In [None]:
# Tell jupyter notebook to autoload config file
%load_ext autoreload
%autoreload 2

In [None]:
import folium as fl
from folium.plugins import MarkerCluster
from config import *

In [None]:
stops = pd.read_csv(os.path.join(gtfs_path, 'stops.txt'), dtype={"stop_id": str, "parent_station": str})
data = pd.read_csv(data_path, dtype={"from_stop": str, "to_stop": str, "trip_id": str})
print(f"Using \"{gtfs_dataset}\" dataset with {len(data.index)} rows/edges")

In [None]:
# Converting DataFrame to numpy array
# Increases search performance dramatically
stops_np, stops_index = convert_df_to_np(stops)
stops_ids = stops['stop_id'].to_numpy()

data_np, data_index = convert_df_to_np(data)
data_trip_ids = data['trip_id'].to_numpy()
data_len = len(data.index)


def get_stop_details(stop_id):
    # stop = stops[stops['stop_id'] == stop_id]  ->  4 times slower
    stop = stops_np[stops_ids == stop_id]
    if not stop.any():
        raise ValueError(f"stop_id {stop_id} couldn't be found")
    stop = stop[0]
    return convert_np_to_dict(stop, stops_index)


def get_line(points):
    return fl.PolyLine(points, color="red", weight=2, opacity=0.5)


In [None]:
# Loads the map roughly in the middle of all stations
connections_map = fl.Map(location=[stops['stop_lat'].median(), stops['stop_lon'].median()], zoom_start=8)
stop_clusters = MarkerCluster()
added_stops = set()
added_edges = set()
counter = 0

for row in data_np:
    counter += 1
    if counter % 10000 == 0 or counter == data_len:
        print('Progress : {}%'.format(np.round(counter / data_len * 100, 2)))
    points = []
    row = convert_np_to_dict(row, data_index)
    if row['trip_id'] == '0' or row['trip_id'] == '-1':  # Removes added connections
        continue
    if (row['from_stop'], row['to_stop']) in added_edges or (row['to_stop'], row['from_stop']) in added_edges:
        continue
    from_stop = get_stop_details(row['from_stop'])
    to_stop = get_stop_details(row['to_stop'])
    if from_stop['stop_id'] not in added_stops:
        stop_clusters.add_child(fl.Marker(location=[from_stop['stop_lat'], from_stop['stop_lon']], popup=from_stop['stop_name']))
    if to_stop['stop_id'] not in added_stops:
        stop_clusters.add_child(fl.Marker(location=[to_stop['stop_lat'], to_stop['stop_lon']], popup=to_stop['stop_name']))
    points.append([from_stop['stop_lat'], from_stop['stop_lon']])
    points.append([to_stop['stop_lat'], to_stop['stop_lon']])
    connections_map.add_child(get_line(points))

    added_stops.add(from_stop['stop_id'])
    added_stops.add(to_stop['stop_id'])
    added_edges.add((from_stop['stop_id'], to_stop['stop_id']))


connections_map.add_child(stop_clusters)
connections_map