In [1]:
from pymongo import MongoClient
import math
import time

EARTH_RADIUS = 6371 # Radius of the Earth in Kilometers


def deg_to_rad(x):
    return x * math.pi / 180


def calculate_distance(from_coordinates:tuple, to_coordinates:tuple):
    from_lat, from_lng = from_coordinates[0], from_coordinates[1]
    to_lat, to_lng = to_coordinates[0], to_coordinates[1]
    half_d_lat = deg_to_rad((to_lat - from_lat) / 2)
    half_d_lon = deg_to_rad((to_lng - from_lng) / 2)
    a = (math.sin(half_d_lat) ** 2 + math.cos(deg_to_rad(from_lat)) * math.cos(deg_to_rad(to_lat)) * math.sin(half_d_lon) ** 2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return EARTH_RADIUS * c


def to_dictionary(df):
    data = []
    for record in df.to_numpy():
        data.append({
            'stop_id': int(record[1]),
            'stop_sequence': int(record[2]),
            'stop_name': record[3],
            'latitude': float(record[4]),
            'longitude': float(record[5])
        })
    return data

    
def get_stops(bus_route):
    route_trips = trips[trips['route_id'] == bus_route]
    trip_ids = route_trips['trip_id'].unique()
    route_stop_times = stop_times[stop_times['trip_id'].isin(trip_ids)]
    route_stop_times = route_stop_times.merge(stops, on="stop_id", how="left")
    route_stop_times = route_stop_times.sort_values(by=["trip_id", "stop_sequence"])
    single_trip_id = route_stop_times["trip_id"].iloc[0]  # Pick the first trip
    route_stop_times = route_stop_times[route_stop_times["trip_id"] == single_trip_id]
    
    return {
        'route_id': int(bus_route),
        'routes': to_dictionary(route_stop_times[["trip_id", "stop_id", "stop_sequence", "stop_name", "stop_lat", "stop_lon"]])
    }


mongo_client = MongoClient("mongodb://log6953fe:log6953fe@localhost:27017")
database = mongo_client["log6953fe_db"]
bus_stops_collection = database["bus_stops"]


In [60]:
# start_time = time.time()
# for i in np.unique(routes['route_id']):
#     bus_stops_collection.insert_one(get_stops(i))

# print(f"=={(time.time() - start_time): .4f} seconds ==")

In [2]:
results = bus_stops_collection.find()
results = list(results)
for i in range(len(results)):
    route_id = results[i]['route_id']
    routes = results[i]['routes']
    distances = []
    for z in range(1, len(routes)):
        from_coord = (routes[z - 1]['latitude'], routes[z - 1]['longitude'])
        to_coord = (routes[z]['latitude'], routes[z]['longitude'])
        distances.append(calculate_distance(from_coord, to_coord))
    results[i]['maximum_distance'] = max(distances)


{'_id': ObjectId('67da1327c8a006ea989c8616'),
 'route_id': 1,
 'routes': [{'stop_id': 43,
   'stop_sequence': 1,
   'stop_name': 'Station Angrignon',
   'latitude': 45.446466,
   'longitude': -73.603118},
  {'stop_id': 42,
   'stop_sequence': 2,
   'stop_name': 'Station Monk',
   'latitude': 45.451158,
   'longitude': -73.593242},
  {'stop_id': 41,
   'stop_sequence': 3,
   'stop_name': 'Station Jolicoeur',
   'latitude': 45.45701,
   'longitude': -73.581691},
  {'stop_id': 40,
   'stop_sequence': 4,
   'stop_name': 'Station Verdun',
   'latitude': 45.459441,
   'longitude': -73.572021},
  {'stop_id': 39,
   'stop_sequence': 5,
   'stop_name': "Station De l'Église",
   'latitude': 45.461894,
   'longitude': -73.567074},
  {'stop_id': 38,
   'stop_sequence': 6,
   'stop_name': 'Station LaSalle',
   'latitude': 45.471063,
   'longitude': -73.566267},
  {'stop_id': 37,
   'stop_sequence': 7,
   'stop_name': 'Station Charlevoix',
   'latitude': 45.478465,
   'longitude': -73.569336},
  {'s