In [1]:
import geopy.distance
import numpy as np
from sklearn.neighbors import BallTree, KDTree
import pandas as pd
from datetime import datetime
from datetime import timedelta


In [2]:
def custom_distance(a, b):
    manhattan_distance = (geopy.distance.distance((a[0], a[1]), (a[0], b[1])) + geopy.distance.distance((a[0], a[1]), (b[0], a[1]))).km
    return manhattan_distance /4.5 * 3600

class Coordinate:
    def __init__(self, lat, lon, stop_id, time):
        self.lat = lat
        self.lon = lon
        self.stop_id =stop_id
        self.time = time

class Node:
    def __init__(self, stop_id, neighbors):
        self.stop_id = stop_id
        self.neighbors = neighbors

In [3]:
class Queue_Node:
    def __init__(self, stop_id, time):
        self.stop_id = stop_id
        self.time = time

In [13]:
f = open("./input.txt", 'r')
inputs = f.read().split(',')
f.close()
destination_lat = float(inputs[0])
destination_lon = float(inputs[1])
max_commute_time = float(inputs[2]) * 60
max_walking_to_stop_time = 15 * 60

stops = pd.read_csv("./gtfs_puget_sound_consolidated/stops.txt")

stops_tree = BallTree(stops[["stop_lat", "stop_lon"]].values, metric = lambda a,b: custom_distance(a,b))
closest_stops_indices, distances = stops_tree.query_radius([[destination_lat, destination_lon]], min(max_walking_to_stop_time,max_commute_time), True)

In [75]:
stops = stops[["stop_lat", "stop_lon", "stop_id"]]

In [50]:
stop_set = set()
# set to track the stops traversed
for i in range(len(closest_stops_indices[0])):
    index = closest_stops_indices[0][i]
    lat = stops.loc[index]["stop_lat"]
    lon = stops.loc[index]["stop_lon"]
    stop_id = stops.loc[index]["stop_id"]
    distance = distances[0][i]
    coord = Coordinate(lat, lon, stop_id, distance)
    stop_set.add(stop_id)

In [73]:
sets = [set() for i in stops.index]

In [76]:
stops.insert(3, "routes", sets, allow_duplicates=False) 
stops

Unnamed: 0,stop_lat,stop_lon,stop_id,routes
0,47.605137,-122.336533,1-100,{}
1,47.606136,-122.334969,1-101,{}
2,47.616711,-122.330597,1-1010,{}
3,47.606911,-122.333092,1-102,{}
4,47.617626,-122.329407,1-1020,{}
...,...,...,...,...
13219,48.005856,-122.189018,97-95,{}
13220,48.004234,-122.189056,97-96,{}
13221,48.000927,-122.189163,97-97,{}
13222,47.997681,-122.189201,97-98,{}


In [18]:
# process stop_times to construct a graph
stop_times = pd.read_csv("./gtfs_puget_sound_consolidated/stop_times.txt", low_memory=False)
stop_times["arrival_time"]= pd.to_timedelta(stop_times["arrival_time"])
stop_times = stop_times[["trip_id", "stop_id", "arrival_time"]]

In [19]:
trips = pd.read_csv("./gtfs_puget_sound_consolidated/trips.txt", low_memory=False)
trips = trips[["route_id", "trip_id", "direction_id"]]

In [20]:
#building graph with routes that go through the starting stops
unique_trip_id_set = set(stop_times.query('stop_id in @stop_set')["trip_id"])
relevant_trips = trips.query('trip_id in @unique_trip_id_set')

In [21]:
stop_times = stop_times.query('trip_id in @unique_trip_id_set')

In [22]:
def custom_route_id_with_directiion(row):
    return row['route_id'] + "-" + str(row['direction_id'])

relevant_trips['route_id_with_direction'] = relevant_trips.apply(custom_route_id_with_directiion, axis=1)
relevant_trips

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relevant_trips['route_id_with_direction'] = relevant_trips.apply(custom_route_id_with_directiion, axis=1)


Unnamed: 0,route_id,trip_id,direction_id,route_id_with_direction
1617,100031,473763466,0.0,100031-0.0
1618,100031,473763496,0.0,100031-0.0
1619,100031,473763516,0.0,100031-0.0
1620,100031,473763526,0.0,100031-0.0
1621,100031,473763566,0.0,100031-0.0
...,...,...,...,...
104109,SCM,SB_winter2024_Sun_2020,0.0,SCM-0.0
104110,SCM,SB_winter2024_Sun_2030,0.0,SCM-0.0
104111,SCM,SB_winter2024_Sun_2040,0.0,SCM-0.0
104112,SCM,SB_winter2024_Sun_2050,0.0,SCM-0.0


In [81]:
# build a new df for route information and average route traveling stop-to-stop time
travel_graph= pd.DataFrame(columns = ["route_id_with_direction", "stop_list", "arrival_time_list", "max_total_travel_time"])

In [82]:
for index, row in relevant_trips.iterrows():
    route_id_with_direction = row["route_id_with_direction"]
    trip_id = row["trip_id"]
    current_trip = stop_times.loc[stop_times["trip_id"] == trip_id].sort_values(by=["arrival_time"])
    current_stops = current_trip["stop_id"].tolist()
    if not travel_graph["route_id_with_direction"].str.contains(route_id_with_direction).any():     
        travel_time = np.max(current_trip["arrival_time"]) - np.min(current_trip["arrival_time"])
        new_row = {"route_id_with_direction": route_id_with_direction, "stop_list": current_stops, "arrival_time_list": current_trip["arrival_time"].tolist(), "max_total_travel_time": travel_time}
        travel_graph = travel_graph._append(new_row, ignore_index=True)
        for stop_id in current_stops:
            print(stop_id)
            stops.loc[stops["stop_id"] == stop_id]["routes"].values[0].add(route_id_with_direction)
    else:
        max_total_travel_time = travel_graph.loc[travel_graph["route_id_with_direction"] == route_id_with_direction]["max_total_travel_time"].values[0]
        trip_stop_times = stop_times.loc[(stop_times["trip_id"] == trip_id)]
        total_travel_time = np.max(trip_stop_times["arrival_time"]) - np.min(trip_stop_times["arrival_time"])
        # update the travel time if this trip's total travel time is longer
        if total_travel_time > max_total_travel_time:
            travel_graph.loc[travel_graph["route_id_with_direction"] == route_id_with_direction]["max_total_travel_time"].values[0] = total_travel_time
            travel_graph.loc[travel_graph["route_id_with_direction"] == route_id_with_direction]["arrival_time_list"].values[0] = trip_stop_times
            travel_graph.loc[travel_graph["route_id_with_direction"] == route_id_with_direction]["stop_list"].values[0] = current_stops
            
            

  travel_graph = travel_graph._append(new_row, ignore_index=True)


In [148]:
map = stops.iloc[closest_stops_indices[0]]
map = map.rename(columns={'stop_lat': 'lat','stop_lon':'lon'})[['lat','lon']]
map = map.assign(time = distances[0])

In [119]:
max_commute_time_timedelta = timedelta(seconds=max_commute_time)

In [149]:
for i in map.index:
    stop_id = str(stops.loc[i]["stop_id"])
    routes = stops.loc[i]["routes"]
    # traverse on the routes that go through the given stop_id
    for route in routes:
        route_info = travel_graph.loc[travel_graph["route_id_with_direction"] == route]
        stop_list = route_info["stop_list"].values[0]
        arrival_time_list = route_info["arrival_time_list"].values[0]
        current_stop = stop_list[0]
        travel_start_time = arrival_time_list[0]
        destination_stop_index = stop_list.index(stop_id)
        for x in range(0, destination_stop_index):
            if arrival_time_list[destination_stop_index] - arrival_time_list[x] <= max_commute_time_timedelta:
                lat = stops.loc[stops["stop_id"]==stop_list[x]]["stop_lat"].values[0]
                lon = stops.loc[stops["stop_id"]==stop_list[x]]["stop_lon"].values[0]
                time = (arrival_time_list[destination_stop_index] - arrival_time_list[x]).total_seconds()
                map.loc[-1] = [lat, lon,time]
                map.index = map.index + 1
        
            

In [150]:
map

Unnamed: 0,lat,lon,time
3957,47.621288,-122.347710,611.372519
3958,47.619213,-122.347725,796.838130
1916,47.618641,-122.347847,855.051254
1908,47.618504,-122.347878,869.100921
14527,47.621263,-122.349955,748.587402
...,...,...,...
4,47.633392,-122.325836,182.000000
3,47.630054,-122.329338,87.000000
2,47.628349,-122.331520,37.000000
1,47.623417,-122.337044,360.000000


In [164]:
import folium
import webbrowser

In [177]:
my_convexhull_map = folium.Map(location=(47.608013, -122.335167), zoom_start=10)#location - the center of the map, zoom_start - the resolution

fg = folium.FeatureGroup(name="Stops")
for index, row in map.iterrows():
    fg.add_child(
        folium.CircleMarker(
            (row['lat'], row['lon']),
            radius = 7,
            color="cornflowerblue",
            stroke=False,
            fill=True,
            fill_opacity=0.6,
            opacity=1,
            popup=(folium.Popup("Transit stops")),
        )
    )

my_convexhull_map.add_child(fg)

fg = folium.FeatureGroup(name="Work Destination")
fg.add_child(
    folium.CircleMarker(
        (destination_lat, destination_lon),
        radius = 10,
        color="#FF7043",
        stroke=False,
        fill=True,
        fill_opacity=1,
        opacity=1,
        popup=(folium.Popup("Work Destination")),
    )
)
my_convexhull_map.add_child(fg)


In [None]:
my_convexhull_map.save("map.html")
webbrowser.open("map.html")
# Add layer control and show map
folium.LayerControl(collapsed=False).add_to(my_convexhull_map)
my_convexhull_map