In [1]:
import numpy as np
import pandas as pd
import geopandas as gpd

In [45]:
# sets up and preprocesses data frames from input files pertaining to the regions
# near each red line stop.

speed = 20 # assume train travels at 20 mph

walks = pd.read_csv("../raw_data/walk_to_red_line.csv", index_col=False)
stops = pd.read_csv("../raw_data/red_line_stops_lookup.csv", index_col='id')
segments = gpd.read_file("../shape_files/red_line_segments.shp")
gtfs = pd.read_csv("../raw_data/GTFS_ODMatrix_TravelTime.csv")

# walks = data for walking to the nearest red line stop  
walks['tract_id'] = (
    walks['centroids_15min: STATEFP'].astype(str)
    + walks['centroids_15min: COUNTYFP'].astype(str).str.pad(3, "left", "0")
    + walks['centroids_15min: TRACTCE'].astype(str).str.pad(6, "left", "0")
)
walks = walks[walks['Minimum Travel Time (Minutes)'] <= 15][['tract_id', 'Near Layer: Name', 'Minimum Travel Time (Minutes)']]

# stops = id, name and stop number for each stop of proposed route
stops['stop_num'] = [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 18, 4, 15]

# segments = regions near each red line stop
segments[['origin', 'dest']] = segments['OriginDest'].str.split(':', expand=True).astype(int)
segments = segments.loc[(segments['origin'] != 0) & (segments['dest'] != 0)]
segments['origin'] = segments['origin'].apply(lambda x: stops['stop_num'][x])
segments['dest'] = segments['dest'].apply(lambda x: stops['stop_num'][x])

segments = segments[['origin', 'dest', 'mileage']]
segments['minutes'] = (segments['mileage'] / assumed_train_speed) * 60

In [None]:
# raw_path = "../raw_data/"

# assumed_train_speed = 20

# segments = gpd.read_file(raw_path + "red_line_segments.shp")
# stop_lookup_df = pd.read_csv(raw_path + "red_line_stops_lookup.csv")
# walk_df = pd.read_csv(raw_path + "walk_to_red_line.csv", index_col=False)
# gtfs_df = pd.read_csv(raw_path + "GTFS_ODMatrix_TravelTime.csv")

# walk_df["centroids_15min: STATEFP"] = walk_df["centroids_15min: STATEFP"].astype(str)
# walk_df["centroids_15min: COUNTYFP"] = (
#     walk_df["centroids_15min: COUNTYFP"].astype(str).str.pad(3, "left", "0")
# )
# walk_df["centroids_15min: TRACTCE"] = (
#     walk_df["centroids_15min: TRACTCE"].astype(str).str.pad(6, "left", "0")
# )
# walk_df["tract_id"] = (
#     walk_df["centroids_15min: STATEFP"]
#     + walk_df["centroids_15min: COUNTYFP"]
#     + walk_df["centroids_15min: TRACTCE"]
# )

# time_mask = walk_df["Minimum Travel Time (Minutes)"] <= 15
# walk_df = walk_df[time_mask].copy()
# walk_df = walk_df[
#     [
#         "tract_id",
#         "Near Layer: Name",
#         "Minimum Travel Time (Minutes)",
#     ]
# ].copy()

# stop_lookup_df.set_index("id", inplace=True)
# stop_lookup = stop_lookup_df.to_dict()["Name"]
# stop_list = [1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 16, 17, 19, 18, 4, 15]
# stop_lookup_df["stop_num_W-E"] = stop_list


# segments["origin"] = segments["OriginDest"].astype(str).apply(lambda x: x.split(":")[0])
# segments["dest"] = segments["OriginDest"].astype(str).apply(lambda x: x.split(":")[1])
# mask1 = segments["origin"] != "0"
# mask2 = segments["dest"] != "0"
# segments = segments[mask1 & mask2].copy()
# segments["origin"] = segments["origin"].apply(lambda x: stop_lookup[int(x)])
# segments["dest"] = segments["dest"].apply(lambda x: stop_lookup[int(x)])
# segments = segments[["origin", "dest", "mileage"]].copy()
# segments["speed"] = assumed_train_speed
# segments["minutes"] = (segments["mileage"] / segments["speed"]) * 60


unique_stops = stop_lookup_df["Name"].unique()
stop_west_east = (
    stop_lookup_df[["Name", "stop_num_W-E"]].set_index("Name").to_dict()["stop_num_W-E"]
)

stop_lookup_df.sort_values(by="stop_num_W-E", inplace=True)
stop_lookup_df.set_index("stop_num_W-E", inplace=True)
# %%

station_travel_time_arr = []
for orig in unique_stops:
    stop_num_w_e_orig = stop_west_east[orig]
    for dest in unique_stops:
        if orig == dest:
            continue
        stop_num_w_e_dest = stop_west_east[dest]
        num_stops = abs(stop_num_w_e_dest - stop_num_w_e_orig)
        orig_idx = stop_lookup_df.loc[stop_lookup_df.index == stop_num_w_e_orig].index[
            0
        ]
        if stop_num_w_e_orig > stop_num_w_e_dest:
            dest_idx = orig_idx - (num_stops)
            crossed_stops = stop_lookup_df.iloc[dest_idx:orig_idx]
            merged = crossed_stops.merge(segments, left_on="Name", right_on="dest")
            time_traveled = merged["minutes"].sum()
        else:
            dest_idx = orig_idx + (num_stops - 1)
            crossed_stops = stop_lookup_df.iloc[orig_idx - 1 : dest_idx]
            merged = crossed_stops.merge(segments, left_on="Name", right_on="origin")
            time_traveled = merged["minutes"].sum()

        station_travel_time_arr.append(
            {
                "origin_station": orig,
                "destination_station": dest,
                "time_traveled": time_traveled,
            }
        )

station_travel_time = pd.DataFrame(station_travel_time_arr)

gtfs_df["destination_tract_id"] = (
    gtfs_df["GeoID_Destination"].apply(lambda x: str(x)[-11:]).astype(str)
)
gtfs_df["origin_tract_id"] = (
    gtfs_df["GoeID_Origin"].apply(lambda x: str(x)[-11:]).astype(str)
)
gtfs_df = gtfs_df[["origin_tract_id", "destination_tract_id", "TransitTime (minutes)"]]

walk_df.rename(
    columns={
        "Near Layer: Name": "origin_station",
        "Minimum Travel Time (Minutes)": "walk_time_toward",
    },
    inplace=True,
)
gtfs_df = gtfs_df.merge(walk_df, left_on="origin_tract_id", right_on="tract_id").drop(
    columns=["tract_id"]
)

walk_df.rename(
    columns={
        "origin_station": "destination_station",
        "walk_time_toward": "walk_time_away",
    },
    inplace=True,
)

gtfs_df = gtfs_df.merge(
    walk_df,
    left_on="destination_tract_id",
    right_on="tract_id",
).drop(columns=["tract_id"])

gtfs_df = gtfs_df.merge(
    station_travel_time, on=["origin_station", "destination_station"]
)
gtfs_df["TransitTimeNew"] = (
    gtfs_df["walk_time_toward"] + gtfs_df["time_traveled"] + gtfs_df["walk_time_away"]
)
gtfs_df.rename(columns={"TransitTime (minutes)": "TransitTimeOld"}, inplace=True)
final_df = gtfs_df[
    ["origin_tract_id", "destination_tract_id", "TransitTimeOld", "TransitTimeNew"]
]
final_df["min"] = final_df[["TransitTimeOld", "TransitTimeNew"]].apply(min, axis=1)
final_df.to_csv("../processed_data/transit_time_data.csv", index=False)