In [None]:
import pandas as pd
import geopandas as gpd
import shapely
import networkx as nx
import matplotlib.pyplot as plt

In [None]:
od_matrix = pd.read_csv('../data/algo_testing_data.csv', index_col=0)
od_matrix['hash'] = [frozenset(x) for x in zip(od_matrix['Origin'], od_matrix['Dest'])]
od_matrix

In [None]:
# read in solution and cities data
sol = pd.read_csv('../out/solution.csv')
cities_geom = gpd.read_file('../data/us-major-cities/USA_Major_Cities.shp')
og_crs = cities_geom.crs
cities_cols = ['NAME', 'ST', 'geometry']
cities_geom[cities_cols]

sol_cols = sol.columns
# first add the coords of the origin city
temp = sol.merge(
    cities_geom[cities_cols],
    how='inner',
    left_on=['city_origin', 'state_origin'],
    right_on=['NAME', 'ST']
).rename(columns={'geometry': 'pt_origin'})
# then add the coords of the dest city
temp = temp.merge(
    cities_geom[cities_cols],
    how='inner',
    left_on=['city_dest', 'state_dest'],
    right_on=['NAME', 'ST']
).rename(columns={'geometry': 'pt_dest'})
sol_geom = temp[['pt_origin', 'pt_dest']]
# create line segments from city points
sol_geom['geometry'] = sol_geom.apply(
    lambda row: shapely.LineString((row['pt_origin'], row['pt_dest'])),
    axis='columns'
)

sol_geo = gpd.GeoDataFrame(sol, geometry=sol_geom['geometry'], crs=og_crs)  # type: ignore
states = gpd.read_file('../data/us-states/States_shapefile.shp').to_crs(epsg=3395)
# remove AK and HI
states: gpd.GeoDataFrame = states[(states['State_Code'] != 'AK') & (states['State_Code'] != 'HI')]  # type: ignore

fig, ax1 = plt.subplots(nrows=1, ncols=1, sharex=True, sharey=True, figsize=(20, 20))
ax1 = states.plot(ax=ax1, cmap='Pastel2')
sol_geo.to_crs(epsg=3395).plot(ax=ax1, column='co2_g', cmap='inferno')

In [None]:
G = nx.from_pandas_edgelist(sol_geo, source='Origin', target='Dest', edge_attr='NonStopKm')
nx.draw_networkx(G, pos=nx.planar_layout(G))

In [None]:
# build dataframe of shortest paths between every pair of cities in the rail network
# paths_df will contain one row for each leg of each shortest path
paths_tuples: list[tuple[str, dict[str, list[str]]]] = nx.all_pairs_dijkstra_path(G, weight='NonStopKm')
visited_paths: set[frozenset] = set()
paths_df = pd.DataFrame(columns=['hash', 'Origin', 'Dest', 'path_origin', 'path_dest'])
pathsList = []

for (origin, paths_dict) in paths_tuples:
    for (dest, path) in paths_dict.items():
        pathKey = frozenset([origin, dest])
        # skip 0-length and already-seen paths
        # TODO skip paths between airports in same city
        if origin == dest or pathKey in visited_paths:
            continue
        visited_paths.add(pathKey)
        pathsList.append(path) # todo remove
        
        # create small dataframe to represent this path; each row is an edge
        df = pd.DataFrame(path, columns=['Origin'])
        df['Dest'] = df['Origin'].shift(-1)
        df = df.dropna()
        df['path_origin'] = path[0]
        df['path_dest'] = path[-1]
        # hash is an order-independent identifier for a single edge: (JFK,LAX) == (LAX,JFK)
        df['hash'] = [frozenset(x) for x in zip(df['Origin'], df['Dest'])]
        paths_df = pd.concat([paths_df, df], axis='index')


paths_df

In [None]:
# merge paths_df with the OD-matrix to obtain attributes for each rail segment present in the network
paths_df = paths_df.merge(od_matrix.drop(columns=['Origin', 'Dest']), on='hash', how='left', validate='m:1')
paths_df

In [None]:
# group paths_df by unique path
paths_df['path_hash'] = [frozenset(x) for x in zip(paths_df['path_origin'], paths_df['path_dest'])]
path_groups = paths_df.groupby('path_hash')[[
    'NonStopKm',
    'hsr_time_hr',
    'co2_g_hsr'
]]

# for each path, sum the following:
# - travel time by HSR
# - travel distance by HSR -> becomes hsr_km (distance by rail journey)
# - co2 emitted by HSR journey
hsr_metrics = path_groups.sum()
# for each path, count the following:
# - number of legs in the rail journey
hsr_metrics['hsr_legs'] = path_groups.count()['NonStopKm']
hsr_metrics = hsr_metrics.rename(columns={'NonStopKm': 'hsr_km'}).reset_index()
hsr_metrics

In [None]:
# merge OD-matrix back into aggregated path data to link metrics for HSR journey/path
# with corresponding metrics for direct flight journey
hsr_metrics = hsr_metrics.merge(
    od_matrix[[
        'hash',
        'Origin', 'Dest',
        'Passengers',
        'flight_time_hr',
        'co2_g_flight',
        # 'city_origin', 'state_origin', 'pop_origin',
        # 'city_dest', 'state_dest', 'pop_dest',
    ]],
    left_on='path_hash',
    right_on='hash',
    how='left',
    validate='1:1',
    # indicator=True,
).drop(columns=['hash'])
hsr_metrics

In [None]:
hsr_metrics_dev = hsr_metrics.copy()
# hsr_metrics_dev.to_csv('../out/temp.csv', index=False)

# add 3 hours for travel time to/from airport, security, gate times, etc.
hsr_metrics_dev['flight_time_hr'] += 3

hsr_metrics_dev