# Visualize Commute Routes
This notebook visualizes the commute routes generated by `main.py`. The routes are loaded from the `commute_routes.csv` file and plotted on an interactive map using Plotly.

In [70]:
import gcsfs
import pandas as pd
import plotly.express as px
import pytz

In [71]:
# Set your project ID and bucket name
project_id = 'dig-es-nws-gemini-projects'
bucket_name = 'marketplace-commutes'

# Initialize gcsfs
gcs = gcsfs.GCSFileSystem(project=project_id)

# List all CSV files in the bucket
files = gcs.glob(f'gs://{bucket_name}/*.csv')

print(files)

# Read all CSV files into a list of DataFrames
all_dfs = []
for f in files:
    df_temp = pd.read_csv(f"gs://{f}")
    # Extract timezone from filename
    filename = f.split('/')[-1]
    if 'atlantic' in filename:
        df_temp['timezone'] = 'America/Halifax'
    elif 'central' in filename:
        df_temp['timezone'] = 'America/Winnipeg'
    elif 'eastern' in filename:
        df_temp['timezone'] = 'America/Toronto'
    elif 'mountain' in filename:
        df_temp['timezone'] = 'America/Edmonton'
    elif 'pacific' in filename:
        df_temp['timezone'] = 'America/Vancouver'
    else:
        df_temp['timezone'] = 'UTC' # Default or handle as needed
    all_dfs.append(df_temp)


# Concatenate all DataFrames into a single DataFrame
df = pd.concat(all_dfs, ignore_index=True)

# Display the first few rows of the combined DataFrame
df.head()

['marketplace-commutes/commute_routes_atlantic.csv', 'marketplace-commutes/commute_routes_central.csv', 'marketplace-commutes/commute_routes_eastern.csv', 'marketplace-commutes/commute_routes_mountain.csv', 'marketplace-commutes/commute_routes_pacific.csv']


Unnamed: 0,origin,destination,travel_mode,distance_km,duration_min,warnings,line_geometry,timestamp,timezone
0,"Larry Uteck, Halifax",Hospital in Halifax,DRIVE,10.113,20,,"LINESTRING (-63.67528 44.70207, -63.67451 44.7...",2025-08-26 20:10:05,America/Halifax
1,"Windsor Street, Halifax","St. Margaret's Bay Road, Halifax",DRIVE,5.678,13,,"LINESTRING (-63.60553 44.65555, -63.60527 44.6...",2025-08-26 20:10:05,America/Halifax
2,"Armdale, Halifax","Bayers Lake, Halifax",DRIVE,7.775,12,,"LINESTRING (-63.60788 44.63002, -63.60795 44.6...",2025-08-26 20:10:06,America/Halifax
3,"North End, Halifax","Macdonald Bridge, Halifax",BICYCLE,1.723,10,,"LINESTRING (-63.60227 44.65962, -63.60234 44.6...",2025-08-26 20:10:06,America/Halifax
4,"Larry Uteck, Halifax",Hospital in Halifax,DRIVE,10.113,21,,"LINESTRING (-63.67528 44.70207, -63.67451 44.7...",2025-08-26 20:20:05,America/Halifax


In [72]:
df["timezone"].unique()

array(['America/Halifax', 'America/Winnipeg', 'America/Toronto',
       'America/Edmonton', 'America/Vancouver'], dtype=object)

In [73]:
# --- Data Preparation ---

# Convert timestamp to datetime objects
df['timestamp'] = pd.to_datetime(df['timestamp'], utc=True)

# Create a unique identifier for each route
df['route_id'] = df['origin'] + ' to ' + df['destination']

# Localize timestamp and extract hour
df['timestamp_local'] = df.apply(lambda row: row['timestamp'].tz_convert(row['timezone']), axis=1)

df['hour'] = df['timestamp_local'].apply(lambda x: x.hour)

# --- Visualization ---
fig = px.line(df, x='timestamp_local', y='duration_min', color='route_id',
              title='Commute Time Over Time',
              labels={'timestamp': 'Timestamp', 'duration_min': 'Commute Time (minutes)'})

fig.show()

In [74]:
# Filter for off-peak hours
off_peak_df = df[~((df['hour'] >= 7) & (df['hour'] < 9) | (df['hour'] >= 17) & (df['hour'] < 19))]

# Calculate the median travel time for each route during off-peak hours
off_peak_stats = off_peak_df.groupby('route_id')['duration_min'].median().reset_index()
off_peak_stats.columns = ['route_id', 'median_off_peak_duration_min']

# Filter for peak hours
peak_df = df[((df['hour'] >= 7) & (df['hour'] < 9) | (df['hour'] >= 17) & (df['hour'] < 19))]

# Calculate the median and 95th percentile travel time for each route during peak hours
peak_stats = peak_df.groupby('route_id')['duration_min'].agg(['median', lambda x: x.quantile(0.95)]).reset_index()
peak_stats.columns = ['route_id', 'median_peak_duration_min', 'p95_peak_duration_min']

# Merge the stats
final_stats = pd.merge(off_peak_stats, peak_stats, on='route_id', how='left')

# Add calculated columns
final_stats['peak_vs_off_peak_median_diff'] = final_stats['median_peak_duration_min'] - final_stats['median_off_peak_duration_min']
final_stats['p95_vs_median_peak_diff'] = final_stats['p95_peak_duration_min'] - final_stats['median_peak_duration_min']

# Calculate the score and sort by it
final_stats['score'] = (final_stats['peak_vs_off_peak_median_diff'] + final_stats['p95_vs_median_peak_diff']) / final_stats['median_off_peak_duration_min']
final_stats = final_stats.sort_values(by='score', ascending=False)


print("Travel time statistics:")
display(final_stats)

Travel time statistics:


Unnamed: 0,route_id,median_off_peak_duration_min,median_peak_duration_min,p95_peak_duration_min,peak_vs_off_peak_median_diff,p95_vs_median_peak_diff,score
33,"Westshore, Victoria to Downtown Victoria",1.0,1.0,18.2,0.0,17.2,17.2
35,"Windsor Street, Halifax to St. Margaret's Bay ...",9.0,9.0,14.65,0.0,5.65,0.627778
6,"Charleswood, Winnipeg to St. James, Winnipeg",17.0,21.0,25.0,4.0,4.0,0.470588
28,"St. James, Winnipeg to Transcona, Winnipeg",27.0,28.0,38.3,1.0,10.3,0.418519
22,"Orleans, Ottawa to Downtown Ottawa",21.0,22.0,28.65,1.0,6.65,0.364286
17,"Gardiner Expressway and Highway 427, Toronto t...",20.0,23.0,26.65,3.0,3.65,0.3325
7,"Cloverdale, Surrey to Richmond/Vancouver",54.0,61.5,71.0,7.5,9.5,0.314815
18,"Larry Uteck, Halifax to Hospital in Halifax",16.0,19.0,21.0,3.0,2.0,0.3125
2,"88th Ave, Surrey to Coquitlam",33.0,36.0,43.0,3.0,7.0,0.30303
31,"Tillicum Centre, Victoria to Patricia Bay High...",9.5,10.0,12.0,0.5,2.0,0.263158
