In [None]:
import pandas as pd
import random
from collections import defaultdict

Files Here


In [None]:

all_paths = [
    # Add your file paths here or load them however needed
    "enriched_routes_by_timestamp_cap_to_wfg_with_daytime.csv",
    "enriched_routes_by_timestamp_cg_to_bh_with_daytime.csv",
    "enriched_routes_by_timestamp_ppm_to_cap_with_daytime.csv",
    "enriched_routes_by_timestamp_sam_to_ss_with_daytime.csv",
    "enriched_routes_by_timestamp_sam_to_whs_with_daytime.csv",
    "enriched_routes_by_timestamp_sn_to_bh_with_daytime.csv",
    "enriched_routes_by_timestamp_whs_to_if_with_daytime.csv",
    "enriched_routes_by_timestamp_wp_to_idcts_with_daytime.csv",
    "enriched_routes_by_timestamp_wp_to_sft_with_daytime.csv",
    "enriched_routes_by_timestamp_wp_to_tmp_with_daytime.csv",
    "enriched_routes_by_timestamp_bh_to_cts_with_daytime.csv",
    "enriched_routes_by_timestamp_cap_to_saq_with_daytime.csv",
    "enriched_routes_sn_to_wp_with_daytime.csv",
    "enriched_routes_wp_to_st_with_daytime.csv"
]

'''
all_paths = [
    # Add your file paths here or load them however needed

    "enriched_routes_sn_to_wp_with_daytime.csv",
    "enriched_routes_wp_to_st_with_daytime.csv"
]
'''

df = pd.concat([pd.read_csv(path) for path in all_paths], ignore_index=True)

PREPROCESS DATA

In [None]:
df = df.rename(columns={'day': 'day_of_week'})
df['hour_of_day'] = pd.to_datetime(df['time'], errors='coerce').dt.hour
df['duration'] = df.apply(lambda row: row['duration_in_traffic'] if row['mode'] == 'driving' else row['duration_seconds'], axis=1)

# Pivot data

pivot = df.pivot_table(index=['day_of_week', 'hour_of_day'], columns='mode', values='duration', aggfunc='mean').reset_index()
pivot = pivot.dropna(subset=['driving', 'transit', 'walking', 'bicycling'])

  df['hour_of_day'] = pd.to_datetime(df['time'], errors='coerce').dt.hour


Q LEARNING SET UP

In [None]:
Q = defaultdict(lambda: {mode: 0.0 for mode in ['driving', 'transit', 'bicycling', 'walking']})
alpha = 0.1      # learning rate
gamma = 0.9      # discount factor (not used here)
epsilon = 0.1    # exploration rate

penalty_weight = {
    "driving": 0.50,     # Higher penalty to discourage over-selection
    "transit": 0.05,
    "bicycling": 0.02,
    "walking": 0.01
}

states = list(zip(pivot['day_of_week'], pivot['hour_of_day']))
modes = ['driving', 'transit', 'bicycling', 'walking']

Training Loop


In [None]:
for i in range(len(pivot)):
    state = states[i]
    row = pivot.iloc[i]

    if random.random() < epsilon:
        action = random.choice(modes)
    else:
        action = min(Q[state], key=Q[state].get)

    # Apply penalty-adjusted reward
    penalized_duration = row[action] * (1 + penalty_weight[action])
    reward = -penalized_duration

    Q[state][action] += alpha * (reward - Q[state][action])

EXTRACT LEARNED POLICY

In [None]:
optimal_policy = {
    state: min(Q[state], key=Q[state].get)
    for state in Q
}

policy_df = pd.DataFrame([
    {'day_of_week': s[0], 'hour_of_day': s[1], 'best_mode': a}
    for s, a in optimal_policy.items()
])

Display Result

In [None]:
policy_df = policy_df.sort_values(by=["day_of_week", "hour_of_day"])
print(policy_df)

policy_df.to_csv("learned_route_policy_all_routes_run10.csv", index=False)


    day_of_week  hour_of_day  best_mode
0     8/10/2025            6    driving
1     8/10/2025            7    driving
2     8/10/2025            8    driving
3     8/10/2025            9    driving
4     8/10/2025           10    driving
..          ...          ...        ...
114    8/9/2025           18    driving
115    8/9/2025           19    driving
116    8/9/2025           20  bicycling
117    8/9/2025           21    driving
118    8/9/2025           22    driving

[119 rows x 3 columns]
