In [21]:
import pandas as pd

In [22]:
from eval import evaluate_heuristic

In [25]:
from training_claude import load_agent, evaluate_agent
from rl_env import FlexSimEnv

In [77]:
main_history = {
    'pax': [],
    'state': [],
    'vehicles': [],
    'idle': [],
}

# slope scenarios
slopes = [1.0, 2.0, 3.0]
slope_scenario_names = ['slope_1', 'slope_2', 'slope_3']

for i in range(len(slopes)):
    tmp_history = evaluate_heuristic(
        slope=slopes[i], n_episodes=30, 
        output_history=True,
        scenario_name=slope_scenario_names[i])
    for ky in tmp_history:
        if ky not in main_history:
            main_history[ky] = []
        main_history[ky].append(tmp_history[ky])

# RL scenarios

model_paths = ['models/dqn_weight_2.pth',
               'models/dqn_weight_2_5.pth',
               'models/dqn_weight_3.pth',]
scenario_names = [
    'dqn_2',
    'dqn_2_5',
    'dqn_3',
]
env = FlexSimEnv()
for i in range(len(model_paths)):
    tmp_agent = load_agent(model_paths[i])
    tmp_history, tmp_summary = evaluate_agent(
    env, tmp_agent, num_episodes=30, output_history=True,
    scenario_name=scenario_names[i])
    print(tmp_summary)
    for ky in tmp_history:
        if ky not in main_history:
            main_history[ky] = []
        main_history[ky].append(tmp_history[ky])

{'mean_reward': -0.576, 'std_reward': 1.196, 'deviation_opportunities': 40.4, 'deviations': 27.8, 'avg_picked_requests': 1.7, 'early_trips': 4.6, 'late_trips': 1.9}
{'mean_reward': -0.699, 'std_reward': 1.3, 'deviation_opportunities': 39.4, 'deviations': 22.5, 'avg_picked_requests': 1.9, 'early_trips': 6.1, 'late_trips': 1.6}
{'mean_reward': -0.527, 'std_reward': 1.203, 'deviation_opportunities': 39.4, 'deviations': 29.5, 'avg_picked_requests': 1.6, 'early_trips': 4.6, 'late_trips': 2.0}


## Bring together

In [78]:
history_merged = {}
# now concatenate within each main key
for ky in main_history.keys():
    history_merged[ky] = pd.concat(main_history[ky], axis=0)

In [82]:
pax = history_merged['pax'].copy()
# pax = pax[pax['scenario']!='agent_2']
pax['arrival_hour'] = (pax['arrival_time']/3600).astype(int)
pax['denied'] = (pax['boarding_time'].isna().astype(int)) & (pax['flex'] == 1)

In [83]:
pax.groupby(['scenario', 'arrival_hour'])['wait_time'].quantile(0.95)

scenario  arrival_hour
dqn_2     0               615.00
          1               602.05
          2               593.00
dqn_2_5   0               607.10
          1               607.00
          2               609.00
          3                26.00
dqn_3     0               623.00
          1               600.00
          2               606.65
slope_1   0               619.15
          1               615.25
          2               606.00
slope_2   0               624.50
          1               605.40
          2               606.00
slope_3   0               607.00
          1               603.00
          2               585.90
Name: wait_time, dtype: float64

In [84]:
pax.groupby(['scenario', 'arrival_hour'])['denied'].sum()

scenario  arrival_hour
dqn_2     0               124
          1               108
          2                78
dqn_2_5   0               132
          1               142
          2               106
          3                 0
dqn_3     0                87
          1                91
          2                59
slope_1   0                24
          1                22
          2                28
slope_2   0                50
          1                57
          2                21
slope_3   0                54
          1                56
          2                40
Name: denied, dtype: int64