# Task: T-test, normal vs evacuation

In [211]:
import pandas as pd
import math

In [212]:
v_fitted_10_19_path = 'Group 2 US101SB/v_fitted_10-19.csv'
v_fitted_10_26_path = 'Group 2 US101SB/v_fitted_10-26.csv'
v_fitted_0831_0901_path = 'SR_91_Bottleneck_B_150-200/v_fitted_0831-0901.csv'
v_fitted_0907_0908_path = 'SR_91_Bottleneck_B_150-200/v_fitted_0907-0908.csv'

In [213]:
v_fitted_10_19 = pd.read_csv(v_fitted_10_19_path)
v_fitted_10_26 = pd.read_csv(v_fitted_10_26_path)
v_fitted_0831_0901 = pd.read_csv(v_fitted_0831_0901_path)
v_fitted_0907_0908 = pd.read_csv(v_fitted_0907_0908_path)

## US101

In [214]:
v_fitted = pd.concat([v_fitted_10_19, v_fitted_10_26], ignore_index=True)
v_fitted.sample(5)

Unnamed: 0,date,v_observed,k_observed,greenshields,drake,five_pl,s3
3290,10-26,68.0,2.368667,64.003907,66.320362,66.647605,66.679487
6,10-19,65.0,0.366667,66.949412,67.220399,20.602775,67.22622
920,10-19,68.0,4.282667,63.993101,66.436678,18.81316,67.226063
60,10-19,65.0,0.410667,66.916195,67.218918,20.515093,67.22622
2435,10-19,62.333333,14.916,55.965666,58.250552,18.813003,63.650097


In [215]:
from scipy.stats import ttest_ind
model_list = ['greenshields', 'drake', 'five_pl', 's3']
for model in model_list:
    v_fitted_model = v_fitted[['date', model]]
    # Perform the t-test, if there is a significant difference between the dates.
    v_fitted_model_normal = v_fitted_model[v_fitted_model['date'] == '10-19'][model]
    v_fitted_model_evacuation = v_fitted_model[v_fitted_model['date'] == '10-26'][model]
    # Perform the t-test
    t_stat, p_val = ttest_ind(v_fitted_model_normal, v_fitted_model_evacuation, equal_var=False)
    print(f'The p-value for the t-test of the {model} model is {p_val:2e}.')


The p-value for the t-test of the greenshields model is 5.127328e-127.
The p-value for the t-test of the drake model is 2.047672e-89.
The p-value for the t-test of the five_pl model is 0.000000e+00.
The p-value for the t-test of the s3 model is 3.139047e-90.


## SR91

In [216]:
v_fitted = pd.concat([v_fitted_0831_0901, v_fitted_0907_0908], ignore_index=True)
v_fitted.sample(5)

Unnamed: 0,date,v_observed,k_observed,greenshields,drake,five_pl,s3
1497,0907-0908,76.6,20.9,61.216086,61.846103,68.942724,67.667994
2035,0907-0908,31.8,31.9,53.848814,47.677321,49.044393,47.57693
2269,0907-0908,19.1875,49.5,42.061178,25.094428,20.721098,23.258541
1468,0907-0908,71.128205,20.9,61.216086,61.846103,68.942724,67.667994
549,0907-0908,78.666667,3.3,73.003721,74.847856,74.975587,75.211698


In [217]:
from scipy.stats import ttest_ind
model_list = ['greenshields', 'drake', 'five_pl', 's3']
for model in model_list:
    v_fitted_model = v_fitted[['date', model]]
    # Perform the t-test, if there is a significant difference between the dates.
    v_fitted_model_normal = v_fitted_model[v_fitted_model['date'] == '0831-0901'][model]
    v_fitted_model_evacuation = v_fitted_model[v_fitted_model['date'] == '0907-0908'][model]
    # Perform the t-test
    t_stat, p_val = ttest_ind(v_fitted_model_normal, v_fitted_model_evacuation, equal_var=False)
    print(f'The p-value for the t-test of the {model} model is {p_val:2e}.')

The p-value for the t-test of the greenshields model is 2.005825e-272.
The p-value for the t-test of the drake model is 1.365156e-267.
The p-value for the t-test of the five_pl model is 6.448733e-204.
The p-value for the t-test of the s3 model is 6.805086e-210.


  res = hypotest_fun_out(*samples, **kwds)


# Task: MSE by density range

## US101

In [218]:
v_fitted = pd.concat([v_fitted_10_19, v_fitted_10_26], ignore_index=True)
v_fitted.sample(5)

Unnamed: 0,date,v_observed,k_observed,greenshields,drake,five_pl,s3
3856,10-26,70.428571,6.116,59.765657,64.30292,66.535335,66.462035
3963,10-26,63.0,6.93,58.84502,63.642862,66.481767,66.301279
1379,10-19,69.8,7.010667,61.933649,65.131278,18.813003,67.217491
1546,10-19,67.526316,8.191333,61.042327,64.382625,18.813003,67.195194
1017,10-19,66.3,4.810667,63.594498,66.231531,18.813016,67.225815


In [219]:
# Create group column 'k_groups' based on range of k values
# k_groups: 0-10, 10-20, 20-30, 30-40, 40-50, 50-60, 60-70, 70-80, 80-90, 90-100
v_fitted['k_groups'] = pd.cut(v_fitted['k_observed'], bins=range(0, 101, 10), right=False)
print(v_fitted['k_groups'].value_counts().sort_index())

# For each k_groups, calculated the MSE for each model
from sklearn.metrics import mean_squared_error
model_list = ['greenshields', 'drake', 'five_pl', 's3']
mse_list = []
for k_group in v_fitted['k_groups'].unique():
    v_fitted_k_group = v_fitted[v_fitted['k_groups'] == k_group]
    for model in model_list:
        mse = mean_squared_error(v_fitted_k_group['v_observed'], v_fitted_k_group[model])
        mse_list.append([k_group, model, mse])
mse_df = pd.DataFrame(mse_list, columns=['k_groups', 'model', 'mse'])
mse_df.sample(5)

# Create pivot table for the MSE values
mse_pivot = mse_df.pivot(index='model', columns='k_groups', values='mse')
mse_pivot.to_excel('Group 2 US101SB/mse_pivot_us101.xlsx')

k_groups
[0, 10)      3420
[10, 20)     1459
[20, 30)      234
[30, 40)      129
[40, 50)       60
[50, 60)       36
[60, 70)       13
[70, 80)        3
[80, 90)        1
[90, 100)       0
Name: count, dtype: int64


## SR91

In [220]:
v_fitted = pd.concat([v_fitted_0831_0901, v_fitted_0907_0908], ignore_index=True)
v_fitted.sample(5)

Unnamed: 0,date,v_observed,k_observed,greenshields,drake,five_pl,s3
257,0831-0901,78.571429,3.3,69.336469,69.33647,69.33647,69.33647
122,0831-0901,74.6,2.2,69.33647,69.33647,69.33647,69.33647
1733,0907-0908,66.391304,25.3,58.269177,56.462768,62.88971,60.454281
1961,0907-0908,50.047619,28.6,56.058995,52.138168,56.461245,54.058885
1221,0907-0908,79.448276,15.4,64.899722,67.632627,72.828826,73.049771


In [221]:
# Create group column 'k_groups' based on range of k values
# k_groups: 0-10, 10-20, 20-30, 30-40, 40-50, 50-60, 60-70, 70-80, 80-90, 90-100
k_maixmum = math.floor(v_fitted['k_observed'].max())
v_fitted['k_groups'] = pd.cut(v_fitted['k_observed'], bins=range(0, k_maixmum + 11, 10), right=False)
print(v_fitted['k_groups'].value_counts().sort_index())

# For each k_groups, calculated the MSE for each model
from sklearn.metrics import mean_squared_error
model_list = ['greenshields', 'drake', 'five_pl', 's3']
mse_list = []
for k_group in v_fitted['k_groups'].unique():
    v_fitted_k_group = v_fitted[v_fitted['k_groups'] == k_group]
    for model in model_list:
        mse = mean_squared_error(v_fitted_k_group['v_observed'], v_fitted_k_group[model])
        mse_list.append([k_group, model, mse])
mse_df = pd.DataFrame(mse_list, columns=['k_groups', 'model', 'mse'])
mse_df.sample(5)

# Create pivot table for the MSE values
mse_pivot = mse_df.pivot(index='model', columns='k_groups', values='mse')
mse_pivot.to_excel('SR_91_Bottleneck_B_150-200/mse_pivot_sr91.xlsx')

k_groups
[0, 10)       910
[10, 20)      539
[20, 30)      552
[30, 40)      182
[40, 50)       96
[50, 60)       76
[60, 70)       62
[70, 80)       72
[80, 90)       73
[90, 100)      84
[100, 110)     80
[110, 120)     69
[120, 130)     47
[130, 140)     20
[140, 150)      3
[150, 160)      1
Name: count, dtype: int64
