In [5]:
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

In [6]:
# Load files
with open('results/mu_synth_1.pickle', 'rb') as handle:
    V, p, mg_U = pickle.load(handle)

In [7]:
# Load data
data_train = pd.read_csv('data/data_rum_1_train.csv')
data_test = pd.read_csv('data/data_rum_1_test.csv')

# Set scalars
J = 3
K = 2

# Set variables
Xvars = ['TRAIN_COST','TRAIN_TT','SM_COST','SM_TT','CAR_COST','CAR_TT']
X_train = data_train[Xvars].to_numpy()
X_test = data_test[Xvars].to_numpy()
X = np.r_[X_train,X_test]
transformer = MinMaxScaler().fit(X)

X_train = transformer.transform(X_train)
X_test = transformer.transform(X_test)
X = transformer.transform(X)

y_train = data_train['CHOICE'].to_numpy() - 1
y_test = data_test['CHOICE'].to_numpy() - 1
y = np.r_[y_train,y_test]

In [8]:
mg_U_array = np.mean(np.stack(mg_U,axis=3),axis=3)
V_array = np.mean(np.stack(V,axis=2),axis=2)

In [10]:
# Get marginal utilities
mu_train_cost = mg_U_array[:,0,0]
mu_train_tt   = mg_U_array[:,1,0]
mu_sm_cost    = mg_U_array[:,2,1]
mu_sm_tt      = mg_U_array[:,3,1]
mu_car_cost   = mg_U_array[:,4,2]
mu_car_tt     = mg_U_array[:,5,2]

mu_array = np.c_[mu_train_cost,mu_train_tt,mu_sm_cost,mu_sm_tt,mu_car_cost,mu_car_tt]

# Compute 'true' MU
beta = [-2.,-3.]

true_mu_train_cost = beta[0]
true_mu_train_tt   = beta[1]
true_mu_sm_cost    = beta[0]
true_mu_sm_tt      = beta[1]
true_mu_car_cost   = beta[0]
true_mu_car_tt     = beta[1]

true_mu_array = np.c_[true_mu_train_cost,true_mu_train_tt,true_mu_sm_cost,true_mu_sm_tt,true_mu_car_cost,true_mu_car_tt]

# Create statistics
mu_mean   = np.mean(mu_array,axis=0)
mu_std    = np.std(mu_array,axis=0)
mu_median = np.median(mu_array,axis=0)
mu_mean_bias = np.mean(mu_array-true_mu_array,axis=0)
mu_rmse = np.sqrt(np.mean((mu_array-true_mu_array)**2,axis=0))

true_mu_mean   = np.mean(true_mu_array,axis=0)
true_mu_median = np.median(true_mu_array,axis=0)

In [11]:
# Get percentiles of VTT
mu_perc=np.quantile(mu_array,q=[0,.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,.95,1],axis=0)
pd.DataFrame(mu_perc,columns=Xvars)

Unnamed: 0,TRAIN_COST,TRAIN_TT,SM_COST,SM_TT,CAR_COST,CAR_TT
0,-2.263721,-3.64579,-2.084186,-2.897225,-2.727747,-3.424845
1,-2.226755,-3.58511,-2.072387,-2.880394,-2.693589,-3.393254
2,-2.190234,-3.53404,-2.060878,-2.864082,-2.660135,-3.364389
3,-2.124131,-3.443753,-2.033038,-2.828043,-2.58865,-3.301381
4,-2.047149,-3.331021,-2.002494,-2.788311,-2.519253,-3.23383
5,-1.958615,-3.196592,-1.963948,-2.735237,-2.420823,-3.136932
6,-1.866151,-3.05801,-1.927946,-2.688013,-2.303972,-3.031832
7,-1.770495,-2.921886,-1.893409,-2.637761,-2.157315,-2.888082
8,-1.643386,-2.746618,-1.844282,-2.576728,-2.000178,-2.744479
9,-1.517833,-2.546983,-1.759355,-2.46869,-1.832984,-2.573682


In [12]:
df_mu = pd.DataFrame(np.c_[mu_mean,true_mu_mean,mu_median,true_mu_median,mu_mean_bias,mu_rmse],index=Xvars,columns=['Mean','Mean true','Median','Median true','Mean bias','RMSE'])
# df_mu.to_csv('results/asu_synth_1_mu.csv')
df_mu

Unnamed: 0,Mean,Mean true,Median,Median true,Mean bias,RMSE
TRAIN_COST,-1.799327,-2.0,-1.866151,-2.0,0.200673,0.405803
TRAIN_TT,-2.959662,-3.0,-3.05801,-3.0,0.040338,0.52971
SM_COST,-1.881157,-2.0,-1.927946,-2.0,0.118843,0.228931
SM_TT,-2.626955,-3.0,-2.688013,-3.0,0.373045,0.456033
CAR_COST,-2.197798,-2.0,-2.303972,-2.0,-0.197798,0.468852
CAR_TT,-2.919926,-3.0,-3.031832,-3.0,0.080074,0.43269


In [15]:
# Get VTT
vtt_train = mg_U_array[:,1,0]/mg_U_array[:,0,0]
vtt_sm = mg_U_array[:,3,1]/mg_U_array[:,2,1]
vtt_car = mg_U_array[:,5,2]/mg_U_array[:,4,2]

vtt_array = np.c_[vtt_train,vtt_sm,vtt_car]

# Compute 'true' VTT
true_vtt_train = true_mu_train_tt/true_mu_train_cost
true_vtt_sm = true_mu_sm_tt/true_mu_sm_cost
true_vtt_car = true_mu_car_tt/true_mu_car_cost

true_vtt_array = np.c_[true_vtt_train,true_vtt_sm,true_vtt_car]

# Create statistics
vtt_mean   = np.mean(vtt_array,axis=0)
vtt_std    = np.std(vtt_array,axis=0)
vtt_median = np.median(vtt_array,axis=0)
vtt_mean_bias = np.mean(vtt_array-true_vtt_array,axis=0)
vtt_rmse = np.sqrt(np.mean((vtt_array-true_vtt_array)**2,axis=0))

true_vtt_mean   = np.mean(true_vtt_array,axis=0)
true_vtt_median = np.median(true_vtt_array,axis=0)

vtt_names = ['TRAIN', 'SM', 'CAR']

In [16]:
# Get percentiles of VTT
vtt_perc=np.quantile(vtt_array,q=[0,.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,.95,1],axis=0)
pd.DataFrame(vtt_perc,columns=vtt_names)

Unnamed: 0,TRAIN,SM,CAR
0,1.53658,1.211189,1.255554
1,1.607389,1.382195,1.260049
2,1.610225,1.386289,1.26479
3,1.615872,1.389007,1.274497
4,1.620478,1.39007,1.284782
5,1.62702,1.39134,1.297451
6,1.635026,1.393104,1.316494
7,1.644946,1.394991,1.338304
8,1.657823,1.397515,1.370216
9,1.678561,1.402579,1.40641


In [17]:
# Drop outliers
vtt_train_clean = vtt_train[(vtt_train>=0) & (vtt_train <= vtt_perc[-2,0])]
vtt_sm_clean = vtt_sm[(vtt_sm>=0) & (vtt_sm <= vtt_perc[-2,1])]
vtt_car_clean = vtt_car[(vtt_car>=0) & (vtt_car <= vtt_perc[-2,2])]

true_vtt_train_clean = true_vtt_train
true_vtt_sm_clean = true_vtt_sm
true_vtt_car_clean = true_vtt_car

# Create clean VTT statistic arrays
mean_vtt_array = np.r_[np.mean(vtt_train_clean),np.mean(vtt_sm_clean),np.mean(vtt_car_clean)]
median_vtt_array = np.r_[np.median(vtt_train_clean),np.median(vtt_sm_clean),np.median(vtt_car_clean)]
std_vtt_array = np.r_[np.std(vtt_train_clean),np.std(vtt_sm_clean),np.std(vtt_car_clean)]

bias_vtt_array = np.r_[
    np.mean(vtt_train_clean - true_vtt_train_clean),
    np.mean(vtt_sm_clean - true_vtt_sm_clean),
    np.mean(vtt_car_clean - true_vtt_car_clean)
]

rmse_vtt_array = np.r_[
    np.mean((vtt_train_clean - true_vtt_train_clean)**2),
    np.mean((vtt_sm_clean - true_vtt_sm_clean)**2),
    np.mean((vtt_car_clean - true_vtt_car_clean)**2)
]

min_vtt_array = np.r_[np.min(vtt_train_clean),np.min(vtt_sm_clean),np.min(vtt_car_clean)]
max_vtt_array = np.r_[np.max(vtt_train_clean),np.max(vtt_sm_clean),np.max(vtt_car_clean)]

In [18]:
df_vtt = pd.DataFrame(np.c_[mean_vtt_array,true_vtt_mean,median_vtt_array,true_vtt_median,bias_vtt_array,rmse_vtt_array],index=vtt_names,columns=['Mean','Mean true','Median','Median true','Mean bias','RMSE'])
df_vtt.to_csv('results/asu_synth_1_vtt.csv')
df_vtt

Unnamed: 0,Mean,Mean true,Median,Median true,Mean bias,RMSE
TRAIN,1.643942,1.5,1.633276,1.5,0.143942,0.022024
SM,1.394254,1.5,1.392676,1.5,-0.105746,0.011371
CAR,1.331356,1.5,1.310546,1.5,-0.168644,0.032711
