In [7]:
import numpy as np
import pandas as pd
import pickle
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt


In [8]:
# Load files
with open('results/mu_synth_4.pickle', 'rb') as handle:
    V, p, mg_U = pickle.load(handle)

In [9]:
mg_U_array = np.mean(np.stack(mg_U,axis=3),axis=3)
V_array = np.mean(np.stack(V,axis=2),axis=2)

In [10]:
# Load data
data_train = pd.read_csv('data/data_rum_4_train.csv')
data_test = pd.read_csv('data/data_rum_4_test.csv')

In [11]:
# Set scalars
J = 3
K = 2

# Set variables
Xvars = ['TRAIN_COST','TRAIN_TT','SM_COST','SM_TT','CAR_COST','CAR_TT']
X_train = data_train[Xvars].to_numpy()
X_test = data_test[Xvars].to_numpy()
X = np.r_[X_train,X_test]
transformer = MinMaxScaler().fit(X)

X_train = transformer.transform(X_train)
X_test = transformer.transform(X_test)
X = transformer.transform(X)

y_train = data_train['CHOICE'].to_numpy() - 1
y_test = data_test['CHOICE'].to_numpy() - 1
y = np.r_[y_train,y_test]

In [13]:
# Get marginal utilities
mu_train_cost = mg_U_array[:,0,0]
mu_train_tt   = mg_U_array[:,1,0]
mu_sm_cost    = mg_U_array[:,2,1]
mu_sm_tt      = mg_U_array[:,3,1]
mu_car_cost   = mg_U_array[:,4,2]
mu_car_tt     = mg_U_array[:,5,2]

mu_array = np.c_[mu_train_cost,mu_train_tt,mu_sm_cost,mu_sm_tt,mu_car_cost,mu_car_tt]

# Compute 'true' MU
beta = [-3.,-5.]

X_test_mu = transformer.inverse_transform(X_test).reshape((len(X_test),J,K))
true_mu_train_cost = beta[0]/(X_test_mu[:,0,0] + 0.1)
true_mu_train_tt   = beta[1]/(X_test_mu[:,0,1] + 0.1)
true_mu_sm_cost    = beta[0]/(X_test_mu[:,1,0] + 0.1)
true_mu_sm_tt      = beta[1]/(X_test_mu[:,1,1] + 0.1)
true_mu_car_cost   = beta[0]/(X_test_mu[:,2,0] + 0.1)
true_mu_car_tt     = beta[1]/(X_test_mu[:,2,1] + 0.1)

true_mu_array = np.c_[true_mu_train_cost,true_mu_train_tt,true_mu_sm_cost,true_mu_sm_tt,true_mu_car_cost,true_mu_car_tt]

# Create statistics
mu_mean   = np.mean(mu_array,axis=0)
mu_std    = np.std(mu_array,axis=0)
mu_median = np.median(mu_array,axis=0)
mu_mean_bias = np.mean(mu_array-true_mu_array,axis=0)
mu_rmse = np.sqrt(np.mean((mu_array-true_mu_array)**2,axis=0))

true_mu_mean   = np.mean(true_mu_array,axis=0)
true_mu_median = np.median(true_mu_array,axis=0)

In [14]:
# Get percentiles of VTT
mu_perc=np.quantile(mu_array,q=[0,.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,.95,1],axis=0)
pd.DataFrame(mu_perc,columns=Xvars)

Unnamed: 0,TRAIN_COST,TRAIN_TT,SM_COST,SM_TT,CAR_COST,CAR_TT
0,-9.923254,-13.305468,-4.60805,-7.148941,-15.879719,-11.120002
1,-4.785953,-4.824412,-4.47005,-6.848319,-6.863217,-5.934516
2,-4.36594,-4.194782,-4.321202,-6.635783,-5.49369,-5.034145
3,-3.953961,-3.689996,-3.898678,-6.253268,-4.577562,-4.392139
4,-3.562324,-3.262683,-3.435144,-5.80181,-3.973823,-3.912467
5,-3.251947,-2.906146,-3.100634,-5.380119,-3.495369,-3.517526
6,-2.978325,-2.625145,-2.7813,-5.006749,-2.987946,-3.057586
7,-2.691184,-2.371001,-2.491232,-4.675666,-2.533041,-2.60837
8,-2.373435,-2.109238,-2.1346,-4.290961,-2.155981,-2.254586
9,-2.044119,-1.794413,-1.717548,-3.772162,-1.80332,-1.90571


In [15]:
# Create dataframe with results
df_mu = pd.DataFrame(np.c_[mu_mean,true_mu_mean,mu_median,true_mu_median,mu_mean_bias,mu_rmse],index=Xvars,columns=['Mean','Mean true','Median','Median true','Mean bias','RMSE'])
# df_mu.to_csv('results/ann_synth_4_mu.csv')
df_mu

Unnamed: 0,Mean,Mean true,Median,Median true,Mean bias,RMSE
TRAIN_COST,-3.021431,-5.692981,-2.978325,-3.26087,2.671549,7.003774
TRAIN_TT,-2.820541,-3.235772,-2.625145,-2.808989,0.415231,0.935614
SM_COST,-2.766411,-5.156657,-2.7813,-2.777778,2.390246,7.000391
SM_TT,-4.913009,-6.032699,-5.006749,-5.434783,1.119691,2.184374
CAR_COST,-3.337341,-3.6201,-2.987946,-3.191489,0.282759,0.857116
CAR_TT,-3.223037,-3.901054,-3.057586,-3.333333,0.678017,1.031301


In [19]:
# Get VTT
vtt_train = mg_U_array[:,1,0]/mg_U_array[:,0,0]
vtt_sm = mg_U_array[:,3,1]/mg_U_array[:,2,1]
vtt_car = mg_U_array[:,5,2]/mg_U_array[:,4,2]

vtt_array = np.c_[vtt_train,vtt_sm,vtt_car]

# Compute 'true' VTT
true_vtt_train = true_mu_train_tt/true_mu_train_cost
true_vtt_sm = true_mu_sm_tt/true_mu_sm_cost
true_vtt_car = true_mu_car_tt/true_mu_car_cost

true_vtt_array = np.c_[true_vtt_train,true_vtt_sm,true_vtt_car]

# Create statistics
vtt_mean   = np.mean(vtt_array,axis=0)
vtt_std    = np.std(vtt_array,axis=0)
vtt_median = np.median(vtt_array,axis=0)
vtt_mean_bias = np.mean(vtt_array-true_vtt_array,axis=0)
vtt_rmse = np.sqrt(np.mean((vtt_array-true_vtt_array)**2,axis=0))

true_vtt_mean   = np.mean(true_vtt_array,axis=0)
true_vtt_median = np.median(true_vtt_array,axis=0)

vtt_names = ['TRAIN', 'SM', 'CAR']

In [20]:
# Get percentiles of VTT
vtt_perc=np.quantile(vtt_array,q=[0,.05,.1,.2,.3,.4,.5,.6,.7,.8,.9,.95,1],axis=0)
pd.DataFrame(vtt_perc,columns=vtt_names)

Unnamed: 0,TRAIN,SM,CAR
0,0.466948,0.144929,0.700264
1,0.780822,1.23754,0.86117
2,0.807015,1.36062,0.914256
3,0.839167,1.497176,0.962618
4,0.865348,1.55915,0.985807
5,0.888957,1.632969,1.003021
6,0.910266,1.740612,1.018206
7,0.930359,1.864345,1.03201
8,0.953053,2.028302,1.044892
9,0.981085,2.366292,1.06032


In [21]:
# Drop outliers
vtt_train_clean = vtt_train[(vtt_train>=0) & (vtt_train <= vtt_perc[-2,0])]
vtt_sm_clean = vtt_sm[(vtt_sm>=0) & (vtt_sm <= vtt_perc[-2,1])]
vtt_car_clean = vtt_car[(vtt_car>=0) & (vtt_car <= vtt_perc[-2,2])]

true_vtt_train_clean = true_vtt_train[(vtt_train>=0) & (vtt_train <= vtt_perc[-2,0])]
true_vtt_sm_clean = true_vtt_sm[(vtt_sm>=0) & (vtt_sm <= vtt_perc[-2,1])]
true_vtt_car_clean = true_vtt_car[(vtt_car>=0) & (vtt_car <= vtt_perc[-2,2])]

# Create clean VTT statistic arrays
mean_vtt_array = np.r_[np.mean(vtt_train_clean),np.mean(vtt_sm_clean),np.mean(vtt_car_clean)]
median_vtt_array = np.r_[np.median(vtt_train_clean),np.median(vtt_sm_clean),np.median(vtt_car_clean)]
std_vtt_array = np.r_[np.std(vtt_train_clean),np.std(vtt_sm_clean),np.std(vtt_car_clean)]

bias_vtt_array = np.r_[
    np.mean(vtt_train_clean - true_vtt_train_clean),
    np.mean(vtt_sm_clean - true_vtt_sm_clean),
    np.mean(vtt_car_clean - true_vtt_car_clean)
]

rmse_vtt_array = np.r_[
    np.mean((vtt_train_clean - true_vtt_train_clean)**2),
    np.mean((vtt_sm_clean - true_vtt_sm_clean)**2),
    np.mean((vtt_car_clean - true_vtt_car_clean)**2)
]

min_vtt_array = np.r_[np.min(vtt_train_clean),np.min(vtt_sm_clean),np.min(vtt_car_clean)]
max_vtt_array = np.r_[np.max(vtt_train_clean),np.max(vtt_sm_clean),np.max(vtt_car_clean)]

In [22]:
df_vtt = pd.DataFrame(np.c_[mean_vtt_array,true_vtt_mean,median_vtt_array,true_vtt_median,bias_vtt_array,rmse_vtt_array],index=vtt_names,columns=['Mean','Mean true','Median','Median true','Mean bias','RMSE'])
df_vtt.to_csv('results/ann_synth_4_vtt.csv')
df_vtt

Unnamed: 0,Mean,Mean true,Median,Median true,Mean bias,RMSE
TRAIN,0.903318,1.005046,0.904491,0.849465,0.004604,0.196949
SM,1.856167,2.276807,1.718953,1.839679,-0.152548,0.950446
CAR,1.000167,1.153914,1.015189,1.074074,-0.13332,0.183952
