In [1]:
# Load modules
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import numpy as np
import pandas as pd
import pickle

from train_ann_functions import AsuSharedNN, ll, normaliser_shared

In [2]:
# Load data
data_train = pd.read_csv('data/data_rum_1_train.csv')
data_test = pd.read_csv('data/data_rum_1_test.csv')

In [3]:
# Set scalars
J = 3
K = 2

# Set variables
Xvars = ['TRAIN_COST','TRAIN_TT','SM_COST','SM_TT','CAR_COST','CAR_TT']
X_train = data_train[Xvars]
X_test = data_test[Xvars]
X = pd.concat([X_train,X_test],axis=0)

transformer = normaliser_shared(X_shared = [1,0,1,0,1,0],shared_locations = [[0,2,4]])
transformer.fit(X)

X_train = transformer.transform(X_train)
X_test = transformer.transform(X_test)
X = transformer.transform(X)

y_train = data_train['CHOICE'].to_numpy() - 1
y_test = data_test['CHOICE'].to_numpy() - 1
y = np.r_[y_train,y_test]

In [4]:
R = 100

ll_full_list = []
ll_train_list = []
ll_test_list = []
r2_test_list = []
mg_U = []
V = []
p = []

for r in range(R):
    # Set model
    model = AsuSharedNN(topology=(6,),activation='tanh',from_logits=True)
    model.fit(X_train,y_train,[1,1,2,2,3,3],[1,0,1,0,1,0],early_stopping=True,validation_split=0.1)

    # Get log-likelihood
    ll_full = ll(y,model.predict_proba(X))
    ll_train = ll(y_train,model.predict_proba(X_train))
    ll_test = ll(y_test,model.predict_proba(X_test))
    r2 = 1 - ll_test/(len(X_test)*np.log(1/J))

    ll_full_list.append(ll_full)
    ll_train_list.append(ll_train)
    ll_test_list.append(ll_test)
    r2_test_list.append(r2)

    print(str(r+1) + '/' + str(R) + ' / Log-lik (full): ' + str(round(ll_full,2)) + ' / LL (train) = ' + str(round(ll_train,2)) + ' / LL (test) = ' + str(round(ll_test,2)) + ' / Rho-sq (test): ' + str(round(r2,4)))

    mg_U.append(model.gradient(X_test,transformer))
    V.append(model.predict_utility(X_test))
    p.append(model.predict_proba(X_test))

1/100 / Log-lik (full): -5816.36 / LL (train) = -4630.43 / LL (test) = -1185.93 / Rho-sq (test): 0.4029
2/100 / Log-lik (full): -5815.64 / LL (train) = -4629.46 / LL (test) = -1186.17 / Rho-sq (test): 0.4028
3/100 / Log-lik (full): -5817.93 / LL (train) = -4631.2 / LL (test) = -1186.73 / Rho-sq (test): 0.4025
4/100 / Log-lik (full): -5812.38 / LL (train) = -4627.46 / LL (test) = -1184.92 / Rho-sq (test): 0.4035
5/100 / Log-lik (full): -5820.73 / LL (train) = -4633.94 / LL (test) = -1186.79 / Rho-sq (test): 0.4025
6/100 / Log-lik (full): -5814.58 / LL (train) = -4627.72 / LL (test) = -1186.86 / Rho-sq (test): 0.4025
7/100 / Log-lik (full): -5823.99 / LL (train) = -4636.1 / LL (test) = -1187.89 / Rho-sq (test): 0.402
8/100 / Log-lik (full): -5813.47 / LL (train) = -4627.25 / LL (test) = -1186.22 / Rho-sq (test): 0.4028
9/100 / Log-lik (full): -5817.96 / LL (train) = -4631.89 / LL (test) = -1186.07 / Rho-sq (test): 0.4029
10/100 / Log-lik (full): -5817.13 / LL (train) = -4630.71 / LL (tes

In [5]:
# Stack metrics
ll_full_array = np.array(ll_full_list)
ll_train_array = np.array(ll_train_list)
ll_test_array = np.array(ll_test_list)
r2_test_array = np.array(r2_test_list)

# Create series
metrics = pd.DataFrame(np.c_[ll_full_array,ll_train_array,ll_test_array,r2_test_array],columns=['Log-lik (full)','Log-lik (train)','Log-lik (test)','Rho-sq (test)'])
metrics.to_csv('results/asushared_synth_1_metrics.csv')
metrics.mean()

Log-lik (full)    -5818.228796
Log-lik (train)   -4631.586587
Log-lik (test)    -1186.642209
Rho-sq (test)         0.402584
dtype: float64

In [6]:
# Save pickle file
with open('results/asushared_synth_1.pickle', 'wb') as handle:
    pickle.dump([mg_U,V,p], handle, protocol=pickle.HIGHEST_PROTOCOL)