In [1]:
# Load modules
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 

import numpy as np
import pandas as pd
import pickle

from train_ann_functions import AsuSharedNN, ll, normaliser_shared

In [2]:
# Load data
data_train = pd.read_csv('data/data_rum_4_train.csv')
data_test = pd.read_csv('data/data_rum_4_test.csv')

In [3]:
# Set scalars
J = 3
K = 2

# Set variables
Xvars = ['TRAIN_COST','TRAIN_TT','SM_COST','SM_TT','CAR_COST','CAR_TT']
X_train = data_train[Xvars]
X_test = data_test[Xvars]
X = pd.concat([X_train,X_test],axis=0)

transformer = normaliser_shared(X_shared = [1,0,1,0,1,0],shared_locations = [[0,2,4]])
transformer.fit(X)

X_train = transformer.transform(X_train)
X_test = transformer.transform(X_test)
X = transformer.transform(X)

y_train = data_train['CHOICE'].to_numpy() - 1
y_test = data_test['CHOICE'].to_numpy() - 1
y = np.r_[y_train,y_test]

In [4]:
R = 100

ll_full_list = []
ll_train_list = []
ll_test_list = []
r2_test_list = []
mg_U = []
V = []
p = []

for r in range(R):
    # Set model
    model = AsuSharedNN(topology=(5,5),activation='tanh',from_logits=True)
    model.fit(X_train,y_train,[1,1,2,2,3,3],[1,0,1,0,1,0],early_stopping=True,validation_split=0.1)

    # Get log-likelihood
    ll_full = ll(y,model.predict_proba(X))
    ll_train = ll(y_train,model.predict_proba(X_train))
    ll_test = ll(y_test,model.predict_proba(X_test))
    r2 = 1 - ll_test/(len(X_test)*np.log(1/J))

    ll_full_list.append(ll_full)
    ll_train_list.append(ll_train)
    ll_test_list.append(ll_test)
    r2_test_list.append(r2)

    print(str(r+1) + '/' + str(R) + ' / Log-lik (full): ' + str(round(ll_full,2)) + ' / LL (train) = ' + str(round(ll_train,2)) + ' / LL (test) = ' + str(round(ll_test,2)) + ' / Rho-sq (test): ' + str(round(r2,4)))

    mg_U.append(model.gradient(X_test,transformer))
    V.append(model.predict_utility(X_test))
    p.append(model.predict_proba(X_test))

1/100 / Log-lik (full): -4459.28 / LL (train) = -3536.79 / LL (test) = -922.49 / Rho-sq (test): 0.5356
2/100 / Log-lik (full): -4458.03 / LL (train) = -3535.86 / LL (test) = -922.17 / Rho-sq (test): 0.5357
3/100 / Log-lik (full): -4459.82 / LL (train) = -3537.43 / LL (test) = -922.39 / Rho-sq (test): 0.5356
4/100 / Log-lik (full): -4459.38 / LL (train) = -3537.02 / LL (test) = -922.35 / Rho-sq (test): 0.5356
5/100 / Log-lik (full): -4457.42 / LL (train) = -3535.45 / LL (test) = -921.97 / Rho-sq (test): 0.5358
6/100 / Log-lik (full): -4464.41 / LL (train) = -3541.19 / LL (test) = -923.22 / Rho-sq (test): 0.5352
7/100 / Log-lik (full): -4466.65 / LL (train) = -3543.72 / LL (test) = -922.94 / Rho-sq (test): 0.5353
8/100 / Log-lik (full): -4463.22 / LL (train) = -3540.83 / LL (test) = -922.39 / Rho-sq (test): 0.5356
9/100 / Log-lik (full): -4456.25 / LL (train) = -3534.3 / LL (test) = -921.95 / Rho-sq (test): 0.5358
10/100 / Log-lik (full): -4466.06 / LL (train) = -3542.96 / LL (test) = -9

In [5]:
# Stack metrics
ll_full_array = np.array(ll_full_list)
ll_train_array = np.array(ll_train_list)
ll_test_array = np.array(ll_test_list)
r2_test_array = np.array(r2_test_list)

# Create series
metrics = pd.DataFrame(np.c_[ll_full_array,ll_train_array,ll_test_array,r2_test_array],columns=['Log-lik (full)','Log-lik (train)','Log-lik (test)','Rho-sq (test)'])
metrics.to_csv('results/asushared_synth_4_metrics.csv')
metrics.mean()

Log-lik (full)    -4462.585260
Log-lik (train)   -3539.923887
Log-lik (test)     -922.661374
Rho-sq (test)         0.535485
dtype: float64

In [6]:
# Save pickle file
with open('results/asushared_synth_4.pickle', 'wb') as handle:
    pickle.dump([mg_U,V,p], handle, protocol=pickle.HIGHEST_PROTOCOL)