## Train and Submit (Using another computer)

In [1]:
import numpy as np
import pandas as pd
import scipy as sp
import xgboost as xgb
from sklearn.metrics import log_loss
from datetime import datetime

### Load Data

In [2]:
path = './droped/'

train_X = sp.sparse.load_npz(path + 'train_X.npz')
test_X = sp.sparse.load_npz(path + 'test_X.npz')
    
train_y = np.load(path + 'train_y.npy')
    
train_VN = np.load(path + 'train_VN.npy')
test_VN = np.load(path + 'test_VN.npy')

### Preparation

In [4]:
y = pd.get_dummies(train_y).values.argmax(1)
N = train_X.shape[0]

num_round = 550
xgb_params = {'objective':'multi:softprob', 'num_class':38,
              'eta':.2, 'max_depth':5, 'colsample_bytree':.4, 'subsample':.7, 'lambda': 5, 
              'silent':1}

dtrain = xgb.DMatrix(train_X, label = y)
dtest = xgb.DMatrix(test_X)

nModels = 20

In [5]:
# np.save('D:/id' + str(0) + '.npy', np.array([0,0,0]))

### Training and Ensembling

In [6]:
sum_test_pr = np.zeros((N, 38))
sum_train_pr = np.zeros((N, 38))

print('Training Time\tTrLoss\tTrLoss_Avg')
for j in range(nModels):
    t0 = datetime.now()
    xgb_params['seed'] = 10202*j + 50604
    bst = xgb.train(xgb_params, dtrain, num_round)
    test_pr = bst.predict(dtest)
    train_pr = bst.predict(dtrain)
    sum_test_pr += test_pr
    sum_train_pr += train_pr
    print(datetime.now() - t0, '\t', log_loss(y, train_pr), '\t', log_loss(y, sum_train_pr / (j+1)))
    np.save('D:/id' + str(j) + '.npy', sum_test_pr)

avg_test_pr = sum_test_pr / nModels
col_list = ['TripType_' + str(lb) for lb in np.unique(train_y)]
result = pd.DataFrame(avg_test_pr, columns=col_list, index=test_VN)
result.index.name = 'VisitNumber'
result.to_csv('submission_ensembled.csv', columns=result.columns)

Training Time	TrLoss	TrLoss_Avg
0:20:47.842203 	 0.397623186535 	 0.39762319351
0:20:21.332338 	 0.397950371359 	 0.395741392924
0:20:11.885828 	 0.39719846512 	 0.394698363597
0:20:16.239720 	 0.397820199493 	 0.39442775907
0:20:09.829959 	 0.397107566228 	 0.394065758414
0:20:20.011718 	 0.397283718518 	 0.393873628313
0:20:11.381488 	 0.3979832295 	 0.393830559476
0:20:14.703368 	 0.39825500002 	 0.39386203738
0:20:08.974889 	 0.397724720987 	 0.39382433659
0:20:18.878964 	 0.396578646633 	 0.393683164883
0:20:09.309613 	 0.397009273026 	 0.393607643307
0:22:17.513251 	 0.397805882042 	 0.393590728397
0:20:29.376931 	 0.397595129021 	 0.393547496087
0:20:26.428478 	 0.397209123043 	 0.393496156018
0:20:36.343566 	 0.396940869417 	 0.393443164134
0:20:30.540712 	 0.396650156455 	 0.393362912949
0:20:31.255680 	 0.397073483134 	 0.393330483343
0:20:18.151477 	 0.397495412416 	 0.393316751544
0:20:09.479227 	 0.397332492842 	 0.393299372382
0:20:32.658611 	 0.398158458655 	 0.393328665