## **Setup**

In [62]:
from google.colab import drive
mount_path = '/content/gdrive/'
drive.mount(mount_path)
results_path='My Drive/AI For Good - AI Blitz 3/AutoDrive/Results/'
model_path='My Drive/AI For Good - AI Blitz 3/AutoDrive/Models/'

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).


In [10]:
import os
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
import pytz
from zipfile import ZipFile
from tempfile import TemporaryDirectory
import requests
import cv2
import shutil
import glob

# from sklearn.metrics import roc_auc_score, roc_curve, auc
from sklearn.model_selection import train_test_split

# plot options
# plt.rcParams.update({'font.size': 11})
plt.style.use('fivethirtyeight')

# Data Loading

In [63]:
combdf=pd.read_pickle(f'{mount_path}{results_path}train_val_combdf.pkl')

In [4]:
allpreds=np.load(mount_path+'My Drive/AI For Good - AI Blitz 3/AutoDrive/Results/train_val_preds.npy')

In [9]:
print(combdf.shape)
display(combdf.head())
print(allpreds.shape)

(62695, 2)


Unnamed: 0,filename,canSteering
0,33856.jpg,180.0
1,61909.jpg,-194.370014
2,36269.jpg,-39.000471
3,60259.jpg,-185.300714
4,50681.jpg,44.939983


(62695, 9)


# Train Boosting classifier

In [24]:
import xgboost as xgb
from sklearn.model_selection import GridSearchCV

### Grid search for best parameters

In [29]:
grid_values = {'learning_rate':[0.01,0.1,0.5],'max_depth':[1,3,5,10],'n_estimators':[50,100,200,500]}

clf=xgb.XGBRegressor()

grid_clf = GridSearchCV(clf, param_grid = grid_values, scoring='neg_mean_squared_error', cv=3,
                        verbose=10, n_jobs=-1)
grid_clf.fit(allpreds,combdf['canSteering'])

print('Grid best score (f1): ', grid_clf.best_score_)
print('Grid best parameter (max. f1): ', grid_clf.best_params_)

Fitting 3 folds for each of 48 candidates, totalling 144 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.
[Parallel(n_jobs=-1)]: Done   1 tasks      | elapsed:    2.9s
[Parallel(n_jobs=-1)]: Done   4 tasks      | elapsed:    5.7s
[Parallel(n_jobs=-1)]: Done   9 tasks      | elapsed:   17.7s
[Parallel(n_jobs=-1)]: Done  14 tasks      | elapsed:   39.7s
[Parallel(n_jobs=-1)]: Done  21 tasks      | elapsed:  1.2min
[Parallel(n_jobs=-1)]: Done  28 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done  37 tasks      | elapsed:  4.2min
[Parallel(n_jobs=-1)]: Done  46 tasks      | elapsed:  7.8min
[Parallel(n_jobs=-1)]: Done  57 tasks      | elapsed:  8.8min
[Parallel(n_jobs=-1)]: Done  68 tasks      | elapsed:  9.9min
[Parallel(n_jobs=-1)]: Done  81 tasks      | elapsed: 11.8min
[Parallel(n_jobs=-1)]: Done  94 tasks      | elapsed: 16.4min
[Parallel(n_jobs=-1)]: Done 109 tasks      | elapsed: 18.0min
[Parallel(n_jobs=-1)]: Done 124 tasks      | elapsed: 19.6min
[Parallel(n_jobs=-1)]: Done 141 tasks      | elapsed: 23

Grid best score (f1):  -785.8225306492999
Grid best parameter (max. f1):  {'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 500}


### Fit the model w/ the best parameters

Check on a validation set

In [50]:
xgb_model=xgb.XGBRegressor(learning_rate= 0.01, max_depth= 10, n_estimators= 500)

xgb_model.fit(X=X_train,y=y_train, eval_set=[(X_val,y_val)], eval_metric='rmse')

[0]	validation_0-rmse:211.508
[1]	validation_0-rmse:209.477
[2]	validation_0-rmse:207.464
[3]	validation_0-rmse:205.473
[4]	validation_0-rmse:203.504
[5]	validation_0-rmse:201.553
[6]	validation_0-rmse:199.622
[7]	validation_0-rmse:197.712
[8]	validation_0-rmse:195.822
[9]	validation_0-rmse:193.952
[10]	validation_0-rmse:192.098
[11]	validation_0-rmse:190.266
[12]	validation_0-rmse:188.449
[13]	validation_0-rmse:186.655
[14]	validation_0-rmse:184.877
[15]	validation_0-rmse:183.119
[16]	validation_0-rmse:181.375
[17]	validation_0-rmse:179.654
[18]	validation_0-rmse:177.945
[19]	validation_0-rmse:176.258
[20]	validation_0-rmse:174.584
[21]	validation_0-rmse:172.932
[22]	validation_0-rmse:171.299
[23]	validation_0-rmse:169.676
[24]	validation_0-rmse:168.074
[25]	validation_0-rmse:166.487
[26]	validation_0-rmse:164.914
[27]	validation_0-rmse:163.359
[28]	validation_0-rmse:161.821
[29]	validation_0-rmse:160.296
[30]	validation_0-rmse:158.788
[31]	validation_0-rmse:157.296
[32]	validation_0-

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.01, max_delta_step=0,
             max_depth=10, min_child_weight=1, missing=None, n_estimators=500,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, verbosity=1)

Fit on the whole dataset

In [51]:
xgb_model=xgb.XGBRegressor(learning_rate= 0.01, max_depth= 10, n_estimators= 500)

xgb_model.fit(X=allpreds,y=combdf['canSteering'])



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0,
             importance_type='gain', learning_rate=0.01, max_delta_step=0,
             max_depth=10, min_child_weight=1, missing=None, n_estimators=500,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, verbosity=1)

# Make predictions on test set

In [52]:
testpreds=np.load(mount_path+'My Drive/AI For Good - AI Blitz 3/AutoDrive/Results/all_preds.npy')

In [53]:
subpreds=xgb_model.predict(testpreds)

# Prep submission

In [68]:
testdf=pd.read_pickle(f'{mount_path}{results_path}testdf.pkl')

In [69]:
submitdf=pd.DataFrame(data=subpreds,columns=['canSteering'])
submitdf['filename']=testdf['filename']
submitdf.head()

Unnamed: 0,canSteering,filename
0,-26.495888,0.jpg
1,20.703915,1.jpg
2,-38.039177,2.jpg
3,-37.272419,3.jpg
4,-36.139706,4.jpg


In [72]:
datestr=datetime.datetime.now(pytz.timezone('US/Eastern')).strftime("%y%m%d_%H%M")
submitdf.to_csv('/content/'+datestr+'_submission.csv',index=True)