### Install XGBoost, Pandas, and NumPy if you haven't
[https://pypi.org/project/xgboost/](https://pypi.org/project/xgboost/)

### Download stuff from this folder and place it in the same directory as this notebook, or place it somewhere else and update the paths for the files in the code
[https://drive.google.com/drive/folders/1UtbN5iiR_NNPiZ25SVpD-QE4IX8B97Qs?usp=sharing](https://drive.google.com/drive/folders/1UtbN5iiR_NNPiZ25SVpD-QE4IX8B97Qs?usp=sharing)

You must access that link with a UCSD email

In [1]:
import pandas as pd
import numpy as np
import xgboost as xgb

In [2]:
df_train = pd.read_csv("./combined_trainvf_trig.csv")
df_train = df_train[df_train["DIST"] < 20]
df_train = df_train[df_train["TARGET"] > 30]
df_train = df_train[df_train["TARGET"] < 15000]
cat_list = ["ORIGIN_CALL", "ORIGIN_STAND", "TAXI_ID"]
df_train[cat_list] = df_train[cat_list].astype("category")

df_test = pd.read_csv("./combined_testvf_trig.csv")
for col in cat_list:
    df_test[col] = df_test[col].astype("category").cat.set_categories(df_train[col].cat.categories)

### Note: If it doesn't work try on datahub with a GPU

In [3]:
# Load models
xgb_u = xgb.XGBRegressor(tree_method="gpu_hist", 
                            booster="dart",
                            n_estimators=100, 
                            enable_categorical=True, 
                            max_cat_to_onehot=100,
                            objective="reg:gamma") # Best validaiton / Halloween snapshot
xgb_u.load_model("./best_validation.json")
xgb_aug = xgb.XGBRegressor(tree_method="gpu_hist", 
                            booster="dart",
                            n_estimators=100, 
                            enable_categorical=True, 
                            max_cat_to_onehot=100,
                            objective="reg:gamma") # August snapshot
xgb_aug.load_model("./best_aug.json")
xgb_sep = xgb.XGBRegressor(tree_method="gpu_hist", 
                            booster="dart",
                            n_estimators=100, 
                            enable_categorical=True, 
                            max_cat_to_onehot=100,
                            objective="reg:gamma") # September Snapshot
xgb_sep.load_model("./best_sep.json")
xgb_oct = xgb.XGBRegressor(tree_method="gpu_hist", 
                            booster="dart",
                            n_estimators=100, 
                            enable_categorical=True, 
                            max_cat_to_onehot=100,
                            objective="reg:gamma") # Oct. 6 snapshot
xgb_oct.load_model("./early_oct.json")
xgb_xmas = xgb.XGBRegressor(tree_method="gpu_hist", 
                            booster="dart",
                            n_estimators=100, 
                            enable_categorical=True, 
                            max_cat_to_onehot=100,
                            objective="reg:gamma") # Dec. 21 snapshot
xgb_xmas.load_model("./best_christmas.json")

### Submission 1

In [4]:
xgb_u.predict(df_test)

array([ 834.36957,  773.02026,  802.2202 ,  724.85205,  784.04126,
       1062.1941 ,  802.844  ,  784.9671 ,  776.72626,  843.73   ,
        747.79425,  819.26135, 1120.386  , 1548.1792 ,  851.00867,
        958.89197, 1017.8074 , 1685.2157 ,  913.5704 , 1253.2373 ,
        765.38605,  840.5893 ,  845.07965,  905.5813 ,  848.08386,
        806.92145,  834.70306,  855.97784,  771.65466,  895.7491 ,
        802.6993 ,  810.7188 ,  855.5143 ,  951.7547 , 1437.8717 ,
        926.70294,  962.37006,  742.64813,  862.902  ,  838.0691 ,
        654.036  ,  771.78937, 1636.9084 ,  794.95544,  759.26324,
        728.7717 ,  761.2157 ,  676.37646,  651.3116 ,  697.4943 ,
        997.7938 ,  807.8231 ,  779.61383,  861.4121 ,  945.41956,
        693.3123 ,  766.9139 ,  777.97687, 1200.9869 ,  788.1438 ,
        814.2785 ,  813.5458 ,  846.3193 , 1048.8154 ,  633.52905,
        932.74817,  815.3516 ,  722.5624 , 1056.415  ,  678.0027 ,
       1227.401  , 1005.9026 ,  838.7327 , 1219.524  ,  700.87

### Submission 2

In [5]:
preds_xgbx = xgb_xmas.predict(df_test[df_test["MONTH"] == 12])
preds_hal = xgb_u.predict(df_test[(df_test["WK_OF_YR"] == 44)])
preds_aug = xgb_aug.predict(df_test[df_test["MONTH"] == 8])
preds_sep = xgb_sep.predict(df_test[(df_test["WK_OF_YR"] == 40)])
preds_oct = xgb_oct.predict(df_test[(df_test["WK_OF_YR"] == 41)])

preds_cmb = np.zeros(320)
preds_cmb[df_test["WK_OF_YR"] == 51] = preds_xgbx
preds_cmb[df_test["WK_OF_YR"] == 33] = preds_aug
preds_cmb[df_test["WK_OF_YR"] == 40] = preds_sep
preds_cmb[df_test["WK_OF_YR"] == 41] = preds_oct
preds_cmb[df_test["WK_OF_YR"] == 44] = preds_hal
print(preds_cmb)

[1151.36242676 1136.22497559  946.02111816 1089.12634277 1065.50610352
 1322.95422363 1258.92370605 1196.51245117 1156.26733398 1049.93884277
  973.6282959  1275.0645752  1815.05480957 2648.80297852 1002.25140381
 1976.90307617 1156.40795898 1944.12353516 1101.4140625  2347.2253418
  925.56359863  957.7230835   877.29602051 1321.0246582  1270.69995117
  940.95581055 1222.34655762 1048.37792969 1114.77612305 1108.49121094
 1036.72436523 1030.22143555 1269.73327637 1600.17028809 5157.4609375
 1772.94494629 1285.40881348 1085.78283691 1394.7232666  1250.11462402
  801.58087158 1128.67321777 3591.04394531 1149.91296387  983.18389893
  967.12908936  929.66082764  940.01538086  658.55651855  930.80255127
 1508.13244629 1326.33728027 1199.11181641 1535.19677734 1524.87329102
  943.50238037  961.29547119 1064.85656738 2341.76293945 1001.43450928
 1042.69445801 1370.9744873  1288.3684082  1579.54711914  781.46734619
  956.0713501  1124.26330566 1040.58703613 2015.71142578  879.25183105
 3294.12