# Modeling

In [1]:
# Imports
%matplotlib inline
import pickle
import gc
import matplotlib.pyplot as plt
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split

## Read processed data

In [2]:
df = pd.read_pickle("data-pre/df.pkl")
drop_columns = [c for c in df if c[-1] not in ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0'] and c.startswith('target')]
drop_columns += ['ID']
features = df.columns.difference(drop_columns)

In [3]:
f0 = df.date_block_num < 34
f1 = df.date_block_num == 34

train, val = train_test_split(df[f0], test_size=0.2, stratify=df[f0]['target'])
test = df[f1]

Train = xgb.DMatrix(train[features], train['target'])
Val = xgb.DMatrix(val[features], val['target'])
Test = xgb.DMatrix(test[features])

In [4]:
del df
gc.collect()

47

## XGBoost Training

In [5]:
%%time

xgb_params = {
    'eval_metric': 'rmse',
    'lambda': '0.171', 
    'gamma': '0.124',
    'booster': 'gbtree', 
    'alpha': '0.170',
    'objective': 'reg:squarederror',
    'colsample_bytree': '0.715',
    'subsample': '0.874', 
    'min_child_weight': 26,
    'eta': '0.148',
    'max_depth': 6,
    'tree_method': 'gpu_hist',
}


model = xgb.train(xgb_params, Train, 1500, [(Train, 'Train'), (Val, 'Val')], early_stopping_rounds=10, verbose_eval=1)

[0]	Train-rmse:1.16808	Val-rmse:1.16830
Multiple eval metrics have been passed: 'Val-rmse' will be used for early stopping.

Will train until Val-rmse hasn't improved in 10 rounds.
[1]	Train-rmse:1.11203	Val-rmse:1.11211
[2]	Train-rmse:1.06709	Val-rmse:1.06709
[3]	Train-rmse:1.03280	Val-rmse:1.03297
[4]	Train-rmse:1.00679	Val-rmse:1.00713
[5]	Train-rmse:0.98742	Val-rmse:0.98784
[6]	Train-rmse:0.97121	Val-rmse:0.97186
[7]	Train-rmse:0.95979	Val-rmse:0.96050
[8]	Train-rmse:0.94863	Val-rmse:0.94940
[9]	Train-rmse:0.93939	Val-rmse:0.94023
[10]	Train-rmse:0.93233	Val-rmse:0.93324
[11]	Train-rmse:0.92702	Val-rmse:0.92815
[12]	Train-rmse:0.92283	Val-rmse:0.92407
[13]	Train-rmse:0.91916	Val-rmse:0.92044
[14]	Train-rmse:0.91572	Val-rmse:0.91709
[15]	Train-rmse:0.91319	Val-rmse:0.91465
[16]	Train-rmse:0.91112	Val-rmse:0.91265
[17]	Train-rmse:0.90890	Val-rmse:0.91052
[18]	Train-rmse:0.90727	Val-rmse:0.90892
[19]	Train-rmse:0.90566	Val-rmse:0.90749
[20]	Train-rmse:0.90422	Val-rmse:0.90621
[21]	Tra

[195]	Train-rmse:0.84493	Val-rmse:0.85680
[196]	Train-rmse:0.84487	Val-rmse:0.85678
[197]	Train-rmse:0.84468	Val-rmse:0.85664
[198]	Train-rmse:0.84455	Val-rmse:0.85657
[199]	Train-rmse:0.84450	Val-rmse:0.85655
[200]	Train-rmse:0.84438	Val-rmse:0.85648
[201]	Train-rmse:0.84428	Val-rmse:0.85640
[202]	Train-rmse:0.84411	Val-rmse:0.85624
[203]	Train-rmse:0.84405	Val-rmse:0.85624
[204]	Train-rmse:0.84378	Val-rmse:0.85601
[205]	Train-rmse:0.84370	Val-rmse:0.85596
[206]	Train-rmse:0.84351	Val-rmse:0.85578
[207]	Train-rmse:0.84321	Val-rmse:0.85561
[208]	Train-rmse:0.84310	Val-rmse:0.85556
[209]	Train-rmse:0.84289	Val-rmse:0.85537
[210]	Train-rmse:0.84276	Val-rmse:0.85529
[211]	Train-rmse:0.84270	Val-rmse:0.85526
[212]	Train-rmse:0.84205	Val-rmse:0.85468
[213]	Train-rmse:0.84190	Val-rmse:0.85457
[214]	Train-rmse:0.84184	Val-rmse:0.85455
[215]	Train-rmse:0.84176	Val-rmse:0.85454
[216]	Train-rmse:0.84172	Val-rmse:0.85451
[217]	Train-rmse:0.84152	Val-rmse:0.85433
[218]	Train-rmse:0.84133	Val-rmse:

[391]	Train-rmse:0.82348	Val-rmse:0.84259
[392]	Train-rmse:0.82342	Val-rmse:0.84258
[393]	Train-rmse:0.82333	Val-rmse:0.84253
[394]	Train-rmse:0.82325	Val-rmse:0.84249
[395]	Train-rmse:0.82321	Val-rmse:0.84247
[396]	Train-rmse:0.82285	Val-rmse:0.84217
[397]	Train-rmse:0.82279	Val-rmse:0.84216
[398]	Train-rmse:0.82276	Val-rmse:0.84215
[399]	Train-rmse:0.82269	Val-rmse:0.84211
[400]	Train-rmse:0.82267	Val-rmse:0.84210
[401]	Train-rmse:0.82254	Val-rmse:0.84200
[402]	Train-rmse:0.82248	Val-rmse:0.84194
[403]	Train-rmse:0.82228	Val-rmse:0.84182
[404]	Train-rmse:0.82213	Val-rmse:0.84169
[405]	Train-rmse:0.82205	Val-rmse:0.84163
[406]	Train-rmse:0.82197	Val-rmse:0.84157
[407]	Train-rmse:0.82193	Val-rmse:0.84158
[408]	Train-rmse:0.82179	Val-rmse:0.84147
[409]	Train-rmse:0.82174	Val-rmse:0.84145
[410]	Train-rmse:0.82166	Val-rmse:0.84141
[411]	Train-rmse:0.82164	Val-rmse:0.84141
[412]	Train-rmse:0.82153	Val-rmse:0.84133
[413]	Train-rmse:0.82144	Val-rmse:0.84128
[414]	Train-rmse:0.82139	Val-rmse:

[587]	Train-rmse:0.80583	Val-rmse:0.83085
[588]	Train-rmse:0.80581	Val-rmse:0.83085
[589]	Train-rmse:0.80570	Val-rmse:0.83073
[590]	Train-rmse:0.80562	Val-rmse:0.83066
[591]	Train-rmse:0.80557	Val-rmse:0.83065
[592]	Train-rmse:0.80551	Val-rmse:0.83062
[593]	Train-rmse:0.80536	Val-rmse:0.83050
[594]	Train-rmse:0.80534	Val-rmse:0.83051
[595]	Train-rmse:0.80530	Val-rmse:0.83049
[596]	Train-rmse:0.80486	Val-rmse:0.83013
[597]	Train-rmse:0.80477	Val-rmse:0.83006
[598]	Train-rmse:0.80475	Val-rmse:0.83006
[599]	Train-rmse:0.80472	Val-rmse:0.83006
[600]	Train-rmse:0.80465	Val-rmse:0.83005
[601]	Train-rmse:0.80461	Val-rmse:0.83004
[602]	Train-rmse:0.80458	Val-rmse:0.83003
[603]	Train-rmse:0.80454	Val-rmse:0.83002
[604]	Train-rmse:0.80451	Val-rmse:0.83001
[605]	Train-rmse:0.80449	Val-rmse:0.83002
[606]	Train-rmse:0.80446	Val-rmse:0.83003
[607]	Train-rmse:0.80437	Val-rmse:0.82995
[608]	Train-rmse:0.80432	Val-rmse:0.82994
[609]	Train-rmse:0.80428	Val-rmse:0.82993
[610]	Train-rmse:0.80424	Val-rmse:

[783]	Train-rmse:0.79439	Val-rmse:0.82470
[784]	Train-rmse:0.79436	Val-rmse:0.82468
[785]	Train-rmse:0.79431	Val-rmse:0.82465
[786]	Train-rmse:0.79426	Val-rmse:0.82461
[787]	Train-rmse:0.79416	Val-rmse:0.82452
[788]	Train-rmse:0.79406	Val-rmse:0.82445
[789]	Train-rmse:0.79389	Val-rmse:0.82432
[790]	Train-rmse:0.79386	Val-rmse:0.82430
[791]	Train-rmse:0.79381	Val-rmse:0.82428
[792]	Train-rmse:0.79377	Val-rmse:0.82427
[793]	Train-rmse:0.79370	Val-rmse:0.82422
[794]	Train-rmse:0.79367	Val-rmse:0.82421
[795]	Train-rmse:0.79363	Val-rmse:0.82418
[796]	Train-rmse:0.79360	Val-rmse:0.82417
[797]	Train-rmse:0.79358	Val-rmse:0.82417
[798]	Train-rmse:0.79356	Val-rmse:0.82416
[799]	Train-rmse:0.79346	Val-rmse:0.82408
[800]	Train-rmse:0.79342	Val-rmse:0.82408
[801]	Train-rmse:0.79330	Val-rmse:0.82400
[802]	Train-rmse:0.79324	Val-rmse:0.82394
[803]	Train-rmse:0.79320	Val-rmse:0.82393
[804]	Train-rmse:0.79316	Val-rmse:0.82391
[805]	Train-rmse:0.79313	Val-rmse:0.82391
[806]	Train-rmse:0.79309	Val-rmse:

[979]	Train-rmse:0.78545	Val-rmse:0.82011
[980]	Train-rmse:0.78542	Val-rmse:0.82010
[981]	Train-rmse:0.78540	Val-rmse:0.82010
[982]	Train-rmse:0.78536	Val-rmse:0.82007
[983]	Train-rmse:0.78533	Val-rmse:0.82007
[984]	Train-rmse:0.78530	Val-rmse:0.82007
[985]	Train-rmse:0.78527	Val-rmse:0.82007
[986]	Train-rmse:0.78525	Val-rmse:0.82008
[987]	Train-rmse:0.78522	Val-rmse:0.82007
[988]	Train-rmse:0.78519	Val-rmse:0.82006
[989]	Train-rmse:0.78516	Val-rmse:0.82006
[990]	Train-rmse:0.78514	Val-rmse:0.82005
[991]	Train-rmse:0.78511	Val-rmse:0.82005
[992]	Train-rmse:0.78507	Val-rmse:0.82005
[993]	Train-rmse:0.78504	Val-rmse:0.82005
[994]	Train-rmse:0.78502	Val-rmse:0.82005
[995]	Train-rmse:0.78501	Val-rmse:0.82005
[996]	Train-rmse:0.78493	Val-rmse:0.81998
[997]	Train-rmse:0.78491	Val-rmse:0.81995
[998]	Train-rmse:0.78486	Val-rmse:0.81994
[999]	Train-rmse:0.78481	Val-rmse:0.81992
[1000]	Train-rmse:0.78478	Val-rmse:0.81989
[1001]	Train-rmse:0.78476	Val-rmse:0.81988
[1002]	Train-rmse:0.78473	Val-rm

[1171]	Train-rmse:0.77846	Val-rmse:0.81686
[1172]	Train-rmse:0.77843	Val-rmse:0.81686
[1173]	Train-rmse:0.77841	Val-rmse:0.81685
[1174]	Train-rmse:0.77837	Val-rmse:0.81686
[1175]	Train-rmse:0.77834	Val-rmse:0.81685
[1176]	Train-rmse:0.77832	Val-rmse:0.81684
[1177]	Train-rmse:0.77831	Val-rmse:0.81684
[1178]	Train-rmse:0.77828	Val-rmse:0.81685
[1179]	Train-rmse:0.77821	Val-rmse:0.81682
[1180]	Train-rmse:0.77817	Val-rmse:0.81680
[1181]	Train-rmse:0.77814	Val-rmse:0.81679
[1182]	Train-rmse:0.77811	Val-rmse:0.81677
[1183]	Train-rmse:0.77808	Val-rmse:0.81676
[1184]	Train-rmse:0.77806	Val-rmse:0.81676
[1185]	Train-rmse:0.77802	Val-rmse:0.81674
[1186]	Train-rmse:0.77801	Val-rmse:0.81673
[1187]	Train-rmse:0.77798	Val-rmse:0.81672
[1188]	Train-rmse:0.77796	Val-rmse:0.81671
[1189]	Train-rmse:0.77794	Val-rmse:0.81672
[1190]	Train-rmse:0.77793	Val-rmse:0.81672
[1191]	Train-rmse:0.77790	Val-rmse:0.81670
[1192]	Train-rmse:0.77785	Val-rmse:0.81668
[1193]	Train-rmse:0.77783	Val-rmse:0.81667
[1194]	Trai

[1362]	Train-rmse:0.77115	Val-rmse:0.81290
[1363]	Train-rmse:0.77110	Val-rmse:0.81285
[1364]	Train-rmse:0.77109	Val-rmse:0.81285
[1365]	Train-rmse:0.77107	Val-rmse:0.81285
[1366]	Train-rmse:0.77105	Val-rmse:0.81285
[1367]	Train-rmse:0.77104	Val-rmse:0.81285
[1368]	Train-rmse:0.77102	Val-rmse:0.81285
[1369]	Train-rmse:0.77101	Val-rmse:0.81286
[1370]	Train-rmse:0.77098	Val-rmse:0.81286
[1371]	Train-rmse:0.77096	Val-rmse:0.81286
[1372]	Train-rmse:0.77093	Val-rmse:0.81285
[1373]	Train-rmse:0.77091	Val-rmse:0.81284
[1374]	Train-rmse:0.77089	Val-rmse:0.81285
[1375]	Train-rmse:0.77086	Val-rmse:0.81285
[1376]	Train-rmse:0.77084	Val-rmse:0.81285
[1377]	Train-rmse:0.77082	Val-rmse:0.81285
[1378]	Train-rmse:0.77079	Val-rmse:0.81285
[1379]	Train-rmse:0.77076	Val-rmse:0.81283
[1380]	Train-rmse:0.77074	Val-rmse:0.81282
[1381]	Train-rmse:0.77073	Val-rmse:0.81282
[1382]	Train-rmse:0.77070	Val-rmse:0.81281
[1383]	Train-rmse:0.77067	Val-rmse:0.81281
[1384]	Train-rmse:0.77055	Val-rmse:0.81271
[1385]	Trai

## Predict test data

In [6]:
test['item_cnt_month'] = model.predict(Test).clip(0, 20)

## Save submission & model

In [7]:
test[['ID', 'item_cnt_month']].sort_values('ID').to_csv('submissions/submission.csv', index=False)
pickle.dump(model, open('xgb.pickle', 'wb'))