In [71]:
import pandas as pd
import numpy as np

import xgboost as xgb

import pickle
from sklearn.model_selection import train_test_split

order_items_df = pd.read_csv("order_items.csv")
test_trips_df = pd.read_csv("test_trips.csv")
train_trips_df = pd.read_csv("train_trips.csv")

# Import data and take a look at it

In [72]:
train_trips_df.shape

(117063, 6)

In [73]:
train_trips_df.head()

Unnamed: 0,trip_id,shopper_id,fulfillment_model,store_id,shopping_started_at,shopping_ended_at
0,3119519,48539,model_1,6,2015-09-01 07:03:56,2015-09-01 07:30:56
1,3119513,3775,model_1,1,2015-09-01 07:04:33,2015-09-01 07:40:33
2,3119516,4362,model_1,1,2015-09-01 07:23:21,2015-09-01 07:41:21
3,3119792,47659,model_1,1,2015-09-01 07:29:52,2015-09-01 08:55:52
4,3119922,11475,model_1,1,2015-09-01 07:32:21,2015-09-01 09:01:21


In [74]:
train_trips_df['fulfillment_model'].value_counts()

model_2    77972
model_1    39091
Name: fulfillment_model, dtype: int64

# Data Preprocessing

In [75]:
#Convert character variables to numeric 
train_trips_df['fulfillment_model'] = train_trips_df['fulfillment_model'].map({'model_2':2,'model_1':1})

In [76]:
train_trips_df['fulfillment_model'].value_counts()

2    77972
1    39091
Name: fulfillment_model, dtype: int64

In [77]:
# There a total 14 stores 
train_trips_df.store_id.value_counts().reset_index().sort_values(by='index')

Unnamed: 0,index,store_id
1,1,28793
0,3,46925
2,5,12461
7,6,3255
4,29,5623
3,31,6804
11,54,542
12,78,147
10,90,969
5,105,4587


In [78]:
store_ids = {1:1, 3:2, 5:3, 6:4, 29:5, 31:6, 54:7, 78:8, 90:9, 105:10, 115:11, 123:12, 126:13, 148:14}

#Convert character variables to numeric 
train_trips_df['store_id'] = train_trips_df['store_id'].map(store_ids)

In [79]:
#First, convert datetime strings into datetime
train_trips_df["shopping_started_at"] = pd.to_datetime(train_trips_df["shopping_started_at"], format='%Y-%m-%d %H:%M:%S')
train_trips_df["shopping_ended_at"] = pd.to_datetime(train_trips_df["shopping_ended_at"], format='%Y-%m-%d %H:%M:%S')

In [80]:
#Now construct other variables, like month, date, etc.
train_trips_df["start_month"] = train_trips_df["shopping_started_at"].dt.month
train_trips_df["start_day"] = train_trips_df["shopping_started_at"].dt.day
train_trips_df["start_weekday"] = train_trips_df["shopping_started_at"].dt.weekday 
train_trips_df["start_hour"] = train_trips_df["shopping_started_at"].dt.hour
train_trips_df["start_minute"] = train_trips_df["shopping_started_at"].dt.minute

In [81]:
train_trips_df['trip_duration'] = pd.to_datetime(train_trips_df['shopping_ended_at']) - pd.to_datetime(train_trips_df['shopping_started_at']) 

#Convert duration to seconds for easier interpretation
train_trips_df['trip_duration'] = pd.to_timedelta(train_trips_df["trip_duration"]).astype('timedelta64[s]').astype(int)

In [82]:
train_trips_df.head(5)

Unnamed: 0,trip_id,shopper_id,fulfillment_model,store_id,shopping_started_at,shopping_ended_at,start_month,start_day,start_weekday,start_hour,start_minute,trip_duration
0,3119519,48539,1,4,2015-09-01 07:03:56,2015-09-01 07:30:56,9,1,1,7,3,1620
1,3119513,3775,1,1,2015-09-01 07:04:33,2015-09-01 07:40:33,9,1,1,7,4,2160
2,3119516,4362,1,1,2015-09-01 07:23:21,2015-09-01 07:41:21,9,1,1,7,23,1080
3,3119792,47659,1,1,2015-09-01 07:29:52,2015-09-01 08:55:52,9,1,1,7,29,5160
4,3119922,11475,1,1,2015-09-01 07:32:21,2015-09-01 09:01:21,9,1,1,7,32,5340


# Add Order details as features 

In [83]:
order_items_df.head()

Unnamed: 0,trip_id,item_id,department_name,quantity
0,3119513,368671,Produce,10.0
1,3120462,368671,Produce,10.0
2,3120473,368671,Produce,10.0
3,3121910,368671,Produce,6.0
4,3122332,368671,Produce,10.0


In [84]:
# One trip id is having multiple order from multiple deparments . we can have total quantity as one of the feature

quantity_trip = order_items_df.groupby(by=['trip_id'])["quantity"].sum()

In [85]:
# to Convert pd.series into data frame and then inner join to get total quantity for a trip
quantity_trip = quantity_trip.to_frame().reset_index()

train_trips_df = pd.merge(train_trips_df, quantity_trip,  on = "trip_id",how='inner')


In [86]:
train_trips_df.head(5)

Unnamed: 0,trip_id,shopper_id,fulfillment_model,store_id,shopping_started_at,shopping_ended_at,start_month,start_day,start_weekday,start_hour,start_minute,trip_duration,quantity
0,3119519,48539,1,4,2015-09-01 07:03:56,2015-09-01 07:30:56,9,1,1,7,3,1620,39.0
1,3119513,3775,1,1,2015-09-01 07:04:33,2015-09-01 07:40:33,9,1,1,7,4,2160,183.0
2,3119516,4362,1,1,2015-09-01 07:23:21,2015-09-01 07:41:21,9,1,1,7,23,1080,9.0
3,3119792,47659,1,1,2015-09-01 07:29:52,2015-09-01 08:55:52,9,1,1,7,29,5160,92.0
4,3119922,11475,1,1,2015-09-01 07:32:21,2015-09-01 09:01:21,9,1,1,7,32,5340,186.0


# Modeling

In [87]:
X = train_trips_df.drop(["trip_duration","trip_id", "shopper_id", "shopping_started_at", "shopping_ended_at"], axis=1)
y = train_trips_df["trip_duration"]

Unnamed: 0,fulfillment_model,store_id,start_month,start_day,start_weekday,start_hour,start_minute,quantity
0,1,4,9,1,1,7,3,39.0
1,1,1,9,1,1,7,4,183.0
2,1,1,9,1,1,7,23,9.0
3,1,1,9,1,1,7,29,92.0
4,1,1,9,1,1,7,32,186.0


In [91]:
#Split the data into training, test, and valdiation sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2018)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=2019)

In [92]:
#Define evaluation metric
def rmsle(y_true, y_pred):
    assert len(y_true) == len(y_pred)
    return np.square(np.log(y_pred + 1) - np.log(y_true + 1)).mean() ** 0.5

In [93]:
#XGBoost parameters 
params = {
    'booster':            'gbtree',
    'objective':          'reg:linear',
    'learning_rate':      0.05,
    'max_depth':          14,
    'subsample':          0.9,
    'colsample_bytree':   0.7,
    'colsample_bylevel':  0.7,
    'silent':             1,
    'feval':              'rmsle'
}

In [94]:
nrounds = 2000

In [95]:
#Define train and validation sets
dtrain = xgb.DMatrix(X_train, np.log(y_train+1))
dval = xgb.DMatrix(X_val, np.log(y_val+1))

#this is for tracking the error
watchlist = [(dval, 'eval'), (dtrain, 'train')]

In [96]:
#Train model
gbm = xgb.train(params,
                dtrain,
                num_boost_round = nrounds,
                evals = watchlist,
                verbose_eval = True
                )

[0]	eval-rmse:6.80773	train-rmse:6.80944
[1]	eval-rmse:6.46992	train-rmse:6.47164
[2]	eval-rmse:6.14917	train-rmse:6.15091
[3]	eval-rmse:5.84384	train-rmse:5.84557
[4]	eval-rmse:5.55389	train-rmse:5.55562
[5]	eval-rmse:5.27919	train-rmse:5.28093
[6]	eval-rmse:5.01846	train-rmse:5.02024
[7]	eval-rmse:4.77008	train-rmse:4.77186
[8]	eval-rmse:4.53495	train-rmse:4.53675
[9]	eval-rmse:4.3111	train-rmse:4.31289
[10]	eval-rmse:4.09854	train-rmse:4.10036
[11]	eval-rmse:3.89675	train-rmse:3.89856
[12]	eval-rmse:3.70506	train-rmse:3.70687
[13]	eval-rmse:3.52371	train-rmse:3.52557
[14]	eval-rmse:3.35107	train-rmse:3.35296
[15]	eval-rmse:3.18733	train-rmse:3.18921
[16]	eval-rmse:3.03232	train-rmse:3.0343
[17]	eval-rmse:2.88532	train-rmse:2.88735
[18]	eval-rmse:2.74538	train-rmse:2.74742
[19]	eval-rmse:2.61259	train-rmse:2.61463
[20]	eval-rmse:2.48672	train-rmse:2.48876
[21]	eval-rmse:2.36794	train-rmse:2.37003
[22]	eval-rmse:2.25484	train-rmse:2.25693
[23]	eval-rmse:2.14758	train-rmse:2.14968
[24]

[188]	eval-rmse:0.478417	train-rmse:0.291592
[189]	eval-rmse:0.478486	train-rmse:0.290983
[190]	eval-rmse:0.478532	train-rmse:0.290406
[191]	eval-rmse:0.478533	train-rmse:0.28999
[192]	eval-rmse:0.478522	train-rmse:0.289441
[193]	eval-rmse:0.478581	train-rmse:0.28878
[194]	eval-rmse:0.478628	train-rmse:0.288409
[195]	eval-rmse:0.47873	train-rmse:0.287333
[196]	eval-rmse:0.4788	train-rmse:0.286244
[197]	eval-rmse:0.478834	train-rmse:0.285575
[198]	eval-rmse:0.478887	train-rmse:0.28481
[199]	eval-rmse:0.478977	train-rmse:0.283832
[200]	eval-rmse:0.479007	train-rmse:0.283464
[201]	eval-rmse:0.479029	train-rmse:0.28268
[202]	eval-rmse:0.47912	train-rmse:0.281863
[203]	eval-rmse:0.479145	train-rmse:0.281511
[204]	eval-rmse:0.479202	train-rmse:0.280813
[205]	eval-rmse:0.479282	train-rmse:0.280278
[206]	eval-rmse:0.479345	train-rmse:0.279584
[207]	eval-rmse:0.479428	train-rmse:0.278788
[208]	eval-rmse:0.47945	train-rmse:0.278552
[209]	eval-rmse:0.479548	train-rmse:0.277838
[210]	eval-rmse:0.4

[372]	eval-rmse:0.488941	train-rmse:0.192997
[373]	eval-rmse:0.488974	train-rmse:0.192838
[374]	eval-rmse:0.488984	train-rmse:0.192773
[375]	eval-rmse:0.489005	train-rmse:0.192689
[376]	eval-rmse:0.489067	train-rmse:0.192197
[377]	eval-rmse:0.489133	train-rmse:0.191681
[378]	eval-rmse:0.489175	train-rmse:0.191414
[379]	eval-rmse:0.489229	train-rmse:0.191082
[380]	eval-rmse:0.48929	train-rmse:0.190653
[381]	eval-rmse:0.489313	train-rmse:0.190464
[382]	eval-rmse:0.489384	train-rmse:0.189896
[383]	eval-rmse:0.489394	train-rmse:0.189851
[384]	eval-rmse:0.489446	train-rmse:0.189255
[385]	eval-rmse:0.489517	train-rmse:0.188261
[386]	eval-rmse:0.489591	train-rmse:0.188026
[387]	eval-rmse:0.489632	train-rmse:0.187729
[388]	eval-rmse:0.489649	train-rmse:0.187639
[389]	eval-rmse:0.489698	train-rmse:0.187361
[390]	eval-rmse:0.489736	train-rmse:0.186772
[391]	eval-rmse:0.489765	train-rmse:0.186676
[392]	eval-rmse:0.489773	train-rmse:0.18653
[393]	eval-rmse:0.489854	train-rmse:0.186
[394]	eval-rmse

[556]	eval-rmse:0.49672	train-rmse:0.131792
[557]	eval-rmse:0.496754	train-rmse:0.131532
[558]	eval-rmse:0.496795	train-rmse:0.131283
[559]	eval-rmse:0.496831	train-rmse:0.130955
[560]	eval-rmse:0.496872	train-rmse:0.130368
[561]	eval-rmse:0.496942	train-rmse:0.129995
[562]	eval-rmse:0.496989	train-rmse:0.129855
[563]	eval-rmse:0.497023	train-rmse:0.129784
[564]	eval-rmse:0.497064	train-rmse:0.129504
[565]	eval-rmse:0.497081	train-rmse:0.129463
[566]	eval-rmse:0.497102	train-rmse:0.129057
[567]	eval-rmse:0.497138	train-rmse:0.128855
[568]	eval-rmse:0.497193	train-rmse:0.128536
[569]	eval-rmse:0.497252	train-rmse:0.128119
[570]	eval-rmse:0.497261	train-rmse:0.127893
[571]	eval-rmse:0.497305	train-rmse:0.127439
[572]	eval-rmse:0.497341	train-rmse:0.12734
[573]	eval-rmse:0.497363	train-rmse:0.127171
[574]	eval-rmse:0.497394	train-rmse:0.127086
[575]	eval-rmse:0.49742	train-rmse:0.126898
[576]	eval-rmse:0.497456	train-rmse:0.126699
[577]	eval-rmse:0.497474	train-rmse:0.126498
[578]	eval-rm

[739]	eval-rmse:0.501982	train-rmse:0.091098
[740]	eval-rmse:0.502014	train-rmse:0.090813
[741]	eval-rmse:0.502049	train-rmse:0.090683
[742]	eval-rmse:0.502063	train-rmse:0.090348
[743]	eval-rmse:0.502094	train-rmse:0.089985
[744]	eval-rmse:0.502123	train-rmse:0.089754
[745]	eval-rmse:0.50216	train-rmse:0.089542
[746]	eval-rmse:0.502195	train-rmse:0.089287
[747]	eval-rmse:0.502201	train-rmse:0.089269
[748]	eval-rmse:0.502216	train-rmse:0.089233
[749]	eval-rmse:0.502245	train-rmse:0.089007
[750]	eval-rmse:0.502292	train-rmse:0.088734
[751]	eval-rmse:0.502338	train-rmse:0.088509
[752]	eval-rmse:0.502368	train-rmse:0.088281
[753]	eval-rmse:0.502394	train-rmse:0.088041
[754]	eval-rmse:0.502426	train-rmse:0.087968
[755]	eval-rmse:0.502433	train-rmse:0.08791
[756]	eval-rmse:0.502476	train-rmse:0.087507
[757]	eval-rmse:0.502517	train-rmse:0.08728
[758]	eval-rmse:0.502541	train-rmse:0.087128
[759]	eval-rmse:0.502561	train-rmse:0.08706
[760]	eval-rmse:0.502577	train-rmse:0.086918
[761]	eval-rms

[923]	eval-rmse:0.50566	train-rmse:0.063636
[924]	eval-rmse:0.505673	train-rmse:0.063577
[925]	eval-rmse:0.505709	train-rmse:0.063284
[926]	eval-rmse:0.505733	train-rmse:0.063019
[927]	eval-rmse:0.505739	train-rmse:0.062771
[928]	eval-rmse:0.505754	train-rmse:0.062659
[929]	eval-rmse:0.505764	train-rmse:0.06248
[930]	eval-rmse:0.505782	train-rmse:0.062283
[931]	eval-rmse:0.505784	train-rmse:0.062197
[932]	eval-rmse:0.505792	train-rmse:0.062182
[933]	eval-rmse:0.505802	train-rmse:0.061973
[934]	eval-rmse:0.505816	train-rmse:0.061928
[935]	eval-rmse:0.505836	train-rmse:0.061623
[936]	eval-rmse:0.505849	train-rmse:0.061498
[937]	eval-rmse:0.505868	train-rmse:0.06141
[938]	eval-rmse:0.505887	train-rmse:0.061338
[939]	eval-rmse:0.505892	train-rmse:0.061332
[940]	eval-rmse:0.505915	train-rmse:0.061094
[941]	eval-rmse:0.505937	train-rmse:0.060961
[942]	eval-rmse:0.505954	train-rmse:0.060814
[943]	eval-rmse:0.505966	train-rmse:0.060645
[944]	eval-rmse:0.505979	train-rmse:0.060578
[945]	eval-rm

[1104]	eval-rmse:0.507956	train-rmse:0.046522
[1105]	eval-rmse:0.507964	train-rmse:0.046476
[1106]	eval-rmse:0.507968	train-rmse:0.046469
[1107]	eval-rmse:0.507976	train-rmse:0.046322
[1108]	eval-rmse:0.507991	train-rmse:0.046269
[1109]	eval-rmse:0.508004	train-rmse:0.046217
[1110]	eval-rmse:0.508028	train-rmse:0.046102
[1111]	eval-rmse:0.508036	train-rmse:0.046077
[1112]	eval-rmse:0.508039	train-rmse:0.046072
[1113]	eval-rmse:0.508043	train-rmse:0.046062
[1114]	eval-rmse:0.508053	train-rmse:0.045972
[1115]	eval-rmse:0.508062	train-rmse:0.045809
[1116]	eval-rmse:0.508075	train-rmse:0.045673
[1117]	eval-rmse:0.508081	train-rmse:0.045651
[1118]	eval-rmse:0.508096	train-rmse:0.045546
[1119]	eval-rmse:0.508098	train-rmse:0.045541
[1120]	eval-rmse:0.508106	train-rmse:0.045401
[1121]	eval-rmse:0.50812	train-rmse:0.045256
[1122]	eval-rmse:0.508121	train-rmse:0.045254
[1123]	eval-rmse:0.508132	train-rmse:0.045236
[1124]	eval-rmse:0.508144	train-rmse:0.045142
[1125]	eval-rmse:0.508153	train-rms

[1284]	eval-rmse:0.509355	train-rmse:0.037245
[1285]	eval-rmse:0.509356	train-rmse:0.037241
[1286]	eval-rmse:0.509372	train-rmse:0.037144
[1287]	eval-rmse:0.50938	train-rmse:0.037067
[1288]	eval-rmse:0.509384	train-rmse:0.037022
[1289]	eval-rmse:0.509389	train-rmse:0.037002
[1290]	eval-rmse:0.509393	train-rmse:0.036982
[1291]	eval-rmse:0.509404	train-rmse:0.036923
[1292]	eval-rmse:0.509409	train-rmse:0.036899
[1293]	eval-rmse:0.509413	train-rmse:0.036882
[1294]	eval-rmse:0.509417	train-rmse:0.036867
[1295]	eval-rmse:0.509421	train-rmse:0.03683
[1296]	eval-rmse:0.50943	train-rmse:0.036799
[1297]	eval-rmse:0.509438	train-rmse:0.036708
[1298]	eval-rmse:0.509439	train-rmse:0.036651
[1299]	eval-rmse:0.509445	train-rmse:0.036581
[1300]	eval-rmse:0.50945	train-rmse:0.036521
[1301]	eval-rmse:0.509457	train-rmse:0.036481
[1302]	eval-rmse:0.509464	train-rmse:0.03643
[1303]	eval-rmse:0.509476	train-rmse:0.036367
[1304]	eval-rmse:0.509482	train-rmse:0.036332
[1305]	eval-rmse:0.509488	train-rmse:0.

[1463]	eval-rmse:0.510308	train-rmse:0.031589
[1464]	eval-rmse:0.510308	train-rmse:0.031589
[1465]	eval-rmse:0.510312	train-rmse:0.031571
[1466]	eval-rmse:0.510319	train-rmse:0.031536
[1467]	eval-rmse:0.510319	train-rmse:0.031531
[1468]	eval-rmse:0.510325	train-rmse:0.031511
[1469]	eval-rmse:0.510329	train-rmse:0.031483
[1470]	eval-rmse:0.510332	train-rmse:0.031478
[1471]	eval-rmse:0.510336	train-rmse:0.031452
[1472]	eval-rmse:0.51034	train-rmse:0.031432
[1473]	eval-rmse:0.510345	train-rmse:0.031405
[1474]	eval-rmse:0.510352	train-rmse:0.031352
[1475]	eval-rmse:0.510354	train-rmse:0.031345
[1476]	eval-rmse:0.510359	train-rmse:0.031291
[1477]	eval-rmse:0.510368	train-rmse:0.03123
[1478]	eval-rmse:0.510372	train-rmse:0.031207
[1479]	eval-rmse:0.510379	train-rmse:0.031156
[1480]	eval-rmse:0.510383	train-rmse:0.031124
[1481]	eval-rmse:0.510387	train-rmse:0.031108
[1482]	eval-rmse:0.510394	train-rmse:0.031077
[1483]	eval-rmse:0.510396	train-rmse:0.031067
[1484]	eval-rmse:0.510401	train-rmse

[1642]	eval-rmse:0.510924	train-rmse:0.028129
[1643]	eval-rmse:0.510925	train-rmse:0.028108
[1644]	eval-rmse:0.510926	train-rmse:0.028098
[1645]	eval-rmse:0.510931	train-rmse:0.028081
[1646]	eval-rmse:0.510935	train-rmse:0.028063
[1647]	eval-rmse:0.510937	train-rmse:0.028055
[1648]	eval-rmse:0.51094	train-rmse:0.028046
[1649]	eval-rmse:0.510943	train-rmse:0.028032
[1650]	eval-rmse:0.510944	train-rmse:0.02803
[1651]	eval-rmse:0.510944	train-rmse:0.027997
[1652]	eval-rmse:0.510948	train-rmse:0.027988
[1653]	eval-rmse:0.510948	train-rmse:0.027987
[1654]	eval-rmse:0.510951	train-rmse:0.027972
[1655]	eval-rmse:0.510954	train-rmse:0.027959
[1656]	eval-rmse:0.510954	train-rmse:0.027955
[1657]	eval-rmse:0.510958	train-rmse:0.027931
[1658]	eval-rmse:0.510959	train-rmse:0.027923
[1659]	eval-rmse:0.510962	train-rmse:0.027896
[1660]	eval-rmse:0.510966	train-rmse:0.027873
[1661]	eval-rmse:0.510967	train-rmse:0.027871
[1662]	eval-rmse:0.510973	train-rmse:0.027857
[1663]	eval-rmse:0.510974	train-rmse

[1821]	eval-rmse:0.511336	train-rmse:0.026241
[1822]	eval-rmse:0.51134	train-rmse:0.026241
[1823]	eval-rmse:0.511342	train-rmse:0.026231
[1824]	eval-rmse:0.511344	train-rmse:0.026216
[1825]	eval-rmse:0.511347	train-rmse:0.026203
[1826]	eval-rmse:0.511348	train-rmse:0.026202
[1827]	eval-rmse:0.51135	train-rmse:0.026194
[1828]	eval-rmse:0.511352	train-rmse:0.026184
[1829]	eval-rmse:0.511356	train-rmse:0.026173
[1830]	eval-rmse:0.511359	train-rmse:0.026172
[1831]	eval-rmse:0.511362	train-rmse:0.026172
[1832]	eval-rmse:0.511364	train-rmse:0.026161
[1833]	eval-rmse:0.511365	train-rmse:0.02616
[1834]	eval-rmse:0.511364	train-rmse:0.026155
[1835]	eval-rmse:0.511368	train-rmse:0.026147
[1836]	eval-rmse:0.51137	train-rmse:0.026145
[1837]	eval-rmse:0.511369	train-rmse:0.026142
[1838]	eval-rmse:0.511371	train-rmse:0.026137
[1839]	eval-rmse:0.511371	train-rmse:0.026134
[1840]	eval-rmse:0.511375	train-rmse:0.026107
[1841]	eval-rmse:0.511375	train-rmse:0.026103
[1842]	eval-rmse:0.51138	train-rmse:0.

In [97]:
#Test predictions
pred = np.exp(gbm.predict(xgb.DMatrix(X_test))) - 1

In [98]:
#Use mean absolute error to get a basic estimate of the error
mae = (abs(pred - y_test)).mean()
mae

912.7095947265625

In [99]:
#Take a look at feature importance
feature_scores = gbm.get_fscore()
feature_scores

{'fulfillment_model': 329247,
 'quantity': 1508739,
 'start_day': 1023570,
 'start_hour': 979100,
 'start_minute': 1280929,
 'start_month': 394456,
 'start_weekday': 562531,
 'store_id': 793895}

In [100]:
#This is not very telling, so let's scale the features
summ = 0
for key in feature_scores:
    summ = summ + feature_scores[key]

for key in feature_scores:
    feature_scores[key] = feature_scores[key] / summ

feature_scores

{'fulfillment_model': 0.04790812382220242,
 'quantity': 0.21953382970045546,
 'start_day': 0.14893778318615425,
 'start_hour': 0.14246703549103984,
 'start_minute': 0.1863856166933941,
 'start_month': 0.05739656516357226,
 'start_weekday': 0.08185284847493629,
 'store_id': 0.11551819746824539}

In [None]:
thresh = 0.08
pred
# y_pred [y_pred > thresh] = 1
# y_pred [y_pred <= thresh] = 0

In [102]:
from sklearn.metrics import explained_variance_score

In [103]:
print(explained_variance_score(pred,y_test))

-0.8390771310810956


In [104]:
from sklearn.metrics import r2_score
r2_score(y_test, pred)

0.2047684233859295