In this jupyter notebook are logistic regression, desigion tree, random forest and xgboost models fitted to black friday dataset and explained by eli5, lime and shap.

In [25]:
import eli5
import lime
import shap

import pandas as pd

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score, classification_report

## Data loading

In [21]:
BLACK_FRIDAY_PATH = "../../datasets/preprocessed/pre_black_friday.csv"

In [22]:
black_df = pd.read_csv(BLACK_FRIDAY_PATH) 
black_df.head()

Unnamed: 0.1,Unnamed: 0,Purchase_x,Occupation,Marital_Status,Gender_F,Age_0-17,Age_18-25,Age_26-35,Age_36-45,Age_46-50,Age_51-55,Age_55+,City_Category_A,City_Category_B,City_Category_C,Stay_In_Current_City_Years_0,Stay_In_Current_City_Years_1,Stay_In_Current_City_Years_2,Stay_In_Current_City_Years_3,Stay_In_Current_City_Years_4+
0,0,0,10,0,1,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0
1,34,0,16,0,0,0,0,0,0,0,0,1,0,0,1,0,0,0,0,1
2,110,0,15,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,1,0
3,139,0,7,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,0
4,152,0,20,1,0,0,0,1,0,0,0,0,1,0,0,0,1,0,0,0


In [23]:
X = df.drop(["Unnamed: 0","Purchase_x"], axis=1)
Y = df["Purchase_x"]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=.3, random_state=42)

## Models Definition

In [30]:
models = {
    "Logistic Regression": LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial', max_iter=10000),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest Classifier": RandomForestClassifier(n_estimators=10),
    "XGboost": XGBClassifier()
}

In [37]:
for name, model in models.items():
    print(f"Fitting {name}")
    y_pred = model.predict(X_test)
    print(accuracy_score(y_test, y_pred))
    model.fit(X_train, y_train)
    display(eli5.show_weights(model))

Fitting Logistic Regression
0.7426470588235294


Weight?,Feature,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0
Weight?,Feature,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Weight?,Feature,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
Weight?,Feature,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3
Weight?,Feature,Unnamed: 2_level_4,Unnamed: 3_level_4,Unnamed: 4_level_4,Unnamed: 5_level_4,Unnamed: 6_level_4,Unnamed: 7_level_4
Weight?,Feature,Unnamed: 2_level_5,Unnamed: 3_level_5,Unnamed: 4_level_5,Unnamed: 5_level_5,Unnamed: 6_level_5,Unnamed: 7_level_5
Weight?,Feature,Unnamed: 2_level_6,Unnamed: 3_level_6,Unnamed: 4_level_6,Unnamed: 5_level_6,Unnamed: 6_level_6,Unnamed: 7_level_6
Weight?,Feature,Unnamed: 2_level_7,Unnamed: 3_level_7,Unnamed: 4_level_7,Unnamed: 5_level_7,Unnamed: 6_level_7,Unnamed: 7_level_7
+0.000,x0,,,,,,
+0.000,x1,,,,,,
+0.000,x13,,,,,,
+0.000,<BIAS>,,,,,,
+0.000,x2,,,,,,
+0.000,x3,,,,,,
+0.000,x15,,,,,,
+0.000,x5,,,,,,
+0.000,x16,,,,,,
+0.000,x7,,,,,,

Weight?,Feature
0.0,x0
0.0,x1
0.0,x13
0.0,<BIAS>
0.0,x2
0.0,x3
0.0,x15
0.0,x5
0.0,x16
0.0,x7

Weight?,Feature
0.0,x0
0.0,x12
0.0,x6
-0.0,x11
-0.0,x4
-0.0,x17
-0.0,x7
-0.0,x16
-0.0,x9
-0.0,x14

Weight?,Feature
0.0,x0
0.0,x12
0.0,x11
-0.0,x6
-0.0,x14
-0.0,x4
-0.0,x9
-0.0,x8
-0.0,x18
-0.0,x17

Weight?,Feature
0.0,x0
0.0,x12
-0.0,x8
-0.0,x4
-0.0,x11
-0.0,x9
-0.0,x10
-0.0,x14
-0.0,x18
-0.0,x7

Weight?,Feature
0.0,x0
-0.0,x11
-0.0,x4
-0.0,x14
-0.0,x8
-0.0,x10
-0.0,x9
-0.0,x17
-0.0,x18
-0.0,x16

Weight?,Feature
-0.0,x8
-0.0,x4
-0.0,x10
-0.0,x9
-0.0,x11
-0.0,x18
-0.0,x14
-0.0,x17
-0.0,x16
-0.0,x7

Weight?,Feature
-0.0,x4
-0.0,x10
-0.0,x9
-0.0,x14
-0.0,x8
-0.0,x11
-0.0,x18
-0.0,x7
-0.0,x17
-0.0,x16

Weight?,Feature
-0.0,x4
-0.0,x10
-0.0,x9
-0.0,x8
-0.0,x14
-0.0,x17
-0.0,x18
-0.0,x16
-0.0,x11
-0.0,x7


Fitting Decision Tree
0.6153846153846154


Weight,Feature
0.4476,x0
0.1576,x1
0.0698,x13
0.0493,x2
0.0378,x6
0.0317,x15
0.0259,x7
0.0237,x16
0.0229,x14
0.0206,x8


Fitting Random Forest Classifier
0.6940045248868778


Weight,Feature
0.4602  ± 0.0421,x0
0.2361  ± 0.0400,x1
0.0378  ± 0.0172,x2
0.0373  ± 0.0518,x13
0.0313  ± 0.0476,x12
0.0293  ± 0.0303,x3
0.0213  ± 0.0194,x16
0.0202  ± 0.0131,x15
0.0166  ± 0.0137,x7
0.0162  ± 0.0099,x17


Fitting XGboost
0.7420814479638009


Weight,Feature
0.3355,City_Category_C
0.1286,City_Category_B
0.058,Age_55+
0.054,Gender_F
0.0462,Age_26-35
0.0416,Unnamed: 0
0.0387,Stay_In_Current_City_Years_4+
0.0354,Stay_In_Current_City_Years_3
0.0344,Occupation
0.0278,Age_51-55
