## Neural Net

In [2]:
import warnings
import time
from tqdm import tqdm
from numba.core.errors import NumbaDeprecationWarning, NumbaPendingDeprecationWarning

from _util.custom_plotting import corr_heatmap, histogram_boxplot, horizontal_bar, heatmap_boxplot, simple_bar
from _util.make_confusion_matrix import make_cm
from _util.model_comparisons import *
from _util.custom_mem_opt import custom_mem_opt

from collections import Counter
from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pprint import PrettyPrinter

import gc

from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn.metrics import roc_auc_score, roc_curve, precision_score, recall_score, f1_score
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn

pd.options.mode.chained_assignment = None
pp = PrettyPrinter(width=41, compact=True)
root = './_pkls/'

### Set some options

In [3]:
warnings.simplefilter(action='ignore', category=NumbaDeprecationWarning)
warnings.simplefilter(action='ignore', category=FutureWarning)
np.seterr(divide = 'ignore') 

# Ensure that the current MacOS version is at least 12.3+, and 
# the current current PyTorch installation was built with MPS activated.
print(torch.backends.mps.is_available(), ", ", torch.backends.mps.is_built())

pd.options.mode.chained_assignment = None  # default='warn'
%matplotlib inline

pp = PrettyPrinter(width=100)

True ,  True


In [4]:
df = pd.read_pickle(root + 'final_data.pkl')
df.head()

Unnamed: 0,user_id,product_id,total_product_orders_by_user,total_product_reorders_by_user,user_product_reorder_percentage,avg_add_to_cart_by_user,avg_days_since_last_bought,last_ordered_in,is_reorder_3,is_reorder_2,...,total_reorders_by_user,reorder_propotion_by_user,average_order_size,reorder_in_order,orders_3,orders_2,orders_1,reorder_3,reorder_2,reorder_1
0,1,196,10.0,9.0,0.9,1.4,17.6,10.0,1.0,1.0,...,41,0.694915,5.9,0.705833,6.0,6.0,9.0,0.666667,1.0,0.666667
1,1,10258,9.0,8.0,0.888889,3.333333,19.555555,10.0,1.0,1.0,...,41,0.694915,5.9,0.705833,6.0,6.0,9.0,0.666667,1.0,0.666667
2,1,10326,1.0,0.0,0.0,5.0,28.0,5.0,0.0,0.0,...,41,0.694915,5.9,0.705833,6.0,6.0,9.0,0.666667,1.0,0.666667
3,1,12427,10.0,9.0,0.9,3.3,17.6,10.0,1.0,1.0,...,41,0.694915,5.9,0.705833,6.0,6.0,9.0,0.666667,1.0,0.666667
4,1,13032,3.0,2.0,0.666667,6.333333,21.666666,10.0,1.0,0.0,...,41,0.694915,5.9,0.705833,6.0,6.0,9.0,0.666667,1.0,0.666667


In [5]:
df = custom_mem_opt(df)

Memory usage of properties dataframe is : 4283.495386123657  MB

___MEMORY USAGE AFTER COMPLETION:___
Memory usage is:  1204.2280359268188  MB
This is  28.11320959578722 % of the initial size


In [6]:
df['order_diff'] = df.order_number - df.last_ordered_in
df.drop(['user_id', 'product_id'], axis = 1, inplace = True)

In [7]:
df.head()

Unnamed: 0,total_product_orders_by_user,total_product_reorders_by_user,user_product_reorder_percentage,avg_add_to_cart_by_user,avg_days_since_last_bought,last_ordered_in,is_reorder_3,is_reorder_2,is_reorder_1,order_number,...,reorder_propotion_by_user,average_order_size,reorder_in_order,orders_3,orders_2,orders_1,reorder_3,reorder_2,reorder_1,order_diff
0,10.0,9.0,0.899902,1.400391,17.59375,10.0,1.0,1.0,1.0,11.0,...,0.694824,5.898438,0.706055,6.0,6.0,9.0,0.666504,1.0,0.666504,1.0
1,9.0,8.0,0.888672,3.333984,19.5625,10.0,1.0,1.0,1.0,11.0,...,0.694824,5.898438,0.706055,6.0,6.0,9.0,0.666504,1.0,0.666504,1.0
2,1.0,0.0,0.0,5.0,28.0,5.0,0.0,0.0,0.0,11.0,...,0.694824,5.898438,0.706055,6.0,6.0,9.0,0.666504,1.0,0.666504,6.0
3,10.0,9.0,0.899902,3.300781,17.59375,10.0,1.0,1.0,1.0,11.0,...,0.694824,5.898438,0.706055,6.0,6.0,9.0,0.666504,1.0,0.666504,1.0
4,3.0,2.0,0.666504,6.332031,21.671875,10.0,1.0,0.0,0.0,11.0,...,0.694824,5.898438,0.706055,6.0,6.0,9.0,0.666504,1.0,0.666504,1.0


In [8]:
print(df.shape)

(8474661, 68)


In [9]:
df.reordered = df.reordered.astype(int)
df.groupby('reordered', group_keys=False).apply(lambda x: x.sample(frac=0.30))

Unnamed: 0,total_product_orders_by_user,total_product_reorders_by_user,user_product_reorder_percentage,avg_add_to_cart_by_user,avg_days_since_last_bought,last_ordered_in,is_reorder_3,is_reorder_2,is_reorder_1,order_number,...,reorder_propotion_by_user,average_order_size,reorder_in_order,orders_3,orders_2,orders_1,reorder_3,reorder_2,reorder_1,order_diff
4408752,2.0,1.0,0.500000,19.000000,16.000000,12.0,1.0,0.0,0.0,15.0,...,0.447021,15.500000,0.425781,22.0,19.0,17.0,0.772949,0.789551,0.706055,3.0
1575663,1.0,0.0,0.000000,10.000000,5.000000,28.0,0.0,0.0,0.0,31.0,...,0.581543,13.929688,0.591309,17.0,27.0,32.0,0.470703,0.703613,0.625000,3.0
4782957,8.0,7.0,0.875000,7.250000,17.500000,25.0,1.0,1.0,0.0,26.0,...,0.745605,15.242188,0.698242,13.0,24.0,25.0,0.461426,0.916504,0.919922,1.0
1342324,10.0,9.0,0.899902,2.199219,14.203125,12.0,1.0,1.0,0.0,13.0,...,0.605469,18.578125,0.632812,15.0,21.0,21.0,0.733398,0.619141,0.809570,1.0
3455542,2.0,1.0,0.500000,6.000000,30.000000,11.0,1.0,0.0,0.0,14.0,...,0.180176,8.539062,0.291992,16.0,14.0,6.0,0.062500,0.071411,0.333252,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7308519,4.0,3.0,0.750000,4.500000,24.500000,7.0,1.0,0.0,0.0,10.0,...,0.515137,14.664062,0.563477,10.0,11.0,9.0,1.000000,0.909180,1.000000,3.0
3716043,1.0,0.0,0.000000,29.000000,8.000000,13.0,0.0,0.0,0.0,14.0,...,0.514648,15.695312,0.579102,10.0,16.0,31.0,1.000000,0.562500,0.354736,1.0
537849,1.0,0.0,0.000000,13.000000,19.000000,32.0,0.0,0.0,0.0,38.0,...,0.575684,6.945312,0.626465,3.0,3.0,12.0,0.333252,0.666504,0.583496,6.0
8024146,1.0,0.0,0.000000,3.000000,9.000000,3.0,0.0,0.0,0.0,4.0,...,0.399902,21.671875,0.392578,17.0,18.0,30.0,0.000000,0.777832,0.399902,1.0


In [10]:
print(df.shape)

(8474661, 68)


In [11]:
df_val, df_red = train_test_split(df, test_size=0.3, stratify=df['reordered'])
#df_red = df.groupby('reordered', group_keys=False).apply(lambda x: x.sample(frac=0.3))

In [12]:
y_val = df_val.reordered
X_val = df_val.drop(["reordered", 'order_dow', 'order_hour_of_day', 'days_since_prior_order'], axis = 1)
print(X_val.shape)

(5932262, 64)
