**Deep Learning Based Product Reordering Prediction**


In [1]:
# 1. Import Required Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from scipy.stats import skew

In [2]:
# 2. Load Data
orders = pd.read_csv('orders.csv')
prior = pd.read_csv('order_products__prior.csv')
train = pd.read_csv('order_products__train.csv')
products = pd.read_csv('products.csv')
aisles = pd.read_csv('aisles.csv')
departments = pd.read_csv('departments.csv')


In [3]:
prior_sampled = prior.sample(frac=0.02, random_state=42)  # 10% sample
orders_sampled = orders[orders['order_id'].isin(prior_sampled['order_id'])]

merged = prior_sampled.merge(orders_sampled, on='order_id', how='left')
merged = merged.merge(products, on='product_id', how='left')
merged = merged.merge(aisles, on='aisle_id', how='left')
merged = merged.merge(departments, on='department_id', how='left')


In [4]:
# 4. Feature Engineering
user_total_orders = merged.groupby('user_id')['order_number'].max().reset_index(name='user_total_orders')
order_size = merged.groupby('order_id')['product_id'].count().reset_index(name='order_size')
product_reorder_rate = merged.groupby('product_id')['reordered'].mean().reset_index(name='product_reorder_rate')
user_reorder_ratio = merged.groupby('user_id')['reordered'].mean().reset_index(name='user_reorder_ratio')
user_product_orders = merged.groupby(['user_id', 'product_id']).size().reset_index(name='user_product_order_count')


In [5]:
# Merge features into main dataframe
merged = merged.merge(user_total_orders, on='user_id', how='left')
merged = merged.merge(order_size, on='order_id', how='left')
merged = merged.merge(product_reorder_rate, on='product_id', how='left')
merged = merged.merge(user_reorder_ratio, on='user_id', how='left')
merged = merged.merge(user_product_orders, on=['user_id', 'product_id'], how='left')

# Time-based features
merged['is_weekend'] = merged['order_dow'].apply(lambda x: 1 if x in [0, 6] else 0)
merged['is_morning_order'] = merged['order_hour_of_day'].apply(lambda x: 1 if 5 <= x <= 11 else 0)


In [6]:
# 5. Prepare Final Dataset
merged = merged.drop(columns=["product_name","eval_set","aisle","department"] , errors='ignore')
features = ['user_total_orders', 'order_size', 'product_reorder_rate', 'user_reorder_ratio',
            'user_product_order_count', 'days_since_prior_order', 'order_hour_of_day', 'order_dow',
            'is_weekend', 'is_morning_order']
X = merged[features].fillna(0)
y = merged['reordered']

In [7]:
# Analyze skewness
skewed_feats = X.apply(lambda x: skew(x.dropna())).sort_values(ascending=False)
print("Feature Skewness:\n", skewed_feats)

# Log transform skewed features with skewness > 0.75
high_skew = skewed_feats[skewed_feats > 0.75].index
X[high_skew] = X[high_skew].apply(lambda x: np.log1p(x))



Feature Skewness:
 user_product_order_count    3.755845
order_size                  2.094216
days_since_prior_order      1.057430
user_total_orders           1.044069
is_weekend                  0.721496
is_morning_order            0.691302
order_dow                   0.182293
order_hour_of_day          -0.045956
user_reorder_ratio         -0.646925
product_reorder_rate       -1.017942
dtype: float64


In [8]:
# Normalize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [9]:
# 6. Train-Test Split
x_train, x_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


In [10]:
# 7. Build Deep Neural Network
model = Sequential()
model.add(Dense(128, input_dim=x_train.shape[1], activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.3))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
# 8. Train Model
callbacks = [EarlyStopping(patience=3, restore_best_weights=True)]
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_val, y_val), callbacks=callbacks)

Epoch 1/10
[1m16218/16218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 3ms/step - accuracy: 0.8053 - loss: 0.4009 - val_accuracy: 0.8078 - val_loss: 0.3904
Epoch 2/10
[1m16218/16218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 3ms/step - accuracy: 0.8058 - loss: 0.3986 - val_accuracy: 0.8099 - val_loss: 0.3896
Epoch 3/10
[1m16218/16218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m47s[0m 3ms/step - accuracy: 0.8062 - loss: 0.3975 - val_accuracy: 0.8083 - val_loss: 0.3883
Epoch 4/10
[1m16218/16218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m48s[0m 3ms/step - accuracy: 0.8060 - loss: 0.3978 - val_accuracy: 0.8097 - val_loss: 0.3873
Epoch 5/10
[1m16218/16218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m49s[0m 3ms/step - accuracy: 0.8057 - loss: 0.3973 - val_accuracy: 0.8088 - val_loss: 0.3897
Epoch 6/10
[1m16218/16218[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 3ms/step - accuracy: 0.8063 - loss: 0.3960 - val_accuracy: 0.8099 - val_loss: 0.387

<keras.src.callbacks.history.History at 0x17aef7b95b0>

In [16]:
# 9. Evaluate Model
y_pred = model.predict(x_val)
y_pred_class = (y_pred > 0.5).astype(int)
print(classification_report(y_val, y_pred_class))
print("ROC-AUC:", roc_auc_score(y_val, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_val, y_pred_class))



[1m4055/4055[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 1ms/step
              precision    recall  f1-score   support

           0       0.82      0.69      0.75     53271
           1       0.81      0.89      0.85     76467

    accuracy                           0.81    129738
   macro avg       0.81      0.79      0.80    129738
weighted avg       0.81      0.81      0.81    129738

ROC-AUC: 0.8924475258352587
Confusion Matrix:
 [[36792 16479]
 [ 8202 68265]]


In [17]:
# 10. Save Model
model.save('product_reorder_dnn_model.h5')

