In [None]:
# Importing Libraries
import pandas as pd
import numpy as np
import json
import pickle
import os
from tqdm.auto import tqdm
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split


In [None]:
# Set device to GPU (CUDA) if available, otherwise CPU
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {DEVICE}")

# Model and Training Hyperparameters
LEARNING_RATE = 0.001  # A small learning rate to ensure smooth and stable training
EPOCHS = 120  # Increase epochs since we have early stopping
BATCH_SIZE = 1024  # Batch size for training
N_NEGATIVE_SAMPLES = (
    15  # Number of negative samples to use for training per positive sample
)
EARLY_STOPPING_PATIENCE = 7  # Stop if validation loss doesn't improve for 7 epochs


Using device: cuda


In [None]:
# Loading necessary data
order_data = pd.read_csv("../Datasets/order_data_cleaned.csv")
customer_data = pd.read_csv("../Datasets/customer_data_cleaned.csv")
test_data = pd.read_csv("../Datasets/test_data_question.csv")

# Processing Data

In [None]:
# Function to extract all item names from "orders" JSON string
def extract_items(order_json_string):
    try:
        data = json.loads(order_json_string)
        return [item["item_name"] for item in data["orders"][0]["item_details"]]
    except (json.JSONDecodeError, IndexError, KeyError):
        return []


# This creates a column called "item_list" which contains all the item's in a list format included in that order
order_data["item_list"] = order_data["ORDERS"].apply(extract_items)
# this creates a set (to remove duplicates) to get all unique items in order_data
all_items_in_orders = set(
    [item for sublist in order_data["item_list"] for item in sublist]
)
# this creates a set (to remove duplicates) to get all unique items in test_data
all_items_in_test = (
    set(test_data["item1"].unique())
    | set(test_data["item2"].unique())
    | set(test_data["item3"].unique())
)
# this is basically a union list of all items from both sets created to get an idea of the overall number of unique items in the complete entire database
ARM_VOCABULARY = sorted(list(all_items_in_orders | all_items_in_test))
N_ARMS = len(ARM_VOCABULARY)

# CONTEXT_FEATURES is a dictionary that maps feature names to their unique values
# this basically stores all the unique different feature values for features that are included for our valuation/prediction
CONTEXT_FEATURES = {
    "CUSTOMER_TYPE": sorted(
        customer_data["CUSTOMER_TYPE"].astype(str).unique().tolist()
    ),
    "STORE_NUMBER": sorted(order_data["STORE_NUMBER"].unique().tolist()),
    "ITEMS": ARM_VOCABULARY,
}
# this creates a OneHotEncoder for the CUSTOMER_TYPE feature
customer_type_encoder = OneHotEncoder(
    categories=[CONTEXT_FEATURES["CUSTOMER_TYPE"]],
    handle_unknown="ignore",
    sparse_output=False,
)
# this creates a OneHotEncoder for the STORE_NUMBER feature
store_number_encoder = OneHotEncoder(
    categories=[CONTEXT_FEATURES["STORE_NUMBER"]],
    handle_unknown="ignore",
    sparse_output=False,
)
# the encoders are then fitted onto the respective feature sets which are converted into a 2D array of 1 column as OneHotEncoder expects input in 2D format
customer_type_encoder.fit(np.array(CONTEXT_FEATURES["CUSTOMER_TYPE"]).reshape(-1, 1))
store_number_encoder.fit(np.array(CONTEXT_FEATURES["STORE_NUMBER"]).reshape(-1, 1))

# this creates a dictionary where each item is mapped to a unique index value (Dict so that O(1) access is possible)
ARM_MAP = {item: i for i, item in enumerate(ARM_VOCABULARY)}

In [None]:
def get_context_vector(customer_type, store_number, items_in_cart):
    customer_vec = customer_type_encoder.transform(np.array([[customer_type]]))
    store_vec = store_number_encoder.transform(np.array([[store_number]]))
    items_vec = np.zeros((1, len(CONTEXT_FEATURES["ITEMS"])))
    for item in items_in_cart:
        if item in ARM_MAP:
            items_vec[0, ARM_MAP[item]] = 1
    return np.concatenate(
        [np.array([[1]]), customer_vec, store_vec, items_vec], axis=1
    ).flatten()


dummy_context = get_context_vector("Guest", order_data["STORE_NUMBER"].iloc[0], [])
N_FEATURES = len(dummy_context)
print(f"Number of arms: {N_ARMS}, Number of context features: {N_FEATURES}")


Number of arms: 138, Number of context features: 182


# Custom Dataset

In [None]:
# Custom Dataset class to handle data efficiently for train_test_split purposes and training purposes.
class BanditDataset(Dataset):
    def __init__(self, contexts, arm_indices, rewards):
        self.contexts = torch.FloatTensor(contexts)
        self.arm_indices = torch.LongTensor(arm_indices)
        self.rewards = torch.FloatTensor(rewards)

    def __len__(self):
        return len(self.rewards)

    def __getitem__(self, idx):
        return self.contexts[idx], self.arm_indices[idx], self.rewards[idx]

In [None]:
# creating a merged dataset on order and custome data on CUSTOMER_ID for creating the dataset
order_data_merged = pd.merge(order_data, customer_data, on="CUSTOMER_ID", how="left")
order_data_merged["CUSTOMER_TYPE"].fillna("Guest", inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  order_data_merged["CUSTOMER_TYPE"].fillna("Guest", inplace=True)


# Creating Dataset

In [None]:
# Lists to store context, arm_index, reward gained from the training data
contexts, arm_indices, rewards = [], [], []
# Iterating through each row in the order_data_merged column
for _, row in tqdm(order_data_merged.iterrows(), total=len(order_data_merged)):
    # checking if number of items are more than 2 to be able to create context
    items_in_order = row["item_list"]
    if len(items_in_order) < 2:
        continue
    # for each item in the order starting from the second item
    for i in range(1, len(items_in_order)):
        # choose the first 'i' items that is if i == 3 then choose the first 2 items as context
        context_items = items_in_order[:i]
        # chosen item is the i-th item to tell the model that this is the item to select for +ve reward
        chosen_item = items_in_order[i]
        # code to handle unknown cases so that it does not crash
        if chosen_item not in ARM_MAP:
            continue
        # get context vector for the current row along with the context items
        context_vector = get_context_vector(
            row["CUSTOMER_TYPE"], row["STORE_NUMBER"], context_items
        )
        # append the necessary contexts ,arm_index and reward to the list for creation of training data
        contexts.append(context_vector)
        arm_indices.append(ARM_MAP[chosen_item])
        rewards.append(1.0)
        # create a list of possible negative samples that are not in the chosen items
        possible_neg_items = [item for item in ARM_VOCABULARY if item != chosen_item]
        # sample randomly '15' negative samples
        neg_items = np.random.choice(
            possible_neg_items, size=N_NEGATIVE_SAMPLES, replace=False
        )
        # for each negatuve item sampled append it lists to ensure that the model gets enough information on 'what not to suggest'.
        for neg_item in neg_items:
            contexts.append(context_vector)
            arm_indices.append(ARM_MAP[neg_item])
            rewards.append(0.0)

# Split data into training and validation sets
(
    contexts_train,
    contexts_val,
    arm_indices_train,
    arm_indices_val,
    rewards_train,
    rewards_val,
) = train_test_split(contexts, arm_indices, rewards, test_size=0.1, random_state=42)

# Create datasets for training and validation via custom created dataset
train_dataset = BanditDataset(contexts_train, arm_indices_train, rewards_train)
val_dataset = BanditDataset(contexts_val, arm_indices_val, rewards_val)

# created data loaders to be used during training and evaluation phase
train_loader = DataLoader(
    train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2
)
val_loader = DataLoader(
    val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2
)


100%|██████████| 1414410/1414410 [06:51<00:00, 3439.80it/s]
  self.contexts = torch.FloatTensor(contexts)


In [None]:
# save the train and val loaders
# with open("bandit_bnn_train_val_loaders.pkl", "wb") as f:
#     pickle.dump((train_loader, val_loader), f)

In [18]:
# # read train and val loaders
# with open("bandit_bnn_train_val_loaders.pkl", "rb") as f:
#     train_loader, val_loader = pickle.load(f)

In [None]:
# THIS CELL HERE IS TO CHECK THE BALANCE OF THE CLASSES FOR POSITIVE AND NEGATIVE REWARDS

# Convert the rewards list to a pandas Series for easy counting
rewards_series = pd.Series(rewards)

# Count the occurrences of each class (1.0 for positive, 0.0 for negative)
class_counts = rewards_series.value_counts()

print("\n--- Class Balance Check ---")
print(class_counts)

# Calculate and print the ratio
if 0.0 in class_counts and 1.0 in class_counts:
    ratio = class_counts[0.0] / class_counts[1.0]
    print(f"Positive-to-Negative Ratio: 1 to {ratio:.1f}")
print("---------------------------\n")


--- Class Balance Check ---
0.0    21090660
1.0     1406044
Name: count, dtype: int64
Positive-to-Negative Ratio: 1 to 15.0
---------------------------



# Model Architecture

In [None]:
# Custom Bayesian Neural Network (with MC Dropout)
class BanditBNN(nn.Module):
    def __init__(self, n_features, n_arms, dropout_rate=0.5):
        super(BanditBNN, self).__init__()
        self.hidden1 = nn.Linear(n_features, 160)
        self.dropout1 = nn.Dropout(p=dropout_rate)
        self.hidden2 = nn.Linear(160, 140)
        self.dropout2 = nn.Dropout(p=dropout_rate)
        self.output = nn.Linear(140, n_arms)

    def forward(self, x):
        x = torch.relu(self.hidden1(x))
        x = self.dropout1(x)
        x = torch.relu(self.hidden2(x))
        x = self.dropout2(x)
        return self.output(x)


In [None]:
# Establish model name and path and initialize the optimzer and loss function
MODEL_PATH = "bandit_bnn_model_final.pth"
model = BanditBNN(N_FEATURES, N_ARMS).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
# used BCEWithLogitsLoss as our problem is of a binary classification nature and this loss function is stable over using sigmoid() and BCELoss()
criterion = nn.BCEWithLogitsLoss()

# Train Loop

In [None]:
# Train Loop function
def train_model():
    print("Starting GPU training with validation and early stopping...")
    print(f"LR : {LEARNING_RATE} ")
    # Initialize variables for early stopping
    best_val_loss = float("inf")
    patience_counter = 0
    save_epoch = 0
    for epoch in range(EPOCHS):
        # Training Phase
        model.train()
        total_train_loss = 0
        for context_batch, arm_idx_batch, reward_batch in tqdm(
            train_loader, desc=f"Epoch {epoch + 1}/{EPOCHS} [Train]"
        ):
            # Pass the inputs to the 'device' which is CUDA in in this case meaning "GPU".
            context_batch, arm_idx_batch, reward_batch = (
                context_batch.to(DEVICE),
                arm_idx_batch.to(DEVICE),
                reward_batch.to(DEVICE),
            )
            # clear previous accumulated gradients to prevent gradients from adding up across batches
            optimizer.zero_grad()
            # get the scores for all arms (returns a 138 size tenson of scores which are probabilities between 0-1)
            all_arm_scores = model(context_batch)
            # this code here gets the score for each chosen arm
            # that is for each chosen arm index which we are storing at the start
            # we gather the score at each index from the all_arm_scores matrix and we extract the score at that particular index
            # this returns a list of score's which is usefull for calculating the loss for eaach batch.
            chosen_arm_score = all_arm_scores.gather(
                1, arm_idx_batch.unsqueeze(1)
            ).squeeze()
            # calculating the loss
            loss = criterion(chosen_arm_score, reward_batch)
            # perform backpropagation
            loss.backward()
            # updating the weights based on the loss
            optimizer.step()
            # appending the training loss
            total_train_loss += loss.item()
        # creating the average training loss for that batch
        avg_train_loss = total_train_loss / len(train_loader)

        # Validation Phase
        model.eval()  # Set model to evaluation mode (disables dropout)
        total_val_loss = 0
        with torch.no_grad():
            for context_batch, arm_idx_batch, reward_batch in tqdm(
                val_loader, desc=f"Epoch {epoch + 1}/{EPOCHS} [Val]"
            ):
                context_batch, arm_idx_batch, reward_batch = (
                    context_batch.to(DEVICE),
                    arm_idx_batch.to(DEVICE),
                    reward_batch.to(DEVICE),
                )
                all_arm_scores = model(context_batch)
                chosen_arm_score = all_arm_scores.gather(
                    1, arm_idx_batch.unsqueeze(1)
                ).squeeze()
                loss = criterion(chosen_arm_score, reward_batch)
                total_val_loss += loss.item()
        # creates the average validation loss for that batch
        avg_val_loss = total_val_loss / len(val_loader)

        print(
            f"Epoch {epoch + 1} finished. Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}"
        )

        #  Early Stopping Logic on validation loss to avoid overfitting
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            # Save the models state (weights and biases)
            torch.save(model.state_dict(), MODEL_PATH)
            print(f"Validation loss improved. Saving best model to {MODEL_PATH}")
            save_epoch = epoch + 1  # Save the epoch when the model was saved to print
            patience_counter = 0
        else:
            patience_counter += 1
            print(
                f"Validation loss did not improve. Patience: {patience_counter}/{EARLY_STOPPING_PATIENCE}"
            )

        if patience_counter >= EARLY_STOPPING_PATIENCE:
            print("Early stopping triggered.")
            break
    print(f"Training completed. Best model saved at epoch {save_epoch}.")

# Train Model

In [23]:
train_model()

Starting GPU training with validation and early stopping...
LR : 0.001 


Epoch 1/120 [Train]: 100%|██████████| 19773/19773 [01:00<00:00, 328.03it/s]
Epoch 1/120 [Val]: 100%|██████████| 2197/2197 [00:06<00:00, 363.87it/s]


Epoch 1 finished. Train Loss: 0.1549, Val Loss: 0.1477
Validation loss improved. Saving best model to bandit_bnn_model_fourth_third.pth


Epoch 2/120 [Train]: 100%|██████████| 19773/19773 [00:59<00:00, 331.44it/s]
Epoch 2/120 [Val]: 100%|██████████| 2197/2197 [00:06<00:00, 341.89it/s]


Epoch 2 finished. Train Loss: 0.1513, Val Loss: 0.1474
Validation loss improved. Saving best model to bandit_bnn_model_fourth_third.pth


Epoch 3/120 [Train]: 100%|██████████| 19773/19773 [01:05<00:00, 303.09it/s]
Epoch 3/120 [Val]: 100%|██████████| 2197/2197 [00:06<00:00, 324.41it/s]


Epoch 3 finished. Train Loss: 0.1508, Val Loss: 0.1470
Validation loss improved. Saving best model to bandit_bnn_model_fourth_third.pth


Epoch 4/120 [Train]: 100%|██████████| 19773/19773 [00:59<00:00, 333.95it/s]
Epoch 4/120 [Val]: 100%|██████████| 2197/2197 [00:06<00:00, 358.15it/s]


Epoch 4 finished. Train Loss: 0.1505, Val Loss: 0.1472
Validation loss did not improve. Patience: 1/7


Epoch 5/120 [Train]: 100%|██████████| 19773/19773 [01:00<00:00, 324.27it/s]
Epoch 5/120 [Val]: 100%|██████████| 2197/2197 [00:06<00:00, 355.35it/s]


Epoch 5 finished. Train Loss: 0.1504, Val Loss: 0.1472
Validation loss did not improve. Patience: 2/7


Epoch 6/120 [Train]: 100%|██████████| 19773/19773 [01:02<00:00, 316.62it/s]
Epoch 6/120 [Val]: 100%|██████████| 2197/2197 [00:06<00:00, 350.40it/s]


Epoch 6 finished. Train Loss: 0.1503, Val Loss: 0.1471
Validation loss did not improve. Patience: 3/7


Epoch 7/120 [Train]: 100%|██████████| 19773/19773 [01:00<00:00, 327.46it/s]
Epoch 7/120 [Val]: 100%|██████████| 2197/2197 [00:06<00:00, 339.12it/s]


Epoch 7 finished. Train Loss: 0.1504, Val Loss: 0.1471
Validation loss did not improve. Patience: 4/7


Epoch 8/120 [Train]: 100%|██████████| 19773/19773 [00:59<00:00, 332.28it/s]
Epoch 8/120 [Val]: 100%|██████████| 2197/2197 [00:06<00:00, 364.94it/s]


Epoch 8 finished. Train Loss: 0.1505, Val Loss: 0.1473
Validation loss did not improve. Patience: 5/7


Epoch 9/120 [Train]: 100%|██████████| 19773/19773 [00:58<00:00, 335.20it/s]
Epoch 9/120 [Val]: 100%|██████████| 2197/2197 [00:06<00:00, 333.10it/s]


Epoch 9 finished. Train Loss: 0.1506, Val Loss: 0.1472
Validation loss did not improve. Patience: 6/7


Epoch 10/120 [Train]: 100%|██████████| 19773/19773 [00:59<00:00, 329.81it/s]
Epoch 10/120 [Val]: 100%|██████████| 2197/2197 [00:08<00:00, 262.61it/s]


Epoch 10 finished. Train Loss: 0.1507, Val Loss: 0.1475
Validation loss did not improve. Patience: 7/7
Early stopping triggered.
Training completed. Best model saved at epoch 3.


# Load Model

In [None]:
# Saving and Loading Models
def load_model():
    if os.path.exists(MODEL_PATH):
        print(f"Loading best model from {MODEL_PATH}")
        model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))
    else:
        print("Model file not found. Training from scratch.")
        train_model()


load_model()

Loading best model from bandit_bnn_model_fourth_third.pth


  model.load_state_dict(torch.load(MODEL_PATH, map_location=DEVICE))


# Recommendations function

In [None]:
# Function that returns 'top_n' item recommendations based on customer context and items in cart along with the store number.
def get_recommendations_gpu(customer_type, store_number, items_in_cart, top_n=3):
    context_vector = get_context_vector(customer_type, store_number, items_in_cart)
    context_tensor = torch.FloatTensor(context_vector).to(DEVICE)

    # Enable dropout for Monte Carlo Dropout which resembles Thompson Sampling.
    model.train()
    with torch.no_grad():
        scores = model(context_tensor)

    # Get top_n recommendations
    _, top_indices = torch.topk(scores, top_n)

    # Get the item names for the top_n indices
    return [ARM_VOCABULARY[i] for i in top_indices.cpu().numpy()]


# Example Usage
print("\n--- Generating example recommendation ---")
sample_row = test_data.iloc[0]
cart = [sample_row["item1"], sample_row["item2"], sample_row["item3"]]
recs = get_recommendations_gpu(
    sample_row["CUSTOMER_TYPE"], sample_row["STORE_NUMBER"], cart
)
print(f"Cart: {cart}")
print(f"Top 3 Recommendations: {recs}")



--- Generating example recommendation ---
Cart: ['Chicken Sub Combo', 'Ranch Dip - Regular', '10 pc Spicy Wings Combo']
Top 3 Recommendations: ['8 pc Grilled Wings Combo', '2 pc Crispy Strips', '6 pc Grilled Wings Combo']


# Testing the model on a dataframe with 4 items in order
NOTE: This data is taken from order_data_merged which was used to create the training and validation data so this is not a complete 'test' please refer to evaluate_BNN.ipynb last cell.

In [35]:
# --- 7. Final Evaluation on Holdout Set (Recall@3) ---
print("\n--- Calculating Recall@3 on a Holdout Set ---")

# Create a holdout test set from orders with exactly 4 items to mimic the competition test case
holdout_df = order_data_merged[order_data_merged["item_list"].apply(len) == 4].copy()
print(f"Created a holdout set with {len(holdout_df)} orders.")

hits = 0
total = len(holdout_df)

if total > 0:
    for _, row in tqdm(holdout_df.iterrows(), total=total, desc="Evaluating Recall@3"):
        # The first 3 items are the context (the cart)
        context_cart = row["item_list"][:-1]
        # The 4th item is the ground truth we want to predict
        ground_truth_item = row["item_list"][-1]

        # Generate top 3 recommendations
        recommendations = get_recommendations_gpu(
            row["CUSTOMER_TYPE"], row["STORE_NUMBER"], context_cart
        )

        # Check if the ground truth is in our recommendations
        if ground_truth_item in recommendations:
            hits += 1

    recall_at_3 = hits / total
    print(
        f"\nFinal Recall@3 Score on Holdout Set: {recall_at_3:.4f} ({hits}/{total} hits)"
    )
else:
    print("No orders with exactly 4 items found to create a holdout set.")


--- Calculating Recall@3 on a Holdout Set ---
Created a holdout set with 93553 orders.


Evaluating Recall@3: 100%|██████████| 93553/93553 [00:49<00:00, 1905.28it/s]


Final Recall@3 Score on Holdout Set: 0.3705 (34663/93553 hits)



