##### Preparing the Data

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import itertools
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder
from tqdm import tqdm

In [3]:
reviews_df = pd.read_csv('/content/drive/MyDrive/Rec. Sys/reviews.csv')
recipes_df = pd.read_csv('/content/drive/MyDrive/Rec. Sys/recipes.csv')

In [4]:
recipes_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522517 entries, 0 to 522516
Data columns (total 28 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   RecipeId                    522517 non-null  int64  
 1   Name                        522517 non-null  object 
 2   AuthorId                    522517 non-null  int64  
 3   AuthorName                  522517 non-null  object 
 4   CookTime                    439972 non-null  object 
 5   PrepTime                    522517 non-null  object 
 6   TotalTime                   522517 non-null  object 
 7   DatePublished               522517 non-null  object 
 8   Description                 522512 non-null  object 
 9   Images                      522516 non-null  object 
 10  RecipeCategory              521766 non-null  object 
 11  Keywords                    505280 non-null  object 
 12  RecipeIngredientQuantities  522514 non-null  object 
 13  RecipeIngredie

In [5]:
recipes_df['RecipeCategory'].head()

Unnamed: 0,RecipeCategory
0,Frozen Desserts
1,Chicken Breast
2,Beverages
3,Soy/Tofu
4,Vegetable


In [6]:
recipes_df['Keywords'].head()

Unnamed: 0,Keywords
0,"c(""Dessert"", ""Low Protein"", ""Low Cholesterol"",..."
1,"c(""Chicken Thigh & Leg"", ""Chicken"", ""Poultry"",..."
2,"c(""Low Protein"", ""Low Cholesterol"", ""Healthy"",..."
3,"c(""Beans"", ""Vegetable"", ""Low Cholesterol"", ""We..."
4,"c(""Low Protein"", ""Vegan"", ""Low Cholesterol"", ""..."


In [7]:
reviews_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1401982 entries, 0 to 1401981
Data columns (total 8 columns):
 #   Column         Non-Null Count    Dtype 
---  ------         --------------    ----- 
 0   ReviewId       1401982 non-null  int64 
 1   RecipeId       1401982 non-null  int64 
 2   AuthorId       1401982 non-null  int64 
 3   AuthorName     1401982 non-null  object
 4   Rating         1401982 non-null  int64 
 5   Review         1401768 non-null  object
 6   DateSubmitted  1401982 non-null  object
 7   DateModified   1401982 non-null  object
dtypes: int64(4), object(4)
memory usage: 85.6+ MB


In [8]:
recipes_df.head()

Unnamed: 0,RecipeId,Name,AuthorId,AuthorName,CookTime,PrepTime,TotalTime,DatePublished,Description,Images,...,SaturatedFatContent,CholesterolContent,SodiumContent,CarbohydrateContent,FiberContent,SugarContent,ProteinContent,RecipeServings,RecipeYield,RecipeInstructions
0,38,Low-Fat Berry Blue Frozen Dessert,1533,Dancer,PT24H,PT45M,PT24H45M,1999-08-09T21:46:00Z,Make and share this Low-Fat Berry Blue Frozen ...,"c(""https://img.sndimg.com/food/image/upload/w_...",...,1.3,8.0,29.8,37.1,3.6,30.2,3.2,4.0,,"c(""Toss 2 cups berries with sugar."", ""Let stan..."
1,39,Biryani,1567,elly9812,PT25M,PT4H,PT4H25M,1999-08-29T13:12:00Z,Make and share this Biryani recipe from Food.com.,"c(""https://img.sndimg.com/food/image/upload/w_...",...,16.6,372.8,368.4,84.4,9.0,20.4,63.4,6.0,,"c(""Soak saffron in warm milk for 5 minutes and..."
2,40,Best Lemonade,1566,Stephen Little,PT5M,PT30M,PT35M,1999-09-05T19:52:00Z,This is from one of my first Good House Keepi...,"c(""https://img.sndimg.com/food/image/upload/w_...",...,0.0,0.0,1.8,81.5,0.4,77.2,0.3,4.0,,"c(""Into a 1 quart Jar with tight fitting lid, ..."
3,41,Carina's Tofu-Vegetable Kebabs,1586,Cyclopz,PT20M,PT24H,PT24H20M,1999-09-03T14:54:00Z,This dish is best prepared a day in advance to...,"c(""https://img.sndimg.com/food/image/upload/w_...",...,3.8,0.0,1558.6,64.2,17.3,32.1,29.3,2.0,4 kebabs,"c(""Drain the tofu, carefully squeezing out exc..."
4,42,Cabbage Soup,1538,Duckie067,PT30M,PT20M,PT50M,1999-09-19T06:19:00Z,Make and share this Cabbage Soup recipe from F...,"""https://img.sndimg.com/food/image/upload/w_55...",...,0.1,0.0,959.3,25.1,4.8,17.7,4.3,4.0,,"c(""Mix everything together and bring to a boil..."


In [9]:
reviews_df.head()

Unnamed: 0,ReviewId,RecipeId,AuthorId,AuthorName,Rating,Review,DateSubmitted,DateModified
0,2,992,2008,gayg msft,5,better than any you can get at a restaurant!,2000-01-25T21:44:00Z,2000-01-25T21:44:00Z
1,7,4384,1634,Bill Hilbrich,4,"I cut back on the mayo, and made up the differ...",2001-10-17T16:49:59Z,2001-10-17T16:49:59Z
2,9,4523,2046,Gay Gilmore ckpt,2,i think i did something wrong because i could ...,2000-02-25T09:00:00Z,2000-02-25T09:00:00Z
3,13,7435,1773,Malarkey Test,5,easily the best i have ever had. juicy flavor...,2000-03-13T21:15:00Z,2000-03-13T21:15:00Z
4,14,44,2085,Tony Small,5,An excellent dish.,2000-03-28T12:51:00Z,2000-03-28T12:51:00Z


In [10]:
reviews_df.columns

Index(['ReviewId', 'RecipeId', 'AuthorId', 'AuthorName', 'Rating', 'Review',
       'DateSubmitted', 'DateModified'],
      dtype='object')

In [11]:
recipes_df.columns

Index(['RecipeId', 'Name', 'AuthorId', 'AuthorName', 'CookTime', 'PrepTime',
       'TotalTime', 'DatePublished', 'Description', 'Images', 'RecipeCategory',
       'Keywords', 'RecipeIngredientQuantities', 'RecipeIngredientParts',
       'AggregatedRating', 'ReviewCount', 'Calories', 'FatContent',
       'SaturatedFatContent', 'CholesterolContent', 'SodiumContent',
       'CarbohydrateContent', 'FiberContent', 'SugarContent', 'ProteinContent',
       'RecipeServings', 'RecipeYield', 'RecipeInstructions'],
      dtype='object')

In [12]:
# Renaming for consistency
reviews_df.rename(columns={'RecipeId': 'recipe_id', 'AuthorId': 'user_id'}, inplace=True)
recipes_df.rename(columns={'RecipeId': 'recipe_id'}, inplace=True)

In [13]:
# Merging on recipe_id to bring in recipe metadata into the reviews
full_df = pd.merge(
    reviews_df,
    recipes_df[['recipe_id', 'Name', 'RecipeIngredientParts', 'RecipeInstructions', 'Keywords', 'RecipeCategory']],
    on='recipe_id',
    how='inner'
)

In [14]:
full_df['category'] = full_df['RecipeCategory'].fillna("Unknown")

In [15]:
# Encoding the users, items, category feature
user_encoder = LabelEncoder()
item_encoder = LabelEncoder()
category_encoder = LabelEncoder()

In [16]:
full_df['user_id_enc'] = user_encoder.fit_transform(full_df['user_id'])
full_df['item_id_enc'] = item_encoder.fit_transform(full_df['recipe_id'])
full_df['category_encoded'] = category_encoder.fit_transform(full_df['category'])

In [17]:
from sklearn.model_selection import train_test_split

In [18]:
train_df, test_df = train_test_split(full_df, test_size=0.3, random_state=42)

In [19]:
test_df.columns

Index(['ReviewId', 'recipe_id', 'user_id', 'AuthorName', 'Rating', 'Review',
       'DateSubmitted', 'DateModified', 'Name', 'RecipeIngredientParts',
       'RecipeInstructions', 'Keywords', 'RecipeCategory', 'category',
       'user_id_enc', 'item_id_enc', 'category_encoded'],
      dtype='object')

##### Dataset to include Cuisine

In [20]:
class HybridRatingsDataset(Dataset):
    def __init__(self, df):
        self.user = torch.tensor(df['user_id_enc'].values, dtype=torch.long)
        self.item = torch.tensor(df['item_id_enc'].values, dtype=torch.long)
        self.category = torch.tensor(df['category_encoded'].values, dtype=torch.long)
        self.rating = torch.tensor(df['Rating'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.rating)

    def __getitem__(self, idx):
        return self.user[idx], self.item[idx], self.category[idx], self.rating[idx]

##### Defining the Hybrid Model

In [21]:
import torch.nn as nn
import torch.nn.functional as F

In [22]:
class HybridNCF(nn.Module):
    def __init__(self, n_users, n_items, n_categories, emb_dim=64, hidden=[64, 32]):
        super().__init__()
        self.user_embed = nn.Embedding(n_users, emb_dim)
        self.item_embed = nn.Embedding(n_items, emb_dim)
        self.cat_embed = nn.Embedding(n_categories, emb_dim)

        self.mlp = nn.Sequential(
            nn.Linear(emb_dim * 3, hidden[0]),
            nn.ReLU(),
            nn.Linear(hidden[0], hidden[1]),
            nn.ReLU(),
            nn.Linear(hidden[1], 1)
        )

    def forward(self, user, item, category):
        ue = self.user_embed(user)
        ie = self.item_embed(item)
        ce = self.cat_embed(category)
        x = torch.cat([ue, ie, ce], dim=-1)
        return self.mlp(x).squeeze()

##### Training Loop and Model Evaluation

In [23]:
from collections import defaultdict
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, ndcg_score

In [24]:
embedding_dim = 64
hidden_layers = [64, 32]
learning_rate = 0.001
batch_size = 256
epochs = 15

In [25]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [26]:
train_dataset = HybridRatingsDataset(train_df)
test_dataset = HybridRatingsDataset(test_df)

In [27]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [28]:
n_users = len(user_encoder.classes_)
n_items = len(item_encoder.classes_)
n_categories = len(category_encoder.classes_)

In [29]:
model = HybridNCF(n_users, n_items, n_categories, emb_dim=embedding_dim, hidden=hidden_layers).to(device)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [30]:
def precision_recall_at_k(actual, predicted, k):
    actual_set = set(actual)
    pred_k = predicted[:k]
    hits = len(set(pred_k) & actual_set)
    precision = hits / k
    recall = hits / len(actual_set) if actual_set else 0
    return precision, recall

In [31]:
def dcg_at_k(r, k):
    r = np.asfarray(r)[:k]
    return np.sum((2 ** r - 1) / np.log2(np.arrange(2, r.size + 2)))

In [32]:
def ndcg_at_k(predicted, actual, k=5):
    actual_set = set(actual)
    relevance = [1 if item in actual_set else 0 for item in predicted[:k]]
    ideal_relevance = sorted(relevance, reverse=True)
    dcg = dcg_at_k(relevance, k)
    idcg = dcg_at_k(ideal_relevance, k)
    return dcg / idcg if idcg > 0 else 0.0

In [33]:
from collections import defaultdict

In [34]:
train_losses = []
val_losses = []
rmse_scores = []
mae_scores = []
precision_scores = []
recall_scores = []
ndcg_scores = []

In [35]:
item_id_to_category = dict(zip(test_df['item_id_enc'], test_df['category_encoded']))

In [36]:
n_items = test_df['item_id_enc'].max() + 1
item_categories = [item_id_to_category.get(i, 0) for i in range(n_items)]

In [37]:
from random import sample

In [None]:
for epoch in range(epochs):
    model.train()
    total_train_loss = 0

    for user, item, cat, rating in train_loader:
        user, item, cat, rating = user.to(device), item.to(device), cat.to(device), rating.to(device)

        pred = model(user, item, cat)
        loss = loss_fn(pred, rating)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_train_loss += loss.item()

    avg_train_loss = total_train_loss / len(train_loader)
    train_losses.append(avg_train_loss)

    # --------------------------------
    # Validation and Ranking Metrics
    # --------------------------------
    model.eval()
    total_val_loss = 0
    all_preds = []
    all_true = []

    with torch.no_grad():
        for user, item, cat, rating in test_loader:
            user, item, cat, rating = user.to(device), item.to(device), cat.to(device), rating.to(device)

            pred = model(user, item, cat)
            loss = loss_fn(pred, rating)
            total_val_loss += loss.item()

            all_preds.extend(pred.cpu().numpy())
            all_true.extend(rating.cpu().numpy())

    avg_val_loss = total_val_loss / len(test_loader)
    val_losses.append(avg_val_loss)

    # Calculate RMSE and MAE
    rmse = root_mean_squared_error(all_true, all_preds)
    mae = mean_absolute_error(all_true, all_preds)
    rmse_scores.append(rmse)
    mae_scores.append(mae)

    # --------------------------------------------------
    # Ranking Evaluation
    # --------------------------------------------------
    k = 5
    precision_total, recall_total = 0, 0

    # Ground truth interactions
    user_item_truth = defaultdict(set)
    for _, row in test_df.iterrows():
        user_item_truth[row['user_id_enc']].add(row['item_id_enc'])

    # Subsample users for faster eval
    eval_user_count = min(1000, len(user_item_truth))
    sampled_users = sample(list(user_item_truth.keys()), eval_user_count)

    # Build train interactions (do this once)
    user_train_items = defaultdict(set)
    for _, row in train_df.iterrows():
        user_train_items[row['user_id_enc']].add(row['item_id_enc'])

    # Predict top-K items
    user_item_scores = defaultdict(dict)
    batch_size = 512

    with torch.no_grad():
        for user in tqdm(sampled_users, desc="Evaluating sampled users"):
            user_scores = {}
            train_items_for_user = user_train_items.get(user, set())

            for start in range(0, n_items, batch_size):
                end = min(start + batch_size, n_items)
                item_indices = np.arange(start, end)

                items_batch = torch.tensor(item_indices, dtype=torch.long, device=device)
                user_batch = torch.full((end - start,), user, dtype=torch.long, device=device)
                cat_batch = torch.tensor(item_categories[start:end], dtype=torch.long, device=device)

                scores_batch = model(user_batch, items_batch, cat_batch).cpu().numpy()

                for idx, score in zip(item_indices, scores_batch):
                    if idx not in train_items_for_user:
                        user_scores[idx] = score

            top_k = sorted(user_scores.items(), key=lambda x: x[1], reverse=True)[:k]
            user_item_scores[user] = dict(top_k)

    # Evaluate top-K predictions
    for user in sampled_users:
        true_items = user_item_truth[user]
        predicted_items = list(user_item_scores[user].keys())
        hits = set(predicted_items) & true_items

        precision_total += len(hits) / k
        recall_total += len(hits) / len(true_items)

    avg_precision = precision_total / eval_user_count
    avg_recall = recall_total / eval_user_count
    precision_scores.append(avg_precision)
    recall_scores.append(avg_recall)

    # Sample user output
    print("\n-- Sample User Evaluation --")
    for user in sampled_users[:5]:
        true_items = user_item_truth[user]
        predicted_items = list(user_item_scores[user].keys())
        hits = set(predicted_items) & true_items

        print(f"\nUser ID: {user}")
        print(f" Ground Truth Items (from test): {sorted(true_items)}")
        print(f" Top-{k} Predicted Items       : {predicted_items}")
        print(f" Hits                          : {sorted(hits)}")
        print(f" Precision@{k}                 : {len(hits)/k:.4f}")
        print(f" Recall@{k}                    : {len(hits)/len(true_items):.4f}")

    print(f"\nEpoch {epoch+1}/{epochs}, Train Loss: {avg_train_loss:.4f}, "
          f"Val Loss: {avg_val_loss:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, "
          f"Precision@{k}: {avg_precision:.4f}, Recall@{k}: {avg_recall:.4f}")

    # Log to file
    log_path = "sample_eval_log.txt"
    with open(log_path, "a") as f:
        f.write(f"\nEpoch {epoch + 1}/{epochs}\n")
        f.write(f"Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, "
                f"RMSE: {rmse:.4f}, MAE: {mae:.4f}, "
                f"Precision@{k}: {avg_precision:.4f}, Recall@{k}: {avg_recall:.4f}\n")

        f.write("--- Sample User Evaluation ---\n")
        for user in sampled_users[:5]:
            true_items = user_item_truth[user]
            predicted_items = list(user_item_scores[user].keys())
            hits = set(predicted_items) & true_items

            f.write(f"\nUser ID: {user}\n")
            f.write(f"  Ground Truth Items (from test): {sorted(true_items)}\n")
            f.write(f"  Top-{k} Predicted Items       : {predicted_items}\n")
            f.write(f"  Hits                          : {sorted(hits)}\n")
            f.write(f"  Precision@{k}: {len(hits)/k:.4f}, Recall@{k}: {len(hits)/len(true_items):.4f}\n")


Evaluating sampled users: 100%|██████████| 1000/1000 [07:40<00:00,  2.17it/s]



-- Sample User Evaluation --

User ID: 91210
 Ground Truth Items (from test): [62809]
 Top-5 Predicted Items       : [np.int64(174679), np.int64(148910), np.int64(144683), np.int64(56200), np.int64(38586)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 38206
 Ground Truth Items (from test): [17404, 28293, 105085]
 Top-5 Predicted Items       : [np.int64(140276), np.int64(16664), np.int64(220229), np.int64(136609), np.int64(29674)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 48427
 Ground Truth Items (from test): [3133]
 Top-5 Predicted Items       : [np.int64(126524), np.int64(40003), np.int64(194306), np.int64(126110), np.int64(219204)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 60302
 Ground Truth Items (from test): [23847, 43169, 103411, 142372]
 Top-5 P

Evaluating sampled users: 100%|██████████| 1000/1000 [07:57<00:00,  2.09it/s]



-- Sample User Evaluation --

User ID: 103566
 Ground Truth Items (from test): [135211]
 Top-5 Predicted Items       : [np.int64(263457), np.int64(120661), np.int64(22845), np.int64(163772), np.int64(53884)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 137509
 Ground Truth Items (from test): [207353]
 Top-5 Predicted Items       : [np.int64(11090), np.int64(25541), np.int64(206937), np.int64(135668), np.int64(14030)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 121914
 Ground Truth Items (from test): [69232]
 Top-5 Predicted Items       : [np.int64(263457), np.int64(27722), np.int64(17763), np.int64(104888), np.int64(219524)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 1043
 Ground Truth Items (from test): [11283]
 Top-5 Predicted Items       : [np.int64(26

Evaluating sampled users: 100%|██████████| 1000/1000 [08:11<00:00,  2.03it/s]



-- Sample User Evaluation --

User ID: 201064
 Ground Truth Items (from test): [269752]
 Top-5 Predicted Items       : [np.int64(117296), np.int64(263522), np.int64(82710), np.int64(164339), np.int64(263457)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 158349
 Ground Truth Items (from test): [94137]
 Top-5 Predicted Items       : [np.int64(19156), np.int64(217940), np.int64(27821), np.int64(153161), np.int64(146509)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 43954
 Ground Truth Items (from test): [55837]
 Top-5 Predicted Items       : [np.int64(84285), np.int64(31244), np.int64(244316), np.int64(17763), np.int64(9496)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 157444
 Ground Truth Items (from test): [860]
 Top-5 Predicted Items       : [np.int64(26345

Evaluating sampled users: 100%|██████████| 1000/1000 [08:08<00:00,  2.05it/s]



-- Sample User Evaluation --

User ID: 79713
 Ground Truth Items (from test): [3854]
 Top-5 Predicted Items       : [np.int64(166665), np.int64(18880), np.int64(228526), np.int64(18388), np.int64(101857)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 86736
 Ground Truth Items (from test): [38447]
 Top-5 Predicted Items       : [np.int64(233941), np.int64(56573), np.int64(159805), np.int64(263457), np.int64(13974)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 131060
 Ground Truth Items (from test): [239787]
 Top-5 Predicted Items       : [np.int64(32405), np.int64(178566), np.int64(188164), np.int64(61998), np.int64(187277)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 106496
 Ground Truth Items (from test): [62135]
 Top-5 Predicted Items       : [np.int64(176

Evaluating sampled users: 100%|██████████| 1000/1000 [08:18<00:00,  2.00it/s]



-- Sample User Evaluation --

User ID: 43368
 Ground Truth Items (from test): [32406]
 Top-5 Predicted Items       : [np.int64(217940), np.int64(263522), np.int64(132691), np.int64(95341), np.int64(233941)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 142979
 Ground Truth Items (from test): [18447, 20322]
 Top-5 Predicted Items       : [np.int64(181987), np.int64(93641), np.int64(181486), np.int64(269615), np.int64(85463)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 35543
 Ground Truth Items (from test): [56880, 108779]
 Top-5 Predicted Items       : [np.int64(217940), np.int64(16978), np.int64(86241), np.int64(270019), np.int64(69706)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 216445
 Ground Truth Items (from test): [1109]
 Top-5 Predicted Items       :

Evaluating sampled users: 100%|██████████| 1000/1000 [08:14<00:00,  2.02it/s]



-- Sample User Evaluation --

User ID: 122579
 Ground Truth Items (from test): [19099]
 Top-5 Predicted Items       : [np.int64(72011), np.int64(70809), np.int64(217391), np.int64(167591), np.int64(126650)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 198692
 Ground Truth Items (from test): [93763]
 Top-5 Predicted Items       : [np.int64(166603), np.int64(174680), np.int64(239031), np.int64(186992), np.int64(134052)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 120391
 Ground Truth Items (from test): [233906]
 Top-5 Predicted Items       : [np.int64(61793), np.int64(43435), np.int64(156712), np.int64(183416), np.int64(102099)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 78401
 Ground Truth Items (from test): [146830]
 Top-5 Predicted Items       : [np.int6

Evaluating sampled users: 100%|██████████| 1000/1000 [08:02<00:00,  2.07it/s]



-- Sample User Evaluation --

User ID: 210152
 Ground Truth Items (from test): [180408]
 Top-5 Predicted Items       : [np.int64(15261), np.int64(32508), np.int64(165988), np.int64(169339), np.int64(16978)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 246371
 Ground Truth Items (from test): [43365]
 Top-5 Predicted Items       : [np.int64(153161), np.int64(84966), np.int64(110187), np.int64(39391), np.int64(33328)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 132427
 Ground Truth Items (from test): [17396, 32304]
 Top-5 Predicted Items       : [np.int64(199038), np.int64(267522), np.int64(223074), np.int64(246745), np.int64(110187)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 131414
 Ground Truth Items (from test): [2663, 171334]
 Top-5 Predicted Items     

Evaluating sampled users: 100%|██████████| 1000/1000 [08:06<00:00,  2.06it/s]



-- Sample User Evaluation --

User ID: 84195
 Ground Truth Items (from test): [464, 50215]
 Top-5 Predicted Items       : [np.int64(164065), np.int64(26171), np.int64(157546), np.int64(4906), np.int64(198243)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 19420
 Ground Truth Items (from test): [10157]
 Top-5 Predicted Items       : [np.int64(179909), np.int64(238749), np.int64(133611), np.int64(174680), np.int64(53067)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 92111
 Ground Truth Items (from test): [126491]
 Top-5 Predicted Items       : [np.int64(197179), np.int64(153424), np.int64(146852), np.int64(15259), np.int64(129635)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 144684
 Ground Truth Items (from test): [119432]
 Top-5 Predicted Items       : [np.in

Evaluating sampled users: 100%|██████████| 1000/1000 [08:16<00:00,  2.02it/s]



-- Sample User Evaluation --

User ID: 128185
 Ground Truth Items (from test): [96475]
 Top-5 Predicted Items       : [np.int64(228526), np.int64(199038), np.int64(74731), np.int64(58478), np.int64(63631)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 55016
 Ground Truth Items (from test): [9290, 23348, 29711, 39707, 42696, 46945, 56887, 61513, 64457, 75086, 83607, 84010, 97777, 126977, 161051]
 Top-5 Predicted Items       : [np.int64(78142), np.int64(171784), np.int64(81218), np.int64(170790), np.int64(169522)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 261905
 Ground Truth Items (from test): [40448]
 Top-5 Predicted Items       : [np.int64(199038), np.int64(228526), np.int64(144607), np.int64(109989), np.int64(77038)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

U

Evaluating sampled users: 100%|██████████| 1000/1000 [08:21<00:00,  1.99it/s]



-- Sample User Evaluation --

User ID: 45672
 Ground Truth Items (from test): [14231]
 Top-5 Predicted Items       : [np.int64(103563), np.int64(101759), np.int64(217940), np.int64(167742), np.int64(193896)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 58625
 Ground Truth Items (from test): [615, 9408, 11471, 13240, 17568, 19065, 19765, 37506, 38712, 42690, 45896, 49127, 55379, 56636, 58959, 65880, 76939, 100376, 107185, 107356, 114916, 137433, 149549, 151703, 155231, 163101, 167751, 168142, 212205, 216281, 220331]
 Top-5 Predicted Items       : [np.int64(160008), np.int64(56204), np.int64(171061), np.int64(130530), np.int64(112493)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 65721
 Ground Truth Items (from test): [5534]
 Top-5 Predicted Items       : [np.int64(227503), np.int64(77038), np.int64(147284), np.int64(270705), np.int64(

Evaluating sampled users: 100%|██████████| 1000/1000 [08:14<00:00,  2.02it/s]



-- Sample User Evaluation --

User ID: 243109
 Ground Truth Items (from test): [4581]
 Top-5 Predicted Items       : [np.int64(38297), np.int64(153161), np.int64(217881), np.int64(39391), np.int64(160799)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 81532
 Ground Truth Items (from test): [860, 33581]
 Top-5 Predicted Items       : [np.int64(132433), np.int64(35198), np.int64(50469), np.int64(160008), np.int64(228848)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 133190
 Ground Truth Items (from test): [48447]
 Top-5 Predicted Items       : [np.int64(167105), np.int64(224535), np.int64(37714), np.int64(43323), np.int64(12164)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 39141
 Ground Truth Items (from test): [108073]
 Top-5 Predicted Items       : [np.int64

Evaluating sampled users: 100%|██████████| 1000/1000 [08:24<00:00,  1.98it/s]



-- Sample User Evaluation --

User ID: 10811
 Ground Truth Items (from test): [13574, 45776, 76568, 78385, 102415, 159065, 159883, 191733, 202438]
 Top-5 Predicted Items       : [np.int64(82217), np.int64(123963), np.int64(92038), np.int64(116147), np.int64(214534)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 22905
 Ground Truth Items (from test): [36345, 131193, 247286]
 Top-5 Predicted Items       : [np.int64(82217), np.int64(199038), np.int64(101197), np.int64(33328), np.int64(19012)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 2857
 Ground Truth Items (from test): [4864, 11795, 11806, 11909]
 Top-5 Predicted Items       : [np.int64(31700), np.int64(94934), np.int64(35198), np.int64(23404), np.int64(140024)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 

Evaluating sampled users: 100%|██████████| 1000/1000 [08:30<00:00,  1.96it/s]



-- Sample User Evaluation --

User ID: 36766
 Ground Truth Items (from test): [69003]
 Top-5 Predicted Items       : [np.int64(134501), np.int64(196290), np.int64(129635), np.int64(38347), np.int64(262504)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 3524
 Ground Truth Items (from test): [4950, 6283, 6466, 13666]
 Top-5 Predicted Items       : [np.int64(199038), np.int64(73493), np.int64(238816), np.int64(131395), np.int64(119441)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 83052
 Ground Truth Items (from test): [106922]
 Top-5 Predicted Items       : [np.int64(37714), np.int64(5596), np.int64(199592), np.int64(40179), np.int64(79964)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 30978
 Ground Truth Items (from test): [1985, 23536, 69152]
 Top-5 Predicted

Evaluating sampled users: 100%|██████████| 1000/1000 [08:11<00:00,  2.03it/s]



-- Sample User Evaluation --

User ID: 73785
 Ground Truth Items (from test): [170137]
 Top-5 Predicted Items       : [np.int64(236831), np.int64(77038), np.int64(19492), np.int64(85108), np.int64(58221)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 89678
 Ground Truth Items (from test): [24608, 86241]
 Top-5 Predicted Items       : [np.int64(199038), np.int64(23661), np.int64(164065), np.int64(132080), np.int64(38297)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 196774
 Ground Truth Items (from test): [113714, 183169]
 Top-5 Predicted Items       : [np.int64(47216), np.int64(16978), np.int64(256213), np.int64(131569), np.int64(22853)]
 Hits                          : []
 Precision@5                 : 0.0000
 Recall@5                    : 0.0000

User ID: 257522
 Ground Truth Items (from test): [20673]
 Top-5 Predicted Items       :

Evaluating sampled users:   8%|▊         | 85/1000 [00:41<07:24,  2.06it/s]

##### Loss Visualization

In [None]:
plt.figure(figsize=(10, 5), dpi=200)
plt.plot(train_losses, label='Training Loss', marker='o')
plt.plot(val_losses, label='Validation Loss', marker='x')
plt.plot(rmse_scores, label='RMSE', linestyle='--')
plt.plot(mae_scores, label='MAE', linestyle='--')
plt.plot(precision_scores, label=f'Precision@{k}', linestyle='-.')
plt.plot(recall_scores, label=f'Recall@{k}', linestyle=':')

plt.title('HybridNCF Training Progress')
plt.xlabel('Epoch')
plt.ylabel('MSE Loss')
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

In [None]:
recall_scores

In [None]:
print(f"Final RMSE: {(sum(rmse_scores) / len(rmse_scores)):.4f}")
print(f"Final MAE: {(sum(mae_scores) / len(mae_scores)):.4f}")
print(f"Final Precision@{k}: {(sum(precision_scores) / len(precision_scores)):.4f}")
print(f"Final Recall@{k}: {(sum(recall_scores) / len(recall_scores)):.4f}")

##### Top-N Recommendation with Full Meal Info

In [None]:
def recommend_top_n(user_id_val, model, n):
    model.eval()

    if user_id_val not in user_encoder.classes_:
        raise ValueError(f"User ID {user_id_val} not in training data")

    user_id = user_encoder.transform([user_id_val])[0]
    user_tensor = torch.tensor([user_id] * n_items, dtype=torch.long).to(device)

    item_indices = np.arange(n_items)
    item_tensor = torch.tensor(item_indices, dtype=torch.long).to(device)
    recipe_ids = item_encoder.inverse_transform(item_indices)

    recipe_info = recipes_df[['recipe_id', 'RecipeCategory', 'Name', 'RecipeIngredientParts', 'RecipeInstructions']].copy()
    recipe_info['RecipeCategory'] = recipe_info['RecipeCategory'].fillna('Unknown')

    # Mapping known categories, assigning 'Unknown' for unseen categories
    known_categories = set(category_encoder.classes_)
    recipe_info['SafeCategory'] = recipe_info['RecipeCategory'].apply(
        lambda x: x if x in known_categories else 'Unknown'
    )
    recipe_info['category_encoded'] = category_encoder.transform(recipe_info['SafeCategory'])

    category_encoded = []
    for rid in recipe_ids:
        row = recipe_info[recipe_info['recipe_id'] == rid]
        category_encoded.append(row['category_encoded'].values[0] if not row.empty else 0)

    category_tensor = torch.tensor(category_encoded, dtype=torch.long).to(device)

    with torch.no_grad():
        scores = model(user_tensor, item_tensor, category_tensor)
        top_indices = torch.topk(scores, n).indices.cpu().numpy()

    top_recipes = []
    for idx in top_indices:
        rid = recipe_ids[idx]
        recipe_row = recipe_info[recipe_info['recipe_id'] == rid].iloc[0]
        top_recipes.append({
            "recipe_id": recipe_row['recipe_id'],
            "name": recipe_row['Name'],
            "ingredients": recipe_row['RecipeIngredientParts'],
            "instructions": recipe_row['RecipeInstructions']
        })

    return top_recipes

In [None]:
from collections import defaultdict

In [None]:
# Creating dictionary: user_id -> list of recipe_ids they interacted with in test set
test_interactions = defaultdict(list)

In [None]:
for _, row in test_df.iterrows():
    test_interactions[row['user_id']].append(row['recipe_id'])

In [None]:
# Converting to plain dict and getting uniques test users
test_interactions = dict(test_interactions)
test_users = list(test_interactions.keys())

##### Example for User

In [None]:
test_df['user_id'].head(15)

In [None]:
ndcg_scores = []

In [None]:
user_id = 358698
n = 5

for user_id in test_users:
    if user_id not in test_interactions:
        continue
    try:
        recommendations = recommend_top_n(user_id, model, n=n)
    except ValueError:
        continue


    predicted_ids = [rec["recipe_id"] for rec in recommendations]
    actual_ids = test_interactions[user_id]

    ndcg = ndcg_at_k(predicted_ids, actual_ids, k=5)
    ndcg_scores.append(ndcg)

average_ndcg = np.mean(ndcg_scores)
print(f"Average NDCG@{n}: {average_ndcg:.4f}")

In [None]:
for i, rec in enumerate(recommendations, 1):
    print(f"\n📌 Recommendation {i}: {rec['name']}")
    print(f"📝 Ingredients: {rec['ingredients']}")
    print(f"📖 Instructions: {rec['instructions']}")

##### Saving the Trained Model

In [None]:
torch.save({
    'model_state_dict': model.state_dict(),
    'user_encoder': user_encoder,
    'item_encoder': item_encoder,
    'category_encoder': category_encoder,
    'recipes_df': recipes_df,
    'embedding_dim': embedding_dim,
    'hidden_layers': hidden_layers
}, 'hybrid_food_recommender.pth')