**Movie Recommendation System - Collaborative Denoising Auto-Encoders**

In [None]:
# ===== Block 1: Setup Environment =====

import numpy as np
import pandas as pd
import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
from scipy import sparse
import random

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Fix random seeds for reproducibility
def set_seed(seed=42):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

set_seed(42)

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print("Using device:", device)


Mounted at /content/drive
Using device: cuda


In [None]:
# ===== Block 2: Data Download and Preprocessing =====

import pandas as pd
import numpy as np
from scipy import sparse
import os

# Step 1: Download MovieLens 25M dataset
!wget -q --show-progress https://files.grouplens.org/datasets/movielens/ml-25m.zip
!unzip -q ml-25m.zip

# Step 2: Load ratings.csv
ratings = pd.read_csv('ml-25m/ratings.csv')

# Step 3: Filter users with at least 5 ratings
user_counts = ratings['userId'].value_counts()
filtered_users = user_counts[user_counts >= 5].index
ratings = ratings[ratings['userId'].isin(filtered_users)]

# Step 4: Map userId and movieId to continuous indices
user2idx = {user_id: idx for idx, user_id in enumerate(ratings['userId'].unique())}
item2idx = {item_id: idx for idx, item_id in enumerate(ratings['movieId'].unique())}

ratings['userId'] = ratings['userId'].map(user2idx)
ratings['movieId'] = ratings['movieId'].map(item2idx)

num_users = ratings['userId'].nunique()
num_items = ratings['movieId'].nunique()

print(f"Total users: {num_users}, Total items: {num_items}")

# Step 5: Create Interaction Matrix
interaction_matrix = sparse.lil_matrix((num_users, num_items))
for row in ratings.itertuples():
    interaction_matrix[row.userId, row.movieId] = 1

# Step 6: Save Interaction Matrix
os.makedirs('/content/drive/MyDrive/CDAE_Final_Model', exist_ok=True)
sparse.save_npz('/content/drive/MyDrive/CDAE_Final_Model/interaction_matrix.npz', interaction_matrix.tocsr())

print("Interaction matrix saved successfully at /content/drive/MyDrive/CDAE_Final_Model/interaction_matrix.npz")


replace ml-25m/tags.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
Total users: 162541, Total items: 59047
Interaction matrix saved successfully at /content/drive/MyDrive/CDAE_Final_Model/interaction_matrix.npz


In [None]:
# ===== Block 3: Model Definition (CDAE) =====

class CDAE(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=200, dropout_rate=0.5):
        super(CDAE, self).__init__()
        self.num_users = num_users
        self.num_items = num_items
        self.embedding_dim = embedding_dim

        # User embedding
        self.user_embedding = nn.Embedding(num_users, embedding_dim)

        # Encoder
        self.encoder = nn.Linear(num_items, embedding_dim)

        # Decoder
        self.decoder = nn.Linear(embedding_dim, num_items)

        # Dropout
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x, user_id):
        user_emb = self.user_embedding(user_id)
        x = self.dropout(x)
        x = self.encoder(x)
        x = x + user_emb
        x = torch.sigmoid(x)
        x = self.decoder(x)
        x = torch.sigmoid(x)
        return x


In [None]:
# ===== Block 4: Training Setup =====

from torch.utils.data import Dataset, DataLoader

# Dataset Class
class InteractionDataset(Dataset):
    def __init__(self, interaction_matrix):
        self.interactions = interaction_matrix

    def __len__(self):
        return self.interactions.shape[0]

    def __getitem__(self, idx):
        return torch.FloatTensor(self.interactions[idx].toarray().flatten()), torch.LongTensor([idx])

# Load saved interaction matrix
from scipy import sparse
interaction_matrix = sparse.load_npz('/content/drive/MyDrive/CDAE_Final_Model/interaction_matrix.npz')

# Create train-validation split
full_dataset = InteractionDataset(interaction_matrix)

train_size = int(0.9 * len(full_dataset))
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
validation_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)

# Initialize model, optimizer, scheduler
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = CDAE(num_users=num_users, num_items=num_items).to(device)
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-5)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)

# Loss Function
criterion = nn.BCELoss()


In [None]:
!pip install tqdm




In [None]:
# ===== Block 5: Training Loop with Evaluation and Progress Bar =====

import os
import numpy as np
from tqdm import tqdm  # <-- Progress bar import

# ===== Setup paths =====
checkpoint_dir = '/content/drive/MyDrive/CDAE_Checkpoints'
final_model_dir = '/content/drive/MyDrive/CDAE_Final_Model'
os.makedirs(checkpoint_dir, exist_ok=True)
os.makedirs(final_model_dir, exist_ok=True)

# ===== Define evaluation metrics =====
def recall_at_k(predictions, ground_truth, k=10):
    top_k = predictions.topk(k, dim=1)[1]
    correct = (ground_truth.gather(1, top_k) > 0).float()
    recall = correct.sum(1) / torch.clamp(ground_truth.sum(1), min=1.0)
    return recall.mean().item()

def ndcg_at_k(predictions, ground_truth, k=10):
    top_k = predictions.topk(k, dim=1)[1]
    gains = (ground_truth.gather(1, top_k)).float()
    discounts = torch.log2(torch.arange(2, k + 2, device=ground_truth.device).float())
    dcg = (gains / discounts).sum(1)
    ideal_gains = torch.sort(ground_truth, descending=True)[0][:, :k]
    ideal_dcg = (ideal_gains / discounts).sum(1)
    ndcg = (dcg / torch.clamp(ideal_dcg, min=1e-10)).mean().item()
    return ndcg

# ===== Training Settings =====
EPOCHS = 130
start_epoch = 0

# ===== Training Loop =====
for epoch in range(start_epoch, EPOCHS):
    model.train()
    epoch_loss = 0.0

    # Progress bar for training batches
    loop = tqdm(train_loader, leave=True, desc=f"Epoch {epoch+1}/{EPOCHS}")

    for user_vectors, user_ids in loop:
        user_vectors = user_vectors.to(device)
        user_ids = user_ids.squeeze().to(device)

        optimizer.zero_grad()
        outputs = model(user_vectors, user_ids)

        loss = criterion(outputs, user_vectors)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

        loop.set_postfix(loss=loss.item())

    scheduler.step()

    # ===== Evaluation after each epoch =====
    model.eval()
    recall_scores = []
    ndcg_scores = []

    with torch.no_grad():
        for user_vectors, user_ids in validation_loader:
            user_vectors = user_vectors.to(device)
            user_ids = user_ids.squeeze().to(device)

            outputs = model(user_vectors, user_ids)

            recall_scores.append(recall_at_k(outputs, user_vectors, k=10))
            ndcg_scores.append(ndcg_at_k(outputs, user_vectors, k=10))

    mean_recall = np.mean(recall_scores)
    mean_ndcg = np.mean(ndcg_scores)

    print(f"\nEpoch {epoch+1}/{EPOCHS} Completed | Avg Loss: {epoch_loss/len(train_loader):.4f} | Recall@10: {mean_recall:.4f} | NDCG@10: {mean_ndcg:.4f}\n")

    # ===== Save checkpoint every 15 epochs =====
    if (epoch + 1) % 15 == 0:
        checkpoint_path = os.path.join(checkpoint_dir, f'cdae_checkpoint_epoch_{epoch+1}.pth')
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'scheduler_state_dict': scheduler.state_dict()
        }, checkpoint_path)
        print(f"Checkpoint saved at {checkpoint_path}")

# ===== Save Final Model after Training =====
final_model_path = os.path.join(final_model_dir, 'cdae_final_model.pth')
torch.save({
    'epoch': EPOCHS,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'scheduler_state_dict': scheduler.state_dict()
}, final_model_path)
print(f"Final model saved successfully at {final_model_path}")


Epoch 1/130: 100%|██████████| 286/286 [01:32<00:00,  3.08it/s, loss=0.011]



Epoch 1/130 Completed | Avg Loss: 0.0164 | Recall@10: 0.0507 | NDCG@10: 0.4484



Epoch 2/130: 100%|██████████| 286/286 [01:23<00:00,  3.43it/s, loss=0.0127]



Epoch 2/130 Completed | Avg Loss: 0.0120 | Recall@10: 0.0504 | NDCG@10: 0.4458



Epoch 3/130: 100%|██████████| 286/286 [01:22<00:00,  3.45it/s, loss=0.0125]



Epoch 3/130 Completed | Avg Loss: 0.0116 | Recall@10: 0.0520 | NDCG@10: 0.4487



Epoch 4/130: 100%|██████████| 286/286 [01:22<00:00,  3.47it/s, loss=0.0112]



Epoch 4/130 Completed | Avg Loss: 0.0114 | Recall@10: 0.0510 | NDCG@10: 0.4447



Epoch 5/130: 100%|██████████| 286/286 [01:22<00:00,  3.45it/s, loss=0.0132]



Epoch 5/130 Completed | Avg Loss: 0.0112 | Recall@10: 0.0521 | NDCG@10: 0.4496



Epoch 6/130: 100%|██████████| 286/286 [01:23<00:00,  3.43it/s, loss=0.0137]



Epoch 6/130 Completed | Avg Loss: 0.0111 | Recall@10: 0.0522 | NDCG@10: 0.4508



Epoch 7/130: 100%|██████████| 286/286 [01:23<00:00,  3.43it/s, loss=0.00926]



Epoch 7/130 Completed | Avg Loss: 0.0108 | Recall@10: 0.0534 | NDCG@10: 0.4596



Epoch 8/130: 100%|██████████| 286/286 [01:23<00:00,  3.44it/s, loss=0.00965]



Epoch 8/130 Completed | Avg Loss: 0.0102 | Recall@10: 0.0603 | NDCG@10: 0.4937



Epoch 9/130: 100%|██████████| 286/286 [01:23<00:00,  3.44it/s, loss=0.00948]



Epoch 9/130 Completed | Avg Loss: 0.0096 | Recall@10: 0.0669 | NDCG@10: 0.5225



Epoch 10/130: 100%|██████████| 286/286 [01:23<00:00,  3.43it/s, loss=0.0102]



Epoch 10/130 Completed | Avg Loss: 0.0091 | Recall@10: 0.0705 | NDCG@10: 0.5476



Epoch 11/130: 100%|██████████| 286/286 [01:23<00:00,  3.42it/s, loss=0.00782]



Epoch 11/130 Completed | Avg Loss: 0.0086 | Recall@10: 0.0770 | NDCG@10: 0.5861



Epoch 12/130: 100%|██████████| 286/286 [01:23<00:00,  3.42it/s, loss=0.00777]



Epoch 12/130 Completed | Avg Loss: 0.0083 | Recall@10: 0.0822 | NDCG@10: 0.6156



Epoch 13/130: 100%|██████████| 286/286 [01:23<00:00,  3.43it/s, loss=0.00717]



Epoch 13/130 Completed | Avg Loss: 0.0079 | Recall@10: 0.0861 | NDCG@10: 0.6421



Epoch 14/130: 100%|██████████| 286/286 [01:23<00:00,  3.42it/s, loss=0.00766]



Epoch 14/130 Completed | Avg Loss: 0.0077 | Recall@10: 0.0895 | NDCG@10: 0.6589



Epoch 15/130: 100%|██████████| 286/286 [01:24<00:00,  3.39it/s, loss=0.00707]



Epoch 15/130 Completed | Avg Loss: 0.0075 | Recall@10: 0.0910 | NDCG@10: 0.6674

Checkpoint saved at /content/drive/MyDrive/CDAE_Checkpoints/cdae_checkpoint_epoch_15.pth


Epoch 16/130: 100%|██████████| 286/286 [01:26<00:00,  3.31it/s, loss=0.00645]



Epoch 16/130 Completed | Avg Loss: 0.0073 | Recall@10: 0.0927 | NDCG@10: 0.6765



Epoch 17/130: 100%|██████████| 286/286 [01:23<00:00,  3.41it/s, loss=0.00636]



Epoch 17/130 Completed | Avg Loss: 0.0072 | Recall@10: 0.0942 | NDCG@10: 0.6855



Epoch 18/130: 100%|██████████| 286/286 [01:23<00:00,  3.42it/s, loss=0.00645]



Epoch 18/130 Completed | Avg Loss: 0.0071 | Recall@10: 0.0955 | NDCG@10: 0.6922



Epoch 19/130: 100%|██████████| 286/286 [01:23<00:00,  3.42it/s, loss=0.0076]



Epoch 19/130 Completed | Avg Loss: 0.0070 | Recall@10: 0.0970 | NDCG@10: 0.6995



Epoch 20/130: 100%|██████████| 286/286 [01:23<00:00,  3.42it/s, loss=0.00656]



Epoch 20/130 Completed | Avg Loss: 0.0069 | Recall@10: 0.0981 | NDCG@10: 0.7041



Epoch 21/130: 100%|██████████| 286/286 [01:35<00:00,  3.00it/s, loss=0.00686]



Epoch 21/130 Completed | Avg Loss: 0.0068 | Recall@10: 0.0988 | NDCG@10: 0.7073



Epoch 22/130: 100%|██████████| 286/286 [01:32<00:00,  3.09it/s, loss=0.00691]



Epoch 22/130 Completed | Avg Loss: 0.0067 | Recall@10: 0.1001 | NDCG@10: 0.7131



Epoch 23/130: 100%|██████████| 286/286 [01:27<00:00,  3.25it/s, loss=0.00584]



Epoch 23/130 Completed | Avg Loss: 0.0067 | Recall@10: 0.1011 | NDCG@10: 0.7176



Epoch 24/130: 100%|██████████| 286/286 [01:25<00:00,  3.35it/s, loss=0.00657]



Epoch 24/130 Completed | Avg Loss: 0.0066 | Recall@10: 0.1017 | NDCG@10: 0.7206



Epoch 25/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00599]



Epoch 25/130 Completed | Avg Loss: 0.0066 | Recall@10: 0.1026 | NDCG@10: 0.7245



Epoch 26/130: 100%|██████████| 286/286 [01:24<00:00,  3.39it/s, loss=0.00657]



Epoch 26/130 Completed | Avg Loss: 0.0066 | Recall@10: 0.1037 | NDCG@10: 0.7282



Epoch 27/130: 100%|██████████| 286/286 [01:24<00:00,  3.39it/s, loss=0.0066]



Epoch 27/130 Completed | Avg Loss: 0.0065 | Recall@10: 0.1044 | NDCG@10: 0.7313



Epoch 28/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.00672]



Epoch 28/130 Completed | Avg Loss: 0.0065 | Recall@10: 0.1048 | NDCG@10: 0.7325



Epoch 29/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00716]



Epoch 29/130 Completed | Avg Loss: 0.0065 | Recall@10: 0.1057 | NDCG@10: 0.7365



Epoch 30/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00732]



Epoch 30/130 Completed | Avg Loss: 0.0064 | Recall@10: 0.1059 | NDCG@10: 0.7375

Checkpoint saved at /content/drive/MyDrive/CDAE_Checkpoints/cdae_checkpoint_epoch_30.pth


Epoch 31/130: 100%|██████████| 286/286 [01:25<00:00,  3.33it/s, loss=0.00577]



Epoch 31/130 Completed | Avg Loss: 0.0064 | Recall@10: 0.1064 | NDCG@10: 0.7396



Epoch 32/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.00661]



Epoch 32/130 Completed | Avg Loss: 0.0064 | Recall@10: 0.1066 | NDCG@10: 0.7401



Epoch 33/130: 100%|██████████| 286/286 [01:23<00:00,  3.42it/s, loss=0.00613]



Epoch 33/130 Completed | Avg Loss: 0.0064 | Recall@10: 0.1072 | NDCG@10: 0.7431



Epoch 34/130: 100%|██████████| 286/286 [01:23<00:00,  3.44it/s, loss=0.00644]



Epoch 34/130 Completed | Avg Loss: 0.0064 | Recall@10: 0.1071 | NDCG@10: 0.7426



Epoch 35/130: 100%|██████████| 286/286 [01:23<00:00,  3.42it/s, loss=0.0053]



Epoch 35/130 Completed | Avg Loss: 0.0063 | Recall@10: 0.1076 | NDCG@10: 0.7443



Epoch 36/130: 100%|██████████| 286/286 [01:23<00:00,  3.44it/s, loss=0.00668]



Epoch 36/130 Completed | Avg Loss: 0.0063 | Recall@10: 0.1074 | NDCG@10: 0.7437



Epoch 37/130: 100%|██████████| 286/286 [01:23<00:00,  3.43it/s, loss=0.00678]



Epoch 37/130 Completed | Avg Loss: 0.0063 | Recall@10: 0.1080 | NDCG@10: 0.7457



Epoch 38/130: 100%|██████████| 286/286 [01:23<00:00,  3.43it/s, loss=0.00605]



Epoch 38/130 Completed | Avg Loss: 0.0063 | Recall@10: 0.1081 | NDCG@10: 0.7455



Epoch 39/130: 100%|██████████| 286/286 [01:23<00:00,  3.42it/s, loss=0.00681]



Epoch 39/130 Completed | Avg Loss: 0.0063 | Recall@10: 0.1083 | NDCG@10: 0.7466



Epoch 40/130: 100%|██████████| 286/286 [01:23<00:00,  3.43it/s, loss=0.00564]



Epoch 40/130 Completed | Avg Loss: 0.0063 | Recall@10: 0.1089 | NDCG@10: 0.7486



Epoch 41/130: 100%|██████████| 286/286 [01:23<00:00,  3.43it/s, loss=0.00628]



Epoch 41/130 Completed | Avg Loss: 0.0063 | Recall@10: 0.1085 | NDCG@10: 0.7476



Epoch 42/130: 100%|██████████| 286/286 [01:23<00:00,  3.44it/s, loss=0.00664]



Epoch 42/130 Completed | Avg Loss: 0.0063 | Recall@10: 0.1090 | NDCG@10: 0.7496



Epoch 43/130: 100%|██████████| 286/286 [01:22<00:00,  3.45it/s, loss=0.0057]



Epoch 43/130 Completed | Avg Loss: 0.0063 | Recall@10: 0.1093 | NDCG@10: 0.7506



Epoch 44/130: 100%|██████████| 286/286 [01:23<00:00,  3.43it/s, loss=0.00746]



Epoch 44/130 Completed | Avg Loss: 0.0063 | Recall@10: 0.1094 | NDCG@10: 0.7510



Epoch 45/130: 100%|██████████| 286/286 [01:22<00:00,  3.45it/s, loss=0.00602]



Epoch 45/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1098 | NDCG@10: 0.7523

Checkpoint saved at /content/drive/MyDrive/CDAE_Checkpoints/cdae_checkpoint_epoch_45.pth


Epoch 46/130: 100%|██████████| 286/286 [01:26<00:00,  3.32it/s, loss=0.00696]



Epoch 46/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1100 | NDCG@10: 0.7535



Epoch 47/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.00606]



Epoch 47/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1101 | NDCG@10: 0.7541



Epoch 48/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00599]



Epoch 48/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1103 | NDCG@10: 0.7546



Epoch 49/130: 100%|██████████| 286/286 [01:25<00:00,  3.35it/s, loss=0.00763]



Epoch 49/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1104 | NDCG@10: 0.7550



Epoch 50/130: 100%|██████████| 286/286 [01:25<00:00,  3.33it/s, loss=0.00536]



Epoch 50/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1107 | NDCG@10: 0.7561



Epoch 51/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.00541]



Epoch 51/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1109 | NDCG@10: 0.7576



Epoch 52/130: 100%|██████████| 286/286 [01:25<00:00,  3.33it/s, loss=0.00649]



Epoch 52/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1109 | NDCG@10: 0.7575



Epoch 53/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00616]



Epoch 53/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1113 | NDCG@10: 0.7583



Epoch 54/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00585]



Epoch 54/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1118 | NDCG@10: 0.7598



Epoch 55/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.00697]



Epoch 55/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1119 | NDCG@10: 0.7610



Epoch 56/130: 100%|██████████| 286/286 [01:25<00:00,  3.35it/s, loss=0.0066]



Epoch 56/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1120 | NDCG@10: 0.7608



Epoch 57/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.006]



Epoch 57/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1120 | NDCG@10: 0.7616



Epoch 58/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00566]



Epoch 58/130 Completed | Avg Loss: 0.0062 | Recall@10: 0.1124 | NDCG@10: 0.7625



Epoch 59/130: 100%|██████████| 286/286 [01:26<00:00,  3.31it/s, loss=0.00558]



Epoch 59/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1127 | NDCG@10: 0.7639



Epoch 60/130: 100%|██████████| 286/286 [01:25<00:00,  3.34it/s, loss=0.00659]



Epoch 60/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1131 | NDCG@10: 0.7658

Checkpoint saved at /content/drive/MyDrive/CDAE_Checkpoints/cdae_checkpoint_epoch_60.pth


Epoch 61/130: 100%|██████████| 286/286 [01:26<00:00,  3.32it/s, loss=0.00635]



Epoch 61/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1130 | NDCG@10: 0.7657



Epoch 62/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00593]



Epoch 62/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1131 | NDCG@10: 0.7663



Epoch 63/130: 100%|██████████| 286/286 [01:24<00:00,  3.39it/s, loss=0.00594]



Epoch 63/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1134 | NDCG@10: 0.7671



Epoch 64/130: 100%|██████████| 286/286 [01:23<00:00,  3.41it/s, loss=0.00599]



Epoch 64/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1133 | NDCG@10: 0.7666



Epoch 65/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00556]



Epoch 65/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1134 | NDCG@10: 0.7672



Epoch 66/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00595]



Epoch 66/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1134 | NDCG@10: 0.7674



Epoch 67/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.00568]



Epoch 67/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1138 | NDCG@10: 0.7687



Epoch 68/130: 100%|██████████| 286/286 [01:23<00:00,  3.42it/s, loss=0.00611]



Epoch 68/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1139 | NDCG@10: 0.7688



Epoch 69/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.00643]



Epoch 69/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1140 | NDCG@10: 0.7697



Epoch 70/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00664]



Epoch 70/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1141 | NDCG@10: 0.7702



Epoch 71/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.00634]



Epoch 71/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1140 | NDCG@10: 0.7701



Epoch 72/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00658]



Epoch 72/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1142 | NDCG@10: 0.7707



Epoch 73/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00568]



Epoch 73/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1143 | NDCG@10: 0.7711



Epoch 74/130: 100%|██████████| 286/286 [01:24<00:00,  3.36it/s, loss=0.00631]



Epoch 74/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1144 | NDCG@10: 0.7713



Epoch 75/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00517]



Epoch 75/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1145 | NDCG@10: 0.7717

Checkpoint saved at /content/drive/MyDrive/CDAE_Checkpoints/cdae_checkpoint_epoch_75.pth


Epoch 76/130: 100%|██████████| 286/286 [01:26<00:00,  3.32it/s, loss=0.0058]



Epoch 76/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1146 | NDCG@10: 0.7722



Epoch 77/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.00642]



Epoch 77/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1146 | NDCG@10: 0.7725



Epoch 78/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00568]



Epoch 78/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1147 | NDCG@10: 0.7728



Epoch 79/130: 100%|██████████| 286/286 [01:24<00:00,  3.39it/s, loss=0.00591]



Epoch 79/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1149 | NDCG@10: 0.7737



Epoch 80/130: 100%|██████████| 286/286 [01:23<00:00,  3.41it/s, loss=0.00642]



Epoch 80/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1151 | NDCG@10: 0.7744



Epoch 81/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.0054]



Epoch 81/130 Completed | Avg Loss: 0.0061 | Recall@10: 0.1150 | NDCG@10: 0.7744



Epoch 82/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.00588]



Epoch 82/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1151 | NDCG@10: 0.7746



Epoch 83/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.00639]



Epoch 83/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1153 | NDCG@10: 0.7753



Epoch 84/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.00637]



Epoch 84/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1152 | NDCG@10: 0.7749



Epoch 85/130: 100%|██████████| 286/286 [01:23<00:00,  3.41it/s, loss=0.00593]



Epoch 85/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1155 | NDCG@10: 0.7760



Epoch 86/130: 100%|██████████| 286/286 [01:23<00:00,  3.42it/s, loss=0.00689]



Epoch 86/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1154 | NDCG@10: 0.7760



Epoch 87/130: 100%|██████████| 286/286 [01:23<00:00,  3.41it/s, loss=0.00624]



Epoch 87/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1156 | NDCG@10: 0.7767



Epoch 88/130: 100%|██████████| 286/286 [01:23<00:00,  3.41it/s, loss=0.00574]



Epoch 88/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1158 | NDCG@10: 0.7771



Epoch 89/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.00556]



Epoch 89/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1158 | NDCG@10: 0.7776



Epoch 90/130: 100%|██████████| 286/286 [01:23<00:00,  3.41it/s, loss=0.00631]



Epoch 90/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1157 | NDCG@10: 0.7768

Checkpoint saved at /content/drive/MyDrive/CDAE_Checkpoints/cdae_checkpoint_epoch_90.pth


Epoch 91/130: 100%|██████████| 286/286 [01:25<00:00,  3.33it/s, loss=0.00577]



Epoch 91/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1159 | NDCG@10: 0.7778



Epoch 92/130: 100%|██████████| 286/286 [01:27<00:00,  3.27it/s, loss=0.0062]



Epoch 92/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1161 | NDCG@10: 0.7785



Epoch 93/130: 100%|██████████| 286/286 [01:26<00:00,  3.30it/s, loss=0.00597]



Epoch 93/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1161 | NDCG@10: 0.7785



Epoch 94/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.00718]



Epoch 94/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1161 | NDCG@10: 0.7785



Epoch 95/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00591]



Epoch 95/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1161 | NDCG@10: 0.7788



Epoch 96/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00594]



Epoch 96/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1160 | NDCG@10: 0.7786



Epoch 97/130: 100%|██████████| 286/286 [01:24<00:00,  3.39it/s, loss=0.0077]



Epoch 97/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1162 | NDCG@10: 0.7791



Epoch 98/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00604]



Epoch 98/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1163 | NDCG@10: 0.7794



Epoch 99/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00587]



Epoch 99/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1163 | NDCG@10: 0.7793



Epoch 100/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00679]



Epoch 100/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1162 | NDCG@10: 0.7794



Epoch 101/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00537]



Epoch 101/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1164 | NDCG@10: 0.7797



Epoch 102/130: 100%|██████████| 286/286 [01:25<00:00,  3.34it/s, loss=0.00614]



Epoch 102/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1163 | NDCG@10: 0.7797



Epoch 103/130: 100%|██████████| 286/286 [01:25<00:00,  3.35it/s, loss=0.00593]



Epoch 103/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1163 | NDCG@10: 0.7797



Epoch 104/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.00617]



Epoch 104/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1166 | NDCG@10: 0.7806



Epoch 105/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.00583]



Epoch 105/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1165 | NDCG@10: 0.7806

Checkpoint saved at /content/drive/MyDrive/CDAE_Checkpoints/cdae_checkpoint_epoch_105.pth


Epoch 106/130: 100%|██████████| 286/286 [01:25<00:00,  3.33it/s, loss=0.00635]



Epoch 106/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1166 | NDCG@10: 0.7807



Epoch 107/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.00569]



Epoch 107/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1168 | NDCG@10: 0.7814



Epoch 108/130: 100%|██████████| 286/286 [01:25<00:00,  3.33it/s, loss=0.00571]



Epoch 108/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1168 | NDCG@10: 0.7814



Epoch 109/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.00625]



Epoch 109/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1167 | NDCG@10: 0.7812



Epoch 110/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.00609]



Epoch 110/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1168 | NDCG@10: 0.7813



Epoch 111/130: 100%|██████████| 286/286 [01:25<00:00,  3.34it/s, loss=0.00641]



Epoch 111/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1170 | NDCG@10: 0.7820



Epoch 112/130: 100%|██████████| 286/286 [01:25<00:00,  3.34it/s, loss=0.00566]



Epoch 112/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1169 | NDCG@10: 0.7821



Epoch 113/130: 100%|██████████| 286/286 [01:25<00:00,  3.36it/s, loss=0.00628]



Epoch 113/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1170 | NDCG@10: 0.7823



Epoch 114/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00713]



Epoch 114/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1169 | NDCG@10: 0.7819



Epoch 115/130: 100%|██████████| 286/286 [01:24<00:00,  3.39it/s, loss=0.00585]



Epoch 115/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1170 | NDCG@10: 0.7822



Epoch 116/130: 100%|██████████| 286/286 [01:25<00:00,  3.34it/s, loss=0.00583]



Epoch 116/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1170 | NDCG@10: 0.7824



Epoch 117/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00582]



Epoch 117/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1171 | NDCG@10: 0.7825



Epoch 118/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.00563]



Epoch 118/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1171 | NDCG@10: 0.7829



Epoch 119/130: 100%|██████████| 286/286 [01:24<00:00,  3.39it/s, loss=0.00588]



Epoch 119/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1172 | NDCG@10: 0.7833



Epoch 120/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00652]



Epoch 120/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1172 | NDCG@10: 0.7829

Checkpoint saved at /content/drive/MyDrive/CDAE_Checkpoints/cdae_checkpoint_epoch_120.pth


Epoch 121/130: 100%|██████████| 286/286 [01:26<00:00,  3.29it/s, loss=0.00527]



Epoch 121/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1173 | NDCG@10: 0.7835



Epoch 122/130: 100%|██████████| 286/286 [01:24<00:00,  3.39it/s, loss=0.00614]



Epoch 122/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1173 | NDCG@10: 0.7835



Epoch 123/130: 100%|██████████| 286/286 [01:24<00:00,  3.37it/s, loss=0.00569]



Epoch 123/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1174 | NDCG@10: 0.7838



Epoch 124/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.00633]



Epoch 124/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1173 | NDCG@10: 0.7836



Epoch 125/130: 100%|██████████| 286/286 [01:24<00:00,  3.38it/s, loss=0.0061]



Epoch 125/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1174 | NDCG@10: 0.7839



Epoch 126/130: 100%|██████████| 286/286 [01:23<00:00,  3.41it/s, loss=0.00595]



Epoch 126/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1174 | NDCG@10: 0.7840



Epoch 127/130: 100%|██████████| 286/286 [01:24<00:00,  3.39it/s, loss=0.00516]



Epoch 127/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1174 | NDCG@10: 0.7838



Epoch 128/130: 100%|██████████| 286/286 [01:24<00:00,  3.39it/s, loss=0.00644]



Epoch 128/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1176 | NDCG@10: 0.7844



Epoch 129/130: 100%|██████████| 286/286 [01:24<00:00,  3.40it/s, loss=0.00573]



Epoch 129/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1175 | NDCG@10: 0.7843



Epoch 130/130: 100%|██████████| 286/286 [01:23<00:00,  3.40it/s, loss=0.00612]



Epoch 130/130 Completed | Avg Loss: 0.0060 | Recall@10: 0.1175 | NDCG@10: 0.7842

Final model saved successfully at /content/drive/MyDrive/CDAE_Final_Model/cdae_final_model.pth


In [None]:
# ===== Block 6: Final Full Evaluation (Recall/NDCG @ 10, 100, 500) =====

import os
import numpy as np

# ===== Paths =====
evaluation_result_path = '/content/drive/MyDrive/CDAE_Final_Model/final_evaluation_results.txt'

# ===== Load Final Model (Optional, if you need) =====
# (Assume model already loaded, trained and moved to eval mode)

# ===== Define evaluation metrics (already defined if above) =====
# recall_at_k and ndcg_at_k functions already exist

# ===== Evaluate =====
model.eval()
recall_scores_10 = []
ndcg_scores_10 = []
recall_scores_100 = []
ndcg_scores_100 = []
recall_scores_500 = []
ndcg_scores_500 = []

with torch.no_grad():
    for user_vectors, user_ids in validation_loader:
        user_vectors = user_vectors.to(device)
        user_ids = user_ids.squeeze().to(device)

        outputs = model(user_vectors, user_ids)

        recall_scores_10.append(recall_at_k(outputs, user_vectors, k=10))
        ndcg_scores_10.append(ndcg_at_k(outputs, user_vectors, k=10))
        recall_scores_100.append(recall_at_k(outputs, user_vectors, k=100))
        ndcg_scores_100.append(ndcg_at_k(outputs, user_vectors, k=100))
        recall_scores_500.append(recall_at_k(outputs, user_vectors, k=500))
        ndcg_scores_500.append(ndcg_at_k(outputs, user_vectors, k=500))

# ===== Mean Scores =====
mean_recall_10 = np.mean(recall_scores_10)
mean_ndcg_10 = np.mean(ndcg_scores_10)
mean_recall_100 = np.mean(recall_scores_100)
mean_ndcg_100 = np.mean(ndcg_scores_100)
mean_recall_500 = np.mean(recall_scores_500)
mean_ndcg_500 = np.mean(ndcg_scores_500)

# ===== Save Results to Text File =====
with open(evaluation_result_path, 'w') as f:
    f.write("===== Final Evaluation Results =====\n")
    f.write(f"Recall@10: {mean_recall_10:.4f}\n")
    f.write(f"NDCG@10: {mean_ndcg_10:.4f}\n\n")
    f.write(f"Recall@100: {mean_recall_100:.4f}\n")
    f.write(f"NDCG@100: {mean_ndcg_100:.4f}\n\n")
    f.write(f"Recall@500: {mean_recall_500:.4f}\n")
    f.write(f"NDCG@500: {mean_ndcg_500:.4f}\n")

print(f"Evaluation completed! Results saved at: {evaluation_result_path}")


Evaluation completed! Results saved at: /content/drive/MyDrive/CDAE_Final_Model/final_evaluation_results.txt


Recommendation

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import scipy.sparse as sparse
import os


In [None]:
# ===== Setup Paths =====
final_model_dir = '/content/drive/MyDrive/CDAE_Final_Model'
movies_csv_path = os.path.join(final_model_dir, 'movies_backup.csv')  # Updated

# ===== Load interaction matrix =====
interaction_matrix = sparse.load_npz(os.path.join(final_model_dir, 'interaction_matrix.npz'))

# ===== Load movie titles =====
movies_df = pd.read_csv(movies_csv_path)

# ===== Define Model Architecture (MATCHES TRAINING) =====
class CDAE(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=200, dropout_rate=0.5):  # Match
        super(CDAE, self).__init__()
        self.num_users = num_users
        self.num_items = num_items
        self.embedding_dim = embedding_dim

        self.user_embedding = nn.Embedding(num_users, embedding_dim)
        self.encoder = nn.Linear(num_items, embedding_dim)
        self.decoder = nn.Linear(embedding_dim, num_items)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x, user_id):
        user_emb = self.user_embedding(user_id)
        x = self.dropout(x)
        x = self.encoder(x)
        x = x + user_emb
        x = torch.sigmoid(x)
        x = self.decoder(x)
        x = torch.sigmoid(x)
        return x

# ===== Setup Device =====
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# ===== Initialize Correct Model =====
num_users, num_items = interaction_matrix.shape
model = CDAE(num_users=num_users, num_items=num_items, embedding_dim=200, dropout_rate=0.5).to(device)

# ===== Load Saved Weights =====
model_checkpoint = torch.load(os.path.join(final_model_dir, 'cdae_final_model.pth'), map_location=device)
model.load_state_dict(model_checkpoint['model_state_dict'])
model.eval()

print("Model loaded successfully with correct architecture!")


Model loaded successfully with correct architecture!


In [None]:
def recommend_movies(user_id, top_n=10):
    """
    Recommend top N movies for a given user id.
    """
    model.eval()
    with torch.no_grad():
        user_vector = interaction_matrix[user_id].toarray().flatten()
        input_vector = torch.FloatTensor(user_vector).unsqueeze(0).to(device)
        user_tensor = torch.LongTensor([user_id]).to(device)

        predictions = model(input_vector, user_tensor)
        scores = predictions.cpu().numpy().flatten()

        # Filter out already watched movies
        watched_items = np.where(user_vector > 0)[0]
        scores[watched_items] = -np.inf  # Set watched movies score to -inf

        top_indices = scores.argsort()[-top_n:][::-1]

        recommended_titles = []
        for idx in top_indices:
            movie_id = idx
            # Map back to movie title
            movie_title = movies_df[movies_df['movie_encoded'] == movie_id]['title'].values
            if len(movie_title) > 0:
                recommended_titles.append(movie_title[0])

        return recommended_titles


In [None]:
# Recommend movies for a random user
user_id = 123  # Example user within [0, num_users-1]
recommended = recommend_movies(user_id, top_n=10)

print(f"\nTop 10 recommended movies for User ID {user_id}:\n")
for idx, title in enumerate(recommended, 1):
    print(f"{idx}. {title}")



Top 10 recommended movies for User ID 123:

1. Murder, My Sweet (1944)
2. Brother of Sleep (Schlafes Bruder) (1995)
3. White Balloon, The (Badkonake sefid) (1995)
4. Grace of My Heart (1996)
5. Bed of Roses (1996)
6. Die Hard (1988)
7. Outlaw, The (1943)
8. Beautiful Thing (1996)
9. Dumbo (1941)
10. Star Wars: Episode IV - A New Hope (1977)
