<a href="https://www.kaggle.com/code/bangarurajesh/attnetion-based-rec-sys?scriptVersionId=208973393" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

## data.py

In [1]:
data_dir = '/kaggle/input/movielens100k-original'


In [2]:
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import LabelEncoder

def load_data(data_dir):
    # Load ratings
    ratings = pd.read_csv(f'{data_dir}/ratings.dat', sep='::', header=None, engine='python',
                          names=['user_id', 'movie_id', 'rating', 'timestamp'], encoding='ISO-8859-1')
    
    # Load user metadata (user demographics)
    users = pd.read_csv(f'{data_dir}/users.dat', sep='::', header=None, engine='python',
                        names=['user_id', 'gender', 'age', 'occupation', 'zip'], encoding='ISO-8859-1')
    
    # Load item metadata (movie genres)
    movies = pd.read_csv(f'{data_dir}/movies.dat', sep='::', header=None, engine='python',
                         names=['movie_id', 'title', 'genres'], encoding='ISO-8859-1')

    # Encode user metadata features
    gender_encoder = LabelEncoder()
    users['gender'] = gender_encoder.fit_transform(users['gender'])

    age_encoder = LabelEncoder()
    users['age'] = age_encoder.fit_transform(users['age'])



    occupation_encoder = LabelEncoder()
    users['occupation'] = occupation_encoder.fit_transform(users['occupation'])

    # Encode movie genres
    genre_encoder = LabelEncoder()
    movies['genres'] = movies['genres'].apply(lambda x: x.split('|')[0])  # Use the first genre as primary
    movies['genres'] = genre_encoder.fit_transform(movies['genres'])

    # Merge data
    data = pd.merge(ratings, users, on='user_id')
    data = pd.merge(data, movies, on='movie_id')

    return data, len(users), len(movies), len(gender_encoder.classes_), len(age_encoder.classes_), \
           len(occupation_encoder.classes_), len(genre_encoder.classes_)

class MovieLensDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        user_id = torch.tensor(row['user_id'], dtype=torch.long)
        item_id = torch.tensor(row['movie_id'], dtype=torch.long)
        rating = torch.tensor(row['rating'], dtype=torch.float)

        # User metadata
        gender = torch.tensor(row['gender'], dtype=torch.long)
        age = torch.tensor(row['age'], dtype=torch.long)
        occupation = torch.tensor(row['occupation'], dtype=torch.long)

        # Item metadata
        genre = torch.tensor(row['genres'], dtype=torch.long)

        return user_id, item_id, gender, age, occupation, genre, rating

def get_dataloaders(data, batch_size=128, shuffle=True):
    dataset = MovieLensDataset(data)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

In [3]:
ratings = pd.read_csv(f'{data_dir}/u.data',sep = "\t",header=None)
ratings.columns = ['user_id','movie_id','ratings','time_stamp']
movies = pd.read_csv(f'{data_dir}/u.item',sep = "|",header = None,encoding='ISO-8859-1')
movies.columns = ['movie_id', 'movie title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 
                'Adventure', 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 
                'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']
users = pd.read_csv(f'{data_dir}/u.user',sep = "|",header = None,encoding='ISO-8859-1')
users.columns = ['user_id','age','gender','occupation','zipcode']


In [4]:
ratings.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100000 entries, 0 to 99999
Data columns (total 4 columns):
 #   Column      Non-Null Count   Dtype
---  ------      --------------   -----
 0   user_id     100000 non-null  int64
 1   movie_id    100000 non-null  int64
 2   ratings     100000 non-null  int64
 3   time_stamp  100000 non-null  int64
dtypes: int64(4)
memory usage: 3.1 MB


In [5]:
ratings.drop(columns = ['time_stamp'],inplace = True)
users.drop(columns = ['zipcode'],inplace = True)
movies.drop(columns = ['video release date', 'IMDb URL'],inplace = True)

In [6]:
ratings.head()

Unnamed: 0,user_id,movie_id,ratings
0,196,242,3
1,186,302,3
2,22,377,1
3,244,51,2
4,166,346,1


In [7]:
movies.head()

Unnamed: 0,movie_id,movie title,release date,unknown,Action,Adventure,Animation,Children's,Comedy,Crime,...,Fantasy,Film-Noir,Horror,Musical,Mystery,Romance,Sci-Fi,Thriller,War,Western
0,1,Toy Story (1995),01-Jan-1995,0,0,0,1,1,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,GoldenEye (1995),01-Jan-1995,0,1,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
2,3,Four Rooms (1995),01-Jan-1995,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,4,Get Shorty (1995),01-Jan-1995,0,1,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
4,5,Copycat (1995),01-Jan-1995,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0


In [8]:
users.head()

Unnamed: 0,user_id,age,gender,occupation
0,1,24,M,technician
1,2,53,F,other
2,3,23,M,writer
3,4,24,M,technician
4,5,33,F,other


In [24]:
## removing the movies that are not rated at all and also the users that were not participated in rating movies ###
encoder = LabelEncoder()
ratings['movie_id']=encoder.fit_transform(ratings['movie_id'])
rated_movies = set(encoder.classes_)
n_original = len(movies)
movies = movies[movies['movie_id'].isin(rated_movies)].copy()
n_after = len(movies)
print(" {} movies are removed due to not being rated ".format(n_original-n_after))

movies['movie_id'] = encoder.transform(movies['movie_id'].values)

 0 movies are removed due to not being rated 


In [10]:
# same thing I will be doing for users that is users lazy to rate even a single movie 
u_encoder = LabelEncoder()
ratings['user_id'] = u_encoder.fit_transform(ratings['user_id'])
n_original = len(users['user_id'])
responsible_users = set(u_encoder.classes_)
users = users[users['user_id'].isin(responsible_users)].copy()
n_after = len(users['user_id'])
print("{} users have been removed under displinary actions".format(n_original-n_after))
users['user_id'] = users['user_id']
users['user_id'] = u_encoder.transform(users['user_id'].values)





1 users have been removed under displinary actions


In [11]:
## Just to check all the three things once
print(ratings.head())
print(movies.head())
print(users.head())
ratings =ratings.sort_values(by = 'user_id').reset_index(drop = True)

   user_id  movie_id  ratings
0      195       242        3
1      185       302        3
2       21       377        1
3      243        51        2
4      165       346        1
   movie_id        movie title release date  unknown  Action  Adventure  \
0         1   Toy Story (1995)  01-Jan-1995        0       0          0   
1         2   GoldenEye (1995)  01-Jan-1995        0       1          1   
2         3  Four Rooms (1995)  01-Jan-1995        0       0          0   
3         4  Get Shorty (1995)  01-Jan-1995        0       1          0   
4         5     Copycat (1995)  01-Jan-1995        0       0          0   

   Animation  Children's  Comedy  Crime  ...  Fantasy  Film-Noir  Horror  \
0          1           1       1      0  ...        0          0       0   
1          0           0       0      0  ...        0          0       0   
2          0           0       0      0  ...        0          0       0   
3          0           0       1      0  ...        0          0 

In [20]:
# To make the data more clean and far away from real world 
# we will removies movies that are not rated atleast by 20 users
# we will also fire users that are lazy to rate atleast 20 movies
# just to make the calculations simple 
# we will skip this step once our model is ready to entre real world
r_org = len(ratings) # before firing any lazy people or removing movies
i = 0
for user in ratings['user_id'].unique():
    if (len(ratings[ratings['user_id']==user])< 20):
        i+=1
        ratings.drop(ratings[ratings['user_id'] == user].index , inplace = True)
        users.drop(users[users['user_id'] == user].index , inplace = True)
print("{} ratings have been removed as the {} users have been fired".format(r_org- len(ratings),i))
r_m = len(ratings)  ## remaing ratings after firing lazy users
j = 0 
for movie in ratings['movie_id'].unique():
    if(len(ratings[ratings['movie_id']==movie])<20):
        j+=1
        ratings.drop(ratings[ratings['movie_id']==movie].index,inplace = True)
        movies.drop(movies[movies['movie_id'] == movie].index, inplace = True)
print("{} rating have been removied due to removing of {} less rated movies".format(r_m - len(ratings), j))

print("IN TOTAL {} RATINGS CORRESPONDING TO {} USERS AND {} MOVIES HAVE BEEN REMOVED".format(r_org-len(ratings),i,j))
## IN THESE STEPS THERE ARE FEW MOVIES AND USERS THAT COULD HAVE STAYED BUT GOT REMOVED DUE TO RATING LESS RATED MOVIES OR RATED BY A LAZY USERS


0 ratings have been removed as the 0 users have been fired
5032 rating have been removied due to removing of 743 less rated movies
IN TOTAL 5032 RATINGS CORRESPONDING TO 0 USERS AND 743 MOVIES HAVE BEEN REMOVED


# Data visualization 

This is a good way to know the pattrens in the data at hand 
due to lazyness I am skipping this as of now , Later I will replace this writing with all the inside pattrens in the data


## Data Loading

In [None]:
def load_data(data_dir):
    # Load ratings
    ratings = pd.read_csv(f'{data_dir}/ratings.dat', sep='::', header=None, engine='python',
                          names=['user_id', 'movie_id', 'rating', 'timestamp'], encoding='ISO-8859-1')
    
    # Load user metadata (user demographics)
    users = pd.read_csv(f'{data_dir}/users.dat', sep='::', header=None, engine='python',
                        names=['user_id', 'gender', 'age', 'occupation', 'zip'], encoding='ISO-8859-1')
    
    # Load item metadata (movie genres)
    movies = pd.read_csv(f'{data_dir}/movies.dat', sep='::', header=None, engine='python',
                         names=['movie_id', 'title', 'genres'], encoding='ISO-8859-1')

    # Encode user metadata features
    gender_encoder = LabelEncoder()
    users['gender'] = gender_encoder.fit_transform(users['gender'])

    age_encoder = LabelEncoder()
    users['age'] = age_encoder.fit_transform(users['age'])



    occupation_encoder = LabelEncoder()
    users['occupation'] = occupation_encoder.fit_transform(users['occupation'])

    # Encode movie genres
    genre_encoder = LabelEncoder()
    movies['genres'] = movies['genres'].apply(lambda x: x.split('|')[0])  # Use the first genre as primary
    movies['genres'] = genre_encoder.fit_transform(movies['genres'])

    # Merge data
    data = pd.merge(ratings, users, on='user_id')
    data = pd.merge(data, movies, on='movie_id')

    return data, len(users), len(movies), len(gender_encoder.classes_), len(age_encoder.classes_), \
           len(occupation_encoder.classes_), len(genre_encoder.classes_)

class MovieLensDataset(Dataset):
    def __init__(self, dataframe):
        self.data = dataframe

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        user_id = torch.tensor(row['user_id'], dtype=torch.long)
        item_id = torch.tensor(row['movie_id'], dtype=torch.long)
        rating = torch.tensor(row['rating'], dtype=torch.float)

        # User metadata
        gender = torch.tensor(row['gender'], dtype=torch.long)
        age = torch.tensor(row['age'], dtype=torch.long)
        occupation = torch.tensor(row['occupation'], dtype=torch.long)

        # Item metadata
        genre = torch.tensor(row['genres'], dtype=torch.long)

        return user_id, item_id, gender, age, occupation, genre, rating

def get_dataloaders(data, batch_size=128, shuffle=True):
    dataset = MovieLensDataset(data)
    return DataLoader(dataset, batch_size=batch_size, shuffle=shuffle)

## Model.py

In [None]:
import torch
import torch.nn as nn

class MultiHeadSelfAttention(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        
        self.multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)

    def forward(self, x):
        attn_output, _ = self.multihead_attn(x, x, x)
        return attn_output

class CrossAttentionLayer(nn.Module):
    def __init__(self, embed_dim, num_heads):
        super(CrossAttentionLayer, self).__init__()
        self.multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)

    def forward(self, query, key, value):
        attn_output, _ = self.multihead_attn(query, key, value)
        return attn_output

class NCFModelWithMetadata(nn.Module):
    def __init__(self, num_users, num_items, num_gender_features, num_age_features, num_occupation_features, num_genre_features, embed_dim, num_heads):
        super(NCFModelWithMetadata, self).__init__()
        # Embeddings for user and item IDs
        self.user_embedding = nn.Embedding(num_users, embed_dim)
        self.item_embedding = nn.Embedding(num_items, embed_dim)
        
        # Embeddings for user metadata (gender, age, occupation)
        self.gender_embedding = nn.Embedding(num_gender_features, embed_dim)
        self.age_embedding = nn.Embedding(num_age_features, embed_dim)
        self.occupation_embedding = nn.Embedding(num_occupation_features, embed_dim)
        
        # Embedding for item metadata (genre)
        self.genre_embedding = nn.Embedding(num_genre_features, embed_dim)

        # Self-attention for user and item embeddings
        self.user_self_attention = MultiHeadSelfAttention(embed_dim, num_heads)
        self.item_self_attention = MultiHeadSelfAttention(embed_dim, num_heads)

        # Cross-attention for interaction between user and item embeddings
        self.cross_attention = CrossAttentionLayer(embed_dim, num_heads)

        # Fully connected layers for final prediction
        self.fc_layers = nn.Sequential(
            nn.Linear(num_items, 128),  # Input size matches interaction_scores shape (batch_size, num_items)
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_items)  # If outputting scores for num_items
        )
    def forward(self, user, item, gender, age, occupation, genre):
        # Clamp input indices to be within the valid range
        user = torch.clamp(user, 0, self.user_embedding.num_embeddings - 1)
        item = torch.clamp(item, 0, self.item_embedding.num_embeddings - 1)
        gender = torch.clamp(gender, 0, self.gender_embedding.num_embeddings - 1)
        age = torch.clamp(age, 0, self.age_embedding.num_embeddings - 1)
        occupation = torch.clamp(occupation, 0, self.occupation_embedding.num_embeddings - 1)
        genre = torch.clamp(genre, 0, self.genre_embedding.num_embeddings - 1)
    
        # Embeddings for user and item IDs
        user_emb = self.user_embedding(user)  # Shape: (batch_size, embed_dim)
        item_emb = self.item_embedding(item)  # Shape: (batch_size, embed_dim)
    
        # Embeddings for user metadata
        gender_emb = self.gender_embedding(gender)  # Shape: (batch_size, embed_dim)
        age_emb = self.age_embedding(age)  # Shape: (batch_size, embed_dim)
        occupation_emb = self.occupation_embedding(occupation)  # Shape: (batch_size, embed_dim)
    
        # Embedding for item metadata
        genre_emb = self.genre_embedding(genre)  # Shape: (batch_size, embed_dim)
    
        # Combine user and item embeddings with their respective metadata
        user_combined_emb = user_emb + gender_emb + age_emb + occupation_emb  # Shape: (batch_size, embed_dim)
        item_combined_emb = item_emb + genre_emb  # Shape: (batch_size, embed_dim)
    
        # Apply self-attention to combined embeddings
        user_self_attn = self.user_self_attention(user_combined_emb.unsqueeze(0))  # Shape: (1, batch_size, embed_dim)
        item_self_attn = self.item_self_attention(item_combined_emb.unsqueeze(0))  # Shape: (1, batch_size, embed_dim)
    
        # Cross-attention between user and item embeddings
        cross_attn_user = self.cross_attention(user_self_attn, item_self_attn, item_self_attn)  # Shape: (1, batch_size, embed_dim)
        cross_attn_item = self.cross_attention(item_self_attn, user_self_attn, user_self_attn)  # Shape: (1, batch_size, embed_dim)
    
        # Prepare for interaction with all items
        all_items_emb = self.item_embedding.weight  # Shape: (num_items, embed_dim)
    
        # Calculate interaction scores for all items for each user
        # User representations repeated for all items
        user_rep = cross_attn_user.squeeze(0).unsqueeze(1)  # Shape: (batch_size, 1, embed_dim)
    
        # Expand user_rep to match the shape of all_items_emb for element-wise multiplication
        user_rep_expanded = user_rep.expand(-1, all_items_emb.size(0), -1)  # Shape: (batch_size, num_items, embed_dim)
    
        # Expand all_items_emb to have a batch dimension
        all_items_emb_expanded = all_items_emb.unsqueeze(0).expand(user_rep_expanded.size(0), -1, -1)  # Shape: (batch_size, num_items, embed_dim)
    
        # Compute interaction scores using element-wise multiplication followed by summing over the last dimension
        interaction_scores = (user_rep_expanded * all_items_emb_expanded).sum(dim=-1)  # Shape: (batch_size, num_items)
    
        # Feed-forward layers for final output
        output = self.fc_layers(interaction_scores)  # Output shape: (batch_size, num_items)
    
        return output  # Return scores for all items for each user

In [12]:
"""
#feed_forward ver-2
def forward(self, user, item, gender, age, occupation, genre):
    # Clamp input indices to be within the valid range
    user = torch.clamp(user, 0, self.user_embedding.num_embeddings - 1)
    item = torch.clamp(item, 0, self.item_embedding.num_embeddings - 1)
    gender = torch.clamp(gender, 0, self.gender_embedding.num_embeddings - 1)
    age = torch.clamp(age, 0, self.age_embedding.num_embeddings - 1)
    occupation = torch.clamp(occupation, 0, self.occupation_embedding.num_embeddings - 1)
    genre = torch.clamp(genre, 0, self.genre_embedding.num_embeddings - 1)

    # Embeddings for user and item IDs
    # Shape: (batch_size, embed_dim)
    user_emb = self.user_embedding(user)
    item_emb = self.item_embedding(item)

    # Embeddings for user metadata
    # Shape: (batch_size, embed_dim)
    gender_emb = self.gender_embedding(gender)
    age_emb = self.age_embedding(age)
    occupation_emb = self.occupation_embedding(occupation)

    # Embedding for item metadata
    # Shape: (batch_size, embed_dim)
    genre_emb = self.genre_embedding(genre)

    # Combine user and item embeddings with their respective metadata
    # Adding metadata embeddings element-wise
    # Shape: (batch_size, embed_dim)
    user_combined_emb = user_emb + gender_emb + age_emb + occupation_emb
    item_combined_emb = item_emb + genre_emb

    # Apply self-attention to combined embeddings
    # Input for self-attention needs to be (seq_len, batch_size, embed_dim)
    # Reshape to (1, batch_size, embed_dim) for sequence length of 1
    user_self_attn = self.user_self_attention(user_combined_emb.unsqueeze(0))
    item_self_attn = self.item_self_attention(item_combined_emb.unsqueeze(0))

    # Cross-attention between user and item embeddings
    # Query, key, value shapes: (1, batch_size, embed_dim)
    cross_attn_user = self.cross_attention(user_self_attn, item_self_attn, item_self_attn)
    cross_attn_item = self.cross_attention(item_self_attn, user_self_attn, user_self_attn)

    # Concatenate the attended representations along the embedding dimension
    # Shape after squeeze: (batch_size, embed_dim)
    # Concatenate along the last dimension: (batch_size, embed_dim * 2)
    combined = torch.cat([cross_attn_user.squeeze(0), cross_attn_item.squeeze(0)], dim=-1)

    # Feed-forward layers for prediction
    # Shape: (batch_size, 1)
    output = self.fc_layers(combined)

    return output
""""

## utils.py 

In [57]:
import numpy as np

def hit_ratio(ranklist, gtItem):
    """Calculates Hit Ratio (HR)."""
    return 1 if gtItem in ranklist else 0

def ndcg(ranklist, gtItem):
    """Calculates Normalized Discounted Cumulative Gain (NDCG)."""
    if gtItem in ranklist:
        index = ranklist.index(gtItem)
        return np.log(2) / np.log(index + 2)
    return 0

def mean_reciprocal_rank(ranklist, gtItem):
    """Calculates Mean Reciprocal Rank (MRR)."""
    if gtItem in ranklist:
        index = ranklist.index(gtItem)
        return 1 / (index + 1)
    return 0

def precision_at_k(ranklist, gtItem, k):
    """Calculates Precision@k."""
    ranklist = ranklist[:k]  # Truncate to top-k items
    return 1 if gtItem in ranklist else 0

def evaluate_metrics(ranklist, gtItem, k):
    """
    Evaluate multiple metrics: Hit Ratio, NDCG, MRR, Precision@k.
    
    Args:
        ranklist: List of recommended item indices, ordered by relevance.
        gtItem: Ground truth item index.
        k: The number of top-k items for Precision@k.
    
    Returns:
        Dictionary with HR, NDCG, MRR, and Precision@k.
    """
    hr = hit_ratio(ranklist, gtItem)
    ndcg_score = ndcg(ranklist, gtItem)
    mrr_score = mean_reciprocal_rank(ranklist, gtItem)
    precision_k = precision_at_k(ranklist, gtItem, k)

    return {
        'HR': hr,
        'NDCG': ndcg_score,
        'MRR': mrr_score,
        'Precision@k': precision_k
    }

##  train.py

In [82]:

device = torch.device("cpu")

In [83]:
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts
from ranger21 import Ranger21 as Ranger
#from utils import evaluate_metrics

def train_model(model, train_loader, test_loader, num_epochs, switch_epoch, initial_lr, k):
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=initial_lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for batch in train_loader:
            user, item, gender, age, occupation, genre, rating = batch
            user = user.to(device)
            item = item.to(device)
            gender = gender.to(device)
            age = age.to(device)
            occupation = occupation.to(device)
            genre = genre.to(device)
            rating = rating.to(device)

            output = model(user, item, gender, age, occupation, genre).squeeze()
            print(f"Output shape: {output.shape}")
            print(f"Item shape: {item.shape}")
            print(f"Rating shape: {rating.shape}")
            actual_item_scores = output.gather(1, item.view(-1, 1)).squeeze()  # Shape: [128]
            loss = F.mse_loss(actual_item_scores, rating.float())
            # Calculate loss
            #loss = F.mse_loss(output, rating)
            total_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        # Switch optimizer at specified epoch
        if epoch == switch_epoch:
            optimizer = optim.Ranger(model.parameters(), lr=initial_lr)

        scheduler.step()
        
        # Evaluate every few epochs
        #if (epoch + 1) % 5 == 0 or epoch == num_epochs - 1:
        eval_results = evaluate_model(model, test_loader, k)
        print(f"Epoch {epoch + 1}: Loss = {total_loss / len(train_loader):.4f}, "
                  f"HR = {eval_results['HR']:.4f}, NDCG = {eval_results['NDCG']:.4f}, "
                  f"MRR = {eval_results['MRR']:.4f}, Precision@{k} = {eval_results['Precision@k']:.4f}")

def evaluate_model(model, test_loader, k):
    model.eval()
    all_hr, all_ndcg, all_mrr, all_precision_k = [], [], [], []

    with torch.no_grad():
        for batch in test_loader:
            # Move all batch elements to device
            user, item, gender, age, occupation, genre, rating = [x.to(device) for x in batch]
            
            # Pass the batch through the model
            output = model(user, item, gender, age, occupation, genre).squeeze()

            # Get top k predictions for each user in the batch
            _, indices = torch.topk(output, k)  # indices shape should be [batch_size, k]
            predicted_items = indices.cpu().numpy()  # Shape: [batch_size, k]
            print(f"Shape of predicted_items: {predicted_items.shape}")
            print(f"Shape of indices: {indices.shape}")
            actual_items = item.cpu().numpy()  # Shape: [batch_size]
            
            # Loop through each user in the batch
            for i in range(len(user)):  # Iterate through the batch
                actual_item = actual_items[i]  # Actual item for user i
                predicted = predicted_items[i]  # Top-k predicted items for user i
                
                print(f"actual_item: {actual_item}, type: {type(actual_item)}")
                print(f"predicted: {predicted}, type: {type(predicted)}, predicted shape: {predicted.shape}")
                
                # Calculate metrics for this user
                hr = calculate_hit_rate(actual_item, predicted)
                ndcg = calculate_ndcg(actual_item, predicted)
                mrr = calculate_mrr(actual_item, predicted)
                precision_k = calculate_precision_at_k(actual_item, predicted, k)

                # Append metrics
                all_hr.append(hr)
                all_ndcg.append(ndcg)
                all_mrr.append(mrr)
                all_precision_k.append(precision_k)

    # Calculate average metrics across the batch
    avg_hr = sum(all_hr) / len(all_hr)
    avg_ndcg = sum(all_ndcg) / len(all_ndcg)
    avg_mrr = sum(all_mrr) / len(all_mrr)
    avg_precision_k = sum(all_precision_k) / len(all_precision_k)

    return {'HR': avg_hr, 'NDCG': avg_ndcg, 'MRR': avg_mrr, 'Precision@k': avg_precision_k}

# Metrics calculations for individual user-item pairs
def calculate_hit_rate(actual, predicted):
    return 1 if actual in predicted else 0

def calculate_ndcg(actual, predicted):
    if actual in predicted:
        idx = list(predicted).index(actual)
        return 1.0 / (torch.log2(torch.tensor(idx + 2).float()))
    return 0.0

def calculate_mrr(actual, predicted):
    if actual in predicted:
        idx = list(predicted).index(actual)
        return 1.0 / (idx + 1)
    return 0.0

def calculate_precision_at_k(actual, predicted, k):
    return int(actual in predicted[:k]) / k


## Main.py

In [84]:
import torch
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
#import os
#os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

def main():
    data_dir = '/kaggle/input/movielens-100k-small-dataset'
    data, num_users, num_items, num_gender_features, num_age_features, num_occupation_features, num_genre_features = load_data(data_dir)
    
    # Split the data into train and test sets
    train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)
    
    train_loader = get_dataloaders(train_data, batch_size=128)
    test_loader = get_dataloaders(test_data, batch_size=128)
    train_items = set(train_data['movie_id'].values)
    test_items = set(test_data['movie_id'].values)
    print("Unique items in training set:", len(train_items))
    print("Unique items in test set:", len(test_items))
    print("Items in test set not in training set:", len(test_items - train_items))
    # Initialize the model
    embed_dim = 128  # Adjust as necessary
    num_heads = 4   # Adjust as necessary
    model = NCFModelWithMetadata(num_users, num_items, num_gender_features, num_age_features, num_occupation_features, num_genre_features, embed_dim, num_heads)

    # Train the model
    num_epochs = 50
    switch_epoch = 30  # Change optimizer after this epoch
    initial_lr = 1e-3
    k = 10  # For Precision@k

    train_model(model, train_loader, test_loader, num_epochs, switch_epoch, initial_lr, k)

if __name__ == '__main__':
    main()

Output shape: torch.Size([128, 3883])Item shape: torch.Size([128])Rating shape: torch.Size([128])

RuntimeError: index 3952 is out of bounds for dimension 1 with size 3883

## Just Back up functions

In [None]:


# Set device
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device("cpu")

def evaluate_model(model, test_loader, k):
    model.eval()
    metrics = {'HR': [], 'NDCG': [], 'MRR': [], 'Precision@k': []}

    with torch.no_grad():
        for batch in test_loader:
            user, item, gtItem = batch
            scores = model(user, item)
            _, ranklist = torch.topk(scores, k=k)

            metric_results = evaluate_metrics(ranklist.tolist(), gtItem.item(), k)
            for key in metrics:
                metrics[key].append(metric_results[key])

    # Compute average values of each metric
    averaged_metrics = {key: np.mean(metrics[key]) for key in metrics}
    return averaged_metrics


def train_model(model, train_loader, test_loader, num_epochs, switch_epoch, initial_lr, k):
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=initial_lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0

        for batch in train_loader:
            user, item, gender, age, occupation, genre, rating = batch
            user = user.to(device)
            item = item.to(device)
            gender = gender.to(device)
            age = age.to(device)
            occupation = occupation.to(device)
            genre = genre.to(device)
            rating = rating.to(device)

            output = model(user, item, gender, age, occupation, genre).squeeze()

            # Calculate loss
            loss = F.mse_loss(output, rating)
            total_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        # Switch optimizer at specified epoch
        if epoch == switch_epoch:
            optimizer = optim.Ranger(model.parameters(), lr=initial_lr)

        scheduler.step()
        
        # Evaluate every few epochs
        #if (epoch + 1) % 5 == 0 or epoch == num_epochs - 1:
        eval_results = evaluate_model(model, test_loader, k)
        print(f"Epoch {epoch + 1}: Loss = {total_loss / len(train_loader):.4f}, "
                  f"HR = {eval_results['HR']:.4f}, NDCG = {eval_results['NDCG']:.4f}, "
                  f"MRR = {eval_results['MRR']:.4f}, Precision@{k} = {eval_results['Precision@k']:.4f}")

def forward(self, user, item, gender, age, occupation, genre):
    # User embedding
    user_emb = self.user_embedding(user)  # Shape: [batch_size, user_embedding_size]
    # Item embedding
    item_emb = self.item_embedding(item)  # Shape: [batch_size, item_embedding_size]

    # Pass through multi-head and cross-attention layers
    cross_attn_user = self.cross_attention_user(user_emb, genre)  # Adjust output shape if needed
    cross_attn_item = self.cross_attention_item(item_emb, genre)  # Adjust output shape if needed

    # Concatenate the attention outputs
    combined = torch.cat([cross_attn_user, cross_attn_item], dim=-1)  # Check resulting shape

    # Ensure combined shape matches the input for fc_layers
    expected_input_size = self.fc_layers[0].in_features
    if combined.size(1) != expected_input_size:
        # Adjust combined dimensions to match expected input size
        combined = F.linear(combined, torch.randn(expected_input_size, combined.size(1), device=combined.device))

    output = self.fc_layers(combined)
    return output

#feed_forward ver-2 ( working version)
def forward(self, user, item, gender, age, occupation, genre):
        # Embeddings for user and item IDs
        user_emb = torch.clamp(user, 0, self.user_embedding.num_embeddings - 1)
        item_emb = torch.clamp(item, 0, self.item_embedding.num_embeddings - 1)
        print("user_emb_shape1;", user_emb.shape)
        # Embeddings for user metadata
        gender_emb = self.gender_embedding(gender).unsqueeze(0)  # Shape: (1, batch_size, embed_dim)
        age_emb = self.age_embedding(age).unsqueeze(0)  # Shape: (1, batch_size, embed_dim)
        occupation_emb = self.occupation_embedding(occupation).unsqueeze(0)  # Shape: (1, batch_size, embed_dim)

        # Embedding for item metadata
        genre_emb = self.genre_embedding(genre).unsqueeze(0)  # Shape: (1, batch_size, embed_dim)
        print("genere_emb:",genre_emb.shape) 
        # Combine user and item embeddings with their respective metadata
        user_combined_emb = user_emb + gender_emb + age_emb + occupation_emb
        item_combined_emb = item_emb + genre_emb

        # Apply self-attention to combined embeddings
        user_self_attn = self.user_self_attention(user_combined_emb)
        item_self_attn = self.item_self_attention(item_combined_emb)

        # Cross-attention between user and item embeddings
        cross_attn_user = self.cross_attention(user_self_attn, item_self_attn, item_self_attn)
        cross_attn_item = self.cross_attention(item_self_attn, user_self_attn, user_self_attn)

        # Concatenate the attended representations
        combined = torch.cat([cross_attn_user.squeeze(0), cross_attn_item.squeeze(0)], dim=-1)
        print("User embedding shape:", user_emb.shape)
        print("Item embedding shape:", item_emb.shape)
        print("Cross attention user shape:", cross_attn_user.shape)
        print("Cross attention item shape:", cross_attn_item.shape)
        print("Combined shape:", combined.shape)


        combined = combined.view(combined.size(0), -1)
        # Feed-forward layers
        output = self.fc_layers(combined)

        return output

In [None]:
"""
def evaluate_model(model, test_loader, k):
    model.eval()
    all_hr, all_ndcg, all_mrr, all_precision_k = [], [], [], []

    with torch.no_grad():
        for batch in test_loader:
            # Assuming batch contains user, item, and metadata like gender, age, etc.
            user, item, gender, age, occupation, genre, rating = batch
            
            # Pass the batch through the model
            output = model(
                user.to(device), 
                item.to(device), 
                gender.to(device), 
                age.to(device), 
                occupation.to(device), 
                genre.to(device)
            ).squeeze()

            # Get top k predictions for each user in the batch
            _, indices = torch.topk(output, k)
            predicted_items = indices.cpu().numpy()  # Predicted items for the batch
            actual_items = item.cpu().numpy()  # Actual ground truth items for the batch
            print(f"predicted_items: {predicted}, type: {type(predicted)}")
            # Loop through each user in the batch
            for i in range(len(user)):
                actual_item = actual_items[i]  # Actual item for user i
                predicted = predicted_items[i]  # Top-k predicted items for user i
                print(f"actual_item: {actual_item}, type: {type(actual_item)}")
                print(f"predicted: {predicted}, type: {type(predicted)}")      
                # Calculate metrics for this user
                hr = calculate_hit_rate(actual_item, predicted)
                ndcg = calculate_ndcg(actual_item, predicted)
                mrr = calculate_mrr(actual_item, predicted)
                precision_k = calculate_precision_at_k(actual_item, predicted, k)

                # Append metrics
                all_hr.append(hr)
                all_ndcg.append(ndcg)
                all_mrr.append(mrr)
                all_precision_k.append(precision_k)

    # Calculate average metrics across the batch
    avg_hr = sum(all_hr) / len(all_hr)
    avg_ndcg = sum(all_ndcg) / len(all_ndcg)
    avg_mrr = sum(all_mrr) / len(all_mrr)
    avg_precision_k = sum(all_precision_k) / len(all_precision_k)

    return {'HR': avg_hr, 'NDCG': avg_ndcg, 'MRR': avg_mrr, 'Precision@k': avg_precision_k}
"""

In [76]:
import torch

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda