# Neural Collabrative Filtering

### Pre-requisites

In [1]:
import warnings
warnings.filterwarnings(action='ignore')

### Import necessary libraries

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

### Load the data

In [3]:
df = pd.read_csv('../Datasets/merged_moviecine_tmdb.csv')
df.sample(5)

Unnamed: 0,user_id,movie_id,rating,timestamp,title,release_date,genre_names,original_language_full
6223104,121320,39183,3.0,1213382637,Once in a Lifetime: The Extraordinary Story of...,2006-05-19,['Documentary'],English
4881140,95124,2028,3.5,1118283949,Say Anything...,1989-04-14,"['Romance', 'Comedy', 'Drama']",English
1131281,22190,125,1.5,1076158250,Railway Station,1980-01-01,['Documentary'],Polish
6063445,118091,43560,3.0,1499056620,Vacanze in America,1984-12-21,['Comedy'],Italian
6583977,128323,6957,0.5,1128933989,The 40 Year Old Virgin,2005-08-11,"['Comedy', 'Romance']",English


### Encoding the data

In [4]:
user_enc = LabelEncoder()
movie_enc = LabelEncoder()

df['user'] = user_enc.fit_transform(df['user_id'])
df['movie'] = movie_enc.fit_transform(df['movie_id'])

In [5]:
num_users = df['user'].nunique()
num_movies = df['movie'].nunique()

### Holdout

In [6]:
train_df, test_df = train_test_split(df[['user', 'movie', 'rating']], test_size=0.2, random_state=42)

### Pytorch Dataset

In [7]:
class RatingsDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df['user'].values, dtype=torch.long)
        self.movies = torch.tensor(df['movie'].values, dtype=torch.long)
        self.ratings = torch.tensor(df['rating'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        return self.users[idx], self.movies[idx], self.ratings[idx]

train_dataset = RatingsDataset(train_df)
test_dataset = RatingsDataset(test_df)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=512)

### Model Building

In [8]:
class NCF(nn.Module):
    def __init__(self, num_users, num_items, embedding_dim=50):
        super(NCF, self).__init__()
        self.user_embed = nn.Embedding(num_users, embedding_dim)
        self.item_embed = nn.Embedding(num_items, embedding_dim)

        self.fc_layers = nn.Sequential(
            nn.Linear(embedding_dim * 2, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )

    def forward(self, user, item):
        user_emb = self.user_embed(user)
        item_emb = self.item_embed(item)
        x = torch.cat([user_emb, item_emb], dim=-1)
        out = self.fc_layers(x)
        return out.squeeze()

### Model Training

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = NCF(num_users, num_movies).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 5
for epoch in range(epochs):
    model.train()
    total_loss = 0
    for users, movies, ratings in train_loader:
        users, movies, ratings = users.to(device), movies.to(device), ratings.to(device)

        optimizer.zero_grad()
        outputs = model(users, movies)
        loss = criterion(outputs, ratings)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Training Loss: {total_loss/len(train_loader):.4f}")

### Model Evaluation

In [None]:
model.eval()
with torch.no_grad():
    total_mse = 0
    for users, movies, ratings in test_loader:
        users, movies, ratings = users.to(device), movies.to(device), ratings.to(device)
        outputs = model(users, movies)
        mse = criterion(outputs, ratings)
        total_mse += mse.item()
    print(f"Test MSE: {total_mse / len(test_loader):.4f}")