In [None]:
import torch

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.optim as optim
from torch.nn import Module
from torch.utils.data import Dataset, DataLoader
from scipy.sparse import coo_matrix
from scipy.sparse import vstack
from scipy import sparse
import numpy as np
import pandas as pd
from torch.optim import Adam
from torch.nn import MSELoss
import time

np.random.seed(2)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Data Processing

In [None]:
class RatingsDataset(Dataset):
    """Ratings Dataset"""

    def __init__(self):
        """
        Args:
            csv_file (string): Path to the csv file with ratings.
        """
        self.csv = pd.read_csv('drive/My Drive/Research/ratings.csv')

        self.user_ids = list(self.csv.userId-1)
        self.movie_ids = list(self.csv.movieId-1)
        self.ratings = list(self.csv.rating)

        # Count the numbers of users and movies
        self.userNums = np.max(self.user_ids)+1
        self.movieNums = np.max(self.movie_ids)+1

    def __len__(self):
        return len(self.csv)

    def __getitem__(self, idx):
        user_idx = self.user_ids[idx]
        movie_idx = self.movie_ids[idx]
        rating = self.ratings[idx]

        concatenated = torch.tensor([user_idx, movie_idx], dtype=torch.float)

        return {
            'user_id': user_idx,
            'movie_id': movie_idx,
            'concatenated': concatenated,
            'rating': rating
        }

    def get_user_number(self):
        return self.userNums

    def get_movie_number(self):
        return self.movieNums

In [None]:
dataset = RatingsDataset()

train_size = int(0.6 * len(dataset))
val_size = int(0.2 * len(dataset))
test_size = len(dataset) - train_size - val_size

train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, val_size, test_size])

## Model Implementation - Neural Collaborative Filtering

In [None]:
class NCF(Module):
    def __init__(self, num_users, num_items, first_layer=128):
        super(NCF, self).__init__()

        self.mlp = nn.Sequential(
            nn.Linear(2, first_layer),  # Input dimension is 2 since concatenated user and item indices
            nn.Dropout(0.25),
            nn.ReLU(inplace=True),
            nn.Linear(first_layer, first_layer//2),
            nn.Dropout(0.25),
            nn.ReLU(inplace=True),
            nn.Linear(first_layer//2, first_layer//4),
            nn.Dropout(0.25),
            nn.ReLU(inplace=True),
            nn.Linear(first_layer//4, 1)
        )

    def forward(self, concatenated): # Pass the concatenated data to the forward function
        return self.mlp(concatenated).squeeze()

## Define Training Helper Functions

In [None]:
# Define the training and evaluation functions
def train(model, dataloader, optimizer, criterion):
    model.train()
    total_loss = 0.0

    for batch in dataloader:
        # Pass the concatenated user-item pair as the input
        concatenated = batch['concatenated']
        # user_idx = batch['user'].to(torch.long)
        # movie_idx = batch['movie'].to(torch.long)
        # concatenated = torch.stack([user_idx, movie_idx], dim=1).to(torch.float)
        target = batch['rating'].to(torch.float)

        optimizer.zero_grad()
        output = model(concatenated)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    return total_loss / len(dataloader)

def evaluate(model, dataloader, criterion):
    model.eval()
    total_loss = 0.0

    with torch.no_grad():
        for batch in dataloader:
            concatenated = batch['concatenated']
            # user_idx = batch['user'].to(torch.long)
            # movie_idx = batch['movie'].to(torch.long)
            # concatenated = torch.stack([user_idx, movie_idx], dim=1).to(torch.float)
            target = batch['rating']

            output = model(concatenated)
            loss = criterion(output, target)

            total_loss += loss.item()

    return total_loss / len(dataloader)

## Training

In [None]:
# Create your dataset and dataloaders
dataset = RatingsDataset()
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# Instantiate your model
model = NCF(dataset.get_user_number(), dataset.get_movie_number(), first_layer=128)

# Define loss function and optimizer
criterion = nn.L1Loss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    train_loss = train(model, train_dataloader, optimizer, criterion)
    test_loss = evaluate(model, test_dataloader, criterion)
    print(f"Epoch {epoch+1}/{num_epochs}: Train Loss = {train_loss:.4f}, Test Loss = {test_loss:.4f}")



Epoch 1/10: Train Loss = 8.0251, Test Loss = 1.4038
Epoch 2/10: Train Loss = 1.1740, Test Loss = 0.9097
Epoch 3/10: Train Loss = 0.8745, Test Loss = 0.8434
Epoch 4/10: Train Loss = 0.8413, Test Loss = 0.8438
Epoch 5/10: Train Loss = 0.8354, Test Loss = 0.8404
Epoch 6/10: Train Loss = 0.8305, Test Loss = 0.8404
Epoch 7/10: Train Loss = 0.8303, Test Loss = 0.8404
Epoch 8/10: Train Loss = 0.8306, Test Loss = 0.8404
Epoch 9/10: Train Loss = 0.8303, Test Loss = 0.8404
Epoch 10/10: Train Loss = 0.8334, Test Loss = 0.8404


## Produce Recommendations for a Test User

In [None]:
# Choose a test user ID from the MovieLens dataset
test_user_id = 125  # Replace with the actual test user ID

# Get the movies the test user has interacted with
user_interacted_items = set(item['movie_id'] for item in dataset if item['user_id'] == test_user_id)

# Generate movie recommendations for the test user
recommendations = []
for item_id in range(dataset.get_movie_number()):
    # Predict ratings for unseem movies by the test user
    if item_id not in user_interacted_items:
        concatenated = torch.tensor([[test_user_id, item_id]], dtype=torch.float)
        predicted_rating = model(concatenated).item()
        recommendations.append((item_id, predicted_rating))

# Sort recommendations by predicted rating in descending order
recommendations.sort(key=lambda x: x[1], reverse=True)

# Select the top-N recommended items
top_n_recommendations = recommendations[:500]  # Replace N with the desired number of recommendations

# Print the top-N recommended items
for item_id, predicted_rating in top_n_recommendations:
    print(f"Item ID: {item_id}, Predicted Rating: {predicted_rating:.2f}")

Item ID: 2, Predicted Rating: 4.00
Item ID: 3, Predicted Rating: 4.00
Item ID: 4, Predicted Rating: 4.00
Item ID: 5, Predicted Rating: 4.00
Item ID: 6, Predicted Rating: 4.00
Item ID: 7, Predicted Rating: 4.00
Item ID: 8, Predicted Rating: 4.00
Item ID: 10, Predicted Rating: 4.00
Item ID: 11, Predicted Rating: 4.00
Item ID: 12, Predicted Rating: 4.00
Item ID: 13, Predicted Rating: 4.00
Item ID: 14, Predicted Rating: 4.00
Item ID: 15, Predicted Rating: 4.00
Item ID: 16, Predicted Rating: 4.00
Item ID: 17, Predicted Rating: 4.00
Item ID: 18, Predicted Rating: 4.00
Item ID: 19, Predicted Rating: 4.00
Item ID: 20, Predicted Rating: 4.00
Item ID: 21, Predicted Rating: 4.00
Item ID: 22, Predicted Rating: 4.00
Item ID: 23, Predicted Rating: 4.00
Item ID: 24, Predicted Rating: 4.00
Item ID: 25, Predicted Rating: 4.00
Item ID: 26, Predicted Rating: 4.00
Item ID: 27, Predicted Rating: 4.00
Item ID: 28, Predicted Rating: 4.00
Item ID: 29, Predicted Rating: 4.00
Item ID: 30, Predicted Rating: 4.00