In [None]:
# based on https://www.geeksforgeeks.org/deep-learning/neural-collaborative-filtering/

In [15]:
# imports
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import json
from tqdm import tqdm

In [6]:
# load datasets
train_df = pd.read_csv('train_edges.csv')
test_df = pd.read_csv('test_edges.csv')

In [7]:
train_df = train_df.rename(columns={'User-ID':'user_id','Book-Rating': 'rating'})
test_df = test_df.rename(columns={'User-ID':'user_id','Book-Rating': 'rating'})

In [8]:
train_users = list(train_df['user_id'].unique())
books = list(train_df['join_title'].unique())

train_user_to_idx = {user_id: idx for idx, user_id in enumerate(train_users)}
book_to_idx = {book: idx for idx, book in enumerate(books)}

num_train_users, num_books = len(train_users), len(books)

In [9]:
user_indices = torch.tensor([train_user_to_idx[user_id] for user_id in train_df['user_id']], dtype=torch.long)
book_indices = torch.tensor([book_to_idx[book] for book in train_df['join_title']], dtype=torch.long)
ratings = torch.tensor(train_df['rating'].values, dtype=torch.float32)

In [10]:
class NCF(nn.Module):
  def __init__(self, num_users, num_items, embedding_dim=16):
    super(NCF, self).__init__()
    self.user_embedding = nn.Embedding(num_users, embedding_dim)
    self.item_embedding = nn.Embedding(num_items, embedding_dim)
    self.fc1 = nn.Linear(embedding_dim * 2, 128)
    self.fc2 = nn.Linear(128, 64)
    self.fc3 = nn.Linear(64, 1)
    self.relu = nn.ReLU()
    self.dropout = nn.Dropout(0.2)

  def forward(self, user, item):
    user_emb = self.user_embedding(user)
    item_emb = self.item_embedding(item)
    x = torch.cat([user_emb, item_emb], dim=-1)
    x = self.relu(self.fc1(x))
    x = self.dropout(x)
    x = self.relu(self.fc2(x))
    x = self.fc3(x)
    return x

In [11]:
# the neural collaborative filtering model, loss, opt
model = NCF(num_train_users, num_books)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.005, weight_decay=1e-5)

In [13]:
# training loop
epochs = 50
for epoch in range(epochs):
  model.train()
  optimizer.zero_grad()
  predictions = model(user_indices, book_indices).squeeze()
  loss = criterion(predictions, ratings)
  loss.backward()
  optimizer.step()
  if (epoch + 1) % 10 == 0:
    print(f"Epoch {epoch+1}, Loss: {loss.item():.4f}")

Epoch 10, Loss: 18.5886
Epoch 20, Loss: 16.8587
Epoch 30, Loss: 15.7660
Epoch 40, Loss: 14.6671
Epoch 50, Loss: 13.2590


In [20]:
def generate_recommendations(model, user_id, train_df, train_user_to_idx, book_to_idx, books, num_books):
  """
  model: the NCF model
  user_id: the user id
  train_df: the original train dataset, to use for recommendations
  train_user_to_idx: map user_id to idx
  book_to_idx: map book to idx
  num_books: number of books to recommend
  """
  model.eval()

  # exclude the user's book from train set
  user_rated_books = set(train_df[train_df['user_id'] == user_id]['join_title'].unique())
  candidate_books = [book for book in books if book not in user_rated_books]

  user_idx = train_user_to_idx[user_id]
  user_tensor = torch.tensor([user_idx] * len(candidate_books), dtype=torch.long)
  book_tensor = torch.tensor([book_to_idx[b] for b in candidate_books], dtype=torch.long)

  with torch.no_grad():
    predicted_ratings = model(user_tensor, book_tensor).squeeze().cpu().numpy()

  # want top k books indices
  top_k_idx = predicted_ratings.argsort()[::-1][:k]
  return [candidate_books[i] for i in top_k_idx]

In [21]:
# Generate recommendations for all users in test set
k = 50

model.eval()
recommendations_dict = {}

eval_users = list(test_df['user_id'].unique())
for user_id in tqdm(eval_users):
  recommendations = generate_recommendations(model, user_id, train_df, train_user_to_idx, book_to_idx, books, k)
  recommendations_dict[str(user_id)] = recommendations

print(f"Generated {k} recommendations for {len(recommendations_dict)} users")

  8%|â–Š         | 765/9055 [00:02<00:21, 378.77it/s]


KeyboardInterrupt: 

In [18]:
file_path = "neural_collaborative_filtering_recommendations_k_50.json"
with open(file_path, "w") as json_file:
  json.dump(recommendations_dict, json_file, indent=4)

print(f"Recommendations saved to {file_path}")


Recommendations saved to neural_collaborative_filtering_recommendations_k_50.json
