In [1]:
import pandas as pd
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset

df = pd.read_csv('RatingsNew.csv')

user_ids = df['user_id'].unique()
user_to_idx = {user_id: idx for idx, user_id in enumerate(user_ids)}
isbn_ids = df['book_id'].unique()
isbn_to_idx = {isbn_id: idx for idx, isbn_id in enumerate(isbn_ids)}

df['user_idx'] = df['user_id'].apply(lambda x: user_to_idx[x])
df['isbn_idx'] = df['book_id'].apply(lambda x: isbn_to_idx[x])

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=42)

import torch.nn as nn

class MatrixFactorization(nn.Module):
    def __init__(self, num_users, num_items, embedding_size=100):
        super(MatrixFactorization, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)
        self.user_embedding.weight.data.uniform_(0, 0.05)
        self.item_embedding.weight.data.uniform_(0, 0.05)

    def forward(self, user_indices, item_indices):
        user_embedding = self.user_embedding(user_indices)
        item_embedding = self.item_embedding(item_indices)
        return (user_embedding * item_embedding).sum(1)

class BookRatingDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df.user_idx.values, dtype=torch.long)
        self.items = torch.tensor(df.isbn_idx.values, dtype=torch.long)
        self.ratings = torch.tensor(df['rating'].values, dtype=torch.float32)

    def __len__(self):
        return len(self.users)

    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]

train_dataset = BookRatingDataset(train_df)
val_dataset = BookRatingDataset(val_df)
test_dataset = BookRatingDataset(test_df)

train_loader = DataLoader(train_dataset, batch_size=512, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=512, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=512, shuffle=False)

def evaluate_model(model, test_loader):
    model.eval()
    losses = []
    with torch.no_grad():
        for users, items, real_ratings in test_loader:
            predictions = model(users, items)
            loss = criterion(predictions, real_ratings).item()
            losses.append(loss)
    mean_loss = np.mean(losses)
    rmse = np.sqrt(mean_loss)
    return rmse

num_users, num_items = len(user_ids), len(isbn_ids)
model = MatrixFactorization(num_users, num_items)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

patience = 2
best_val_loss = float('inf')
epochs_without_improvement = 0

num_epochs = 50
for epoch in range(num_epochs):
    model.train()
    for users, items, ratings in train_loader:
        optimizer.zero_grad()
        predictions = model(users, items)
        loss = criterion(predictions, ratings)
        loss.backward()
        optimizer.step()

    val_rmse = evaluate_model(model, val_loader)
    print(f'Epoch {epoch+1}, Validation RMSE: {val_rmse}')

    if val_rmse < best_val_loss:
        best_val_loss = val_rmse
        epochs_without_improvement = 0
        torch.save(model.state_dict(), 'best_model.pth')
    else:
        epochs_without_improvement += 1
        if epochs_without_improvement >= patience:
            print("Early stopping triggered")
            break

model.load_state_dict(torch.load('best_model.pth'))

Epoch 1, Validation RMSE: 0.9288272773071933
Epoch 2, Validation RMSE: 0.9221959436462063
Epoch 3, Validation RMSE: 0.8993747004217882
Epoch 4, Validation RMSE: 0.8707395446951249
Epoch 5, Validation RMSE: 0.8597830763505917
Epoch 6, Validation RMSE: 0.8732253203949419
Epoch 7, Validation RMSE: 0.901339889089948
Early stopping triggered


<All keys matched successfully>

In [9]:
import torch
import numpy as np

def recommend_books(model, book_ids, num_items, n_recommendations=5):
    model.eval()
    book_ids_tensor = torch.tensor(book_ids, dtype=torch.long)
    book_embeddings = model.item_embedding(book_ids_tensor)
    pseudo_user_profile = book_embeddings.mean(dim=0)
    all_books = torch.tensor(range(num_items), dtype=torch.long)
    all_book_embeddings = model.item_embedding(all_books)
    similarities = (all_book_embeddings @ pseudo_user_profile) / (all_book_embeddings.norm(dim=1) * pseudo_user_profile.norm() + 1e-8)
    _, recommended_indices = torch.topk(similarities, n_recommendations + len(book_ids))
    recommended_books = [idx.item() for idx in recommended_indices if idx.item() not in book_ids][:n_recommendations]
    return recommended_books

book_ids = [13, 48, 196]
num_items = len(df.book_id.unique())
n_recommendations = 5
model.eval()
recommended_books = recommend_books(model, book_ids, num_items, n_recommendations)
print("Recommended Books:", recommended_books)

Recommended Books: [626, 178, 6400, 2419, 725]
