In [40]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load your datasets
books_df = pd.read_csv('./data/Books.csv')
ratings_df = pd.read_csv('./data/Ratings.csv')
users_df = pd.read_csv('./data/Users.csv')

# Assuming ratings_df has columns ['user_id', 'book_id', 'rating']
# Ensure user_id and book_id in ratings_df are encoded to numeric values
user_encoder = LabelEncoder()
book_encoder = LabelEncoder()

ratings_df['User-ID'] = user_encoder.fit_transform(ratings_df['User-ID'])
ratings_df['ISBN'] = book_encoder.fit_transform(ratings_df['ISBN'])

# Split data
train_data, test_data = train_test_split(ratings_df, test_size=0.2, random_state=42)


  books_df = pd.read_csv('./data/Books.csv')


In [41]:
test_data.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
178554,14710,66197,0
533905,48732,207194,8
1091374,98946,65482,0
1036247,93459,128975,0
309523,28004,47683,0


In [42]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NCF(nn.Module):
    def __init__(self, num_users, num_books, embed_size, layers=[32, 16, 8]):
        super(NCF, self).__init__()
        """
        num_users: Number of unique users
        num_books: Number of unique books
        embed_size: Embedding size
        layers: MLP layers sizes
        """
        self.user_embedding = nn.Embedding(num_embeddings=num_users, embedding_dim=embed_size)
        self.book_embedding = nn.Embedding(num_embeddings=num_books, embedding_dim=embed_size)
        
        # MLP layers
        self.MLP_layers = nn.ModuleList()
        for in_size, out_size in zip(layers[:-1], layers[1:]):
            self.MLP_layers.append(nn.Linear(in_size, out_size))
        
        # Final layer
        self.output_layer = nn.Linear(layers[-1] + embed_size, 1)
        
        self.relu = nn.ReLU()
        
    def forward(self, user_indices, book_indices):
        # Embeddings
        user_embedding = self.user_embedding(user_indices)
        book_embedding = self.book_embedding(book_indices)
        # Concatenate the embeddings to feed into the MLP
        vector = torch.cat([user_embedding, book_embedding], dim=-1)
        # Pass through MLP layers
        for layer in self.MLP_layers:
            vector = layer(vector)
            vector = self.relu(vector)
        
        # Concatenate the output of the GMF part and the MLP part
        concat = torch.cat([user_embedding * book_embedding, vector], dim=-1)
        
        # Final prediction layer
        prediction = self.output_layer(concat)
        return prediction.squeeze()


In [43]:
from torch.utils.data import DataLoader, Dataset
import numpy as np

class RatingsDataset(Dataset):
    def __init__(self, df):
        self.users = torch.tensor(df['User-ID'].values, dtype=torch.long)
        self.books = torch.tensor(df['ISBN'].values, dtype=torch.long)
        self.ratings = torch.tensor(df['Book-Rating'].values, dtype=torch.float32)
        
    def __len__(self):
        return len(self.ratings)
    
    def __getitem__(self, idx):
        return self.users[idx], self.books[idx], self.ratings[idx]

# Prepare DataLoader
train_dataset = RatingsDataset(train_data)
test_dataset = RatingsDataset(test_data)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = NCF(num_users=len(user_encoder.classes_), num_books=len(book_encoder.classes_), embed_size=16).to(device)

# Loss and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 5
for epoch in range(num_epochs):
    model.train()
    for user_indices, book_indices, ratings in train_loader:
        user_indices, book_indices, ratings = user_indices.to(device), book_indices.to(device), ratings.to(device)
        
        # Forward pass
        outputs = model(user_indices, book_indices)
        loss = criterion(outputs, ratings)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [1/5], Loss: 13.9665
Epoch [2/5], Loss: 13.4029
Epoch [3/5], Loss: 16.2462
Epoch [4/5], Loss: 10.1746
Epoch [5/5], Loss: 13.4950


In [44]:
model.eval()  # Set the model to evaluation mode
test_loss = 0
with torch.no_grad():  # No gradients needed
    for user_indices, book_indices, ratings in test_loader:
        user_indices, book_indices, ratings = user_indices.to(device), book_indices.to(device), ratings.to(device)
        
        # Forward pass
        outputs = model(user_indices, book_indices)
        loss = criterion(outputs, ratings)
        
        test_loss += loss.item()

# Average loss
test_loss /= len(test_loader)
print(f'Test MSE: {test_loss}')

Test MSE: 11.815114720768575


In [47]:
def recommend_books(model, user_id_encoded, book_ids_encoded, num_recommendations=5):
    model.eval()
    user_indices = torch.tensor([user_id_encoded] * len(book_ids_encoded), dtype=torch.long).to(device)
    book_indices = torch.tensor(book_ids_encoded, dtype=torch.long).to(device)
    
    with torch.no_grad():
        predictions = model(user_indices, book_indices).cpu().numpy()
    
    # Get the top N recommendations; argsort sorts in ascending order, so use [-num_recommendations:]
    recommended_indices = np.argsort(predictions)[-num_recommendations:]
    
    # Decode book IDs if necessary
    recommended_books = [book_encoder.inverse_transform([idx])[0] for idx in recommended_indices]
    
    return recommended_books

# Example usage
user_id_encoded = user_encoder.transform([809])[0]  # Replace 'user_id_example' with actual user ID
book_ids_encoded = list(range(len(book_encoder.classes_)))  # All books
recommended_books = recommend_books(model, user_id_encoded, book_ids_encoded, num_recommendations=5)
print("Recommended Books:", recommended_books)


Recommended Books: ['0586092269', '0886777399', '0020303750', '0099296810', '0307204030']
