In [1]:
import torch
import torch_geometric
from torch_geometric.data import Data
import pandas as pd
import numpy as np
from torch_geometric.nn import GCNConv
from torch_geometric.nn import SAGEConv
from torch.nn import Linear
import torch.nn.functional as F

class BookRecommenderGNN(torch.nn.Module):
    def __init__(self, num_features, hidden_channels):
        super().__init__()
        self.conv1 = SAGEConv(num_features, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels)
        self.linear = Linear(hidden_channels, 1)
    
    def forward(self, x, edge_index):
        # First Graph Conv layer
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, p=0.2, training=self.training)
        
        # Second Graph Conv layer
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        
        # Final prediction layer
        x = self.linear(x)
        return x

def prepare_data(books_df, interactions_df, min_interactions=10):
    """
    Prepare the Goodreads dataset for GNN training
    
    Parameters:
    books_df: DataFrame with columns ['book_id', 'title', 'authors', 'average_rating', 'ratings_count', ...]
    interactions_df: DataFrame with columns ['user_id', 'book_id', 'rating']
    min_interactions: Minimum number of interactions for users/books to be included
    """
    # Filter users and books with minimum interactions
    user_counts = interactions_df['user_id'].value_counts()
    book_counts = interactions_df['book_id'].value_counts()
    
    valid_users = user_counts[user_counts >= min_interactions].index
    valid_books = book_counts[book_counts >= min_interactions].index
    
    filtered_interactions = interactions_df[
        interactions_df['user_id'].isin(valid_users) & 
        interactions_df['book_id'].isin(valid_books)
    ]
    
    # Create user and book mappings
    unique_users = filtered_interactions['user_id'].unique()
    unique_books = filtered_interactions['book_id'].unique()
    
    user_mapping = {uid: idx for idx, uid in enumerate(unique_users)}
    book_mapping = {bid: idx + len(user_mapping) for idx, bid in enumerate(unique_books)}
    
    # Create edge index
    user_nodes = [user_mapping[uid] for uid in filtered_interactions['user_id']]
    book_nodes = [book_mapping[bid] for bid in filtered_interactions['book_id']]
    
    edge_index = torch.tensor([
        user_nodes + book_nodes,  # Source nodes
        book_nodes + user_nodes   # Target nodes
    ], dtype=torch.long)
    
    # Create node features
    num_nodes = len(user_mapping) + len(book_mapping)
    num_features = 32  # You can adjust this
    
    # Initialize random features (in practice, you'd use real features)
    node_features = torch.randn(num_nodes, num_features)
    
    # Create target ratings
    edge_weights = torch.tensor(
        filtered_interactions['rating'].tolist() * 2,  # Duplicate for bidirectional edges
        dtype=torch.float
    )
    
    return Data(
        x=node_features,
        edge_index=edge_index,
        edge_attr=edge_weights
    ), user_mapping, book_mapping

def train_model(model, data, epochs=100):
    """Train the GNN model"""
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
    model.train()
    
    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(data.x, data.edge_index)
        loss = F.mse_loss(out[data.edge_index[0]], data.edge_attr)
        loss.backward()
        optimizer.step()
        
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1:03d}, Loss: {loss:.4f}')
    
    return model

def get_recommendations(model, data, user_idx, book_mapping, top_k=5):
    """Get book recommendations for a user"""
    model.eval()
    with torch.no_grad():
        # Get embeddings for all nodes
        embeddings = model.conv2(
            model.conv1(data.x, data.edge_index),
            data.edge_index
        )
        
        # Get user embedding
        user_embedding = embeddings[user_idx]
        
        # Get book embeddings
        book_indices = torch.tensor(list(book_mapping.values()))
        book_embeddings = embeddings[book_indices]
        
        # Calculate similarity
        similarity = F.cosine_similarity(
            user_embedding.unsqueeze(0),
            book_embeddings
        )
        
        # Get top-k recommendations
        top_k_indices = similarity.argsort(descending=True)[:top_k]
        
        return [
            list(book_mapping.keys())[idx]
            for idx in top_k_indices
        ]

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def main():
    # Sample dataset preparation
    books_path = "C:/Users/Aravind/PROJECTS/GNN/data/books.csv"  # Replace with actual path
    interactions_path = "C:/Users/Aravind/PROJECTS/GNN/data/interactions.csv"  # Replace with actual path

    # Load datasets
    books_df = pd.read_csv(books_path)
    interactions_df = pd.read_csv(interactions_path)

    # Prepare the data for GNN
    data, user_mapping, book_mapping = prepare_data(books_df, interactions_df, min_interactions=5)

    # Initialize the GNN model
    num_features = data.x.size(1)
    hidden_channels = 64
    model = BookRecommenderGNN(num_features, hidden_channels)

    # Train the model
    print("Training the model...")
    trained_model = train_model(model, data, epochs=50)

    # Get recommendations for a user
    print("\nGenerating recommendations...")
    user_id = list(user_mapping.keys())[0]  # Choose a sample user from the mapping
    user_idx = user_mapping[user_id]        # Map to internal user index
    recommendations = get_recommendations(trained_model, data, user_idx, book_mapping, top_k=5)

    print(f"Top 5 recommendations for user {user_id}:")
    for book_id in recommendations:
        book_title = books_df[books_df['book_id'] == book_id]['title'].values[0]
        print(f"- {book_title} (Book ID: {book_id})")

if __name__ == "__main__":
    main()


Training the model...
Epoch 010, Loss: 3.2669
Epoch 020, Loss: 2.3063
Epoch 030, Loss: 2.3651
Epoch 040, Loss: 2.2722
Epoch 050, Loss: 2.2864

Generating recommendations...
Top 5 recommendations for user 49:
- Book Title 47 (Book ID: 47)
- Book Title 81 (Book ID: 81)
- Book Title 87 (Book ID: 87)
- Book Title 7 (Book ID: 7)
- Book Title 48 (Book ID: 48)


  loss = F.mse_loss(out[data.edge_index[0]], data.edge_attr)
