# GNN-based Recommendation System

This notebook implements a Graph Neural Network (GNN) based recommender system using the MovieLens 100K dataset. The system models user-movie interactions as a bipartite graph and uses message passing with graph convolutions to capture complex relationships.

## Target Performance
- **RMSE**: ≤ 0.90
- **MAE**: ≤ 0.72
- **Precision@K**: ≥ 0.85

## 1. Setup and Imports

In [None]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

# Set style for plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Set random seeds for reproducibility
torch.manual_seed(42)
np.random.seed(42)

# Check device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Import custom modules
from data_loader import MovieLensDataLoader
from gnn_model import GNNRecommender, CollaborativeFilteringLoss
from trainer import RecommenderTrainer
from evaluator import RecommenderEvaluator

## 2. Data Loading and Exploration

In [None]:
# Load data
print("Loading MovieLens 100K dataset...")
data_loader = MovieLensDataLoader()
ratings_df = data_loader.load_ratings()

print(f"Dataset loaded successfully!")
print(f"Total ratings: {len(ratings_df):,}")
print(f"Number of users: {len(data_loader.user_encoder.classes_):,}")
print(f"Number of items: {len(data_loader.item_encoder.classes_):,}")
print(f"Sparsity: {(1 - len(ratings_df) / (len(data_loader.user_encoder.classes_) * len(data_loader.item_encoder.classes_))) * 100:.2f}%")

In [None]:
# Display dataset statistics
print("\n=== Dataset Statistics ===")
print(ratings_df.describe())
print("\n=== First 5 rows ===")
print(ratings_df.head())

In [None]:
# Visualize data distribution
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Rating distribution
axes[0, 0].hist(ratings_df['rating'], bins=5, alpha=0.7, color='skyblue', edgecolor='black')
axes[0, 0].set_title('Rating Distribution')
axes[0, 0].set_xlabel('Rating')
axes[0, 0].set_ylabel('Frequency')

# User activity distribution
user_counts = ratings_df['user_id'].value_counts()
axes[0, 1].hist(user_counts, bins=50, alpha=0.7, color='lightgreen', edgecolor='black')
axes[0, 1].set_title('User Activity Distribution')
axes[0, 1].set_xlabel('Number of Ratings per User')
axes[0, 1].set_ylabel('Number of Users')

# Item popularity distribution
item_counts = ratings_df['item_id'].value_counts()
axes[1, 0].hist(item_counts, bins=50, alpha=0.7, color='salmon', edgecolor='black')
axes[1, 0].set_title('Item Popularity Distribution')
axes[1, 0].set_xlabel('Number of Ratings per Item')
axes[1, 0].set_ylabel('Number of Items')

# Rating vs timestamp
sample_df = ratings_df.sample(5000)  # Sample for visualization
axes[1, 1].scatter(sample_df['timestamp'], sample_df['rating'], alpha=0.5, color='purple')
axes[1, 1].set_title('Ratings over Time (Sample)')
axes[1, 1].set_xlabel('Timestamp')
axes[1, 1].set_ylabel('Rating')

plt.tight_layout()
plt.show()

## 3. Graph Construction and Analysis

In [None]:
# Create bipartite graph
print("Creating bipartite graph...")
graph_data, num_users, num_items = data_loader.create_bipartite_graph(ratings_df)

print(f"Graph created successfully!")
print(f"Number of nodes: {graph_data.x.shape[0]:,}")
print(f"Number of edges: {graph_data.edge_index.shape[1]:,}")
print(f"Node feature dimensions: {graph_data.x.shape[1]}")
print(f"User nodes: {num_users:,}")
print(f"Item nodes: {num_items:,}")

In [None]:
# Analyze graph properties
edge_weights = graph_data.edge_attr
user_degrees = torch.bincount(graph_data.edge_index[0][graph_data.edge_index[0] < num_users])
item_degrees = torch.bincount(graph_data.edge_index[0][graph_data.edge_index[0] >= num_users] - num_users)

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Edge weight distribution
axes[0, 0].hist(edge_weights.numpy(), bins=50, alpha=0.7, color='blue', edgecolor='black')
axes[0, 0].set_title('Edge Weight Distribution')
axes[0, 0].set_xlabel('Edge Weight (Rating)')
axes[0, 0].set_ylabel('Frequency')

# User degree distribution
axes[0, 1].hist(user_degrees.numpy(), bins=50, alpha=0.7, color='green', edgecolor='black')
axes[0, 1].set_title('User Degree Distribution')
axes[0, 1].set_xlabel('Degree')
axes[0, 1].set_ylabel('Number of Users')

# Item degree distribution
axes[1, 0].hist(item_degrees.numpy(), bins=50, alpha=0.7, color='red', edgecolor='black')
axes[1, 0].set_title('Item Degree Distribution')
axes[1, 0].set_xlabel('Degree')
axes[1, 0].set_ylabel('Number of Items')

# Node feature correlation heatmap
user_features = graph_data.x[:num_users].numpy()
corr_matrix = np.corrcoef(user_features.T)
im = axes[1, 1].imshow(corr_matrix, cmap='coolwarm', vmin=-1, vmax=1)
axes[1, 1].set_title('User Feature Correlation')
plt.colorbar(im, ax=axes[1, 1])

plt.tight_layout()
plt.show()

## 4. Data Splitting

In [ ]:
# Split data into train/validation/test sets
def prepare_data_splits(ratings_df, test_size=0.2, val_size=0.1, random_state=42):
    """Split ratings data into train, validation, and test sets."""
    train_val, test_df = train_test_split(ratings_df, test_size=test_size, random_state=random_state)
    train_df, val_df = train_test_split(train_val, test_size=val_size/(1-test_size), random_state=random_state)
    return train_df, val_df, test_df

train_df, val_df, test_df = prepare_data_splits(ratings_df, test_size=0.2, val_size=0.1)

print(f"Data split completed:")
print(f"Training set: {len(train_df):,} ratings ({len(train_df)/len(ratings_df)*100:.1f}%)")
print(f"Validation set: {len(val_df):,} ratings ({len(val_df)/len(ratings_df)*100:.1f}%)")
print(f"Test set: {len(test_df):,} ratings ({len(test_df)/len(ratings_df)*100:.1f}%)")

# Visualize rating distribution across splits
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

for i, (df, name) in enumerate([(train_df, 'Train'), (val_df, 'Validation'), (test_df, 'Test')]):
    axes[i].hist(df['rating'], bins=5, alpha=0.7, edgecolor='black')
    axes[i].set_title(f'{name} Set Rating Distribution')
    axes[i].set_xlabel('Rating')
    axes[i].set_ylabel('Frequency')
    axes[i].set_ylim(0, max([len(train_df[train_df['rating']==r]) for r in range(1,6)]) * 1.1)

plt.tight_layout()
plt.show()

## 5. Model Architecture and Training

In [ ]:
# Initialize model
model = GNNRecommender(
    num_users=num_users,
    num_items=num_items,
    feature_dim=graph_data.x.shape[1],
    embedding_dim=64,
    hidden_dim=128,
    num_layers=3
)

print("Model Architecture:")
print(model)
print(f"\nTotal parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

### Enhanced Training with Detailed Monitoring

In [ ]:
# Custom training function with detailed monitoring
import time
from torch.utils.data import DataLoader, TensorDataset
from tqdm.notebook import tqdm

def train_with_monitoring(model, data, train_df, val_df, epochs=50, lr=0.001, batch_size=1024):
    # Prepare data loaders
    train_users = torch.LongTensor(train_df['user_id'].values)
    train_items = torch.LongTensor(train_df['item_id'].values)
    train_ratings = torch.FloatTensor(train_df['rating'].values)
    
    val_users = torch.LongTensor(val_df['user_id'].values)
    val_items = torch.LongTensor(val_df['item_id'].values)
    val_ratings = torch.FloatTensor(val_df['rating'].values)
    
    train_dataset = TensorDataset(train_users, train_items, train_ratings)
    val_dataset = TensorDataset(val_users, val_items, val_ratings)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    
    # Setup training
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=10, factor=0.5)
    criterion = CollaborativeFilteringLoss(lambda_reg=0.01)
    
    model = model.to(device)
    data = data.to(device)
    
    # Tracking metrics
    metrics = {
        'train_loss': [],
        'val_loss': [],
        'val_rmse': [],
        'val_mae': [],
        'learning_rate': [],
        'epoch_time': [],
        'gradient_norm': []
    }
    
    best_val_loss = float('inf')
    patience_counter = 0
    patience = 15
    
    print("Starting training with detailed monitoring...")
    
    for epoch in range(epochs):
        epoch_start_time = time.time()
        
        # Training phase
        model.train()
        total_train_loss = 0
        total_grad_norm = 0
        
        train_pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{epochs} [Train]')
        for batch_users, batch_items, batch_ratings in train_pbar:
            batch_users = batch_users.to(device)
            batch_items = batch_items.to(device)
            batch_ratings = batch_ratings.to(device)
            
            optimizer.zero_grad()
            
            predictions = model(data, batch_users, batch_items)
            user_emb = model.user_embedding(batch_users)
            item_emb = model.item_embedding(batch_items)
            
            loss = criterion(predictions, batch_ratings, user_emb, item_emb)
            loss.backward()
            
            # Calculate gradient norm and convert to float
            grad_norm = torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            total_grad_norm += grad_norm.item() if torch.is_tensor(grad_norm) else grad_norm
            
            optimizer.step()
            total_train_loss += loss.item()
            
            train_pbar.set_postfix({'loss': f'{loss.item():.4f}'})
        
        avg_train_loss = total_train_loss / len(train_loader)
        avg_grad_norm = total_grad_norm / len(train_loader)
        
        # Validation phase
        model.eval()
        total_val_loss = 0
        all_predictions = []
        all_targets = []
        
        with torch.no_grad():
            val_pbar = tqdm(val_loader, desc=f'Epoch {epoch+1}/{epochs} [Val]')
            for batch_users, batch_items, batch_ratings in val_pbar:
                batch_users = batch_users.to(device)
                batch_items = batch_items.to(device)
                batch_ratings = batch_ratings.to(device)
                
                predictions = model(data, batch_users, batch_items)
                user_emb = model.user_embedding(batch_users)
                item_emb = model.item_embedding(batch_items)
                
                loss = criterion(predictions, batch_ratings, user_emb, item_emb)
                total_val_loss += loss.item()
                
                all_predictions.extend(predictions.cpu().numpy())
                all_targets.extend(batch_ratings.cpu().numpy())
                
                val_pbar.set_postfix({'loss': f'{loss.item():.4f}'})
        
        avg_val_loss = total_val_loss / len(val_loader)
        val_rmse = np.sqrt(mean_squared_error(all_targets, all_predictions))
        val_mae = mean_absolute_error(all_targets, all_predictions)
        
        # Update learning rate
        scheduler.step(avg_val_loss)
        current_lr = optimizer.param_groups[0]['lr']
        
        # Record metrics (ensure all are Python floats)
        epoch_time = time.time() - epoch_start_time
        metrics['train_loss'].append(avg_train_loss)
        metrics['val_loss'].append(avg_val_loss)
        metrics['val_rmse'].append(val_rmse)
        metrics['val_mae'].append(val_mae)
        metrics['learning_rate'].append(current_lr)
        metrics['epoch_time'].append(epoch_time)
        metrics['gradient_norm'].append(avg_grad_norm)
        
        # Early stopping check
        if avg_val_loss < best_val_loss:
            best_val_loss = avg_val_loss
            patience_counter = 0
            torch.save(model.state_dict(), 'best_model_notebook.pth')
        else:
            patience_counter += 1
        
        # Print progress
        if epoch % 5 == 0 or epoch == epochs - 1:
            print(f'Epoch {epoch+1:03d} | Train Loss: {avg_train_loss:.4f} | '
                  f'Val Loss: {avg_val_loss:.4f} | Val RMSE: {val_rmse:.4f} | '
                  f'Val MAE: {val_mae:.4f} | LR: {current_lr:.6f} | Time: {epoch_time:.1f}s')
        
        if patience_counter >= patience:
            print(f"Early stopping at epoch {epoch+1}")
            break
    
    # Load best model
    model.load_state_dict(torch.load('best_model_notebook.pth'))
    print("Training completed!")
    
    return metrics

In [None]:
# Train the model
training_metrics = train_with_monitoring(
    model=model,
    data=graph_data,
    train_df=train_df,
    val_df=val_df,
    epochs=50,
    lr=0.001,
    batch_size=1024
)

## 6. Training Analysis and Visualization

In [None]:
# Plot comprehensive training metrics
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

epochs_range = range(1, len(training_metrics['train_loss']) + 1)

# Loss curves
axes[0, 0].plot(epochs_range, training_metrics['train_loss'], label='Train Loss', color='blue')
axes[0, 0].plot(epochs_range, training_metrics['val_loss'], label='Validation Loss', color='red')
axes[0, 0].set_xlabel('Epoch')
axes[0, 0].set_ylabel('Loss')
axes[0, 0].set_title('Training and Validation Loss')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# RMSE progression
axes[0, 1].plot(epochs_range, training_metrics['val_rmse'], color='green', linewidth=2)
axes[0, 1].axhline(y=0.90, color='red', linestyle='--', label='Target RMSE (0.90)')
axes[0, 1].set_xlabel('Epoch')
axes[0, 1].set_ylabel('RMSE')
axes[0, 1].set_title('Validation RMSE Progression')
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# MAE progression
axes[0, 2].plot(epochs_range, training_metrics['val_mae'], color='orange', linewidth=2)
axes[0, 2].axhline(y=0.72, color='red', linestyle='--', label='Target MAE (0.72)')
axes[0, 2].set_xlabel('Epoch')
axes[0, 2].set_ylabel('MAE')
axes[0, 2].set_title('Validation MAE Progression')
axes[0, 2].legend()
axes[0, 2].grid(True, alpha=0.3)

# Learning rate schedule
axes[1, 0].plot(epochs_range, training_metrics['learning_rate'], color='purple', linewidth=2)
axes[1, 0].set_xlabel('Epoch')
axes[1, 0].set_ylabel('Learning Rate')
axes[1, 0].set_title('Learning Rate Schedule')
axes[1, 0].set_yscale('log')
axes[1, 0].grid(True, alpha=0.3)

# Gradient norm
axes[1, 1].plot(epochs_range, training_metrics['gradient_norm'], color='brown', linewidth=2)
axes[1, 1].set_xlabel('Epoch')
axes[1, 1].set_ylabel('Gradient Norm')
axes[1, 1].set_title('Gradient Norm Progression')
axes[1, 1].grid(True, alpha=0.3)

# Training time per epoch
axes[1, 2].plot(epochs_range, training_metrics['epoch_time'], color='teal', linewidth=2)
axes[1, 2].set_xlabel('Epoch')
axes[1, 2].set_ylabel('Time (seconds)')
axes[1, 2].set_title('Training Time per Epoch')
axes[1, 2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Print final metrics
print("\n=== Final Training Metrics ===")
print(f"Final Train Loss: {training_metrics['train_loss'][-1]:.4f}")
print(f"Final Validation Loss: {training_metrics['val_loss'][-1]:.4f}")
print(f"Final Validation RMSE: {training_metrics['val_rmse'][-1]:.4f}")
print(f"Final Validation MAE: {training_metrics['val_mae'][-1]:.4f}")
print(f"Total Training Time: {sum(training_metrics['epoch_time']):.1f} seconds")

## 7. Comprehensive Model Evaluation

In [None]:
# Initialize evaluator and perform comprehensive evaluation
evaluator = RecommenderEvaluator(model, device=device)

print("Performing comprehensive evaluation on test set...")
test_results = evaluator.comprehensive_evaluation(graph_data, test_df, k_values=[5, 10, 20])

# Display results in a formatted table
print("\n" + "="*50)
print("FINAL MODEL PERFORMANCE")
print("="*50)
print(f"RMSE: {test_results['rmse']:.4f} (Target: ≤ 0.90)")
print(f"MAE: {test_results['mae']:.4f} (Target: ≤ 0.72)")
print("\nRanking Metrics:")
for k in [5, 10, 20]:
    print(f"Precision@{k}: {test_results['precision_at_k'][k]:.4f}")
    print(f"Recall@{k}: {test_results['recall_at_k'][k]:.4f}")
    print(f"NDCG@{k}: {test_results['ndcg_at_k'][k]:.4f}")
    print("-" * 30)

# Check if targets are met
target_met = {
    'RMSE': test_results['rmse'] <= 0.90,
    'MAE': test_results['mae'] <= 0.72,
    'Precision@10': test_results['precision_at_k'][10] >= 0.85
}

print("\n=== TARGET ACHIEVEMENT ===")
for metric, achieved in target_met.items():
    status = "✅ ACHIEVED" if achieved else "❌ NOT ACHIEVED"
    print(f"{metric}: {status}")

all_targets_met = all(target_met.values())
print(f"\nOverall: {'🎉 ALL TARGETS MET!' if all_targets_met else '⚠️ SOME TARGETS NOT MET'}")

### Detailed Performance Analysis

In [None]:
# Get detailed predictions for analysis
rmse, mae, predictions, targets = evaluator.evaluate_ratings(graph_data, test_df)

# Create performance analysis plots
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Prediction vs Actual scatter plot
axes[0, 0].scatter(targets, predictions, alpha=0.5, s=10)
axes[0, 0].plot([1, 5], [1, 5], 'r--', linewidth=2)
axes[0, 0].set_xlabel('Actual Rating')
axes[0, 0].set_ylabel('Predicted Rating')
axes[0, 0].set_title('Predicted vs Actual Ratings')
axes[0, 0].grid(True, alpha=0.3)

# Residuals plot
residuals = predictions - targets
axes[0, 1].scatter(targets, residuals, alpha=0.5, s=10)
axes[0, 1].axhline(y=0, color='r', linestyle='--')
axes[0, 1].set_xlabel('Actual Rating')
axes[0, 1].set_ylabel('Residuals')
axes[0, 1].set_title('Residuals Plot')
axes[0, 1].grid(True, alpha=0.3)

# Error distribution
axes[1, 0].hist(residuals, bins=50, alpha=0.7, edgecolor='black')
axes[1, 0].axvline(x=0, color='r', linestyle='--')
axes[1, 0].set_xlabel('Prediction Error')
axes[1, 0].set_ylabel('Frequency')
axes[1, 0].set_title('Error Distribution')

# Performance by rating value
rating_performance = {}
for rating in range(1, 6):
    mask = (targets == rating)
    if mask.sum() > 0:
        rating_rmse = np.sqrt(mean_squared_error(targets[mask], predictions[mask]))
        rating_performance[rating] = rating_rmse

ratings = list(rating_performance.keys())
rmses = list(rating_performance.values())
axes[1, 1].bar(ratings, rmses, alpha=0.7, color='skyblue', edgecolor='black')
axes[1, 1].set_xlabel('Rating Value')
axes[1, 1].set_ylabel('RMSE')
axes[1, 1].set_title('RMSE by Rating Value')
axes[1, 1].set_xticks(ratings)

plt.tight_layout()
plt.show()

# Print detailed statistics
print("\n=== Detailed Performance Statistics ===")
print(f"Mean Absolute Error: {mae:.4f}")
print(f"Root Mean Square Error: {rmse:.4f}")
print(f"Mean Prediction: {np.mean(predictions):.4f}")
print(f"Mean Actual: {np.mean(targets):.4f}")
print(f"Prediction Std: {np.std(predictions):.4f}")
print(f"Actual Std: {np.std(targets):.4f}")
print(f"Correlation: {np.corrcoef(predictions, targets)[0, 1]:.4f}")

# Error analysis by rating
print("\n=== Error Analysis by Rating ===")
for rating in range(1, 6):
    mask = (targets == rating)
    if mask.sum() > 0:
        rating_mae = mean_absolute_error(targets[mask], predictions[mask])
        rating_rmse = np.sqrt(mean_squared_error(targets[mask], predictions[mask]))
        print(f"Rating {rating}: MAE={rating_mae:.4f}, RMSE={rating_rmse:.4f}, Count={mask.sum()}")

## 8. Model Interpretation and Analysis

In [None]:
# Analyze learned embeddings
model.eval()
with torch.no_grad():
    # Get embeddings for a sample of users and items
    sample_users = torch.arange(min(100, num_users)).to(device)
    sample_items = torch.arange(min(100, num_items)).to(device)
    
    user_embeddings = model.user_embedding(sample_users).cpu().numpy()
    item_embeddings = model.item_embedding(sample_items).cpu().numpy()

# Analyze embedding distributions
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# User embedding distribution
axes[0, 0].hist(user_embeddings.flatten(), bins=50, alpha=0.7, color='blue', edgecolor='black')
axes[0, 0].set_title('User Embedding Distribution')
axes[0, 0].set_xlabel('Embedding Value')
axes[0, 0].set_ylabel('Frequency')

# Item embedding distribution
axes[0, 1].hist(item_embeddings.flatten(), bins=50, alpha=0.7, color='green', edgecolor='black')
axes[0, 1].set_title('Item Embedding Distribution')
axes[0, 1].set_xlabel('Embedding Value')
axes[0, 1].set_ylabel('Frequency')

# Embedding similarity heatmap (sample)
user_sim = np.corrcoef(user_embeddings[:20])  # Top 20 users
im1 = axes[1, 0].imshow(user_sim, cmap='coolwarm', vmin=-1, vmax=1)
axes[1, 0].set_title('User Embedding Similarity (Sample)')
plt.colorbar(im1, ax=axes[1, 0])

item_sim = np.corrcoef(item_embeddings[:20])  # Top 20 items
im2 = axes[1, 1].imshow(item_sim, cmap='coolwarm', vmin=-1, vmax=1)
axes[1, 1].set_title('Item Embedding Similarity (Sample)')
plt.colorbar(im2, ax=axes[1, 1])

plt.tight_layout()
plt.show()

print("\n=== Embedding Analysis ===")
print(f"User embedding mean: {np.mean(user_embeddings):.4f}")
print(f"User embedding std: {np.std(user_embeddings):.4f}")
print(f"Item embedding mean: {np.mean(item_embeddings):.4f}")
print(f"Item embedding std: {np.std(item_embeddings):.4f}")

### Recommendation Quality Analysis

In [None]:
# Analyze recommendation quality for specific users
def analyze_user_recommendations(user_id, top_k=10):
    # Get user's historical ratings
    user_ratings = test_df[test_df['user_id'] == user_id].copy()
    
    if len(user_ratings) == 0:
        print(f"No test ratings found for user {user_id}")
        return
    
    # Get predictions for this user's items
    user_tensor = torch.LongTensor([user_id] * len(user_ratings)).to(device)
    item_tensor = torch.LongTensor(user_ratings['item_id'].values).to(device)
    
    model.eval()
    with torch.no_grad():
        predictions = model(graph_data, user_tensor, item_tensor)
        predictions = predictions.cpu().numpy()
    
    # Create comparison dataframe
    comparison = pd.DataFrame({
        'item_id': user_ratings['item_id'].values,
        'actual_rating': user_ratings['rating'].values,
        'predicted_rating': predictions,
        'error': predictions - user_ratings['rating'].values
    })
    
    comparison = comparison.sort_values('predicted_rating', ascending=False)
    
    print(f"\n=== User {user_id} Recommendation Analysis ===")
    print(f"Total items rated: {len(comparison)}")
    print(f"Average actual rating: {comparison['actual_rating'].mean():.2f}")
    print(f"Average predicted rating: {comparison['predicted_rating'].mean():.2f}")
    print(f"RMSE for this user: {np.sqrt(np.mean(comparison['error']**2)):.4f}")
    
    print(f"\nTop {top_k} Recommendations:")
    print(comparison.head(top_k).to_string(index=False, float_format='%.3f'))
    
    return comparison

# Analyze a few random users
sample_users = np.random.choice(test_df['user_id'].unique(), size=3, replace=False)
for user_id in sample_users:
    analyze_user_recommendations(user_id, top_k=5)

## 9. Ranking Metrics Visualization

In [None]:
# Create comprehensive ranking metrics visualization
k_values = [1, 2, 3, 5, 10, 15, 20]
precision_values = [evaluator.precision_at_k(graph_data, test_df, k=k) for k in k_values]
recall_values = [evaluator.recall_at_k(graph_data, test_df, k=k) for k in k_values]
ndcg_values = [evaluator.ndcg_at_k(graph_data, test_df, k=k) for k in k_values]

fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# Precision@K
axes[0, 0].plot(k_values, precision_values, marker='o', linewidth=2, markersize=8)
axes[0, 0].axhline(y=0.85, color='red', linestyle='--', label='Target (0.85)')
axes[0, 0].set_xlabel('K')
axes[0, 0].set_ylabel('Precision@K')
axes[0, 0].set_title('Precision@K Performance')
axes[0, 0].grid(True, alpha=0.3)
axes[0, 0].legend()

# Recall@K
axes[0, 1].plot(k_values, recall_values, marker='s', linewidth=2, markersize=8, color='green')
axes[0, 1].set_xlabel('K')
axes[0, 1].set_ylabel('Recall@K')
axes[0, 1].set_title('Recall@K Performance')
axes[0, 1].grid(True, alpha=0.3)

# NDCG@K
axes[1, 0].plot(k_values, ndcg_values, marker='^', linewidth=2, markersize=8, color='orange')
axes[1, 0].set_xlabel('K')
axes[1, 0].set_ylabel('NDCG@K')
axes[1, 0].set_title('NDCG@K Performance')
axes[1, 0].grid(True, alpha=0.3)

# Combined metrics
axes[1, 1].plot(k_values, precision_values, marker='o', label='Precision@K', linewidth=2)
axes[1, 1].plot(k_values, recall_values, marker='s', label='Recall@K', linewidth=2)
axes[1, 1].plot(k_values, ndcg_values, marker='^', label='NDCG@K', linewidth=2)
axes[1, 1].set_xlabel('K')
axes[1, 1].set_ylabel('Metric Value')
axes[1, 1].set_title('All Ranking Metrics')
axes[1, 1].grid(True, alpha=0.3)
axes[1, 1].legend()

plt.tight_layout()
plt.show()

# Print detailed ranking results
print("\n=== Detailed Ranking Metrics ===")
ranking_df = pd.DataFrame({
    'K': k_values,
    'Precision@K': precision_values,
    'Recall@K': recall_values,
    'NDCG@K': ndcg_values
})
print(ranking_df.to_string(index=False, float_format='%.4f'))

## 10. Summary and Conclusions

In [None]:
# Create final summary visualization
fig, axes = plt.subplots(1, 2, figsize=(15, 6))

# Target vs Achieved comparison
metrics = ['RMSE', 'MAE', 'Precision@10']
targets = [0.90, 0.72, 0.85]
achieved = [test_results['rmse'], test_results['mae'], test_results['precision_at_k'][10]]

x = np.arange(len(metrics))
width = 0.35

bars1 = axes[0].bar(x - width/2, targets, width, label='Target', alpha=0.7, color='lightcoral')
bars2 = axes[0].bar(x + width/2, achieved, width, label='Achieved', alpha=0.7, color='lightblue')

axes[0].set_xlabel('Metrics')
axes[0].set_ylabel('Value')
axes[0].set_title('Target vs Achieved Performance')
axes[0].set_xticks(x)
axes[0].set_xticklabels(metrics)
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Add value labels on bars
def add_value_labels(ax, bars):
    for bar in bars:
        height = bar.get_height()
        ax.text(bar.get_x() + bar.get_width()/2., height + 0.01,
                f'{height:.3f}', ha='center', va='bottom')

add_value_labels(axes[0], bars1)
add_value_labels(axes[0], bars2)

# Model performance summary
performance_summary = {
    'Metric': ['RMSE', 'MAE', 'Precision@5', 'Precision@10', 'Precision@20', 'NDCG@10'],
    'Value': [
        test_results['rmse'],
        test_results['mae'],
        test_results['precision_at_k'][5],
        test_results['precision_at_k'][10],
        test_results['precision_at_k'][20],
        test_results['ndcg_at_k'][10]
    ]
}

# Create a table-like visualization
axes[1].axis('tight')
axes[1].axis('off')
table = axes[1].table(cellText=[[f"{v:.4f}"] for v in performance_summary['Value']],
                      rowLabels=performance_summary['Metric'],
                      colLabels=['Score'],
                      cellLoc='center',
                      loc='center')
table.auto_set_font_size(False)
table.set_fontsize(12)
table.scale(1.2, 1.5)
axes[1].set_title('Final Model Performance Summary', pad=20)

plt.tight_layout()
plt.show()

print("\n" + "="*80)
print("FINAL SUMMARY - GNN-BASED RECOMMENDATION SYSTEM")
print("="*80)

print(f"\n🎯 TARGET ACHIEVEMENTS:")
print(f"   RMSE: {test_results['rmse']:.4f} (Target: ≤ 0.90) {'✅' if test_results['rmse'] <= 0.90 else '❌'}")
print(f"   MAE: {test_results['mae']:.4f} (Target: ≤ 0.72) {'✅' if test_results['mae'] <= 0.72 else '❌'}")
print(f"   Precision@10: {test_results['precision_at_k'][10]:.4f} (Target: ≥ 0.85) {'✅' if test_results['precision_at_k'][10] >= 0.85 else '❌'}")

print(f"\n📊 ADDITIONAL METRICS:")
print(f"   Precision@5: {test_results['precision_at_k'][5]:.4f}")
print(f"   Precision@20: {test_results['precision_at_k'][20]:.4f}")
print(f"   Recall@10: {test_results['recall_at_k'][10]:.4f}")
print(f"   NDCG@10: {test_results['ndcg_at_k'][10]:.4f}")

print(f"\n🔧 MODEL ARCHITECTURE:")
print(f"   Total Parameters: {sum(p.numel() for p in model.parameters()):,}")
print(f"   Embedding Dimension: 64")
print(f"   Hidden Dimension: 128")
print(f"   GNN Layers: 3")
print(f"   Device: {device}")

print(f"\n📈 TRAINING STATISTICS:")
print(f"   Final Training Loss: {training_metrics['train_loss'][-1]:.4f}")
print(f"   Final Validation Loss: {training_metrics['val_loss'][-1]:.4f}")
print(f"   Total Training Time: {sum(training_metrics['epoch_time']):.1f} seconds")
print(f"   Epochs Completed: {len(training_metrics['train_loss'])}")

print(f"\n🎉 SYSTEM PERFORMANCE: {'EXCELLENT' if all_targets_met else 'GOOD'}")
print("="*80)