# Matrix Factorization for Book Recommendations

This notebook implements classical matrix factorization for collaborative filtering to recommend similar books based on user ratings.

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import csr_matrix
import matplotlib.pyplot as plt
import wandb
from tqdm import tqdm
import warnings

warnings.filterwarnings("ignore")

DATA_DIR = "../data"

# Initialize wandb
run = wandb.init(
    project="book-recommendation-kaggle",
    group="matrix-factorization",
    job_type="train",
    save_code=True,
)

## Data Loading and Preprocessing

In [None]:
# Load data
print("Loading data...")
books = pd.read_csv(f"{DATA_DIR}/Books.csv.zip", compression="zip")
ratings = pd.read_csv(f"{DATA_DIR}/Ratings.csv.zip", compression="zip")
users = pd.read_csv(f"{DATA_DIR}/Users.csv.zip", compression="zip")

print(f"Books: {len(books):,}")
print(f"Ratings: {len(ratings):,}")
print(f"Users: {len(users):,}")

In [None]:
# Filter for users and books with at least 5 interactions
print("\n=== Filtering for Active Users and Popular Books ===")

# Count interactions per user and book
user_counts = ratings.groupby("User-ID").size()
book_counts = ratings.groupby("ISBN").size()

min_interactions = 1

# Get active users and popular books
active_users = user_counts[user_counts >= min_interactions].index
popular_books = book_counts[book_counts >= min_interactions].index

print(
    f"Active users (≥{min_interactions} ratings): {len(active_users):,} / {len(user_counts):,}"
)
print(
    f"Popular books (≥{min_interactions} ratings): {len(popular_books):,} / {len(book_counts):,}"
)

# Filter ratings
filtered_ratings = ratings[
    (ratings["User-ID"].isin(active_users)) & (ratings["ISBN"].isin(popular_books))
].copy()

print(
    f"Filtered ratings: {len(filtered_ratings):,} / {len(ratings):,} ({len(filtered_ratings) / len(ratings) * 100:.1f}%)"
)

In [None]:
# Create compact user and book indices
print("\n=== Creating Compact Indices ===")

# Create mappings
unique_users = sorted(filtered_ratings["User-ID"].unique())
unique_books = sorted(filtered_ratings["ISBN"].unique())

user_to_idx = {user_id: idx for idx, user_id in enumerate(unique_users)}
book_to_idx = {book_id: idx for idx, book_id in enumerate(unique_books)}
idx_to_user = {idx: user_id for user_id, idx in user_to_idx.items()}
idx_to_book = {idx: book_id for book_id, idx in book_to_idx.items()}

# Map to indices
filtered_ratings["user_idx"] = filtered_ratings["User-ID"].map(user_to_idx)
filtered_ratings["book_idx"] = filtered_ratings["ISBN"].map(book_to_idx)

n_users = len(unique_users)
n_books = len(unique_books)

print(f"Matrix dimensions: {n_users:,} users × {n_books:,} books")
print(f"Sparsity: {(1 - len(filtered_ratings) / (n_users * n_books)) * 100:.2f}%")

In [None]:
filtered_ratings.head()

In [None]:
# Create book metadata mapping for recommendations
print("\n=== Creating Book Metadata Mapping ===")

# Filter books to only those in our filtered dataset
book_metadata = books[books["ISBN"].isin(unique_books)].copy()
book_metadata["book_idx"] = book_metadata["ISBN"].map(book_to_idx)

# Create title to index mapping
title_to_idx = book_metadata.set_index("Book-Title")["book_idx"].to_dict()
idx_to_title = book_metadata.set_index("book_idx")["Book-Title"].to_dict()
idx_to_author = book_metadata.set_index("book_idx")["Book-Author"].to_dict()

print(f"Book metadata for {len(book_metadata):,} books")
print("\nSample books:")
for i, (title, author) in enumerate(
    zip(book_metadata["Book-Title"].head(), book_metadata["Book-Author"].head())
):
    print(f"  {title} by {author}")
    if i >= 4:
        break

## Rating Matrix Construction

In [None]:
# Create feedback matrix
print("\n=== Creating Rating Matrix ===")

# Create sparse matrix
user_indices = filtered_ratings["user_idx"].values
book_indices = filtered_ratings["book_idx"].values
rating_values = filtered_ratings["Book-Rating"].values

# Create sparse rating matrix
rating_matrix_sparse = csr_matrix(
    (rating_values, (user_indices, book_indices)), shape=(n_users, n_books)
)

print(f"Sparse rating matrix: {rating_matrix_sparse.shape}")
print(f"Non-zero entries: {rating_matrix_sparse.nnz:,}")
print(f"Memory usage: {rating_matrix_sparse.data.nbytes / 1024**2:.1f} MB")

# Analyze feedback distribution
print(f"\nRating distribution:")
print(filtered_ratings["Book-Rating"].value_counts().sort_index())

In [None]:
class GMF(nn.Module):
    def __init__(self, num_users, num_items, embedding_size=128, dropout=0.1):
        super(GMF, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)
        self.sequential = nn.Sequential(
            nn.Linear(embedding_size, embedding_size // 4),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(embedding_size // 4, 1),
        )

    def forward(self, user_ids, item_ids):
        user_embed = self.user_embedding(user_ids)
        item_embed = self.item_embedding(item_ids)
        element_product = user_embed * item_embed
        output = self.sequential(element_product)
        output = torch.sigmoid(output)
        return output.view(-1)

In [None]:
class MLP(nn.Module):
    def __init__(
        self,
        num_users,
        num_items,
        embedding_size=128,
        hidden_layers=[64, 32],
        dropout=0.1,
    ):
        super(MLP, self).__init__()
        self.user_embedding = nn.Embedding(num_users, embedding_size)
        self.item_embedding = nn.Embedding(num_items, embedding_size)
        mlp_layers = []
        input_size = embedding_size * 2
        for hidden_size in hidden_layers:
            mlp_layers.append(nn.Linear(input_size, hidden_size))
            mlp_layers.append(nn.ReLU())
            mlp_layers.append(nn.Dropout(dropout))
            input_size = hidden_size
        mlp_layers.append(nn.Linear(hidden_layers[-1], 1))
        self.sequential = nn.Sequential(*mlp_layers)

    def forward(self, user_ids, item_ids):
        user_embed = self.user_embedding(user_ids)
        item_embed = self.item_embedding(item_ids)
        concat_embed = torch.cat((user_embed, item_embed), dim=1)
        output = self.sequential(concat_embed)
        output = torch.sigmoid(output)
        return output.view(-1)

## Matrix Factorization Model

In [None]:
class NeuralCollaborativeFiltering(nn.Module):
    def __init__(
        self,
        gmf,
        mlp,
    ):
        super(NeuralCollaborativeFiltering, self).__init__()
        self.gmf = gmf
        self.mlp = mlp

    def forward(self, user_ids, item_ids):
        gmf_output = self.gmf(user_ids, item_ids)
        mlp_output = self.mlp(user_ids, item_ids)
        combined_output = (gmf_output + mlp_output) / 2
        return combined_output

## Training Setup

In [None]:
# Prepare training data
print("\n=== Preparing Training Data ===")

# Convert to PyTorch tensors
user_ids = torch.LongTensor(filtered_ratings["user_idx"].values)
book_ids = torch.LongTensor(filtered_ratings["book_idx"].values)
target_tensor = torch.FloatTensor(filtered_ratings["Book-Rating"].values) / 10.0

# Create train/validation split (80/20)
n_total = len(user_ids)
n_train = int(0.8 * n_total)

# Random shuffle
indices = torch.randperm(n_total)
train_indices = indices[:n_train]
val_indices = indices[n_train:]

# Split data
train_users = user_ids[train_indices]
train_books = book_ids[train_indices]
train_ratings = target_tensor[train_indices]

val_users = user_ids[val_indices]
val_books = book_ids[val_indices]
val_ratings = target_tensor[val_indices]

print(f"Training samples: {len(train_users):,}")
print(f"Validation samples: {len(val_users):,}")

# Model parameters
n_factors = 64
learning_rate = 0.001
weight_decay = 1e-4
batch_size = 8192
n_epochs = 50

print(f"\nModel configuration:")
print(f"  Factors: {n_factors}")
print(f"  Learning rate: {learning_rate}")
print(f"  Batch size: {batch_size:,}")
print(f"  Epochs: {n_epochs}")

In [None]:
# Initialize model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

gmf = GMF(n_users, n_books).to(device)
mlp = MLP(n_users, n_books, hidden_layers=[128, 64, 32]).to(device)
gmf_optimizer = optim.AdamW(
    gmf.parameters(), lr=learning_rate, weight_decay=weight_decay
)
mlp_optimizer = optim.AdamW(
    mlp.parameters(), lr=learning_rate, weight_decay=weight_decay
)
gmf_scheduler = optim.lr_scheduler.StepLR(gmf_optimizer, step_size=15, gamma=0.5)
mlp_scheduler = optim.lr_scheduler.StepLR(mlp_optimizer, step_size=15, gamma=0.5)
criterion = nn.MSELoss()

print(f"GMF model parameters: {sum(p.numel() for p in gmf.parameters()):,}")
print(f"MLP model parameters: {sum(p.numel() for p in mlp.parameters()):,}")

# Move data to device
train_users = train_users.to(device)
train_books = train_books.to(device)
train_ratings = train_ratings.to(device)

val_users = val_users.to(device)
val_books = val_books.to(device)
val_ratings = val_ratings.to(device)

## Training Loop

In [None]:
def train_gmf(model, criterion, optimizer, batch_size=64, num_epochs=5):
    for epoch in range(num_epochs):
        total_loss = 0.0
        i = 0

        for i in range(0, len(train_users), batch_size):
            # Get batch
            batch_users = train_users[i : i + batch_size]
            batch_books = train_books[i : i + batch_size]
            batch_targets = train_ratings[i : i + batch_size]

            optimizer.zero_grad()
            predictions = model(batch_users, batch_books)
            loss = criterion(predictions, batch_targets)
            loss.backward()
            optimizer.step()

            if i % 1000 == 0:
                diff = torch.abs(predictions - batch_targets).sum().item()
                print(
                    f"Batch [{i + 1}/{len(train_users)}], Loss: {loss.item()}, Avg. Diff: {(diff / len(batch_targets))}"
                )

            i = i + 1

            total_loss += loss.item()

        print(
            f"GMF Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(train_users)}"
        )

In [None]:
def train_mlp(model, criterion, optimizer, batch_size=64, num_epochs=5):
    for epoch in range(num_epochs):
        total_loss = 0.0
        i = 0
        for i in range(0, len(train_users), batch_size):
            # Get batch
            batch_users = train_users[i : i + batch_size]
            batch_books = train_books[i : i + batch_size]
            batch_targets = train_ratings[i : i + batch_size]

            optimizer.zero_grad()
            predictions = model(batch_users, batch_books)
            loss = criterion(predictions, batch_targets)
            loss.backward()
            optimizer.step()

            if i % 1000 == 0:
                diff = torch.abs(predictions - batch_targets).sum().item()
                print(
                    f"Batch [{i + 1}/{len(train_users)}], Loss: {loss.item()}, Avg. Diff: {(diff / len(batch_targets))}"
                )

            i = i + 1

            total_loss += loss.item()

        print(
            f"MLP Epoch [{epoch + 1}/{num_epochs}], Loss: {total_loss / len(train_users)}"
        )

In [None]:
num_epochs = 5
print("Training GMF...")
train_gmf(gmf, criterion, gmf_optimizer)

print("Training MLP...")
train_mlp(mlp, criterion, mlp_optimizer)

In [None]:
def evaluate_model(model, users, books, ratings, batch_size=8192, criterion=criterion):
    """Evaluate model on given data"""
    model.eval()
    total_loss = 0
    n_batches = 0

    with torch.no_grad():
        for i in range(0, len(users), batch_size):
            batch_users = users[i : i + batch_size]
            batch_books = books[i : i + batch_size]
            batch_targets = ratings[i : i + batch_size]

            predictions = model(batch_users, batch_books)
            loss = criterion(predictions, batch_targets)

            total_loss += loss.item()
            n_batches += 1

    return total_loss / n_batches if n_batches > 0 else 0


# Training loop
print("\n=== Starting Training ===")

model = NeuralCollaborativeFiltering(gmf, mlp)
criterion = nn.MSELoss()
optimizer = optim.AdamW(
    model.parameters(), lr=learning_rate, weight_decay=weight_decay
)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.5)
train_losses = []
val_losses = []
best_val_loss = float("inf")
patience_counter = 0
patience = 10

for epoch in range(n_epochs):
    model.train()
    epoch_loss = 0
    n_batches = 0

    # Training batches
    for i in range(0, len(train_users), batch_size):
        # Get batch
        batch_users = train_users[i : i + batch_size]
        batch_books = train_books[i : i + batch_size]
        batch_targets = train_ratings[i : i + batch_size]

        # Forward pass
        optimizer.zero_grad()
        predictions = model(batch_users, batch_books)
        loss = criterion(predictions, batch_targets)

        # Backward pass
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        n_batches += 1

    # Calculate losses
    train_loss = epoch_loss / n_batches
    val_loss = evaluate_model(model, val_users, val_books, val_ratings, batch_size)

    train_losses.append(train_loss)
    val_losses.append(val_loss)

    # Learning rate scheduling
    scheduler.step()

    # Log to wandb
    wandb.log(
        {
            "epoch": epoch + 1,
            "train_loss": train_loss,
            "val_loss": val_loss,
            "learning_rate": optimizer.param_groups[0]["lr"],
        }
    )

    # Print progress
    if (epoch + 1) % 5 == 0 or epoch < 5:
        print(
            f"Epoch {epoch + 1:2d}: Train Loss = {train_loss:.4f}, Val Loss = {val_loss:.4f}, LR = {optimizer.param_groups[0]['lr']:.2e}"
        )

    # Early stopping
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        patience_counter = 0
        # Save best model
        torch.save(model.state_dict(), "best_matrix_factorization.pth")
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f"\nEarly stopping at epoch {epoch + 1}")
            break

print(f"\nTraining completed! Best validation loss: {best_val_loss:.4f}")

## Visualize Training Progress

In [None]:
# Plot training progress
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(train_losses, label="Train Loss", alpha=0.7)
plt.plot(val_losses, label="Validation Loss", alpha=0.7)
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Training Progress")
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(train_losses[5:], label="Train Loss (after epoch 5)", alpha=0.7)
plt.plot(val_losses[5:], label="Validation Loss (after epoch 5)", alpha=0.7)
plt.xlabel("Epoch")
plt.ylabel("MSE Loss")
plt.title("Training Progress (Zoomed)")
plt.legend()
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Log plot to wandb
wandb.log({"training_progress": wandb.Image(plt)})

## Book Similarity Recommendation System

In [None]:
# Load best model
model.load_state_dict(torch.load("best_matrix_factorization.pth"))
model.eval()

# Extract book embeddings
print("\n=== Creating Book Similarity Index ===")
with torch.no_grad():
    book_embeddings = model.get_book_embeddings().cpu().numpy()

print(f"Book embeddings shape: {book_embeddings.shape}")

# Compute cosine similarity matrix
print("Computing book similarity matrix...")
book_similarity_matrix = cosine_similarity(book_embeddings)
print(f"Similarity matrix shape: {book_similarity_matrix.shape}")
print(f"Memory usage: {book_similarity_matrix.nbytes / 1024**2:.1f} MB")

In [None]:
def recommend_similar_books(book_title, k=10):
    """
    Recommend k most similar books to the given book title

    Args:
        book_title (str): Title of the book
        k (int): Number of recommendations to return

    Returns:
        list: List of tuples (title, author, similarity_score)
    """
    # Check if book exists
    if book_title not in title_to_idx:
        # Try partial matching
        matching_titles = [
            title
            for title in title_to_idx.keys()
            if book_title.lower() in title.lower()
        ]
        if matching_titles:
            print(f"Book '{book_title}' not found. Did you mean one of these?")
            for i, title in enumerate(matching_titles[:5]):
                print(f"  {i + 1}. {title}")
            return []
        else:
            print(f"Book '{book_title}' not found in the dataset.")
            return []

    # Get book index
    book_idx = title_to_idx[book_title]

    # Get similarity scores for this book
    similarities = book_similarity_matrix[book_idx]

    # Get top k+1 most similar books (excluding the book itself)
    top_indices = np.argsort(similarities)[::-1][1 : k + 1]  # Skip first (itself)

    # Build recommendations
    recommendations = []
    for idx in top_indices:
        try:
            title = idx_to_title[idx]
            author = idx_to_author.get(idx, "Unknown Author")
            similarity = similarities[idx]
            recommendations.append((title, author, similarity))
        except KeyError:
            continue

    return recommendations


def display_recommendations(book_title, recommendations, k=10):
    """
    Display book recommendations in a nice format
    """
    if not recommendations:
        return

    # Get info about the query book
    if book_title in title_to_idx:
        query_idx = title_to_idx[book_title]
        query_author = idx_to_author.get(query_idx, "Unknown Author")
        print(f"\n📚 Books similar to: '{book_title}' by {query_author}")
    else:
        print(f"\n📚 Books similar to: '{book_title}'")

    print("=" * 80)

    for i, (title, author, similarity) in enumerate(recommendations[:k], 1):
        print(f"{i:2d}. {title}")
        print(f"    by {author}")
        print(f"    Similarity: {similarity:.3f}")
        print()


def search_books(query, max_results=10):
    """
    Search for books by title (case-insensitive partial matching)
    """
    query_lower = query.lower()
    matching_books = []

    for title in title_to_idx.keys():
        if query_lower in title.lower():
            book_idx = title_to_idx[title]
            author = idx_to_author.get(book_idx, "Unknown Author")
            matching_books.append((title, author))

    return matching_books[:max_results]


print("✅ Recommendation system ready!")
print("\nAvailable functions:")
print("  - recommend_similar_books(book_title, k=10)")
print("  - display_recommendations(book_title, recommendations)")
print("  - search_books(query)")

## Example Recommendations

In [None]:
# Example 1: Search for Harry Potter books
print("=== Searching for Harry Potter books ===")
harry_potter_books = search_books("harry potter")
for i, (title, author) in enumerate(harry_potter_books[:5], 1):
    print(f"{i}. {title} by {author}")

In [None]:
# Example 2: Get recommendations for a Harry Potter book
if harry_potter_books:
    sample_book = harry_potter_books[0][0]  # Take the first Harry Potter book
    recommendations = recommend_similar_books(sample_book, k=10)
    display_recommendations(sample_book, recommendations)

In [None]:
# Example 3: Try different genres
test_books = [
    "The Lord of the Rings",
    "To Kill a Mockingbird",
    "1984",
    "Pride and Prejudice",
    "The Great Gatsby",
]

for book_title in test_books:
    print(f"\n{'=' * 60}")
    print(f"Testing recommendations for: {book_title}")

    # First try exact match
    recommendations = recommend_similar_books(book_title, k=5)

    if recommendations:
        display_recommendations(book_title, recommendations, k=5)
    else:
        # Try partial search
        matches = search_books(book_title, max_results=3)
        if matches:
            print(f"\nFound similar titles:")
            for title, author in matches:
                print(f"  - {title} by {author}")
        else:
            print(f"No books found matching '{book_title}'")

## Interactive Recommendation Function

In [None]:
def interactive_book_recommendations():
    """
    Interactive function for getting book recommendations
    """
    print("\n🔍 Interactive Book Recommendation System")
    print("Type 'quit' to exit, 'search <query>' to search for books")
    print("=" * 60)

    while True:
        user_input = input("\nEnter a book title (or command): ").strip()

        if user_input.lower() == "quit":
            print("Goodbye!")
            break

        if user_input.lower().startswith("search "):
            query = user_input[7:]  # Remove 'search ' prefix
            matches = search_books(query, max_results=10)
            if matches:
                print(f"\nFound {len(matches)} books matching '{query}':")
                for i, (title, author) in enumerate(matches, 1):
                    print(f"  {i:2d}. {title} by {author}")
            else:
                print(f"No books found matching '{query}'")
            continue

        # Try to get recommendations
        recommendations = recommend_similar_books(user_input, k=8)
        display_recommendations(user_input, recommendations, k=8)


# Uncomment to run interactive session
# interactive_book_recommendations()

## Model Analysis and Statistics

In [None]:
# Analyze the learned embeddings
print("\n=== Model Analysis ===")

with torch.no_grad():
    book_embeddings = model.get_book_embeddings().cpu().numpy()
    user_embeddings = model.get_user_embeddings().cpu().numpy()

print(f"Book embedding statistics:")
print(f"  Mean: {book_embeddings.mean():.4f}")
print(f"  Std: {book_embeddings.std():.4f}")
print(f"  Min: {book_embeddings.min():.4f}")
print(f"  Max: {book_embeddings.max():.4f}")

print(f"\nUser embedding statistics:")
print(f"  Mean: {user_embeddings.mean():.4f}")
print(f"  Std: {user_embeddings.std():.4f}")
print(f"  Min: {user_embeddings.min():.4f}")
print(f"  Max: {user_embeddings.max():.4f}")

# Analyze similarity distribution
print(f"\nSimilarity matrix statistics:")
upper_triangle = np.triu(
    book_similarity_matrix, k=1
)  # Exclude diagonal and lower triangle
non_zero_similarities = upper_triangle[upper_triangle != 0]

print(f"  Mean similarity: {non_zero_similarities.mean():.4f}")
print(f"  Std similarity: {non_zero_similarities.std():.4f}")
print(f"  Min similarity: {non_zero_similarities.min():.4f}")
print(f"  Max similarity: {non_zero_similarities.max():.4f}")

# Plot similarity distribution
plt.figure(figsize=(10, 4))

plt.subplot(1, 2, 1)
plt.hist(non_zero_similarities, bins=50, alpha=0.7, edgecolor="black")
plt.xlabel("Cosine Similarity")
plt.ylabel("Frequency")
plt.title("Distribution of Book Similarities")
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
# Show top 10 most similar book pairs
similarity_pairs = []
for i in range(len(book_similarity_matrix)):
    for j in range(i + 1, len(book_similarity_matrix)):
        similarity_pairs.append((i, j, book_similarity_matrix[i, j]))

# Sort by similarity
similarity_pairs.sort(key=lambda x: x[2], reverse=True)

print(f"\nTop 10 most similar book pairs:")
for i, (book1_idx, book2_idx, similarity) in enumerate(similarity_pairs[:10], 1):
    try:
        title1 = idx_to_title[book1_idx]
        title2 = idx_to_title[book2_idx]
        author1 = idx_to_author.get(book1_idx, "Unknown")
        author2 = idx_to_author.get(book2_idx, "Unknown")
        print(f"{i:2d}. {similarity:.3f}: '{title1}' by {author1}")
        print(f"     ↔ '{title2}' by {author2}")
    except KeyError:
        continue

# Plot top similarities
top_similarities = [pair[2] for pair in similarity_pairs[:100]]
plt.plot(range(1, 101), top_similarities, "o-", alpha=0.7)
plt.xlabel("Rank")
plt.ylabel("Similarity Score")
plt.title("Top 100 Book Similarities")
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

wandb.log({"similarity_analysis": wandb.Image(plt)})

## Save Model and Components

In [None]:
# Save everything needed for deployment
print("\n=== Saving Model Components ===")

import pickle

# Save model state
torch.save(
    {
        "model_state_dict": model.state_dict(),
        "model_config": {
            "n_users": n_users,
            "n_books": n_books,
            "n_factors": n_factors,
            "dropout": 0.1,
        },
        "training_config": {
            "learning_rate": learning_rate,
            "weight_decay": weight_decay,
            "batch_size": batch_size,
            "n_epochs": n_epochs,
        },
    },
    "matrix_factorization_complete.pth",
)

# Save embeddings and similarity matrix
np.save("book_embeddings.npy", book_embeddings)
np.save("book_similarity_matrix.npy", book_similarity_matrix)

# Save mappings
mappings = {
    "title_to_idx": title_to_idx,
    "idx_to_title": idx_to_title,
    "idx_to_author": idx_to_author,
    "book_to_idx": book_to_idx,
    "idx_to_book": idx_to_book,
    "user_to_idx": user_to_idx,
    "idx_to_user": idx_to_user,
}

with open("book_mappings.pkl", "wb") as f:
    pickle.dump(mappings, f)

print("✅ Saved:")
print("  - matrix_factorization_complete.pth")
print("  - book_embeddings.npy")
print("  - book_similarity_matrix.npy")
print("  - book_mappings.pkl")

print(f"\n📊 Final Model Summary:")
print(f"  Users: {n_users:,}")
print(f"  Books: {n_books:,}")
print(f"  Ratings: {len(filtered_ratings):,}")
print(f"  Embedding dimensions: {n_factors}")
print(f"  Final validation loss: {best_val_loss:.4f}")
print(f"  Model parameters: {sum(p.numel() for p in model.parameters()):,}")

## Recommendation Evaluation Metrics

Now let's evaluate the recommendation system using standard ranking metrics: Precision@K, Recall@K, and mAP (mean Average Precision).

In [None]:
def precision_at_k(recommended_items, relevant_items, k):
    """
    Calculate Precision@K for a single user

    Args:
        recommended_items (list): List of recommended item indices (ranked)
        relevant_items (set): Set of relevant/positive item indices for the user
        k (int): Number of top recommendations to consider

    Returns:
        float: Precision@K score
    """
    if k == 0 or len(recommended_items) == 0:
        return 0.0

    recommended_at_k = set(recommended_items[:k])
    relevant_recommended = recommended_at_k.intersection(relevant_items)

    return len(relevant_recommended) / min(k, len(recommended_items))


def recall_at_k(recommended_items, relevant_items, k):
    """
    Calculate Recall@K for a single user

    Args:
        recommended_items (list): List of recommended item indices (ranked)
        relevant_items (set): Set of relevant/positive item indices for the user
        k (int): Number of top recommendations to consider

    Returns:
        float: Recall@K score
    """
    if len(relevant_items) == 0:
        return 0.0

    recommended_at_k = set(recommended_items[:k])
    relevant_recommended = recommended_at_k.intersection(relevant_items)

    return len(relevant_recommended) / len(relevant_items)


def average_precision(recommended_items, relevant_items):
    """
    Calculate Average Precision for a single user

    Args:
        recommended_items (list): List of recommended item indices (ranked)
        relevant_items (set): Set of relevant/positive item indices for the user

    Returns:
        float: Average Precision score
    """
    if len(relevant_items) == 0:
        return 0.0

    score = 0.0
    num_hits = 0.0

    for i, item in enumerate(recommended_items):
        if item in relevant_items:
            num_hits += 1.0
            score += num_hits / (i + 1.0)

    return score / len(relevant_items)


def get_user_recommendations(model, user_idx, user_items, n_recommendations=100):
    """
    Get top N recommendations for a user

    Args:
        model: Trained matrix factorization model
        user_idx (int): User index
        user_items (set): Set of items the user has already interacted with
        n_recommendations (int): Number of recommendations to generate

    Returns:
        list: List of recommended book indices (excluding already seen items)
    """
    model.eval()

    with torch.no_grad():
        # Get user embedding
        user_tensor = torch.LongTensor([user_idx]).to(device)
        user_emb = model.user_embedding(user_tensor)

        # Get all book embeddings
        book_embs = model.get_book_embeddings()

        # Compute scores for all books
        scores = torch.mm(user_emb, book_embs.t()).squeeze()

        # Add biases
        user_bias = model.user_bias(user_tensor).squeeze()
        book_biases = model.book_bias.weight.squeeze()
        global_bias = model.global_bias

        scores = scores + user_bias + book_biases + global_bias

        # Apply sigmoid to get probabilities
        scores = torch.sigmoid(scores)

        # Convert to numpy and get top items
        scores_np = scores.cpu().numpy()

        # Get all book indices sorted by score (descending)
        all_book_indices = np.argsort(scores_np)[::-1]

        # Filter out items the user has already seen
        recommended_items = []
        for book_idx in all_book_indices:
            if book_idx not in user_items:
                recommended_items.append(book_idx)
                if len(recommended_items) >= n_recommendations:
                    break

        return recommended_items


print("✅ Evaluation functions defined!")

## Prepare Test Data for Evaluation

In [None]:
# Create test set for evaluation
print("\n=== Preparing Test Data for Evaluation ===")

# We'll use the validation split as our test set for evaluation
test_users_tensor = val_users
test_books_tensor = val_books
test_ratings_tensor = val_ratings

# Convert validation data to user-item format for evaluation
test_user_items = {}  # user_idx -> set of positive book indices
test_user_all_items = {}  # user_idx -> set of all book indices (for filtering)

# Group by user
for i in range(len(test_users_tensor)):
    user_idx = test_users_tensor[i].item()
    book_idx = test_books_tensor[i].item()
    rating = test_ratings_tensor[i].item()

    if user_idx not in test_user_items:
        test_user_items[user_idx] = set()
        test_user_all_items[user_idx] = set()

    test_user_all_items[user_idx].add(book_idx)
    if rating == 1:  # Positive feedback (rating >= 7.0)
        test_user_items[user_idx].add(book_idx)

# Also need training data to filter out seen items during recommendation
train_user_items = {}  # user_idx -> set of book indices seen during training

for i in range(len(train_users)):
    user_idx = train_users[i].item()
    book_idx = train_books[i].item()

    if user_idx not in train_user_items:
        train_user_items[user_idx] = set()

    train_user_items[user_idx].add(book_idx)

# Filter test users that have at least 1 positive item and appear in training
valid_test_users = []
for user_idx in test_user_items:
    if len(test_user_items[user_idx]) > 0 and user_idx in train_user_items:
        valid_test_users.append(user_idx)

print(f"Total test users: {len(test_user_items)}")
print(f"Valid test users (with positive items & in training): {len(valid_test_users)}")

# Sample a subset for faster evaluation
max_test_users = 1000  # Adjust this based on computational resources
if len(valid_test_users) > max_test_users:
    np.random.seed(42)
    valid_test_users = np.random.choice(
        valid_test_users, max_test_users, replace=False
    ).tolist()
    print(f"Sampled {max_test_users} users for evaluation")

print(f"Users for evaluation: {len(valid_test_users)}")

# Show statistics
positive_counts = [len(test_user_items[user_idx]) for user_idx in valid_test_users]
print(f"Average positive items per test user: {np.mean(positive_counts):.2f}")
print(f"Min positive items: {np.min(positive_counts)}")
print(f"Max positive items: {np.max(positive_counts)}")

## Run Evaluation

In [None]:
# Evaluate the model
print("\n=== Running Evaluation ===")

# Load the best model
model.load_state_dict(torch.load("best_matrix_factorization.pth"))
model.eval()

# Evaluation parameters
k_values = [5, 10, 20, 50]
n_recommendations = 100  # Generate top-100 to calculate metrics at different k values

# Store results
results = {f"precision@{k}": [] for k in k_values}
results.update({f"recall@{k}": [] for k in k_values})
results["mAP"] = []

print(f"Evaluating {len(valid_test_users)} users...")

# Evaluate each user
for i, user_idx in enumerate(tqdm(valid_test_users, desc="Evaluating users")):
    # Get user's training items (to filter out)
    user_train_items = train_user_items.get(user_idx, set())

    # Get user's positive test items
    user_positive_items = test_user_items[user_idx]

    # Generate recommendations
    recommended_items = get_user_recommendations(
        model, user_idx, user_train_items, n_recommendations
    )

    # Calculate metrics for different k values
    for k in k_values:
        prec_k = precision_at_k(recommended_items, user_positive_items, k)
        recall_k = recall_at_k(recommended_items, user_positive_items, k)

        results[f"precision@{k}"].append(prec_k)
        results[f"recall@{k}"].append(recall_k)

    # Calculate mAP (using all recommendations)
    map_score = average_precision(recommended_items, user_positive_items)
    results["mAP"].append(map_score)

# Calculate average metrics
avg_results = {}
for metric, values in results.items():
    avg_results[metric] = np.mean(values)

print("\n" + "=" * 60)
print("EVALUATION RESULTS")
print("=" * 60)

for k in k_values:
    print(f"Precision@{k:2d}: {avg_results[f'precision@{k}']:.4f}")

print()
for k in k_values:
    print(f"Recall@{k:2d}:    {avg_results[f'recall@{k}']:.4f}")

print(f"\nmAP:          {avg_results['mAP']:.4f}")

# Log to wandb
wandb.log(avg_results)

print(f"\nEvaluation completed on {len(valid_test_users)} users")
print(
    f"Average positive items per user: {np.mean([len(test_user_items[u]) for u in valid_test_users]):.2f}"
)

In [None]:
# Analyze results in more detail
print("\n=== Detailed Analysis ===")

# Create a detailed results dataframe
detailed_results = []
for i, user_idx in enumerate(valid_test_users):
    user_result = {"user_idx": user_idx}
    user_result["n_positive"] = len(test_user_items[user_idx])

    for k in k_values:
        user_result[f"precision@{k}"] = results[f"precision@{k}"][i]
        user_result[f"recall@{k}"] = results[f"recall@{k}"][i]

    user_result["mAP"] = results["mAP"][i]
    detailed_results.append(user_result)

results_df = pd.DataFrame(detailed_results)

# Show statistics by number of positive items
print("Results by number of positive items:")
print("-" * 50)

for n_pos in sorted(results_df["n_positive"].unique()):
    if n_pos <= 20:  # Focus on users with reasonable number of positives
        subset = results_df[results_df["n_positive"] == n_pos]
        if len(subset) >= 5:  # Only show if we have enough samples
            print(f"Users with {n_pos:2d} positive items ({len(subset):3d} users):")
            print(
                f"  Precision@10: {subset['precision@10'].mean():.4f} ± {subset['precision@10'].std():.4f}"
            )
            print(
                f"  Recall@10:    {subset['recall@10'].mean():.4f} ± {subset['recall@10'].std():.4f}"
            )
            print(
                f"  mAP:          {subset['mAP'].mean():.4f} ± {subset['mAP'].std():.4f}"
            )
            print()

# Visualize results
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Plot 1: Precision@K for different K values
axes[0, 0].bar(range(len(k_values)), [avg_results[f"precision@{k}"] for k in k_values])
axes[0, 0].set_xlabel("K")
axes[0, 0].set_ylabel("Precision@K")
axes[0, 0].set_title("Precision@K for Different K Values")
axes[0, 0].set_xticks(range(len(k_values)))
axes[0, 0].set_xticklabels([f"{k}" for k in k_values])
axes[0, 0].grid(True, alpha=0.3)

# Plot 2: Recall@K for different K values
axes[0, 1].bar(range(len(k_values)), [avg_results[f"recall@{k}"] for k in k_values])
axes[0, 1].set_xlabel("K")
axes[0, 1].set_ylabel("Recall@K")
axes[0, 1].set_title("Recall@K for Different K Values")
axes[0, 1].set_xticks(range(len(k_values)))
axes[0, 1].set_xticklabels([f"{k}" for k in k_values])
axes[0, 1].grid(True, alpha=0.3)

# Plot 3: Distribution of mAP scores
axes[1, 0].hist(results["mAP"], bins=30, alpha=0.7, edgecolor="black")
axes[1, 0].set_xlabel("mAP Score")
axes[1, 0].set_ylabel("Number of Users")
axes[1, 0].set_title("Distribution of mAP Scores")
axes[1, 0].axvline(
    avg_results["mAP"],
    color="red",
    linestyle="--",
    label=f"Mean: {avg_results['mAP']:.4f}",
)
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# Plot 4: Precision@10 vs number of positive items
# Group users by number of positive items for better visualization
pos_item_groups = (
    results_df.groupby("n_positive")["precision@10"]
    .agg(["mean", "count"])
    .reset_index()
)
pos_item_groups = pos_item_groups[
    pos_item_groups["count"] >= 5
]  # Only groups with 5+ users

axes[1, 1].scatter(
    pos_item_groups["n_positive"],
    pos_item_groups["mean"],
    s=pos_item_groups["count"] * 5,
    alpha=0.6,
)
axes[1, 1].set_xlabel("Number of Positive Items")
axes[1, 1].set_ylabel("Average Precision@10")
axes[1, 1].set_title(
    "Precision@10 vs Number of Positive Items\n(Bubble size = number of users)"
)
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

# Log the visualization
wandb.log({"evaluation_results": wandb.Image(fig)})

# Show top and bottom performers
print("\nTop 10 users by mAP:")
top_users = results_df.nlargest(10, "mAP")
for _, row in top_users.iterrows():
    print(
        f"User {row['user_idx']:5d}: mAP={row['mAP']:.4f}, {row['n_positive']} positive items"
    )

print("\nBottom 10 users by mAP:")
bottom_users = results_df.nsmallest(10, "mAP")
for _, row in bottom_users.iterrows():
    print(
        f"User {row['user_idx']:5d}: mAP={row['mAP']:.4f}, {row['n_positive']} positive items"
    )

In [None]:
# Final summary
print("\n" + "=" * 80)
print("FINAL EVALUATION SUMMARY")
print("=" * 80)

print(f"\n📊 MODEL PERFORMANCE:")
print(f"   Matrix Factorization with {n_factors} factors")
print(f"   Trained on {len(train_users):,} interactions")
print(f"   Evaluated on {len(valid_test_users)} users")
print(
    f"   Average {np.mean([len(test_user_items[u]) for u in valid_test_users]):.1f} positive items per test user"
)

print(f"\n📈 RANKING METRICS:")
for k in k_values:
    print(f"   Precision@{k:2d}: {avg_results[f'precision@{k}']:.4f}")
    print(f"   Recall@{k:2d}:    {avg_results[f'recall@{k}']:.4f}")

print(f"   mAP:          {avg_results['mAP']:.4f}")

print(f"\n🔍 KEY INSIGHTS:")
max_similarity = book_similarity_matrix.max()
mean_similarity = np.mean(
    book_similarity_matrix[np.triu_indices_from(book_similarity_matrix, k=1)]
)

print(f"   • Maximum book similarity: {max_similarity:.4f}")
print(f"   • Average book similarity: {mean_similarity:.4f}")
print(f"   • Training validation loss: {best_val_loss:.4f}")

if avg_results["precision@10"] < 0.05:
    print(f"\n⚠️  PERFORMANCE NOTES:")
    print(f"   • Low precision suggests the model may need improvement")
    print(
        f"   • Consider: higher embedding dimensions, more data, or different loss functions"
    )
    print(
        f"   • The similarity scores (max {max_similarity:.3f}) indicate embeddings may benefit from"
    )
    print(f"     different initialization or regularization strategies")

print(f"\n💡 RECOMMENDATIONS FOR IMPROVEMENT:")
print(f"   • Try increasing embedding dimensions (current: {n_factors})")
print(f"   • Experiment with different loss functions (BPR, WARP)")
print(f"   • Add negative sampling for better implicit feedback learning")
print(f"   • Consider user/item side information if available")
print(f"   • Use learning rate scheduling or different optimizers")

# Save detailed results
results_df.to_csv("recommendation_evaluation_results.csv", index=False)
print(f"\n💾 Detailed results saved to: recommendation_evaluation_results.csv")

# Create summary for wandb
wandb_summary = {
    "final_precision@5": avg_results["precision@5"],
    "final_precision@10": avg_results["precision@10"],
    "final_recall@10": avg_results["recall@10"],
    "final_mAP": avg_results["mAP"],
    "max_similarity": float(max_similarity),
    "mean_similarity": float(mean_similarity),
    "n_test_users": len(valid_test_users),
    "avg_positive_per_user": np.mean(
        [len(test_user_items[u]) for u in valid_test_users]
    ),
}

wandb.log(wandb_summary)

In [None]:
# Finish wandb run
run.finish()