In [None]:
import numpy as np
import pandas as pd
import pymc as pm
import matplotlib.pyplot as plt

# Select relevant columns
df = df[['User-ID', 'ISBN', 'Book-Rating']]

# Encode User-ID and ISBN as categorical (Used as array indices in PyMC)
df['User-Index'] = df['User-ID'].astype("category").cat.codes
df['Book-Index'] = df['ISBN'].astype("category").cat.codes

# Get number of unique users and books
num_users = df['User-Index'].nunique()
num_books = df['Book-Index'].nunique()

# Convert to numpy arrays for modeling
user_ids = df['User-Index'].values
book_ids = df['Book-Index'].values
ratings = df['Book-Rating'].values  # Use raw ratings since we're using Poisson

print("Number of unique users:", num_users)
print("Number of unique books:", num_books)

# Set a higher latent dimension to capture more complex user-book interactions
latent_dim = 10  

# Calculate rating counts per user and book to adjust priors based on sparsity
user_rating_counts = df.groupby('User-Index')['Book-Rating'].count()
book_rating_counts = df.groupby('Book-Index')['Book-Rating'].count()

# Avoid division by zero by replacing zeros with a small number
user_rating_counts[user_rating_counts == 0] = 1
book_rating_counts[book_rating_counts == 0] = 1

# Bayesian Probabilistic Matrix Factorization Model with Gamma-Poisson
with pm.Model() as model:
    # Prior for global mean rating (Gamma ensures positive values)
    mu = pm.Gamma("mu", alpha=2, beta=0.5)
    
    # User and book bias priors adjusted for sparsity
    user_bias = pm.Normal("user_bias", mu=0, sigma=1 / np.sqrt(user_rating_counts[df['User-Index']]), shape=num_users)
    book_bias = pm.Normal("book_bias", mu=0, sigma=1 / np.sqrt(book_rating_counts[df['Book-Index']]), shape=num_books)

    # Hierarchical priors for user and book latent factors
    sigma_u = pm.HalfCauchy("sigma_u", beta=1)
    sigma_b = pm.HalfCauchy("sigma_b", beta=1)
    
    user_factors = pm.Normal("user_factors", mu=0, sigma=sigma_u, shape=(num_users, latent_dim))
    book_factors = pm.Normal("book_factors", mu=0, sigma=sigma_b, shape=(num_books, latent_dim))

    # Expected rating using Poisson lambda
    lambda_rating = pm.math.exp(
        mu +
        user_bias[df['User-Index']] +
        book_bias[df['Book-Index']] +
        (user_factors[df['User-Index']] * book_factors[df['Book-Index']]).sum(axis=1)
    )

    # Poisson likelihood for discrete ratings
    ratings_obs = pm.Poisson("ratings_obs", mu=lambda_rating, observed=ratings)
    
    # Use No-U-Turn Sampler (NUTS) for efficient MCMC sampling
    trace = pm.sample(
        draws=1000,  
        tune=1000,  
        step=pm.NUTS(),  
        cores=2,  
        random_seed=42,  
        return_inferencedata=True
    )

# Posterior Predictive Sampling
with model:
    posterior_predictive = pm.sample_posterior_predictive(trace, var_names=["ratings_obs"], random_seed=42)
    sampled_ratings = posterior_predictive["ratings_obs"]

# Print an example subset of predicted ratings
print("\nExample of Predicted Ratings (posterior predictive mean):")
print(sampled_ratings.mean(axis=0)[:5])  

# ---- Bayes General Multi-Step Lookahead Recommendation ---- #

def bayes_general_recommendation(user_index, book_indices, trace, top_k=5, exploration_factor=0.5, regret_threshold=0.8, max_regret=2.0):
    """
    Multi-step lookahead Bayesian regret minimization for recommending 5 books.
    
    user_index: ID of the user
    book_indices: List of book IDs available for recommendation.
    trace: Posterior samples from our Bayesian model.
    top_k: Number of books to recommend (default=5).
    exploration_factor: Weight for future expected gain vs. immediate reward.
    regret_threshold: Minimum regret required to trigger exploration.
    max_regret: Upper limit on regret to prevent excessive exploration.
    """
    # Sample from the full posterior
    mu_samples = trace["mu"]
    user_bias_samples = trace["user_bias"][:, user_index]
    book_bias_samples = trace["book_bias"][:, book_indices]
    user_factors_samples = trace["user_factors"][:, user_index, :]
    book_factors_samples = trace["book_factors"][:, book_indices, :]

    num_samples = len(mu_samples)  # Number of MCMC samples
    
    # Compute expected rewards using posterior sampling
    expected_rewards = np.mean(
        np.exp(mu_samples[:, None] + user_bias_samples[:, None] + book_bias_samples +
               np.sum(user_factors_samples[:, None, :] * book_factors_samples, axis=2)), axis=0
    )

    # Compute variance (uncertainty measure)
    rating_uncertainty = np.var(
        np.exp(mu_samples[:, None] + user_bias_samples[:, None] + book_bias_samples +
               np.sum(user_factors_samples[:, None, :] * book_factors_samples, axis=2)), axis=0
    )
    
    # Compute Bayesian regret
    best_expected_reward = np.max(expected_rewards)
    regrets = best_expected_reward - expected_rewards

    # Cap regret to prevent extreme exploration
    regrets = np.clip(regrets, 0, max_regret)

    # Apply regret threshold
    should_explore = regrets > regret_threshold

    # Compute future learning potential
    expected_future_gain = exploration_factor * rating_uncertainty

    # Compute exploration-adjusted score
    exploration_score = expected_rewards + expected_future_gain

    # Rank books
    ranked_books = np.argsort(-exploration_score)  # Sort in descending order

    # Select top-k books for recommendation
    selected_books = [book_indices[i] for i in ranked_books[:top_k]]

    return selected_books

# Example usage: Recommend 5 books for a user
user_id_example = 42  # Replace with an actual user ID
book_pool = np.arange(num_books)  # Assuming all books are available

recommended_books = bayes_general_recommendation(user_id_example, book_pool, trace, top_k=5)
print("\nTop-5 Recommended Books for User", user_id_example, ":", recommended_books)
