In [None]:
#import pytensor
#print(pytensor.config.cxx)

#set up g++ and openBLAS

#import pytensor
#print(dir(pytensor.config))

#import pytensor
#pytensor.config.blas__ldflags = '-LC:\\OpenBLAS\\lib -lopenblas'
#print(pytensor.config.blas__ldflags)

#import pytensor
#print("BLAS flags:", pytensor.config.blas__ldflags)
# print("Computation Mode:", pytensor.config.mode)


"C:\mingw64\bin\g++.EXE"
-LC:\OpenBLAS\lib -lopenblas
BLAS flags: -LC:\OpenBLAS\lib -lopenblas
Computation Mode: Mode


# USE ADVI (only 1000 rows for testing):

In [23]:
import numpy as np
import pandas as pd
import pymc as pm
import matplotlib.pyplot as plt

# Load dataset
df = pd.read_excel("book_ratings.xlsx")

# Select relevant columns
df = df[['User-ID', 'ISBN', 'Book-Rating']]

# **Downsample to 1000 random rows for testing**
df = df.sample(n=1000, random_state=42).reset_index(drop=True)

# Encode User-ID and ISBN as categorical for indexing
df['User-Index'] = df['User-ID'].astype("category").cat.codes
df['Book-Index'] = df['ISBN'].astype("category").cat.codes

# **Remap indices to contiguous range** (Fixes the IndexError)
df['User-Index'] = df['User-Index'].astype("category").cat.codes
df['Book-Index'] = df['Book-Index'].astype("category").cat.codes

# Get updated number of unique users and books
num_users = df['User-Index'].nunique()
num_books = df['Book-Index'].nunique()

# Convert to numpy arrays for modeling
user_ids = df['User-Index'].values
book_ids = df['Book-Index'].values
ratings = df['Book-Rating'].values  # Using raw ratings for Poisson

print("Number of unique users:", num_users)
print("Number of unique books:", num_books)

# Set latent dimension 
latent_dim = 5

# Compute rating counts per user and book
user_rating_counts = df.groupby('User-Index')['Book-Rating'].count()
book_rating_counts = df.groupby('Book-Index')['Book-Rating'].count()

# Avoid division by zero
user_rating_counts[user_rating_counts == 0] = 1
book_rating_counts[book_rating_counts == 0] = 1

# Convert to NumPy arrays
user_ids_np = df['User-Index'].to_numpy()
book_ids_np = df['Book-Index'].to_numpy()

# Bayesian Probabilistic Matrix Factorization Model with Gamma-Poisson
with pm.Model() as model:
    # Prior for global mean rating
    mu = pm.Gamma("mu", alpha=2, beta=0.5)
    
    # User and book bias priors
    user_bias = pm.Normal("user_bias", mu=0, sigma=1 / np.sqrt(user_rating_counts + 1), shape=num_users)
    book_bias = pm.Normal("book_bias", mu=0, sigma=1 / np.sqrt(book_rating_counts + 1), shape=num_books)

    # Hierarchical priors for latent factors
    sigma_u = pm.HalfCauchy("sigma_u", beta=1)
    sigma_b = pm.HalfCauchy("sigma_b", beta=1)
    
    user_factors = pm.Normal("user_factors", mu=0, sigma=sigma_u, shape=(num_users, latent_dim))
    book_factors = pm.Normal("book_factors", mu=0, sigma=sigma_b, shape=(num_books, latent_dim))

    # Expected rating using Poisson lambda
    lambda_rating = pm.math.exp(
        mu +
        user_bias[user_ids_np] +
        book_bias[book_ids_np] +
        (user_factors[user_ids_np] * book_factors[book_ids_np]).sum(axis=1)
    )

    # Poisson likelihood
    ratings_obs = pm.Poisson("ratings_obs", mu=lambda_rating, observed=ratings)
    
    # Use ADVI for fast variational inference instead of NUTS
    print("Running Variational Inference (ADVI)...")
    approx = pm.fit(n=50000, method="advi")
    trace = approx.sample(draws=2000)

# **Extract posterior values manually since PyMC won't sample `ratings_obs`**
with model:
    print("\nManually Generating Predictions Using Posterior Samples...")
    
    # Extract posterior values
    mu_post = trace.posterior["mu"].mean().item()
    user_bias_post = trace.posterior["user_bias"].mean(dim=("chain", "draw")).values
    book_bias_post = trace.posterior["book_bias"].mean(dim=("chain", "draw")).values
    user_factors_post = trace.posterior["user_factors"].mean(dim=("chain", "draw")).values
    book_factors_post = trace.posterior["book_factors"].mean(dim=("chain", "draw")).values

    # Compute expected ratings
    predicted_ratings = np.exp(
        mu_post + 
        user_bias_post[user_ids_np] + 
        book_bias_post[book_ids_np] +
        (user_factors_post[user_ids_np] * book_factors_post[book_ids_np]).sum(axis=1)
    )

    print("\nExample of Predicted Ratings (posterior predictive mean):")
    print(predicted_ratings[:5])

# ---- Bayes General Multi-Step Lookahead Recommendation ---- #

def bayes_general_recommendation(user_index, book_indices, trace, top_k=5, exploration_factor=0.5, regret_threshold=0.8, max_regret=2.0):
    """
    Multi-step lookahead Bayesian regret minimization for recommending 5 books.
    """
    mu_samples = trace.posterior["mu"].values
    user_bias_samples = trace.posterior["user_bias"].values[:, :, user_index]
    book_bias_samples = trace.posterior["book_bias"].values[:, :, book_indices]
    user_factors_samples = trace.posterior["user_factors"].values[:, :, user_index, :]
    book_factors_samples = trace.posterior["book_factors"].values[:, :, book_indices, :]

    num_samples = mu_samples.shape[1]  # Number of posterior samples
    
    # Compute expected rewards using posterior sampling
    expected_rewards = np.mean(
        np.exp(mu_samples[:, :, None] + user_bias_samples[:, :, None] + book_bias_samples +
               np.sum(user_factors_samples[:, :, None, :] * book_factors_samples, axis=-1)), axis=1
    )

    # Compute variance (uncertainty measure)
    rating_uncertainty = np.var(
        np.exp(mu_samples[:, :, None] + user_bias_samples[:, :, None] + book_bias_samples +
               np.sum(user_factors_samples[:, :, None, :] * book_factors_samples, axis=-1)), axis=1
    )
    
    # Compute Bayesian regret
    best_expected_reward = np.max(expected_rewards, axis=1)
    regrets = best_expected_reward[:, None] - expected_rewards

    # Cap regret to prevent extreme exploration
    regrets = np.clip(regrets, 0, max_regret)

    # Apply regret threshold
    should_explore = regrets > regret_threshold

    # Compute future learning potential
    expected_future_gain = exploration_factor * rating_uncertainty

    # Compute exploration-adjusted score
    exploration_score = expected_rewards + expected_future_gain

    # Rank books
    ranked_books = np.argsort(-exploration_score, axis=1)  # Sort in descending order

    # Select top-k books for recommendation
    selected_books = [book_indices[i] for i in ranked_books[0, :top_k]]

    return selected_books

# Example usage: Recommend 5 books for a user
user_id_example = 42  # Replace with an actual user ID
book_pool = np.arange(num_books)  # Assuming all books are available

recommended_books = bayes_general_recommendation(user_id_example, book_pool, trace, top_k=5)
print("\nTop-5 Recommended Books for User", user_id_example, ":", recommended_books)


Number of unique users: 838
Number of unique books: 977
Running Variational Inference (ADVI)...


Output()

Finished [100%]: Average Loss = 3,234.5



Manually Generating Predictions Using Posterior Samples...

Example of Predicted Ratings (posterior predictive mean):
[4.98980722 5.02430613 4.28409701 0.45939204 0.63903444]

Top-5 Recommended Books for User 42 : [872, 134, 3, 171, 755]
