In [1]:
import numpy as np
import pymc as pm
import matplotlib.pyplot as plt

# Generate synthetic data
np.random.seed(42)
num_users = 10
num_items = 15
latent_dim = 2

# True latent factors
true_user_factors = np.random.normal(0, 1, (num_users, latent_dim))
true_item_factors = np.random.normal(0, 1, (num_items, latent_dim))

# Generate ratings
ratings = np.dot(true_user_factors, true_item_factors.T) + np.random.normal(0, 0.5, (num_users, num_items))

# Mask some ratings as unobserved
mask = np.random.rand(num_users, num_items) < 0.8
observed_ratings = np.where(mask, ratings, np.nan)


In [7]:
import pymc as pm
import numpy as np

# Observed ratings with NaNs
observed_ratings_masked = np.ma.masked_invalid(observed_ratings)

# Bayesian Model
with pm.Model() as model:
    # Priors for user and item factors
    user_factors = pm.Normal("user_factors", mu=0, sigma=1, shape=(num_users, latent_dim))
    item_factors = pm.Normal("item_factors", mu=0, sigma=1, shape=(num_items, latent_dim))
    
    # Noise variance
    sigma = pm.HalfNormal("sigma", sigma=1)
    
    # Predicted ratings
    predicted_ratings = pm.Deterministic("predicted_ratings", pm.math.dot(user_factors, item_factors.T))
    
    # Likelihood for observed ratings
    observed_data = pm.Normal("observed_data", mu=predicted_ratings, sigma=sigma, observed=observed_ratings_masked)
    
    # Inference
    trace = pm.sample(2000, return_inferencedata=True)


Initializing NUTS using jitter+adapt_diag...
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [user_factors, item_factors, sigma, observed_data_unobserved]


Sampling 4 chains for 1_000 tune and 2_000 draw iterations (4_000 + 8_000 draws total) took 9 seconds.
The rhat statistic is larger than 1.01 for some parameters. This indicates problems during sampling. See https://arxiv.org/abs/1903.08008 for details
The effective sample size per chain is smaller than 100 for some parameters.  A higher number is needed for reliable rhat and ess computation. See https://arxiv.org/abs/1903.08008 for details


In [10]:
with model:
    posterior_predictive = pm.sample_posterior_predictive(trace, var_names=["predicted_ratings"])

# Extract predicted ratings
predicted_ratings_mean = posterior_predictive["predicted_ratings"].mean(axis=0)

# Fill missing values
filled_ratings = observed_ratings.copy()
filled_ratings[np.isnan(observed_ratings)] = predicted_ratings_mean[np.isnan(observed_ratings)]

print("Filled Ratings Matrix:")
print(filled_ratings)


Sampling: []


KeyError: 'predicted_ratings'

In [11]:
def recommend_items(filled_ratings, user_id, top_n=3):
    user_ratings = filled_ratings[user_id]
    recommended_items = np.argsort(-user_ratings)[:top_n]
    return recommended_items

# Recommend top 3 items for user 0
recommendations = recommend_items(filled_ratings, user_id=0)
print(f"Recommended items for User 0: {recommendations}")


NameError: name 'filled_ratings' is not defined