In [None]:
import implicit
from scipy.sparse import coo_matrix
import numpy as np
import json
from tqdm import tqdm

In [None]:
root = "datasets3/"
train_interactions = pd.read_csv(root+"seen_interactions.csv")
test_interactions = pd.read_csv(root+"hidden_interactions.csv")

In [None]:
# Train BPR model
print("\nTraining BPR model...")
model_bpr = implicit.bpr.BayesianPersonalizedRanking(
    factors=100,
    iterations=50,
    learning_rate=0.01,
    regularization=0.01,
    random_state=42,
    verify_negative_samples=True
)

# BPR expects item-user matrix (transposed)
model_bpr.fit(user_item_matrix.T, show_progress=True)


In [10]:
import implicit
from scipy.sparse import coo_matrix
import numpy as np
import json
from tqdm import tqdm

# Prepare data for BPR

root = "datasets3/"
train_interactions = pd.read_csv(root+"seen_interactions.csv")
test_interactions = pd.read_csv(root+"hidden_interactions.csv")

# Use ALL training data, but weight high ratings more
# This ensures all users are in the model

print(f"Total training interactions: {len(train_interactions)}")

# Create user and item mappings using ALL training data
user_ids = train_interactions['u'].unique()
item_ids = train_interactions['i'].unique()

user_to_idx = {uid: idx for idx, uid in enumerate(user_ids)}
item_to_idx = {iid: idx for idx, iid in enumerate(item_ids)}
idx_to_item = {idx: iid for iid, idx in item_to_idx.items()}
idx_to_user = {idx: uid for uid, idx in user_to_idx.items()}

print(f"Number of users: {len(user_ids)}")
print(f"Number of items: {len(item_ids)}")

# Check for test users not in training
test_users = test_interactions['u'].unique()
train_users = set(user_ids)
users_not_in_train = [u for u in test_users if u not in train_users]
print(f"Test users not in training: {len(users_not_in_train)} out of {len(test_users)}")

# Create user-item interaction matrix with weighted ratings
# Give more weight to higher ratings
user_indices = train_interactions['u'].map(user_to_idx).values
item_indices = train_interactions['i'].map(item_to_idx).values

# Weight strategy: convert ratings to confidence scores
# 5 -> 2.0, 4 -> 1.0, 3 -> 0.5, 2 -> 0.1, 1 -> 0.0 (or exclude)
def rating_to_weight(rating):
    if rating >= 5:
        return 2.0
    elif rating >= 4:
        return 1.0
    elif rating >= 3:
        return 0.5
    elif rating >= 2:
        return 0.1
    else:
        return 0.0

weights = train_interactions['rating'].apply(rating_to_weight).values

# Filter out zero weights
non_zero_mask = weights > 0
user_indices = user_indices[non_zero_mask]
item_indices = item_indices[non_zero_mask]
weights = weights[non_zero_mask]

user_item_matrix = coo_matrix(
    (weights, (user_indices, item_indices)),
    shape=(len(user_ids), len(item_ids))
).tocsr()

print(f"Sparse matrix shape: {user_item_matrix.shape}")
print(f"Number of interactions: {user_item_matrix.nnz}")
print(f"Sparsity: {1 - (user_item_matrix.nnz / (user_item_matrix.shape[0] * user_item_matrix.shape[1])):.4f}")


print("BPR model trained!")

# Build set of items each user has seen
users_seen_train = train_interactions.groupby('u')['i'].agg(set).to_dict()

# Popularity fallback
K = 30
recipe_rating_sum = train_interactions.groupby('i')['rating'].sum()
recipe_rating_count = train_interactions.groupby('i')['rating'].count()
recipe_avg_rating = recipe_rating_sum / recipe_rating_count
recipe_popularity = recipe_avg_rating * recipe_rating_count
top_popular_recipes = list(recipe_popularity.sort_values(ascending=False).index[:K])

# Generate recommendations for test users
recommendations_bpr = {}

for user_id in tqdm(test_users, desc="BPR recommendations"):
    # Check if user exists in training data
    if user_id not in user_to_idx:
        # Unknown user: use popularity fallback
        recommendations_bpr[int(user_id)] = [int(x) for x in top_popular_recipes[:K]]
        continue
    
    user_idx = user_to_idx[user_id]
    
    # Verify the user_idx is valid for the matrix
    if user_idx >= user_item_matrix.shape[0]:
        print(f"Warning: user_idx {user_idx} >= matrix rows {user_item_matrix.shape[0]} for user_id {user_id}")
        recommendations_bpr[int(user_id)] = [int(x) for x in top_popular_recipes[:K]]
        continue
    
    user_seen = users_seen_train.get(user_id, set())
    
    try:
        # Get the user's row from the matrix
        user_row = user_item_matrix[user_idx]
        
        # Verify dimensions before calling recommend
        if user_row.shape[1] != len(item_ids):
            print(f"Warning: user_row shape {user_row.shape} doesn't match items {len(item_ids)}")
            recommendations_bpr[int(user_id)] = [int(x) for x in top_popular_recipes[:K]]
            continue
        
        # Get recommendations from BPR
        recommended_items, scores = model_bpr.recommend(
            user_idx, 
            user_row,
            N=K*3,  # Get more than needed to account for filtering
            filter_already_liked_items=True
        )
        
        # Convert back to original item IDs and filter already seen items
        recs = []
        for item_idx in recommended_items:
            if item_idx in idx_to_item:
                item_id = idx_to_item[item_idx]
                if item_id not in user_seen:
                    recs.append(int(item_id))
                if len(recs) >= K:
                    break
        
        # If we don't have enough recommendations, fill with popular items
        if len(recs) < K:
            for pop_item in top_popular_recipes:
                if pop_item not in user_seen and pop_item not in recs:
                    recs.append(int(pop_item))
                if len(recs) >= K:
                    break
        
        recommendations_bpr[int(user_id)] = recs[:K]
    
    except Exception as e:
        # If there's any error, use popularity fallback
        print(f"Error for user {user_id} (idx={user_idx}): {e}")
        recommendations_bpr[int(user_id)] = [int(x) for x in top_popular_recipes[:K]]

# Save recommendations
with open("bpr_recommendations.json", "w") as f:
    json.dump(recommendations_bpr, f, indent=4)

print(f"\nSaved BPR recommendations to 'bpr_recommendations.json'")
print(f"Total users with recommendations: {len(recommendations_bpr)}")

Total training interactions: 95000
Number of users: 16077
Number of items: 54028
Test users not in training: 363 out of 2834
Sparse matrix shape: (16077, 54028)
Number of interactions: 92121
Sparsity: 0.9999

Training BPR model...




  0%|          | 0/50 [00:00<?, ?it/s]

BPR model trained!


BPR recommendations:   5%|██▊                                                           | 128/2834 [00:00<00:03, 729.70it/s]

Error for user 2566 (idx=1093): index 16541 is out of bounds for axis 1 with size 16077
Error for user 519 (idx=4060): index 33200 is out of bounds for axis 1 with size 16077
Error for user 330 (idx=731): index 17640 is out of bounds for axis 1 with size 16077
Error for user 143 (idx=203): index 16410 is out of bounds for axis 1 with size 16077
Error for user 533 (idx=2404): index 16210 is out of bounds for axis 1 with size 16077
Error for user 6431 (idx=13233): index 39198 is out of bounds for axis 1 with size 16077
Error for user 711 (idx=1468): index 16783 is out of bounds for axis 1 with size 16077
Error for user 27 (idx=1523): index 16769 is out of bounds for axis 1 with size 16077
Error for user 462 (idx=26): index 16180 is out of bounds for axis 1 with size 16077
Error for user 2778 (idx=633): index 17406 is out of bounds for axis 1 with size 16077
Error for user 723 (idx=1072): index 19086 is out of bounds for axis 1 with size 16077
Error for user 1440 (idx=276): index 18299 is

BPR recommendations:   8%|█████▏                                                        | 238/2834 [00:00<00:02, 896.98it/s]

Error for user 4509 (idx=4274): index 22343 is out of bounds for axis 1 with size 16077
Error for user 14820 (idx=6112): index 18091 is out of bounds for axis 1 with size 16077
Error for user 2606 (idx=442): index 18821 is out of bounds for axis 1 with size 16077
Error for user 16231 (idx=13498): index 43989 is out of bounds for axis 1 with size 16077
Error for user 373 (idx=666): index 16720 is out of bounds for axis 1 with size 16077
Error for user 4880 (idx=3698): index 24734 is out of bounds for axis 1 with size 16077
Error for user 594 (idx=263): index 17755 is out of bounds for axis 1 with size 16077
Error for user 13251 (idx=13641): index 41117 is out of bounds for axis 1 with size 16077
Error for user 189 (idx=84): index 16867 is out of bounds for axis 1 with size 16077
Error for user 15563 (idx=15965): index 43127 is out of bounds for axis 1 with size 16077
Error for user 4413 (idx=3725): index 29877 is out of bounds for axis 1 with size 16077
Error for user 2664 (idx=10999): 

BPR recommendations:  12%|███████▏                                                      | 328/2834 [00:00<00:04, 508.03it/s]

Error for user 4131 (idx=3542): index 20314 is out of bounds for axis 1 with size 16077
Error for user 1523 (idx=1650): index 16204 is out of bounds for axis 1 with size 16077
Error for user 3133 (idx=2310): index 16171 is out of bounds for axis 1 with size 16077
Error for user 7025 (idx=4871): index 17184 is out of bounds for axis 1 with size 16077
Error for user 3117 (idx=12508): index 19620 is out of bounds for axis 1 with size 16077
Error for user 6944 (idx=4698): index 16221 is out of bounds for axis 1 with size 16077
Error for user 847 (idx=4453): index 17453 is out of bounds for axis 1 with size 16077
Error for user 2203 (idx=1535): index 16394 is out of bounds for axis 1 with size 16077
Error for user 3345 (idx=4593): index 31587 is out of bounds for axis 1 with size 16077
Error for user 2270 (idx=898): index 17539 is out of bounds for axis 1 with size 16077
Error for user 10468 (idx=9137): index 22348 is out of bounds for axis 1 with size 16077
Error for user 829 (idx=1326): i

BPR recommendations:  30%|██████████████████▌                                          | 862/2834 [00:00<00:01, 1451.17it/s]

Error for user 41 (idx=2528): index 16100 is out of bounds for axis 1 with size 16077
Error for user 2641 (idx=1301): index 27938 is out of bounds for axis 1 with size 16077
Error for user 1956 (idx=6350): index 52390 is out of bounds for axis 1 with size 16077
Error for user 13172 (idx=3723): index 24008 is out of bounds for axis 1 with size 16077
Error for user 25 (idx=61): index 16714 is out of bounds for axis 1 with size 16077
Error for user 22877 (idx=11197): index 30210 is out of bounds for axis 1 with size 16077
Error for user 3703 (idx=4225): index 17106 is out of bounds for axis 1 with size 16077
Error for user 7109 (idx=12489): index 17011 is out of bounds for axis 1 with size 16077
Error for user 542 (idx=2701): index 16781 is out of bounds for axis 1 with size 16077
Error for user 524 (idx=4012): index 16288 is out of bounds for axis 1 with size 16077
Error for user 295 (idx=67): index 16650 is out of bounds for axis 1 with size 16077
Error for user 7359 (idx=1657): index 3

BPR recommendations:  46%|███████████████████████████▎                                | 1290/2834 [00:01<00:00, 1568.03it/s]

Error for user 744 (idx=325): index 16743 is out of bounds for axis 1 with size 16077
Error for user 151 (idx=973): index 17896 is out of bounds for axis 1 with size 16077
Error for user 764 (idx=3936): index 16810 is out of bounds for axis 1 with size 16077
Error for user 4111 (idx=1593): index 16959 is out of bounds for axis 1 with size 16077
Error for user 967 (idx=2303): index 16446 is out of bounds for axis 1 with size 16077
Error for user 61 (idx=1319): index 17172 is out of bounds for axis 1 with size 16077
Error for user 89 (idx=6): index 16383 is out of bounds for axis 1 with size 16077
Error for user 10947 (idx=8034): index 18439 is out of bounds for axis 1 with size 16077
Error for user 20067 (idx=9079): index 22140 is out of bounds for axis 1 with size 16077
Error for user 3039 (idx=1846): index 43162 is out of bounds for axis 1 with size 16077
Error for user 1075 (idx=6087): index 18182 is out of bounds for axis 1 with size 16077
Error for user 1298 (idx=9050): index 18650

BPR recommendations:  63%|█████████████████████████████████████▊                      | 1784/2834 [00:01<00:00, 1983.90it/s]

Error for user 5176 (idx=2861): index 30571 is out of bounds for axis 1 with size 16077
Error for user 4123 (idx=4440): index 19903 is out of bounds for axis 1 with size 16077
Error for user 4177 (idx=5751): index 17513 is out of bounds for axis 1 with size 16077
Error for user 2457 (idx=7324): index 16185 is out of bounds for axis 1 with size 16077
Error for user 13345 (idx=10276): index 25453 is out of bounds for axis 1 with size 16077
Error for user 4055 (idx=1070): index 17397 is out of bounds for axis 1 with size 16077
Error for user 8899 (idx=5564): index 50210 is out of bounds for axis 1 with size 16077
Error for user 315 (idx=772): index 17650 is out of bounds for axis 1 with size 16077
Error for user 512 (idx=2983): index 19767 is out of bounds for axis 1 with size 16077
Error for user 1347 (idx=2202): index 17538 is out of bounds for axis 1 with size 16077
Error for user 1564 (idx=848): index 26270 is out of bounds for axis 1 with size 16077
Error for user 569 (idx=5487): ind

BPR recommendations:  83%|█████████████████████████████████████████████████▋          | 2347/2834 [00:01<00:00, 2260.51it/s]

Error for user 4852 (idx=1473): index 17013 is out of bounds for axis 1 with size 16077
Error for user 3097 (idx=2856): index 25637 is out of bounds for axis 1 with size 16077
Error for user 166 (idx=3202): index 23494 is out of bounds for axis 1 with size 16077
Error for user 9734 (idx=10655): index 34115 is out of bounds for axis 1 with size 16077
Error for user 843 (idx=747): index 42729 is out of bounds for axis 1 with size 16077
Error for user 7852 (idx=11925): index 33013 is out of bounds for axis 1 with size 16077
Error for user 4029 (idx=5334): index 17592 is out of bounds for axis 1 with size 16077
Error for user 4895 (idx=10254): index 24331 is out of bounds for axis 1 with size 16077
Error for user 888 (idx=659): index 19421 is out of bounds for axis 1 with size 16077
Error for user 3874 (idx=12138): index 33975 is out of bounds for axis 1 with size 16077
Error for user 7366 (idx=1940): index 19424 is out of bounds for axis 1 with size 16077
Error for user 7422 (idx=5235): i

BPR recommendations:  91%|██████████████████████████████████████████████████████▊     | 2589/2834 [00:01<00:00, 2191.20it/s]

Error for user 322 (idx=266): index 16826 is out of bounds for axis 1 with size 16077
Error for user 968 (idx=1819): index 22274 is out of bounds for axis 1 with size 16077
Error for user 3072 (idx=2392): index 17198 is out of bounds for axis 1 with size 16077
Error for user 13885 (idx=8333): index 19538 is out of bounds for axis 1 with size 16077
Error for user 4387 (idx=543): index 17537 is out of bounds for axis 1 with size 16077
Error for user 1381 (idx=227): index 17761 is out of bounds for axis 1 with size 16077
Error for user 9852 (idx=7536): index 16865 is out of bounds for axis 1 with size 16077
Error for user 15190 (idx=13053): index 39067 is out of bounds for axis 1 with size 16077
Error for user 13558 (idx=2808): index 16617 is out of bounds for axis 1 with size 16077
Error for user 9462 (idx=12537): index 39212 is out of bounds for axis 1 with size 16077
Error for user 4466 (idx=883): index 23176 is out of bounds for axis 1 with size 16077
Error for user 3333 (idx=4858): i

BPR recommendations: 100%|████████████████████████████████████████████████████████████| 2834/2834 [00:02<00:00, 1387.26it/s]

Error for user 1602 (idx=3127): index 16163 is out of bounds for axis 1 with size 16077
Error for user 2059 (idx=2264): index 21995 is out of bounds for axis 1 with size 16077
Error for user 2516 (idx=1142): index 18922 is out of bounds for axis 1 with size 16077
Error for user 3952 (idx=2275): index 21061 is out of bounds for axis 1 with size 16077
Error for user 15344 (idx=15402): index 23709 is out of bounds for axis 1 with size 16077
Error for user 3293 (idx=12055): index 24724 is out of bounds for axis 1 with size 16077
Error for user 14592 (idx=8506): index 20151 is out of bounds for axis 1 with size 16077
Error for user 15508 (idx=10282): index 29966 is out of bounds for axis 1 with size 16077
Error for user 4860 (idx=4795): index 34167 is out of bounds for axis 1 with size 16077
Error for user 1192 (idx=1848): index 17212 is out of bounds for axis 1 with size 16077
Error for user 293 (idx=294): index 17194 is out of bounds for axis 1 with size 16077
Error for user 3791 (idx=738




In [3]:
# Install implicit library first: pip install implicit

import implicit
from scipy.sparse import coo_matrix
import numpy as np
import json
from tqdm import tqdm
import pandas as pd

In [9]:
import implicit
from scipy.sparse import coo_matrix
import numpy as np
import json
from tqdm import tqdm

# Prepare data for BPR

root = "datasets3/"
train_interactions = pd.read_csv(root+"seen_interactions.csv")
test_interactions = pd.read_csv(root+"hidden_interactions.csv")

# Use ALL training data, but weight high ratings more
# This ensures all users are in the model

print(f"Total training interactions: {len(train_interactions)}")

# Create user and item mappings using ALL training data
user_ids = train_interactions['u'].unique()
item_ids = train_interactions['i'].unique()

user_to_idx = {uid: idx for idx, uid in enumerate(user_ids)}
item_to_idx = {iid: idx for idx, iid in enumerate(item_ids)}
idx_to_item = {idx: iid for iid, idx in item_to_idx.items()}
idx_to_user = {idx: uid for uid, idx in user_to_idx.items()}

print(f"Number of users: {len(user_ids)}")
print(f"Number of items: {len(item_ids)}")

# Create user-item interaction matrix with weighted ratings
# Give more weight to higher ratings
user_indices = train_interactions['u'].map(user_to_idx).values
item_indices = train_interactions['i'].map(item_to_idx).values

# Weight strategy: convert ratings to confidence scores
# 5 -> 2.0, 4 -> 1.0, 3 -> 0.5, 2 -> 0.1, 1 -> 0.0 (or exclude)
def rating_to_weight(rating):
    if rating >= 5:
        return 2.0
    elif rating >= 4:
        return 1.0
    elif rating >= 3:
        return 0.5
    elif rating >= 2:
        return 0.1
    else:
        return 0.0

weights = train_interactions['rating'].apply(rating_to_weight).values

# Filter out zero weights
non_zero_mask = weights > 0
user_indices = user_indices[non_zero_mask]
item_indices = item_indices[non_zero_mask]
weights = weights[non_zero_mask]

user_item_matrix = coo_matrix(
    (weights, (user_indices, item_indices)),
    shape=(len(user_ids), len(item_ids))
).tocsr()

print(f"Sparse matrix shape: {user_item_matrix.shape}")
print(f"Number of interactions: {user_item_matrix.nnz}")
print(f"Sparsity: {1 - (user_item_matrix.nnz / (user_item_matrix.shape[0] * user_item_matrix.shape[1])):.4f}")

# Train BPR model
print("\nTraining BPR model...")
model_bpr = implicit.bpr.BayesianPersonalizedRanking(
    factors=100,
    iterations=50,
    learning_rate=0.01,
    regularization=0.01,
    random_state=42,
    verify_negative_samples=True
)

# BPR expects item-user matrix (transposed)
model_bpr.fit(user_item_matrix.T, show_progress=True)

print("BPR model trained!")

# Build set of items each user has seen
users_seen_train = train_interactions.groupby('u')['i'].agg(set).to_dict()

# Popularity fallback
recipe_rating_sum = train_interactions.groupby('i')['rating'].sum()
recipe_rating_count = train_interactions.groupby('i')['rating'].count()
recipe_avg_rating = recipe_rating_sum / recipe_rating_count
recipe_popularity = recipe_avg_rating * recipe_rating_count
top_popular_recipes = list(recipe_popularity.sort_values(ascending=False).index[:K])

# Generate recommendations for test users
K = 30
test_users = test_interactions['u'].unique()

recommendations_bpr = {}

for user_id in tqdm(test_users, desc="BPR recommendations"):
    if user_id not in user_to_idx:
        # Unknown user: use popularity fallback
        recommendations_bpr[int(user_id)] = [int(x) for x in top_popular_recipes[:K]]
        continue
    
    user_idx = user_to_idx[user_id]
    user_seen = users_seen_train.get(user_id, set())
    
    try:
        # Get recommendations from BPR
        # Note: recommend() expects user index and user's row from the matrix
        recommended_items, scores = model_bpr.recommend(
            user_idx, 
            user_item_matrix[user_idx],
            N=K*3,  # Get more than needed to account for filtering
            filter_already_liked_items=True
        )
        
        # Convert back to original item IDs and filter already seen items
        recs = []
        for item_idx in recommended_items:
            if item_idx in idx_to_item:
                item_id = idx_to_item[item_idx]
                if item_id not in user_seen:
                    recs.append(int(item_id))
                if len(recs) >= K:
                    break
        
        # If we don't have enough recommendations, fill with popular items
        if len(recs) < K:
            for pop_item in top_popular_recipes:
                if pop_item not in user_seen and pop_item not in recs:
                    recs.append(int(pop_item))
                if len(recs) >= K:
                    break
        
        recommendations_bpr[int(user_id)] = recs[:K]
    
    except Exception as e:
        # If there's any error, use popularity fallback
        print(f"Error for user {user_id}: {e}")
        recommendations_bpr[int(user_id)] = [int(x) for x in top_popular_recipes[:K]]

# Save recommendations
with open("bpr_recommendations.json", "w") as f:
    json.dump(recommendations_bpr, f, indent=4)

print(f"\nSaved BPR recommendations to 'bpr_recommendations.json'")
print(f"Total users with recommendations: {len(recommendations_bpr)}")

Total training interactions: 95000
Number of users: 16077
Number of items: 54028
Sparse matrix shape: (16077, 54028)
Number of interactions: 92121
Sparsity: 0.9999

Training BPR model...




  0%|          | 0/50 [00:00<?, ?it/s]

BPR model trained!


BPR recommendations:   0%|                                                                         | 0/2834 [00:00<?, ?it/s]

Error for user 2566: index 16541 is out of bounds for axis 1 with size 16077
Error for user 519: index 33200 is out of bounds for axis 1 with size 16077
Error for user 330: index 17640 is out of bounds for axis 1 with size 16077
Error for user 143: index 16410 is out of bounds for axis 1 with size 16077
Error for user 533: index 16210 is out of bounds for axis 1 with size 16077
Error for user 6431: index 39198 is out of bounds for axis 1 with size 16077
Error for user 711: index 16783 is out of bounds for axis 1 with size 16077
Error for user 27: index 16769 is out of bounds for axis 1 with size 16077
Error for user 462: index 16180 is out of bounds for axis 1 with size 16077
Error for user 2778: index 17406 is out of bounds for axis 1 with size 16077
Error for user 723: index 19086 is out of bounds for axis 1 with size 16077
Error for user 1440: index 18299 is out of bounds for axis 1 with size 16077
Error for user 1786: index 18365 is out of bounds for axis 1 with size 16077
Error fo

BPR recommendations:  15%|█████████                                                    | 419/2834 [00:00<00:01, 2209.02it/s]

Error for user 1470: index 17735 is out of bounds for axis 1 with size 16077
Error for user 541: index 17498 is out of bounds for axis 1 with size 16077
Error for user 461: index 16703 is out of bounds for axis 1 with size 16077
Error for user 6610: index 23889 is out of bounds for axis 1 with size 16077
Error for user 172: index 16188 is out of bounds for axis 1 with size 16077
Error for user 2074: index 16656 is out of bounds for axis 1 with size 16077
Error for user 2813: index 17374 is out of bounds for axis 1 with size 16077
Error for user 16990: index 45914 is out of bounds for axis 1 with size 16077
Error for user 3394: index 17697 is out of bounds for axis 1 with size 16077
Error for user 28: index 17371 is out of bounds for axis 1 with size 16077
Error for user 184: index 16515 is out of bounds for axis 1 with size 16077
Error for user 6620: index 18136 is out of bounds for axis 1 with size 16077
Error for user 13524: index 24281 is out of bounds for axis 1 with size 16077
Err

BPR recommendations:  23%|█████████████▊                                               | 640/2834 [00:00<00:01, 2064.64it/s]

Error for user 2451: index 22792 is out of bounds for axis 1 with size 16077
Error for user 3150: index 18453 is out of bounds for axis 1 with size 16077
Error for user 5102: index 52661 is out of bounds for axis 1 with size 16077
Error for user 4196: index 20695 is out of bounds for axis 1 with size 16077
Error for user 11658: index 47968 is out of bounds for axis 1 with size 16077
Error for user 506: index 17948 is out of bounds for axis 1 with size 16077
Error for user 16091: index 45630 is out of bounds for axis 1 with size 16077
Error for user 5721: index 25983 is out of bounds for axis 1 with size 16077
Error for user 1136: index 32356 is out of bounds for axis 1 with size 16077
Error for user 1678: index 23101 is out of bounds for axis 1 with size 16077
Error for user 2096: index 21856 is out of bounds for axis 1 with size 16077
Error for user 796: index 19006 is out of bounds for axis 1 with size 16077
Error for user 1614: index 16208 is out of bounds for axis 1 with size 16077

BPR recommendations:  33%|███████████████████▉                                         | 928/2834 [00:00<00:00, 2366.80it/s]

Error for user 18283: index 32931 is out of bounds for axis 1 with size 16077
Error for user 7614: index 20557 is out of bounds for axis 1 with size 16077
Error for user 342: index 17474 is out of bounds for axis 1 with size 16077
Error for user 719: index 16367 is out of bounds for axis 1 with size 16077
Error for user 604: index 18079 is out of bounds for axis 1 with size 16077
Error for user 4389: index 19630 is out of bounds for axis 1 with size 16077
Error for user 1185: index 19393 is out of bounds for axis 1 with size 16077
Error for user 6185: index 26357 is out of bounds for axis 1 with size 16077
Error for user 19059: index 44568 is out of bounds for axis 1 with size 16077
Error for user 1411: index 16542 is out of bounds for axis 1 with size 16077
Error for user 1106: index 16915 is out of bounds for axis 1 with size 16077
Error for user 653: index 27022 is out of bounds for axis 1 with size 16077
Error for user 8702: index 53268 is out of bounds for axis 1 with size 16077
E

BPR recommendations:  42%|█████████████████████████▎                                  | 1196/2834 [00:00<00:00, 2475.48it/s]

Error for user 10669: index 20129 is out of bounds for axis 1 with size 16077
Error for user 1041: index 19223 is out of bounds for axis 1 with size 16077
Error for user 5611: index 18761 is out of bounds for axis 1 with size 16077
Error for user 1940: index 26682 is out of bounds for axis 1 with size 16077
Error for user 2808: index 21583 is out of bounds for axis 1 with size 16077
Error for user 3624: index 16281 is out of bounds for axis 1 with size 16077
Error for user 426: index 16366 is out of bounds for axis 1 with size 16077
Error for user 9261: index 23500 is out of bounds for axis 1 with size 16077
Error for user 14761: index 53889 is out of bounds for axis 1 with size 16077
Error for user 5252: index 17676 is out of bounds for axis 1 with size 16077
Error for user 503: index 16512 is out of bounds for axis 1 with size 16077
Error for user 2985: index 17882 is out of bounds for axis 1 with size 16077
Error for user 3655: index 19065 is out of bounds for axis 1 with size 16077

BPR recommendations:  51%|██████████████████████████████▌                             | 1445/2834 [00:00<00:00, 2283.97it/s]

Error for user 4981: index 18269 is out of bounds for axis 1 with size 16077
Error for user 68: index 24477 is out of bounds for axis 1 with size 16077
Error for user 7581: index 27680 is out of bounds for axis 1 with size 16077
Error for user 3272: index 18423 is out of bounds for axis 1 with size 16077
Error for user 8788: index 29959 is out of bounds for axis 1 with size 16077
Error for user 5245: index 19138 is out of bounds for axis 1 with size 16077
Error for user 3705: index 18470 is out of bounds for axis 1 with size 16077
Error for user 3795: index 20793 is out of bounds for axis 1 with size 16077
Error for user 8444: index 21220 is out of bounds for axis 1 with size 16077
Error for user 5176: index 30571 is out of bounds for axis 1 with size 16077
Error for user 4123: index 19903 is out of bounds for axis 1 with size 16077
Error for user 4177: index 17513 is out of bounds for axis 1 with size 16077
Error for user 2457: index 16185 is out of bounds for axis 1 with size 16077
E

BPR recommendations:  59%|███████████████████████████████████▌                        | 1677/2834 [00:00<00:00, 2019.79it/s]

Error for user 15720: index 39287 is out of bounds for axis 1 with size 16077
Error for user 2749: index 16153 is out of bounds for axis 1 with size 16077
Error for user 2123: index 22605 is out of bounds for axis 1 with size 16077
Error for user 4346: index 19612 is out of bounds for axis 1 with size 16077
Error for user 1619: index 17500 is out of bounds for axis 1 with size 16077
Error for user 2145: index 20060 is out of bounds for axis 1 with size 16077
Error for user 5074: index 19225 is out of bounds for axis 1 with size 16077
Error for user 543: index 16268 is out of bounds for axis 1 with size 16077
Error for user 18907: index 18204 is out of bounds for axis 1 with size 16077
Error for user 842: index 21293 is out of bounds for axis 1 with size 16077
Error for user 10316: index 28937 is out of bounds for axis 1 with size 16077
Error for user 2015: index 22052 is out of bounds for axis 1 with size 16077
Error for user 5971: index 17763 is out of bounds for axis 1 with size 1607

BPR recommendations:  67%|███████████████████████████████████████▉                    | 1886/2834 [00:00<00:00, 1788.62it/s]

Error for user 1455: index 21011 is out of bounds for axis 1 with size 16077
Error for user 7603: index 21751 is out of bounds for axis 1 with size 16077
Error for user 633: index 16280 is out of bounds for axis 1 with size 16077
Error for user 630: index 16202 is out of bounds for axis 1 with size 16077
Error for user 67: index 17584 is out of bounds for axis 1 with size 16077
Error for user 15977: index 21028 is out of bounds for axis 1 with size 16077
Error for user 13520: index 22371 is out of bounds for axis 1 with size 16077
Error for user 11346: index 30155 is out of bounds for axis 1 with size 16077
Error for user 13861: index 19189 is out of bounds for axis 1 with size 16077
Error for user 8339: index 47121 is out of bounds for axis 1 with size 16077
Error for user 8501: index 46001 is out of bounds for axis 1 with size 16077
Error for user 117: index 19161 is out of bounds for axis 1 with size 16077
Error for user 4118: index 30923 is out of bounds for axis 1 with size 16077


BPR recommendations:  78%|██████████████████████████████████████████████▌             | 2200/2834 [00:01<00:00, 2130.72it/s]

Error for user 6837: index 52253 is out of bounds for axis 1 with size 16077
Error for user 10467: index 16124 is out of bounds for axis 1 with size 16077
Error for user 1782: index 16339 is out of bounds for axis 1 with size 16077
Error for user 8937: index 17506 is out of bounds for axis 1 with size 16077
Error for user 3530: index 19917 is out of bounds for axis 1 with size 16077
Error for user 5882: index 16993 is out of bounds for axis 1 with size 16077
Error for user 3812: index 16518 is out of bounds for axis 1 with size 16077
Error for user 10280: index 18967 is out of bounds for axis 1 with size 16077
Error for user 6772: index 24348 is out of bounds for axis 1 with size 16077
Error for user 14359: index 37548 is out of bounds for axis 1 with size 16077
Error for user 3043: index 16197 is out of bounds for axis 1 with size 16077
Error for user 4260: index 27327 is out of bounds for axis 1 with size 16077
Error for user 2411: index 16168 is out of bounds for axis 1 with size 16

BPR recommendations:  86%|███████████████████████████████████████████████████▎        | 2426/2834 [00:01<00:00, 2075.18it/s]

Error for user 1030: index 16943 is out of bounds for axis 1 with size 16077
Error for user 13044: index 21029 is out of bounds for axis 1 with size 16077
Error for user 10802: index 46221 is out of bounds for axis 1 with size 16077
Error for user 3865: index 24019 is out of bounds for axis 1 with size 16077
Error for user 6683: index 37888 is out of bounds for axis 1 with size 16077
Error for user 2897: index 19152 is out of bounds for axis 1 with size 16077
Error for user 179: index 16113 is out of bounds for axis 1 with size 16077
Error for user 708: index 17269 is out of bounds for axis 1 with size 16077
Error for user 718: index 16493 is out of bounds for axis 1 with size 16077
Error for user 4900: index 18176 is out of bounds for axis 1 with size 16077
Error for user 15521: index 21846 is out of bounds for axis 1 with size 16077
Error for user 1379: index 16518 is out of bounds for axis 1 with size 16077
Error for user 5285: index 17508 is out of bounds for axis 1 with size 16077

BPR recommendations:  93%|███████████████████████████████████████████████████████▉    | 2642/2834 [00:01<00:00, 2079.51it/s]

Error for user 29: index 16209 is out of bounds for axis 1 with size 16077
Error for user 358: index 16246 is out of bounds for axis 1 with size 16077
Error for user 5630: index 25507 is out of bounds for axis 1 with size 16077
Error for user 1927: index 17698 is out of bounds for axis 1 with size 16077
Error for user 9989: index 19389 is out of bounds for axis 1 with size 16077
Error for user 3349: index 24929 is out of bounds for axis 1 with size 16077
Error for user 19355: index 27084 is out of bounds for axis 1 with size 16077
Error for user 646: index 19036 is out of bounds for axis 1 with size 16077
Error for user 6722: index 20122 is out of bounds for axis 1 with size 16077
Error for user 4949: index 16854 is out of bounds for axis 1 with size 16077
Error for user 11701: index 25943 is out of bounds for axis 1 with size 16077
Error for user 8622: index 23168 is out of bounds for axis 1 with size 16077
Error for user 659: index 27025 is out of bounds for axis 1 with size 16077
Er

BPR recommendations: 100%|████████████████████████████████████████████████████████████| 2834/2834 [00:01<00:00, 2084.12it/s]

Error for user 2682: index 21758 is out of bounds for axis 1 with size 16077
Error for user 8085: index 27516 is out of bounds for axis 1 with size 16077
Error for user 7073: index 16389 is out of bounds for axis 1 with size 16077
Error for user 21766: index 51212 is out of bounds for axis 1 with size 16077
Error for user 8791: index 16122 is out of bounds for axis 1 with size 16077
Error for user 2235: index 29780 is out of bounds for axis 1 with size 16077
Error for user 15635: index 30701 is out of bounds for axis 1 with size 16077
Error for user 13094: index 53043 is out of bounds for axis 1 with size 16077
Error for user 9564: index 25140 is out of bounds for axis 1 with size 16077
Error for user 710: index 16789 is out of bounds for axis 1 with size 16077
Error for user 12259: index 25838 is out of bounds for axis 1 with size 16077
Error for user 1204: index 31711 is out of bounds for axis 1 with size 16077
Error for user 9129: index 25692 is out of bounds for axis 1 with size 16





Saved BPR recommendations to 'bpr_recommendations.json'
Total users with recommendations: 2834


In [5]:
# Prepare data for BPR
# BPR works best with implicit feedback, so we'll treat ratings >= 4 as positive interactions
high_ratings = train_interactions[train_interactions['rating'] >= 4].copy()

print(f"Total training interactions: {len(train_interactions)}")
print(f"High-rating interactions (>=4): {len(high_ratings)}")

# Create user and item mappings
user_ids = high_ratings['u'].unique()
item_ids = high_ratings['i'].unique()

user_to_idx = {uid: idx for idx, uid in enumerate(user_ids)}
item_to_idx = {iid: idx for idx, iid in enumerate(item_ids)}
idx_to_item = {idx: iid for iid, idx in item_to_idx.items()}
idx_to_user = {idx: uid for uid, idx in user_to_idx.items()}

print(f"Number of users: {len(user_ids)}")
print(f"Number of items: {len(item_ids)}")

# Create user-item interaction matrix
user_indices = high_ratings['u'].map(user_to_idx).values
item_indices = high_ratings['i'].map(item_to_idx).values

# Use binary weights (1 for all positive interactions)
# Or use actual ratings as confidence
weights = np.ones(len(user_indices))  # Binary
# weights = high_ratings['rating'].values  # Or use ratings as confidence

user_item_matrix = coo_matrix(
    (weights, (user_indices, item_indices)),
    shape=(len(user_ids), len(item_ids))
).tocsr()

print(f"Sparse matrix shape: {user_item_matrix.shape}")
print(f"Sparsity: {1 - (user_item_matrix.nnz / (user_item_matrix.shape[0] * user_item_matrix.shape[1])):.4f}")

# Train BPR model
print("\nTraining BPR model...")
model_bpr = implicit.bpr.BayesianPersonalizedRanking(
    factors=100,
    iterations=50,
    learning_rate=0.01,
    regularization=0.01,
    random_state=42
)

# BPR expects item-user matrix (transposed)
model_bpr.fit(user_item_matrix.T)

print("BPR model trained!")



Total training interactions: 95000
High-rating interactions (>=4): 87630
Number of users: 15209
Number of items: 50552
Sparse matrix shape: (15209, 50552)
Sparsity: 0.9999

Training BPR model...




  0%|          | 0/50 [00:00<?, ?it/s]

BPR model trained!


In [7]:
K = 30

In [8]:
# Build set of items each user has seen
users_seen_train = train_interactions.groupby('u')['i'].agg(set).to_dict()

# Popularity fallback
recipe_rating_sum = train_interactions.groupby('i')['rating'].sum()
recipe_rating_count = train_interactions.groupby('i')['rating'].count()
recipe_avg_rating = recipe_rating_sum / recipe_rating_count
recipe_popularity = recipe_avg_rating * recipe_rating_count
top_popular_recipes = list(recipe_popularity.sort_values(ascending=False).index[:K])

# Generate recommendations for test users

test_users = test_interactions['u'].unique()

recommendations_bpr = {}

for user_id in tqdm(test_users, desc="BPR recommendations"):
    if user_id not in user_to_idx:
        # Unknown user: use popularity fallback
        recommendations_bpr[int(user_id)] = [int(x) for x in top_popular_recipes[:K]]
        continue
    
    user_idx = user_to_idx[user_id]
    user_seen = users_seen_train.get(user_id, set())
    
    # Get recommendations from BPR
    # Note: recommend() expects user index and user's row from the matrix
    recommended_items, scores = model_bpr.recommend(
        user_idx, 
        user_item_matrix[user_idx],
        N=K*3,  # Get more than needed to account for filtering
        filter_already_liked_items=True
    )
    
    # Convert back to original item IDs and filter already seen items
    recs = []
    for item_idx in recommended_items:
        if item_idx in idx_to_item:
            item_id = idx_to_item[item_idx]
            if item_id not in user_seen:
                recs.append(int(item_id))
            if len(recs) >= K:
                break
    
    # If we don't have enough recommendations, fill with popular items
    if len(recs) < K:
        for pop_item in top_popular_recipes:
            if pop_item not in user_seen and pop_item not in recs:
                recs.append(int(pop_item))
            if len(recs) >= K:
                break
    
    recommendations_bpr[int(user_id)] = recs[:K]

# Save recommendations
with open("bpr_recommendations.json", "w") as f:
    json.dump(recommendations_bpr, f, indent=4)

print(f"\nSaved BPR recommendations to 'bpr_recommendations.json'")
print(f"Total users with recommendations: {len(recommendations_bpr)}")

BPR recommendations:   0%|                                                                         | 0/2834 [00:00<?, ?it/s]


IndexError: index 15393 is out of bounds for axis 1 with size 15209