In [1]:
import random
import loader 
import numpy as np
import pandas as pd
import json
from tqdm import tqdm

from surprise import Dataset, Reader, SVD
from surprise.model_selection import train_test_split
from collections import defaultdict
from surprise.model_selection import GridSearchCV

In [2]:
root = "datasets3/"
train_interactions = pd.read_csv(root+"seen_interactions.csv")
test_interactions = pd.read_csv(root+"hidden_interactions.csv")

In [4]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(train_interactions[['u', 'i', 'rating']], reader)

trainset = data.build_full_trainset()
svd = SVD(n_factors=150, n_epochs=30, lr_all=0.007, reg_all=0.02)

# Train the model
svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7017dfa26bd0>

In [5]:
import numpy as np
import json
from tqdm import tqdm
from collections import defaultdict

K = 30

# Build a set of recipes each user has already seen in the training data
users_seen_train = train_interactions.groupby('u')['i'].agg(set).to_dict()

# Compute overall recipe popularity for fallback
recipe_rating_sum = train_interactions.groupby('i')['rating'].sum()
recipe_rating_count = train_interactions.groupby('i')['rating'].count()
recipe_avg_rating = recipe_rating_sum / recipe_rating_count
recipe_popularity = recipe_avg_rating * recipe_rating_count
top_popular_recipes = list(recipe_popularity.sort_values(ascending=False).index[:K])

# List of all unique items (recipes) in the training data
all_items = train_interactions['i'].unique()

# Only users in the test interactions
test_users = test_interactions['u'].unique()

# Access latent factors and biases
trainset = svd.trainset
P = svd.pu       # user factors
Q = svd.qi       # item factors
bu = svd.bu      # user biases
bi = svd.bi      # item biases
global_mean = trainset.global_mean

recommendations_svd = {}

for user_id in tqdm(test_users, desc="SVD predictions for test users"):
    if user_id not in trainset._raw2inner_id_users:
        # Unknown user: fallback to top popular recipes
        recommendations_svd[int(user_id)] = [int(x) for x in top_popular_recipes]
        continue

    user_inner_id = trainset.to_inner_uid(user_id)
    user_seen = users_seen_train.get(user_id, set())
    
    # Get user vector
    user_vector = P[user_inner_id]
    user_bias = bu[user_inner_id]
    
    predictions = []
    
    for item_id in all_items:
        # Skip items the user has already seen
        if item_id in user_seen:
            continue
            
        # Get item inner id
        if item_id not in trainset._raw2inner_id_items:
            continue
            
        item_inner_id = trainset.to_inner_iid(item_id)
        item_vector = Q[item_inner_id]
        item_bias = bi[item_inner_id]
        
        # Compute predicted rating
        pred = global_mean + user_bias + item_bias + np.dot(user_vector, item_vector)
        
        predictions.append((item_id, pred))
    
    # Sort by predicted rating (descending) and take top K
    predictions.sort(key=lambda x: x[1], reverse=True)
    top_k_items = [int(item_id) for item_id, _ in predictions[:K]]
    
    recommendations_svd[int(user_id)] = top_k_items

# Save recommendations to JSON
with open("svd_recommendations.json", "w") as f:
    json.dump(recommendations_svd, f, indent=4)

print("Saved SVD recommendations to 'svd_recommendations.json'")

SVD predictions for test users: 100%|███████████████████████████████████████████████████| 2834/2834 [04:19<00:00, 10.94it/s]


Saved SVD recommendations to 'svd_recommendations.json'
