In [1]:
import random
import loader 
import pandas as pd
from collections import defaultdict
import json

In [2]:
root = "datasets3/"
train_interactions = pd.read_csv(root+"seen_interactions.csv")
test_interactions = pd.read_csv(root+"hidden_interactions.csv")

In [3]:
train_interactions = train_interactions.drop(["user_id", "recipe_id", "date"], axis=1)
test_interactions = test_interactions.drop(["user_id", "recipe_id", "date"], axis=1)

In [4]:
train_interactions.columns

Index(['rating', 'u', 'i'], dtype='object')

In [5]:
# Get each users' highly reviewed stores list
# users_likes[user_id] = [stores they rated >= 4]
users_reviews = defaultdict(set)
recipe_rating_sum = defaultdict(float)
recipe_rating_count = defaultdict(int)

users_reviews = train_interactions.groupby('u')['i'].agg(set).to_dict()

# Compute sum and count per recipe
recipe_stats = train_interactions.groupby('i')['rating'].agg(['sum', 'count'])

for recipe_id, row in recipe_stats.iterrows():
    recipe_rating_sum[recipe_id] = row['sum']
    recipe_rating_count[recipe_id] = row['count']


In [6]:
K = 30

In [7]:
# Popularity = average rating * number of ratings
recipe_avg_rating = {rid: recipe_rating_sum[rid] / recipe_rating_count[rid]
                     for rid in recipe_rating_sum}

recipe_popularity = {rid: recipe_avg_rating[rid] * recipe_rating_count[rid]
                     for rid in recipe_avg_rating}

# Sort recipes by popularity (descending)
popular_recipes_sorted = sorted(recipe_popularity.items(), key=lambda x: x[1], reverse=True)
popular_recipe_ids = [rid for rid, _ in popular_recipes_sorted]

print("Top ", str(K), " most popular recipes (IDs):", popular_recipe_ids[:K])

Top  30  most popular recipes (IDs): [134610, 117899, 135961, 99787, 52334, 147374, 101819, 147180, 89113, 37047, 56425, 73956, 55772, 15173, 127080, 28552, 139822, 50909, 37359, 75244, 125637, 19297, 118496, 19812, 32114, 89924, 79996, 106975, 149428, 138882]


In [8]:
recommendations = {}

test_users = test_interactions['u'].unique()

for user_id in test_users:
    recs = []
    for rid in popular_recipe_ids:
        if rid in users_reviews.get(user_id, set()):
            continue  # skip already seen recipes
        recs.append(int(rid))
        if len(recs) >= K:
            break
    recommendations[int(user_id)] = recs

with open("baseline_recommendations.json", "w") as f:
    json.dump(recommendations, f, indent=4)