# Dependencies

In [1]:
import os
import pandas as pd
import numpy as np

# Part 1 : Simple recommendations

In [2]:
from config import raw_interactions_path

raw_interactions = pd.read_csv(raw_interactions_path)
raw_interactions.head(2)

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."


In [3]:
def get_review_count(df):
    # Create a dictionary with the count of each type of review per recipe_id
    # Example: {recipe_id: {review: count, review: count, ...}, ...}
    # Example: {240488: {'0': 1, '2': 6, '3': 0, '4': 3, '5': 0}, ...}
    ratings = df.groupby('recipe_id')['rating'].value_counts().unstack(fill_value=0).to_dict(orient = 'index')

    # Create a dictionary with the count of reviews per recipe_id
    # Example: {recipe_id: reviews_count, ...}
    # Example: {240488: 10, ...}
    ratings_count = df.groupby('recipe_id')['rating'].count().to_dict()

    # Create a dataframe with the recipe_id and the ratings and ratings_count dictionaries
    recipe_ids = df['recipe_id'].unique()
    new_df = pd.DataFrame({'recipe_id': recipe_ids})
    new_df['ratings'] = new_df['recipe_id'].map(ratings)
    new_df['ratings_count'] = new_df['recipe_id'].map(ratings_count)
    return new_df

In [4]:
ratings_df = get_review_count(raw_interactions)
ratings_df.head(2)

Unnamed: 0,recipe_id,ratings,ratings_count
0,40893,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 1}",2
1,44394,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 0}",1


In [5]:
# Compute the weighted average of ratings
def weighted_average(ratings_dict):
    total_count = sum(ratings_dict.values())
    weighted_sum = sum(int(rating) * count for rating, count in ratings_dict.items())
    return weighted_sum / total_count if total_count != 0 else 0

def compute_popularity_score(df):
    # Create a dictionary with the popularity score per recipe_id
    # Example: {recipe_id: popularity_score, ...}
    # Example: {240488: 0.5, ...}

    # Note: we will use a weighted average of ratings as popularity score
    df['popularity'] = df['ratings'].apply(weighted_average)
    return df

In [6]:
compute_popularity_score(ratings_df)

Unnamed: 0,recipe_id,ratings,ratings_count,popularity
0,40893,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 1}",2,4.500000
1,44394,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 0}",1,4.000000
2,85009,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 2}",2,5.000000
3,120345,"{0: 1, 1: 0, 2: 1, 3: 0, 4: 1, 5: 0}",3,2.000000
4,134728,"{0: 1, 1: 0, 2: 0, 3: 0, 4: 1, 5: 5}",7,4.142857
...,...,...,...,...
231632,470995,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 1}",1,5.000000
231633,241491,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 1}",2,4.500000
231634,257796,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 0}",1,4.000000
231635,72730,"{0: 1, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0}",1,0.000000
