# Dependencies

In [1]:
import pandas as pd
import numpy as np

# Part 1 : Simple recommendations

## Popularity based recommendation

In [2]:
from config import raw_interactions_path

raw_interactions = pd.read_csv(raw_interactions_path)
raw_interactions.head(2)

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."


In [3]:
def get_review_count(df):
    # Create a dictionary with the count of each type of review per recipe_id
    # Example: {recipe_id: {review: count, review: count, ...}, ...}
    # Example: {240488: {'0': 1, '2': 6, '3': 0, '4': 3, '5': 0}, ...}
    ratings = df.groupby('recipe_id')['rating'].value_counts().unstack(fill_value=0).to_dict(orient = 'index')

    # Create a dictionary with the count of reviews per recipe_id
    # Example: {recipe_id: reviews_count, ...}
    # Example: {240488: 10, ...}
    ratings_count = df.groupby('recipe_id')['rating'].count().to_dict()

    # Create a dataframe with the recipe_id and the ratings and ratings_count dictionaries
    recipe_ids = df['recipe_id'].unique()
    new_df = pd.DataFrame({'recipe_id': recipe_ids})
    new_df['ratings'] = new_df['recipe_id'].map(ratings)
    new_df['ratings_count'] = new_df['recipe_id'].map(ratings_count)
    return new_df


ratings_df = get_review_count(raw_interactions)
ratings_df.head(2)

Unnamed: 0,recipe_id,ratings,ratings_count
0,40893,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 1}",2
1,44394,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 0}",1


In [4]:
# Compute the global average rating
def compute_global_average_rating(df):
    # total number of ratings for all recipes
    total_count = sum([sum(rating_dict.values()) for rating_dict in ratings_df['ratings']])
    # total weighted sum of ratings for all recipes
    total_weighted_sum = sum([int(rating) * count for rating_dict in df['ratings'] for rating, count in rating_dict.items()])
    # compute the global average rating
    global_average = total_weighted_sum / total_count if total_count != 0 else 0
    return global_average


global_average_rating = compute_global_average_rating(ratings_df)

In [5]:
# Compute the weighted average of ratings
def weighted_rating(ratings_dict):
    # minimum number of ratings required to be listed in the chart
    min_number_ratings = 5
    # total number of ratings
    number_ratings = sum(ratings_dict.values())
    # the average rating
    average_rating = sum(int(rating) * count for rating, count in ratings_dict.items()) / number_ratings
    # compute the weighted rating (popularity score)
    weighted_rating = (number_ratings / (number_ratings + min_number_ratings)) * average_rating + (min_number_ratings / (number_ratings + min_number_ratings)) * global_average_rating
    return weighted_rating

def compute_popularity_score(df):
    # Create a dictionary with the popularity score per recipe_id
    # Example: {recipe_id: popularity_score, ...}
    # Example: {240488: 0.5, ...}
    # Note: we will use a weighted average similar to that used by IMDB
    df['popularity'] = df['ratings'].apply(weighted_rating)
    return df

In [6]:
# Compute the popularity score for each recipe
ratings_df = compute_popularity_score(ratings_df)

# Sort the recipes by popularity score
ratings_df = ratings_df.sort_values(by='popularity', ascending=False)
ratings_df.head(10)

Unnamed: 0,recipe_id,ratings,ratings_count,popularity
39572,486261,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 2, 5: 215}",217,4.977726
40002,55309,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 52}",52,4.948335
7176,24768,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 37}",37,4.929883
56166,121941,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 36}",36,4.928173
221730,166669,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 36}",36,4.928173
164386,63621,"{0: 0, 1: 0, 2: 0, 3: 1, 4: 1, 5: 72}",74,4.924748
199988,62754,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 32}",32,4.920408
19449,42038,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 2, 5: 55}",57,4.920243
178184,269136,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 31}",31,4.918197
213836,199171,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 31}",31,4.918197


In [7]:
from config import raw_recipes_path

# Get the top 10 most popular recipes
top_10 = ratings_df.head(10)

# Create a dict with the top 10 most popular recipes (keys are the recipe ids and values are the popularity scores)
top_10_popularity_dict = top_10.set_index('recipe_id')['popularity'].to_dict()
top_10_ratings_dict = top_10.set_index('recipe_id')['ratings'].to_dict()

# Load the raw recipes dataset to get recipe information (name, ingredients, ...)
raw_recipes = pd.read_csv(raw_recipes_path)

# Get the names of the top 10 most popular recipes
top_10_popular_recipes = raw_recipes[raw_recipes['id'].isin(top_10['recipe_id'])][["id", "name"]]

# Add the popularity score as a column
top_10_popular_recipes["popularity"] = top_10_popular_recipes["id"].map(top_10_popularity_dict)

# Add the ratings as a column
top_10_popular_recipes["ratings"] = top_10_popular_recipes["id"].map(top_10_ratings_dict)

# Display the top 10 most popular recipes
top_10_popular_recipes = top_10_popular_recipes.sort_values(by='popularity', ascending=False)
top_10_popular_recipes

Unnamed: 0,id,name,popularity,ratings
134684,486261,mexican stack up rsc,4.977726,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 2, 5: 215}"
35255,55309,caprese salad tomatoes italian marinated toma...,4.948335,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 52}"
21941,24768,berry cream cheese coffee cake,4.929883,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 37}"
118005,166669,kittencal s caesar tortellini salad,4.928173,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 36}"
118290,121941,kittencal s soft white baguette style bread,4.928173,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 36}"
129662,63621,mango salsa 1,4.924748,"{0: 0, 1: 0, 2: 0, 3: 1, 4: 1, 5: 72}"
124551,62754,linda s special potato salad,4.920408,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 32}"
207459,42038,syrup for blueberry pancakes,4.920243,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 2, 5: 55}"
29819,199171,broiled cinnamon toast,4.918197,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 31}"
74262,269136,earth bread,4.918197,"{0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 31}"


## Recipe steps based recommendation

In [8]:
# Get steps of recipes
steps_df = raw_recipes[["id", "steps"]]
steps_df.head(2)

Unnamed: 0,id,steps
0,137739,"['make a choice and proceed with recipe', 'dep..."
1,31490,"['preheat oven to 425 degrees f', 'press dough..."
