In [1]:
import pandas as pd
import numpy as np
import scipy as sp
import DataLoader
from collections import defaultdict
from UserMeanEstimator import UserMeanEstimator
from RecipeMeanEstimator import RecipeMeanEstimator

In [2]:
ratings = pd.read_csv("data/all_recipe_clean.csv")
dataLoader = DataLoader.DataLoader(ratings)
# user_holdout, recipe_holdout, holdout = dataLoader.get_holdout_data()
# holdout_X = [t[:2] for t in holdout]
# holdout_Y = np.array([t[2] for t in holdout])

In [76]:
class RecipeRecommender:
    """
    
    distance: Function that takes in list of recipe_ids selected so far
        and target recipe_id and computes the distance between them. Higher
        distance scores are considered better for diversity.
    """
    def __init__(self, 
                 dataLoader, 
                 estimator, 
                 diversity_weight=1.0, 
                 distance = 'cosine'):
        self.estimator = estimator
        self.dataLoader = dataLoader
        self.recipe_ids = frozenset(dataLoader.get_recipe_ids())
        self.diversity_weight = diversity_weight
        self.diversity = self.get_diversity_calculation(distance)
        self.sim_cache = {}
        
    def get_diversity_calculation(self, distance):
        """
        Returns the function used to calculate how item new_item would affect the diversity
        of recipe set current_items. Higher diversity score is better
        """
        
        def average_cosine_similarity(current_items, new_item_id):
            sim = 0.0
            for recipe_id in current_items:
                sim += self.compute_cosine_similiarity(recipe_id, new_item_id)
            return -sim / len(current_items)
    
        return average_cosine_similarity
    
    def compute_cosine_similiarity(self, r1, r2):
        key = tuple(sorted([r1, r2]))
        if key not in self.sim_cache:
            ratings1 = self.dataLoader.get_recipe_ratings(r1, defaultdict(set))
            ratings2 = self.dataLoader.get_recipe_ratings(r2, defaultdict(set))
            prod = 0
            for k in ratings1:
                if k in ratings2:
                    prod += ratings1[k] * ratings2[k]
            self.sim_cache[key] = prod / (len(ratings1) * len(ratings2))
        return self.sim_cache[key]

    def get_recommendations(self, user_id, n_recs=100):
        preds = self.predict_unrated_recipes(user_id)
        return self.select_diverse_recipes(preds, n_recs)
        
    def predict_unrated_recipes(self, user_id):
        """
        return a sorted list with tuples of recipe id and the user's predicted rating
        for all unrated recipes
        """
        user_rated_recipes = frozenset(self.dataLoader.get_user_ratings(user_id, {}))
        unrated_recipes = self.recipe_ids - user_rated_recipes
        X = [(user_id, recipe_id) for recipe_id in unrated_recipes]
        predictions = estimator.predict(X)
        res = []
        for i, info in enumerate(X):
            _, recipe_id = info
            res.append((recipe_id, predictions[i]))
        return sorted(res,key=lambda x: -x[1])
    
    def select_diverse_recipes(self, ratings, n_recs):
        """
        ratings: list of (recipe_id, rating) tuples, sorted in descending
            order by rating
        """
        recs = []
        original_ratings = {id: rating for id, rating in ratings}
        for _ in range(n_recs):
            target_recipe_id, _ = ratings.pop(0)
            recs.append(target_recipe_id)
            new_ratings = []
            for i, item in enumerate(ratings):
                recipe_id, _ = item
                predicted_rating = original_ratings[recipe_id]
                revised_score = predicted_rating + self.diversity_weight * self.diversity(recs, recipe_id)
                if revised_score == predicted_rating:
                    new_ratings = new_ratings + ratings[i:]
                    break
                new_ratings.append((recipe_id, revised_score))
            ratings = sorted(new_ratings, key=lambda x: -x[1])
        return recs
            

In [77]:
# Step 1: For a given user, run the estimator an all unrated recipes

estimator = RecipeMeanEstimator(dataLoader, defaultdict(set))
recommender = RecipeRecommender(dataLoader, estimator)
preds = recommender.predict_unrated_recipes(36964)
    

In [71]:
# Step 2: Using the estimates, construct the set that balances diversity and predicted utility
preds_old = recommender.select_diverse_recipes(preds, 100)
# preds_old

[(0, 5.0), (1, 5.0), (2, 5.0), (3, 5.0), (10, 5.0), (13, 5.0), (16, 5.0), (17, 5.0), (19, 5.0), (20, 5.0)]
[(1, 5.0), (2, 5.0), (3, 5.0), (10, 5.0), (13, 5.0), (16, 5.0), (17, 5.0), (19, 5.0), (20, 5.0), (33, 5.0)]
[(2, 5.0), (3, 5.0), (10, 5.0), (13, 5.0), (16, 5.0), (17, 5.0), (19, 5.0), (20, 5.0), (33, 5.0), (42, 5.0)]
[(3, 5.0), (10, 5.0), (13, 5.0), (16, 5.0), (19, 5.0), (20, 5.0), (33, 5.0), (42, 5.0), (45, 5.0), (48, 5.0)]
[(10, 5.0), (13, 5.0), (16, 5.0), (19, 5.0), (20, 5.0), (33, 5.0), (42, 5.0), (45, 5.0), (48, 5.0), (50, 5.0)]
[(13, 5.0), (16, 5.0), (19, 5.0), (20, 5.0), (33, 5.0), (42, 5.0), (45, 5.0), (50, 5.0), (52, 5.0), (55, 5.0)]
[(16, 5.0), (19, 5.0), (20, 5.0), (33, 5.0), (42, 5.0), (45, 5.0), (50, 5.0), (52, 5.0), (55, 5.0), (56, 5.0)]
[(19, 5.0), (20, 5.0), (33, 5.0), (42, 5.0), (45, 5.0), (50, 5.0), (55, 5.0), (56, 5.0), (59, 5.0), (63, 5.0)]
[(20, 5.0), (33, 5.0), (42, 5.0), (45, 5.0), (50, 5.0), (55, 5.0), (56, 5.0), (59, 5.0), (63, 5.0), (81, 5.0)]
[(33, 5.0),

[(1066, 5.0), (1073, 5.0), (1074, 5.0), (1099, 5.0), (1106, 5.0), (1115, 5.0), (1159, 5.0), (1162, 5.0), (1216, 5.0), (1275, 5.0)]
[(1073, 5.0), (1074, 5.0), (1099, 5.0), (1106, 5.0), (1115, 5.0), (1159, 5.0), (1162, 5.0), (1216, 5.0), (1275, 5.0), (1289, 5.0)]
[(1074, 5.0), (1099, 5.0), (1106, 5.0), (1115, 5.0), (1159, 5.0), (1162, 5.0), (1216, 5.0), (1275, 5.0), (1289, 5.0), (1326, 5.0)]
[(1099, 5.0), (1106, 5.0), (1115, 5.0), (1159, 5.0), (1162, 5.0), (1216, 5.0), (1275, 5.0), (1289, 5.0), (1326, 5.0), (1329, 5.0)]
[(1106, 5.0), (1115, 5.0), (1159, 5.0), (1162, 5.0), (1216, 5.0), (1275, 5.0), (1289, 5.0), (1326, 5.0), (1329, 5.0), (1348, 5.0)]
[(1115, 5.0), (1159, 5.0), (1162, 5.0), (1216, 5.0), (1275, 5.0), (1289, 5.0), (1326, 5.0), (1329, 5.0), (1348, 5.0), (1376, 5.0)]
[(1159, 5.0), (1162, 5.0), (1216, 5.0), (1275, 5.0), (1289, 5.0), (1326, 5.0), (1329, 5.0), (1348, 5.0), (1376, 5.0), (1439, 5.0)]
[(1162, 5.0), (1216, 5.0), (1275, 5.0), (1289, 5.0), (1326, 5.0), (1329, 5.0), (134

[0,
 1,
 2,
 3,
 10,
 13,
 16,
 19,
 20,
 33,
 42,
 45,
 55,
 56,
 59,
 63,
 81,
 99,
 101,
 133,
 134,
 156,
 161,
 176,
 180,
 197,
 200,
 213,
 226,
 241,
 271,
 286,
 305,
 310,
 320,
 332,
 334,
 359,
 367,
 377,
 385,
 389,
 403,
 454,
 477,
 482,
 533,
 536,
 586,
 612,
 629,
 633,
 668,
 691,
 693,
 745,
 755,
 759,
 767,
 879,
 883,
 938,
 967,
 976,
 979,
 986,
 990,
 999,
 1001,
 1066,
 1073,
 1074,
 1099,
 1106,
 1115,
 1159,
 1162,
 1216,
 1275,
 1326,
 1329,
 1348,
 1376,
 1439,
 1448,
 1480,
 1492,
 1497,
 1520,
 1616,
 1630,
 1637,
 1658,
 1672,
 1687,
 1705,
 1754,
 1783,
 1798,
 1883]

In [82]:
recommender.get_recommendations(36964)

[0,
 1,
 2,
 3,
 10,
 13,
 16,
 19,
 20,
 33,
 42,
 45,
 55,
 56,
 59,
 63,
 81,
 99,
 101,
 133,
 134,
 156,
 161,
 176,
 180,
 197,
 200,
 213,
 226,
 241,
 271,
 286,
 305,
 310,
 320,
 332,
 334,
 359,
 367,
 377,
 385,
 389,
 403,
 454,
 477,
 482,
 533,
 536,
 586,
 612,
 629,
 633,
 668,
 691,
 693,
 745,
 755,
 759,
 767,
 879,
 883,
 938,
 967,
 976,
 979,
 986,
 990,
 999,
 1001,
 1066,
 1073,
 1074,
 1099,
 1106,
 1115,
 1159,
 1162,
 1216,
 1275,
 1326,
 1329,
 1348,
 1376,
 1439,
 1448,
 1480,
 1492,
 1497,
 1520,
 1616,
 1630,
 1637,
 1658,
 1672,
 1687,
 1705,
 1754,
 1783,
 1798,
 1883]

In [80]:
np.all(np.isclose(preds_new, preds_old))

True