In [4]:
import matplotlib.pyplot as plt
from scipy import sparse
import numpy as np
import pickle
from collections import Counter
np.set_printoptions(precision=4)

In [5]:
#load everything
num_ing = 8023
recipe_decoder = pickle.load( open('data-cleaned/recipe_decoder.pkl', 'rb') )
ingr_decoder   = pickle.load( open('data-cleaned/ingredient_decoder.pkl', 'rb') )
tag_decoder    = pickle.load( open('data-cleaned/tag_decoder.pkl', 'rb') )
X     = sparse.load_npz("data-cleaned/recipes.npz")
Xhat  = sparse.load_npz("data-cleaned/recipes_tfidf.npz")
U     = sparse.load_npz("data-cleaned/user_train.npz")
Uhat  = sparse.load_npz("data-cleaned/user_train_tfidf.npz")
Utest = sparse.load_npz("data-cleaned/user_test.npz")
def intersection(lst1, lst2): 
    return list(set(lst1) & set(lst2)) 

In [7]:
def recipe_info(i):
    #extract info
    calories = X[i, -1]
    idx = X[i,:-1].nonzero()[1]
    ingredients = [ingr_decoder[j] for j in idx[idx < num_ing]]
    tags = [tag_decoder[j] for j in idx[idx > num_ing] - num_ing]
    name = recipe_decoder[i]
    
    #print
    print(f"Recipe: {name}")
    print(f"Calorie Level: {calories}")
    print(f"Ingredients: {ingredients}")
    print(f"Tags: {tags}")

In [8]:
def recipe_comp(i, j):
    #extract info
    cali = X[i, -1]
    idxi = X[i, :-1].nonzero()[1]
    ingi = [ingr_decoder[k] for k in idxi[idxi < num_ing]]
    tagsi = [tag_decoder[k] for k in idxi[idxi > num_ing] - num_ing]
    namei = recipe_decoder[i]
    
    calj = X[j, -1]
    idxj = X[j, :-1].nonzero()[1]
    ingj = [ingr_decoder[k] for k in idxj[idxj < num_ing]]
    tagsj = [tag_decoder[k] for k in idxj[idxj > num_ing] - num_ing]
    namej = recipe_decoder[j]
    
    #print
    print(f"Recipe: {namei}, {namej}")
    print(f"Difference Calorie Level: {cali-calj}")
    print(f"Shared Ingredients: {intersection(ingi, ingj)}")
    print(f"Tags: {intersection(tagsi, tagsj)}")

In [9]:
def recipe_score(i, j):
    #just do the single one if they're scalars
    if np.isscalar(i) and np.isscalar(j):
        xi = X[i, :-1].nonzero()[1]
        xj = X[j, :-1].nonzero()[1]
        #add all identical tags and if calories are the same (don't double count calories)
        return len(intersection(xi, xj)) + int(X[i,-1]==X[j,-1])
    
    elif np.isscalar(i) and not np.isscalar(j):
        #set things up 
        score = np.zeros_like(j)
        xi = X[i, :-1].nonzero()[1]
        for num, k in enumerate(j):
            xk = X[k, :-1].nonzero()[1]
            #add all identical tags and if calories are the same (don't double count calories)
            score[num] = len(intersection(xi, xk)) + int(X[i,-1]==X[k,-1])
        return score
    
    score = np.zeros_like(i)
    #iterate through all values
    for num, (k, l) in enumerate(zip(i, j)):
        xk = X[k, :-1].nonzero()[1]
        xl = X[l, :-1].nonzero()[1]
        #add all identical tags and if calories are the same (don't double count calories)
        score[num] = len(intersection(xk, xl)) + int(X[k,-1]==X[l,-1])
        
    return score

In [14]:
def recommendation_score(liked_recipes, recommended_recipes, n=5):
    
#     if liked_recipes in recommended_recipes:
#         return recommended_recipes.index(liked_recipe)
#     else:
#         return n

    max_scores = np.zeros(len(recommended_recipes))     
    #find which recipe they're most similar too
    for l in liked_recipes:
        score = recipe_score(l, recommended_recipes)
        max_scores = np.maximum(score, max_scores)

    return max_scores.mean()