In [18]:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import ShuffleSplit, KFold
import matplotlib.pyplot as plt
from scipy import sparse
from itertools import product
from time import time
import seaborn

from baseUtils import *

X     = sparse.load_npz("data-cleaned/recipes.npz")
Xhat  = sparse.load_npz("data-cleaned/recipes_tfidf.npz")
U     = sparse.load_npz("data-cleaned/user_train.npz")
Uhat  = sparse.load_npz("data-cleaned/user_train_tfidf.npz")
Utest = sparse.load_npz("data-cleaned/user_test.npz")

In [20]:
user_test = Utest[:,-1].toarray().flatten().astype('int')
user_test_idx = np.arange(0, Utest.shape[0])
all_recipes = np.arange(0, U.shape[1])
#get liked recipes
y = np.zeros((user_test.shape[0], 2), dtype='int')-1
for i in range(len(y)):
    recipes = Utest[i].nonzero()[1][:-1]
    if len(recipes) == 1:
        y[i,0] = recipes
    elif len(recipes) == 2:
        y[i,:] = recipes
    else:
        raise ValueError("Someone reviewed 3 recipes!")

#get 5 most popular recipes
recipes = np.array(np.sum(U, axis=0)).flatten()
rrecipes = np.argpartition(recipes, -5)[-5:]
recommended_recipes = rrecipes.copy()
for i in range(len(user_test_idx)-1):
    recommended_recipes = np.vstack((rrecipes, recommended_recipes))

In [19]:

#check if we got the recipe they liked
scores = recommend_scoring(y, recommended_recipes, sc='int')
print(f"Average Score is {scores.mean()} (using int)")

scores = recommend_scoring(y, recommended_recipes, sc='com')
print(f"Average Score is {scores.mean()} (using com)")

Average Score is 9.58416306180805 (using int)
Average Score is -42.51562431263289 (using com)


In [21]:
#check if we got the recipe they liked   
random = np.random.choice(all_recipes, size=(y.shape[0], 5))

scores = recommend_scoring(y, random, sc='int')
print(f"Average Score is {scores.mean()} (using int)")

scores = recommend_scoring(y, random, sc='com')
print(f"Average Score is {scores.mean()} (using com)")

Average Score is 7.66666177872278 (using int)
Average Score is -39.12075665371361 (using com)
