In [4]:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import ShuffleSplit, KFold, train_test_split
import matplotlib.pyplot as plt
from scipy import sparse
from itertools import product
from time import time
import seaborn
%run Helpers.ipynb

In [52]:
def recipe_comp(i, j):
    #extract info
    cali = X[i, -1]
    idxi = X[i, :-1].nonzero()[1]
    ingi = [ingr_decoder[k] for k in idxi[idxi < num_ing]]
    tagsi = [tag_decoder[k] for k in idxi[idxi > num_ing] - num_ing]
    namei = recipe_decoder[i]
    
    calj = X[j, -1]
    idxj = X[j, :-1].nonzero()[1]
    ingj = [ingr_decoder[k] for k in idxj[idxj < num_ing]]
    tagsj = [tag_decoder[k] for k in idxj[idxj > num_ing] - num_ing]
    namej = recipe_decoder[j]
    
    #print
    print(f"Recipe: {namei}, {namej}")
    print(f"Difference Calorie Level: {cali, calj}")
    print(f"Shared Ingredients: {len(intersection(ingi, ingj))}")
    print(f"Tags: {len(intersection(tagsi, tagsj))}")

In [54]:
def recipe_score(i, j):
    #set things up 
    score = np.zeros_like(i)
    
    #just do the single one if they're scalars
    if np.isscalar(i):
        xi = X[i, :-1].nonzero()[1]
        xj = X[j, :-1].nonzero()[1]
        #add all identical tags and if calories are the same (don't double count calories)
        return len(intersection(xi, xj)) + int(X[i,-1]==X[j,-1])
        
    #otherwise iterate through all values
    for num, (k, l) in enumerate(zip(i, j)):
        xk = X[k, :-1].nonzero()[1]
        xl = X[l, :-1].nonzero()[1]
        #add all identical tags and if calories are the same (don't double count calories)
        score[num] = len(intersection(xk, xl)) + int(X[k,-1]==X[l,-1])
        
    return score

In [7]:
rs = ShuffleSplit(1, .2)

for train, test in rs.split(Xhat):
    #make pca
    pca = TruncatedSVD(n_components=30).fit(Xhat[train])
    #make and train neighbors
    nn  = NearestNeighbors(metric='cosine', algorithm='brute').fit(pca.transform(Xhat[train]))
    start = time()
    neighbors = nn.kneighbors(pca.transform(Xhat[test]), 1)

In [55]:
i = 15
print(recipe_score(test[i], neighbors[1][i][0]))
recipe_comp(test[i], neighbors[1][i][0])

4
Recipe: pork chop or chicken  n  potato bake, double chocolate waffles
Difference Calorie Level: (2.0, 0.0)
Shared Ingredients: 1
Tags: 3
