# Scripts for learning 

In [158]:
dimensions_ingredients = 702
dimensions_nutrition_facts = 22
number_of_clusters=100

In [201]:
# Some Imports
from sklearn.cluster import KMeans, AgglomerativeClustering
from sklearn.decomposition import TruncatedSVD
import sklearn.metrics as sm
import numpy as np
import json
import pickle

In [3]:
# Define Inputs
X = None
Y = None

# Load Recipes
filename="recipes.json"
with open(filename) as json_data:
    recipes = json.load(json_data)
    Y = list(recipes.keys())
    X = np.zeros([len(Y), dimensions_ingredients+dimensions_nutrition_facts])
    index = 0
    for label, recipe in recipes.items():
        for ingredient in recipe["ingredients"]:
            X[index, ingredient[0]] = ingredient[1]
        for fact in recipe["nutrition"]:
            X[ index, dimensions_ingredients + fact[0] ] = fact[1]
        index+=1
print("File "+filename+" was successfully read")

File recipes.json was successfully read


In [4]:
# See one example
print(Y[0])
print(X[0,:])

Mustard and lemon sausages with carrot mash
[ 0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.29414018  0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.    

In [6]:
# Train K-Means
# model = KMeans(n_clusters=number_of_clusters)
# model.fit(X)

In [7]:
# Show the labels
# model.labels_

In [9]:
# Show one of the groups
groups = []
for group in range(0, number_of_clusters):
    groups.append([])
#for index in range(0,len(Y)):
    # groups[model.labels_[index]].append(Y[index])

# Now go with nearest neighbors

In [5]:
X_ingredients = X[:,0:dimensions_ingredients]

from sklearn.neighbors import NearestNeighbors
nbrs = NearestNeighbors(n_neighbors=10, algorithm='ball_tree').fit(X_ingredients)

In [6]:
# Find the nearest neighbor to a given one
def _neighbors_of(X):
    X_=np.mean(X, axis=0)
    X_ = X_.reshape(1,-1)
    neighbors=[]
    distances, indices = nbrs.kneighbors(X_)
    for n_index in indices[0]:
        neighbors.append(Y[int(n_index)])
    return neighbors
    
def neighbors_of(index):
    X=X_ingredients[index:index+1,:]
    return _neighbors_of(X)
    
neighbors_of(100)

['Cauliflower Risotto With Spicy Pangrattato',
 'Eat for Eight Bucks: Split Pea Soup and Simplest Slaw',
 'Stir-Fried Chicken with Chinese Cabbage',
 'Crunchy Chinese Chicken Salad',
 'Three Cheese Risotto Recipe',
 'Three cheese risotto',
 'Cook the Book: Rice Cooker Mushroom Risotto',
 'Wild Mushroom Risotto Recipe',
 'Sweet And Spicy Chicken With Soba Salad',
 'Asian Chicken & Rice']

In [19]:
# Or find the nearest to a group of those
def neighbors_of_list(elements, cluster=False):
    X=X_ingredients[elements,:]
    if cluster:
        print("Not implemented")
    else:
        return _neighbors_of(X)
        
neighbors_of_list([0, 268, 3930, 100])

['Vietnamese Peanut Sauce',
 "Fast Breads' Crusty Artisanal Bread",
 'Menudo',
 'Tomato Lemongrass Soup',
 'Dinner Tonight: Tunisian Chickpea Soup',
 'California Rolls',
 'Cook the Book: Dashi and Japanese Chicken Stock',
 'Hearty Vietnamese Beef Noodle Soup (Pho Bo)',
 'Earl Grey Tea Mooncake with Egg Yolk and Pine Nuts',
 'Cherry & Thai Basil Soda']

# Singular value decomposition


In [114]:
svd = TruncatedSVD(n_components=100, n_iter=9, random_state=42)
X_svd = svd.fit_transform(X)

# Clustering 

In [149]:
# Train K-Means
number_of_clusters=120
model = KMeans(n_clusters=number_of_clusters)
model.fit(X_svd[100:])

KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,
    n_clusters=120, n_init=10, n_jobs=1, precompute_distances='auto',
    random_state=None, tol=0.0001, verbose=0)

In [152]:
preds = model.predict(X_svd[-100:])

In [162]:
def printCluster(preds,Y,n):
    for i in range(len(preds)):
        if preds[i]==n:
            print(Y[i])

In [155]:
preds

array([ 81,  73,  99,   1,   9,  88,  81,  99,  88,  78, 104,  88,  78,
        79,  78,  88,  88,  88,  64,  30,  13,  88,  93,  78,  52,  56,
        88,  93, 102,  89,  70, 100,  58,  33,  88,   0, 119,  88, 106,
        78,  88,  30,  78,  88,  83,  17,  13,  70,  98, 106,  58,  30,
        88,  78,  88,  93, 116, 104,  58,  78,  99,  78,  99,  88,  78,
        88,  20,   1, 104,  89,  99,  17,  89,  78,  13,  17,  56,  79,
        65,  88,  78,  99, 102,  89,  93,  78,  58,  89,  40,  99,  99,
        61,  99,  58, 111,  78,  99,  89,  88,  78], dtype=int32)

In [177]:
#Hierachical clustering
aggModel = AgglomerativeClustering(n_clusters=100, linkage='ward')
preds = aggModel.fit_predict(X_svd)

In [209]:
preds[88]

19

In [210]:
printCluster(preds,Y,19)

Veggie Burger Recipe
Chicken Soup Noodles
Fish Steamed With Ham, Mushrooms, Ginger, Garlic & Scallions
Shellfish Paella
Pad Thai
Turkey salad
Risotto "Japanese Style" with Edamame, Bacon and Toasted Pine Nuts
Tofu And Soba Noodles With Lemon Ginger Dressing
Baked Plaice
Roasted Venison
Sour Cherry Strudel
Pan-Seared Cod with Basil Sauce
Steamed Sea Bass with Shredded Pork
Beef Brisket
Quinoa Salad With Black Beans, Corn, And Tomatoes
Cook the Book: Shepherd's Pie
Baked Cod with Tomatoes and Onions
Beef bourguignon
Beef and Pork Ragu
Sweet and Spicy Rubbed Ham
Chicken and Chorizo Paella
Chicken Arroz Caldo (Chicken Rice Porridge)
Pasta with favas, tomatoes and sausage
Beef Potstickers
Cheeseburgers
Grilled Tofu and Soba Noodles
Homemade Brie Burgers
Irish Beef And Stout Stew
Duck Burger Recipe
Pancakes With Buttery Bourbon Maple Syrup
Oven-Baked Halibut Steaks
Korean Barbecue Burgers
Borscht and Beef Pasta
Classic Patty Melt Recipe
Vietnamese Lemongrass Green Beans and Tofu
Red-Cooked S

In [179]:
[(i,y) for i,y in enumerate(Y[:100])]

[(0, 'Mustard and lemon sausages with carrot mash'),
 (1, 'Cranberry-Port Sauce'),
 (2, 'Fennel and cured salmon sandwiches'),
 (3, 'Chilli Cranberry Sauce'),
 (4, 'Winter Vegetables In Puff Pastry Shells'),
 (5, 'Salmon With Fennel And Lemon Risotto'),
 (6, 'Butterscotch Fruit Fondue'),
 (7, 'Chinese Braised Pork With Double Spring Onions'),
 (8, 'Chocolate Roulade'),
 (9, 'Smoked-Ham Rolls'),
 (10, 'Butter cupcakes'),
 (11, 'Pickled Chinese Long Beans'),
 (12, 'Veggie Burger Recipe'),
 (13, 'Italian stuffed cabbage'),
 (14, 'Warm Seafood Sandwiches'),
 (15, 'Sour Cream Coffee Cake With Chocolate Cinnamon Swirl'),
 (16, 'Miso-Infused Cream Cheese Spread'),
 (17, 'Skirt Steak With Paprika Butter'),
 (18, 'Gluten Free Porridge'),
 (19, 'Paella Rodriguez'),
 (20, 'Chicken Soup Noodles'),
 (21, 'Pickled Mustard Greens'),
 (22, 'Portobello Mushrooms Stuffed with Spinach, Parmesan and Fennel'),
 (23, 'Basil Tofu And Vegetable Sandwich'),
 (24, 'A Salad of Black Kale, Kabocha Squash, Cheddar

In [204]:
modelFile=('aggClusterModel.pkl')
pickle.dump(aggModel, open(modelFile,'wb'))