In [1]:
import pandas as pd
import numpy as np
from numpy import linalg as la
from numpy import *

In [2]:
#define three distance functions to try including euclidean, pearson, and cosine
def euclidSim(inA,inB):
    '''This function measures euclidean distance between to arrays'''
    return 1.0 / (1.0 + la.norm(inA - inB))

def pearsSim(inA,inB):
    '''This function measures pearson similarity between two arrays'''
    if len(inA) < 3 : return 1.0
    return 0.5 + 0.5 * corrcoef(inA, inB, rowvar = 0)[0][1]

def cosSim(inA,inB):
    '''This function measures cosine similarity between two arrays'''
    num = float(inA.T * inB)
    denom = la.norm(inA)*la.norm(inB)
    return 0.5 + 0.5 * (num / denom)

In [3]:
def check_ingredients(data):
    '''This checks the df for the presence of ingredients'''
    all_ingreds = False
    counter = 0
    while all_ingreds == False:
        ingredients = input('Please enter at least 2 ingredients separated by a space: ').split()
        if len(ingredients) < 2:
            print('Please enter at least 2 ingredients.')
            continue
        for x in ingredients:
            if x in data.columns:
                counter += 1
            else:
                print('Sorry, {} is not in our ingredient list!'.format(x))
        if counter == len(ingredients):
            all_ingreds = True
        else:
            print('You have selected an ingredient that is not in our list. Please try a different ingredient or a broader category, such as "fish" instead of "cod"')        
    print('\n')
    return ingredients

In [4]:
def create_query(data, dataN, ingredients):
    '''This creates the query that we are going to search'''
    query = np.zeros(len(dataN))
    data.reset_index(level=0, inplace=True)
    indexes = []
    for x in ingredients:
        for i in range(len(data)):
            if x == data['index'][i]:
                indexes.append(i)
    indexes = np.array(indexes)
    np.put(query, indexes, [[1]]) 
    return query

In [5]:
def sims(data, dataN, query, category, metric=euclidSim):
    '''This gets the similarities if they are in the category selected'''
    cat_list = []
    for i in range(len(data)):
        if dataN[i][category] == 1:
            sim = metric(query, dataN[i]) #calc similarity
            cat_list.append((i, sim)) 
    return cat_list

In [6]:
def choice():
    '''Error checking if someone tries to enter a bad category'''
    ok = False
    cats = ['breakfast', 'lunch', 'dinner', 'appetizer', 'dessert']
    while ok == False:
        choice = str(input('Are you looking for a breakfast, lunch, dinner, appetizer, or dessert?'))
        if choice in cats:
            ok = True
        else:
            print('Please select a category from the list!')
    return choice
    

In [7]:
def get_recipe(ratings, recipes):
    selection = eval(input('Please enter one of the numbers above to see the ingredients and recipe.'))-1
    num = ratings[selection][0]
    print('\n')
    print('The ingredients for {} are: \n'.format(recipes['title'][ratings[selection][0]]))
    print('\n'.join(recipes['ingredients'][num]))
    print('\n')
    print('The directions for {} are: \n'.format(recipes['title'][ratings[selection][0]]))
    print('\n'.join(recipes['directions'][num]))

In [8]:
#kNN function
def possible_recipes(data, dataN, recipes, k=10):
    ingredients = check_ingredients(data)
    dataT = data.T
    query = create_query(dataT, dataN, ingredients)
    category = choice()
    result = sims(data, dataN, query, category)
    result.sort(reverse=True, key=lambda tup: tup[1])
    k_results = result[:k]
    
    ratings = []
    for item in k_results:
        ratings.append((item[0], recipes['rating'][item[0]]))
    ratings.sort(reverse=True, key=lambda tup: tup[1])
    
    satisfied = False
    while satisfied == False:
        print('\n')
        print('The top recommended {} recipes are:'.format(category))
        for i in range(len(ratings)):
            print('{}. {}'.format(i+1, recipes['title'][ratings[i][0]]))
    
        get_recipe(ratings, recipes)
        
        res = input('Is this recipe ok? (Y/N)')
        if res == 'Y':
            print('Hope everything turns out delicious!')
            satisfied = True
        elif res == 'N':
            continue
        else:
            print('Please enter "Y" or "N"!')

In [9]:
epi_clean = pd.read_csv('epi_clean.csv', index_col=0)

In [10]:
epi_nums = epi_clean.copy()

In [11]:
drops = ['title', 'rating', 'calories', 'carbs', 'protein', 'fat', 'sodium']
epi_nums.drop(labels=drops, axis=1, inplace=True)

In [12]:
epi_numsT = epi_nums.T

In [13]:
from typed_ast.ast3 import literal_eval
recipes_clean = pd.read_csv('recipes_clean.csv', converters={'ingredients':literal_eval, 'directions':literal_eval})
recipes_clean.head()

Unnamed: 0,ingredients,directions
0,"[6 long parsley sprigs, divided, 1 3/4 cups re...",[Chop enough parsley leaves to measure 1 table...
1,"[Nonstick vegetable oil spray, 3 cups all-purp...",[Preheat oven to 350å¡F. Coat cake pans with n...
2,"[1 cup water, 2/3 cup buttermilk, 1/3 cup heav...",[Butter and sugar six 2/3-to 3/4-cup ramekins....
3,"[1/3 cup chopped fresh chives, 1/4 cup Champag...",[Puree first 5 ingredients in blender until sm...
4,"[6 hard-cooked eggs, diced (2 cups), 3/4 cup s...","[Gently combine the eggs, cucumbers, shallots,..."


In [14]:
for x in drops:
        recipes_clean.insert(loc=drops.index(x), column=x, value=epi_clean[x])
recipes_clean.head()

Unnamed: 0,title,rating,calories,carbs,protein,fat,sodium,ingredients,directions
0,Ham Persillade with Mustard Potato Salad and M...,0.75,0.024962,0.024289,0.01685,0.018402,0.012846,"[6 long parsley sprigs, divided, 1 3/4 cups re...",[Chop enough parsley leaves to measure 1 table...
1,Banana-Chocolate Chip Cake With Peanut Butter ...,0.875,0.031762,0.049271,0.008791,0.021544,0.003325,"[Nonstick vegetable oil spray, 3 cups all-purp...",[Preheat oven to 350å¡F. Coat cake pans with n...
2,Sweet Buttermilk Spoon Breads,0.375,0.006054,0.014573,0.00293,0.002244,0.001212,"[1 cup water, 2/3 cup buttermilk, 1/3 cup heav...",[Butter and sugar six 2/3-to 3/4-cup ramekins....
3,"Tuna, Asparagus, and New Potato Salad with Chi...",1.0,0.017457,0.014573,0.007326,0.014811,0.002901,"[1/3 cup chopped fresh chives, 1/4 cup Champag...",[Puree first 5 ingredients in blender until sm...
4,Cucumber-Basil Egg Salad,0.75,0.008915,0.001388,0.004396,0.008977,0.001894,"[6 hard-cooked eggs, diced (2 cups), 3/4 cup s...","[Gently combine the eggs, cucumbers, shallots,..."


In [15]:
recipes_clean.head()

Unnamed: 0,title,rating,calories,carbs,protein,fat,sodium,ingredients,directions
0,Ham Persillade with Mustard Potato Salad and M...,0.75,0.024962,0.024289,0.01685,0.018402,0.012846,"[6 long parsley sprigs, divided, 1 3/4 cups re...",[Chop enough parsley leaves to measure 1 table...
1,Banana-Chocolate Chip Cake With Peanut Butter ...,0.875,0.031762,0.049271,0.008791,0.021544,0.003325,"[Nonstick vegetable oil spray, 3 cups all-purp...",[Preheat oven to 350å¡F. Coat cake pans with n...
2,Sweet Buttermilk Spoon Breads,0.375,0.006054,0.014573,0.00293,0.002244,0.001212,"[1 cup water, 2/3 cup buttermilk, 1/3 cup heav...",[Butter and sugar six 2/3-to 3/4-cup ramekins....
3,"Tuna, Asparagus, and New Potato Salad with Chi...",1.0,0.017457,0.014573,0.007326,0.014811,0.002901,"[1/3 cup chopped fresh chives, 1/4 cup Champag...",[Puree first 5 ingredients in blender until sm...
4,Cucumber-Basil Egg Salad,0.75,0.008915,0.001388,0.004396,0.008977,0.001894,"[6 hard-cooked eggs, diced (2 cups), 3/4 cup s...","[Gently combine the eggs, cucumbers, shallots,..."


In [16]:
possible_recipes(epi_nums, epi_numsT, recipes_clean)

Please enter at least 2 ingredients separated by a space: milk almond


Are you looking for a breakfast, lunch, dinner, appetizer, or dessert?breakfast


The top recommended breakfast recipes are:
1. Breakfast Bowl With Quinoa and Berries 
2. Frozen Sunrise Margaritas 
3. Fluffy Cathead Biscuits With Honey Butter 
4. Smoked Gouda Grits 
5. Almond-Raisin Granola 
6. Pumpkin Muffins 
7. Baked French Toast 
8. D.I.Y. Ricotta 
9. 5-Grain Porridge with Bee Pollen, Apples, and Coconut 
10. Gluten-Free Banana-Almond Pancakes with Date Caramel 
Please enter one of the numbers above to see the ingredients and recipe.1


The ingredients for Breakfast Bowl With Quinoa and Berries  are: 

4 cups mixed berries (raspberries, strawberries, blueberries, blackberries)
2 tablespoons hemp hearts (available in the natural section of most supermarkets in a variety of brands)
20 whole almonds, toasted and chopped
1/4 cup cooked quinoa


The directions for Breakfast Bowl With Quinoa and Berries  are: 

Divid