In [1]:
import json
import pandas as pd 
import numpy as np
import ast
import random
from fancyimpute import SoftImpute, BiScaler

Using TensorFlow backend.


## 1. Read Offline Scraped Data and Calculate Prices

In [2]:
data = pd.read_csv('nutrition_info.csv')
temp = [ast.literal_eval(i) for i in data['ingredients']]
data['ingredients'] = temp
data = data.drop(columns=['Unnamed: 0','personal_rating'])

In [3]:
# find all the major ingredients
key_words = list(['pound', 'lb', 'pounds', 'kg', 'cup', 'cups', 'ounces', 'package', None])
major_ingredients = [[]for i in range(len(data.ingredients))]
for ing in range(len(data.ingredients)):
    for i in data.ingredients[ing]:
        if any(key_word in i for key_word in key_words):
            major_ingredients[ing].append(i)

In [4]:
def get_prices(data,file):
    # Read JSON price File
    with open(file, 'r') as myfile:
        temp=myfile.read()
    price = json.loads(temp)
    # Compute prices of each recipie with corresponding ingredients and prices
    arr = []
    for i in major_ingredients:
        total = 0
        for ingredient in i:
            try:
                if type(price[ingredient[2]])==float:
                    total += float(price[ingredient[2]])
                else:
                    total += float(price[ingredient[2]].replace('$',''))
            except:
                pass
        arr.append(total*1.2)
    return arr

In [5]:
amazon = get_prices(data,'prices_amazon.json')
walmart = get_prices(data,'prices_walmart.json')
wholefoods = get_prices(data,'prices_wholefoods.json')
data['amazon'] = amazon
data['walmart'] = walmart
data['wholefoods'] = wholefoods
data

Unnamed: 0,title,rating,cook_time,calories,sodium,fat,protein,carbs,fiber,ingredients,links,amazon,walmart,wholefoods
0,Quinoa Tabbouleh,4.5,60,310.0,610.0,16.0,8.0,37.0,6.0,"[[1, cup, quinoa], [1, None, lemon], [4, clove...",https://www.yummly.com/recipe/Quinoa-Tabbouleh...,19.848,10.548,26.100
1,Thai Salmon,4.0,21,80.0,370.0,3.5,6.0,5.0,1.0,"[[6, ounces, salmon fillets], [1, pinch, salt]...",https://www.yummly.com/recipe/Thai-Salmon-2408300,15.432,8.472,25.524
2,Coconut Curry Carrot and Sweet Potato Soup,4.0,35,250.0,600.0,18.0,3.0,22.0,6.0,"[[1, tablespoon, coconut oil], [1, None, yello...",https://www.yummly.com/recipe/Coconut-Curry-Ca...,35.580,22.416,50.820
3,Korean Vegetarian Beef,4.5,20,840.0,920.0,9.0,15.0,171.0,3.0,"[[1, package, veggie ground round], [1/2, cup,...",https://www.yummly.com/recipe/Korean-Vegetaria...,18.828,12.096,20.856
4,"Braised Chickpeas with Tomato, Spinach, and Feta",4.5,20,440.0,1830.0,23.0,15.0,48.0,10.0,"[[3, tablespoons, extra virgin olive oil], [5,...",https://www.yummly.com/recipe/Braised-Chickpea...,32.148,8.016,30.552
5,Tuscan Kale With Garlic Tomatoes,4.5,40,130.0,10.0,11.0,1.0,7.0,1.0,"[[1, None, Tuscan kale], [4, tablespoons, extr...",https://www.yummly.com/recipe/Tuscan-Kale-With...,4.188,2.724,2.388
6,5-Ingredient Cheese Naan Pizzas,4.5,17,80.0,150.0,3.0,4.0,9.0,0.0,"[[4, pieces, naan], [2, teaspoons, olive oil],...",https://www.yummly.com/recipe/5-Ingredient-Che...,8.616,3.744,16.776
7,Oven Baked Pork Chop,4.5,180,290.0,910.0,17.0,20.0,15.0,0.0,"[[None, None, pork chops], [1, tablespoon, soy...",https://www.yummly.com/recipe/Oven-Baked-Pork-...,6.288,6.408,10.788
8,White Chocolate-Raspberry Blender Muffins,4.0,25,200.0,170.0,5.0,6.0,33.0,5.0,"[[2, cups, old-fashioned oats], [2, None, eggs...",https://www.yummly.com/recipe/White-Chocolate-...,29.040,18.276,22.608
9,Perfect Air Fryer Salmon,4.5,40,310.0,460.0,20.0,24.0,10.0,5.0,"[[2, None, salmon fillets], [2, teaspoons, avo...",https://www.yummly.com/recipe/Perfect-Air-Frye...,26.616,9.168,34.752


## 2. Recommender System

In [6]:
# Read in Recipies and URL
link = data[['title','links']]
# Read in User Rating Data
char_matrix = pd.read_csv('final_rating_data.csv')
char_matrix = char_matrix.drop(columns=['Unnamed: 0'])
char_matrix.head()

Unnamed: 0,10 Minute Healthy Crispy Chicken and avocado Wraps,10-Minute Blackened Salmon,15-Minute Baked Salmon with Lemon,20 Minute Honey Garlic Shrimp,20-Minute Lemon Pesto Penne,3-Ingredient Healthy Banana Pancakes,30-Minute Vegetarian Meatballs,4-Ingredient BBQ Baked Salmon,5 Ingredient Peanut Butter Energy Bites,5-Ingredient Cheese Naan Pizzas,...,Vegetable Ramen Pad Thai,Vegetarian Chilli,Vegetarian Chimichangas,Vegetarian Enchilada,Vegetarian Gumbo,Weight Loss Wonder Soup,Weight Watchers Taco Casserole,White Chocolate-Raspberry Blender Muffins,Zesty Lime Shrimp and Avocado Salad,Zesty Quinoa with Broccoli and Cashews
0,,5.0,,,,,5.0,,,,...,,,,,,3.0,,,,
1,,,,,,,5.0,,,,...,,,,,,,,,,
2,,,5.0,,,,,,,,...,4.0,,,,,,,,,
3,,,,4.5,,,,5.0,4.0,,...,,,,5.0,,,,,,
4,,,,,,,,4.0,4.0,,...,,,4.0,,,,,,,1.0


In [7]:
# User input ratings
def user_input(data,link):
    # USER 1: 
    print('==================================================================')
    print('User1 Inputs:')
    print('==================================================================')
    # Generate Random recipies to rate
    dic = {}
    for x in range(10):
        index = random.randint(0,len(data.columns))
        recipie = data.columns[index]
        print(recipie,' : ',link.links[link['title']==recipie].values[0])
        rating = input('Enter Rating (1-5 stars): ')
        dic[recipie] = float(rating)
    # Unpack Rating and concat into main dataframe
    user = data.iloc[0]
    user[:] = np.nan
    for key,value in dic.items():
        user[key] = value
    temp = pd.DataFrame([user], index = ['user1'])
    df1 = pd.concat([temp, data])
    
    # USER 2:
    print('==================================================================')
    print('User2 Inputs:')
    print('==================================================================')
    # Generate Random recipies to rate
    dic = {}
    for x in range(10):
        index = random.randint(0,len(data.columns))
        recipie = data.columns[index]
        print(recipie,' : ',link.links[link['title']==recipie].values[0])
        rating = input('Enter Rating (1-5 stars): ')
        dic[recipie] = float(rating)
    # Unpack Rating and concat into main dataframe
    user = data.iloc[0]
    user[:] = np.nan
    for key,value in dic.items():
        user[key] = value
    temp = pd.DataFrame([user], index = ['user2'])
    final_df = pd.concat([temp, df1])
    return final_df

In [8]:
# Obtain User 1 & 2 Ratings
df = user_input(char_matrix,link)

User1 Inputs:
Honey Dijon Garlic Chicken Breasts  :  https://www.yummly.com/recipe/Honey-Dijon-Garlic-Chicken-Breasts-2208217
Enter Rating (1-5 stars): 5
BBQ Chicken Naan Bread Pizza  :  https://www.yummly.com/recipe/BBQ-Chicken-Naan-Bread-Pizza-2127278
Enter Rating (1-5 stars): 4
Crunchy Detox Salad  :  https://www.yummly.com/recipe/Crunchy-Detox-Salad-2497438
Enter Rating (1-5 stars): 5
Black Bean-Quinoa Buddha Bowl  :  https://www.yummly.com/recipe/Black-Bean-Quinoa-Buddha-Bowl-2227717
Enter Rating (1-5 stars): 3
Easy 5-Ingredient Chicken Enchiladas  :  https://www.yummly.com/recipe/Easy-5-Ingredient-Chicken-Enchiladas-9073068
Enter Rating (1-5 stars): 5
Hamburger Casserole  :  https://www.yummly.com/recipe/Hamburger-Casserole-2286133
Enter Rating (1-5 stars): 4
Sauteed Ground Beef and Kale  :  https://www.yummly.com/recipe/Sauteed-Ground-Beef-and-Kale-1167062
Enter Rating (1-5 stars): 3
No-bake Raspberry Cheesecake Pots  :  https://www.yummly.com/recipe/No-bake-Raspberry-Cheesecake

In [9]:
# ask user input for their allergies
allergies = list()
def user_input_allergies():
    # USER 1: 
    print('==================================================================')
    print('User1 Inputs:')
    print('==================================================================')
    a = input('Enter Your Allergies : ')
    element = a.split(',')
    for i in element:
        allergies.append(i)
        
    # USER 2:
    print('==================================================================')
    print('User2 Inputs:')
    print('==================================================================')
    a = input('Enter Your Allergies : ')
    element = a.split(',')
    for i in element:
        allergies.append(i)
    
    return allergies

In [10]:
# Obtain User 1 & 2 Allergies 
allergies = user_input_allergies()

User1 Inputs:
Enter Your Allergies : mango,bean
User2 Inputs:
Enter Your Allergies : apple,beef


In [11]:
allergies

['mango', 'bean', 'apple', 'beef']

In [12]:
ingredient = []
rating_index = []
for i in char_matrix.columns:
    temp = data[data['title']==i]
    ingredient.append(temp['ingredients'].values[0]) 
all_ingredients = [[]for i in range(len(ingredient))]
# find all the ingredients without unit
for rec in range(len(ingredient)):
    for ing in range(len(ingredient[rec])):
        all_ingredients[rec].append(ingredient[rec][ing][2].split())
# find the index of recipes which contain allergic ingredient
for ing in range(len(all_ingredients)):
    for i in all_ingredients[ing]:
        if any(key_word in i for key_word in allergies):
            rating_index.append(ing)

In [13]:
# Soft Impute missing data
df_filled = pd.DataFrame(SoftImpute().fit_transform(df.as_matrix()))

  


[SoftImpute] Max Singular Value of X_init = 145.715079
[SoftImpute] Iter 1: observed MAE=0.366528 rank=106
[SoftImpute] Iter 2: observed MAE=0.366451 rank=106
[SoftImpute] Iter 3: observed MAE=0.366269 rank=106
[SoftImpute] Iter 4: observed MAE=0.365634 rank=105
[SoftImpute] Iter 5: observed MAE=0.364935 rank=105
[SoftImpute] Iter 6: observed MAE=0.363909 rank=104
[SoftImpute] Iter 7: observed MAE=0.361957 rank=102
[SoftImpute] Iter 8: observed MAE=0.358827 rank=98
[SoftImpute] Iter 9: observed MAE=0.355816 rank=96
[SoftImpute] Iter 10: observed MAE=0.352760 rank=93
[SoftImpute] Iter 11: observed MAE=0.349824 rank=91
[SoftImpute] Iter 12: observed MAE=0.347356 rank=89
[SoftImpute] Iter 13: observed MAE=0.344817 rank=88
[SoftImpute] Iter 14: observed MAE=0.342946 rank=86
[SoftImpute] Iter 15: observed MAE=0.340568 rank=84
[SoftImpute] Iter 16: observed MAE=0.338937 rank=83
[SoftImpute] Iter 17: observed MAE=0.336169 rank=80
[SoftImpute] Iter 18: observed MAE=0.333806 rank=78
[SoftImpute

In [14]:
# 0 = User 2; 1 = User 1
for i in range(2):
    for j in rating_index:  
        df_filled[i][j] = 0
df_filled.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,255,256,257,258,259,260,261,262,263,264
0,0.768631,1.610823,1.656344,1.442383,1.417865,1.45418,1.057119,1.577259,1.523188,1.877372,...,2.159748,1.281746,1.129862,1.716503,1.488387,1.529942,1.665698,0.769901,1.733123,1.06838
1,0.782586,2.180052,1.849801,1.739778,1.944354,1.133391,0.987717,1.678764,2.10619,1.305072,...,1.046478,0.735836,0.912388,1.271222,1.279218,1.483695,1.119659,0.170888,1.633511,1.011952
2,0.782586,2.180052,1.849801,1.739778,1.944354,1.133391,0.987717,1.678764,2.10619,1.305072,...,1.046478,0.735836,0.912388,1.271222,1.279218,1.483695,1.119659,0.170888,1.633511,1.011952
3,3.230636,4.785963,4.795564,4.335903,4.55292,3.489766,5.0,4.740481,4.655663,4.484879,...,3.867616,3.745121,4.087371,4.211912,3.27431,4.139742,4.444457,1.388556,4.231919,2.165153
4,3.067287,4.614206,5.0,3.996984,4.042057,3.36522,3.142084,4.151709,4.72361,4.721836,...,4.0,3.723016,4.182312,2.722269,3.301418,3.573475,3.752346,1.882407,4.592154,3.536112


## 3. Optimization

In [15]:
#rating for user 0 and user 1
rating = df_filled[0:2].rename(index={0:1,1:0}).values.tolist()

In [16]:
# Set up input arrays for each category
calories = []
sodium = []
fat = []
protein = []
carbs = []
fiber = []
cook_time = []
amazon_price = []
wholefoods_price = []
walmart_price = []
K = 265
for i in char_matrix.columns:
    temp = data[data['title']==i]
    calories.append(temp['calories'].values[0])
    sodium.append(temp['sodium'].values[0])
    fat.append(temp['fat'].values[0])
    protein.append(temp['protein'].values[0])
    carbs.append(temp['carbs'].values[0])
    fiber.append(temp['fiber'].values[0])
    cook_time.append(temp['cook_time'].values[0])
    amazon_price.append(temp['amazon'].values[0])
    wholefoods_price.append(temp['wholefoods'].values[0])
    walmart_price.append(temp['walmart'].values[0])

In [28]:
# ask user inpput for their available time
K = 265
T = [[]for i in range(2)]
def user_input_time():
        # USER 1: 
    print('==================================================================')
    print('User1 Inputs:')
    print('==================================================================')
    for x in range(5):
        time = input('Enter Available time of day' + str(x+1) +' (minutes) : ')
        T[0].append(time)
    
    # USER 2:
    print('==================================================================')
    print('User2 Inputs:')
    print('==================================================================')
    for x in range(5):
        time = input('Enter Available time of day' + str(x+1) + ' (minutes) : ')
        T[1].append(time)
    # convert all the user input into Integer    
    for i in range(2):
        for j in range(5):
            T[i][j] = int(T[i][j])
    return T
T = user_input_time()

User1 Inputs:
Enter Available time of day1 (minutes) : 89
Enter Available time of day2 (minutes) : 78
Enter Available time of day3 (minutes) : 90
Enter Available time of day4 (minutes) : 78
Enter Available time of day5 (minutes) : 89
User2 Inputs:
Enter Available time of day1 (minutes) : 123
Enter Available time of day2 (minutes) : 78
Enter Available time of day3 (minutes) : 98
Enter Available time of day4 (minutes) : 78
Enter Available time of day5 (minutes) : 56


In [29]:
T

[[89, 78, 90, 78, 89], [123, 78, 98, 78, 56]]

In [30]:
# ask user input for their nutrition intake and budget
height = list()
weight = list()
budget = list()
def user_input_nutrition():
    # USER 1: 
    print('==================================================================')
    print('User1 Inputs:')
    print('==================================================================')
    h = input('Enter Your Height (cm) : ')
    height.append(h)
    w = input('Enter Your Weight (kg) : ')
    weight.append(w)
    b = input('Enter Your budget : ')
    budget.append(b)
        
    # USER 2:
    print('==================================================================')
    print('User2 Inputs:')
    print('==================================================================')
    h = input('Enter Your Height (cm) : ')
    height.append(h)
    w = input('Enter Your Weight (kg) : ')
    weight.append(w)
    b = input('Enter Your budget : ')
    budget.append(b)
    
    return height,weight,budget
height,weight,budget = user_input_nutrition()

User1 Inputs:
Enter Your Height (cm) : 175
Enter Your Weight (kg) : 65
Enter Your budget : 75
User2 Inputs:
Enter Your Height (cm) : 165
Enter Your Weight (kg) : 55
Enter Your budget : 75


In [31]:
# calculate the nutrition bound as well as the rating according to the user input
for i in range(2):
    height[i] = int(height[i])
    weight[i] = int(weight[i])
ratio = 0.4*(sum((height[i]/175*weight[i]/70) for i in range(2)))

# lower(1) and upper(2) bound of nutrient
cal_bound = [2000, 2500]
pro_bound = [30 ,50]
fat_bound = [65 , 80]
sod_bound = [1600 ,2400]
car_bound = [5, 100]
fib_bound = [0, 10]
for i in range(2):
    cal_bound[i] *= ratio
    pro_bound[i] *= ratio
    fat_bound[i] *= ratio
    sod_bound[i] *= ratio
    car_bound[i] *= ratio
    fib_bound[i] *= ratio

#Budget
B = sum(int(budget[i])for i in range(2))

#rating for user 0 and user 1
rating = df_filled.iloc[0:2].values.tolist()

# parameter for objective
alpha = 0.1

In [32]:
# Create a model instance
from docplex.mp.model import Model
MnM = Model(name='Meal Planning for the New Millennium')

In [33]:
# x_{itk} are binary variable
x = {(i,t,k): MnM.binary_var(name='x_{0}_{1}_{2}'.format(i,t,k)) for i in range(2) for t in range(5) for k in range(265)}

In [34]:
# time inequality
w = MnM.continuous_var(name='w', lb=0)

In [39]:
# five meals constraints, one meal per day
for t in range(5):
    MnM.add_constraint(MnM.sum(x[i,t,k] for i in range(2) for k in range(265)) == 1, ctname = 'subject to five_meals_total')


# nutrition lower bound
MnM.add_constraint(MnM.sum(calories[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) >= cal_bound[0])
MnM.add_constraint(MnM.sum(protein[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) >= pro_bound[0])
MnM.add_constraint(MnM.sum(fat[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) >= fat_bound[0])
MnM.add_constraint(MnM.sum(sodium[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) >= sod_bound[0])
MnM.add_constraint(MnM.sum(carbs[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) >= car_bound[0])
MnM.add_constraint(MnM.sum(fiber[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) >= fib_bound[0])

# nutrition upper bound
MnM.add_constraint(MnM.sum(calories[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) <= cal_bound[1] )
MnM.add_constraint(MnM.sum(protein[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) <= pro_bound[1] )
MnM.add_constraint(MnM.sum(fat[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) <= fat_bound[1] )
MnM.add_constraint(MnM.sum(sodium[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) <= sod_bound[1] )
MnM.add_constraint(MnM.sum(carbs[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) >= car_bound[1])
MnM.add_constraint(MnM.sum(fiber[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) >= fib_bound[1])

#budget constraint
MnM.add_constraint(MnM.sum(amazon_price[k]*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) <= B  )

        
# schedule time inequalities
MnM.add_constraint(MnM.sum(cook_time[k]* (x[0,t,k] - x[1,t,k]) for t in range(5) for k in range(K)) <= w)
MnM.add_constraint(MnM.sum(cook_time[k]* (x[0,t,k] - x[1,t,k]) for t in range(5) for k in range(K)) >= -w)

# meal diversity
for k in range(K):
    MnM.add_constraint(MnM.sum(x[i,t,k] for i in range(2) for t in range(5)) <= 1)

# schedule date constraint
for i in range(2):
    for t in range(5):
        MnM.add_constraint(MnM.sum(x[i,t,k]*cook_time[k] for k in range(265)) <= T[i][t])

In [40]:
# Decine the objective function
MnM.maximize(MnM.sum((rating[0][k]+rating[1][k])*x[i,t,k] for i in range(2) for t in range(5) for k in range(K)) - alpha*w)

In [41]:
#Solve the problem
MnM.print_information()

Model: Meal Planning for the New Millennium
 - number of variables: 2651
   - binary=2650, integer=0, continuous=1
 - number of constraints: 175
   - linear=175
 - parameters: defaults
 - problem type is: MILP


In [42]:
MnMs= MnM.solve(log_output=True)
assert MnMs
MnM.print_solution()

CPLEX Error  1016: Community Edition. Problem size limits exceeded. Purchase at http://ibm.biz/error1016.


DOcplexLimitsExceeded: **** Promotional version. Problem size limits exceeded, CPLEX code=1016