# 1. Import all the necessary library

In [5]:
from bs4 import BeautifulSoup as bs
from urllib.request import urlopen as url 
import pickle
import json
import multiprocessing
import pandas as pd 
import numpy as np
import re

# 2. Define EP_Recipe class to store all the data.

In [6]:
class EP_Recipe():
    title = None
    rating = None
    personal_rating = None
    calories = None
    carbohydrates = None
    fat = None
    protein = None
    saturateFat = None
    sodium = None
    polyunsaturatedFat = None
    fiber = None
    monounsaturatedFat = None
    cholesterol = None 
    activetime = None
    time = None
    total_ingredients = []
    servings = None

    def get_title(self, page):
        recipename = page.find('h1', {'itemprop': 'name'}).text
        # dislike = [,'Slow-Cooker','Spice','Spicy','Spiced','Lamb']
        if 'Squash' in str(recipename):
            return None
        if 'Spice' in str(recipename):
            return None
        if 'Slow-Cooked' in str(recipename):
            return None
        if 'Slow-Cooker' in str(recipename):
            return None
        if 'Spicy' in str(recipename):
            return None
        if 'Spiced' in str(recipename):
            return None
        if 'Lamb' in str(recipename):
            return None
        if 'Salad' in str(recipename):
            return None
        if 'Burger' in str(recipename):
            return None
        if 'Pie' in str(recipename):
            return None
        if 'Cake' in str(recipename):
            return None
        if 'Soup' in str(recipename):
            return None
        else:
            return recipename
        #return page.find('h1', {'itemprop': 'name'}).text

    def get_rating(self, page):
        try:
            return float(page.find_all('span', {'class': 'rating'})[-1].text.split('/')[0]) + 1
        except:
            return None

    def build_recipie(self, page):
        #super(EP_Recipe, self).build_recipie(page)
        self.title = self.get_title(page)
        self.rating = self.get_rating(page)
        self.personal_rating = self.get_personal_rating(page)
        self.calories = self.get_calories(page)
        self.carbohydrates = self.get_carbohydrates(page)
        self.fat = self.get_fat(page)
        self.protein = self.get_protein(page)
        self.saturateFat = self.get_saturatedfat(page)
        self.sodium = self.get_sodium(page)
        self.polyunsaturatedFat = self.get_polyunsaturated(page)
        self.fiber = self.get_fiber(page)
        self.monounsaturatedFat = self.get_monounsaturated(page)
        self.cholesterol = self.get_cholesterol(page)
        self.activetime = self.get_activetime(page)
        self.time = self.get_time(page)
        self.total_ingredients = self.get_ingredients(page)
        self.servings = self.get_serving(page)

    def get_calories(self,page):
        try:
            return float(page.find('span',{'class':'nutri-data','itemprop':'calories'}).text)
        except:
            return None

    def get_carbohydrates(self,page):  
        try:
            return float(page.find('span', {'class': 'nutri-data', 'itemprop': 'carbohydrateContent'}).text.split(' ')[0])
        except:
            return None
        
    def get_fat(self, page):
        try:
            return float(page.find('span', {'class': 'nutri-data', 'itemprop': 'fatContent'}).text.split(' ')[0])
        except:
            return None

    def get_protein(self, page):
        try:
            return float(page.find('span', {'class': 'nutri-data', 'itemprop': 'proteinContent'}).text.split(' ')[0])
        except:
            return None
        
    def get_saturatedfat(self, page):
        try:
            return float(page.find('span', {'class': 'nutri-data', 'itemprop': 'saturatedFatContent'}).text.split(' ')[0])
        except:
            return None
        
        
    def get_sodium(self,page):
        try:
            return float(page.find('span',{'class':'nutri-data','itemprop':'sodiumContent'}).text.split(' ')[0])
        except:
            return None

    def get_polyunsaturated(self, page):
        try:
            find_poly = page.findAll('span', {'class': 'nutri-data'})
            poly = find_poly[6].text.split(' ')[0]
            return float(poly)     
        except:
            return None

    def get_fiber(self, page):
        try:
            return float(page.find('span', {'class': 'nutri-data', 'itemprop': 'fiberContent'}).text.split(' ')[0])
        except:
            return None

    def get_monounsaturated(self, page):
        try:
            find_mono = page.findAll('span', {'class': 'nutri-data'})
            mono = find_mono[8].text.split(' ')[0]            
            return float(mono)
        except:
            return None    

    
    def get_cholesterol(self,page):
        try:
            return float(page.find('span',{'class':"nutri-data",'itemprop':'cholesterolContent'}).text.split(' ')[0])
        except:
            return None
    
    def get_personal_rating(self, page):
        try: 
            p_ratings = page.findAll('img', {'class': 'fork-rating'})
            p_persons = page.find_all('span', {'class': 'credit'})
            p_r = []            
            for i in range(len(p_ratings)):
                temp = [p_persons[i].text.split('/')[0], int(p_ratings[i]['src'].split('/')[-1].split('_')[0]) + 1]
                p_r.append(temp)
            return p_r
        except:
            return None
        
    def get_activetime(self, page):
        try:
            find_activetime = page.find('dd', {'class':"active-time"}).text.split(' ')
            if len(find_activetime) <= 2:
                if 'hour' in find_activetime:
                    hr = float(find_activetime[0])
                    time = hr*60
                elif 'hr' in find_activetime:
                    time = float(find_activetime[0])*60
                elif 'hours' in find_activetime:
                    time = float(find_activetime[0])*60
                else:
                    time = float(find_activetime[0])
            else:
                time = (float(find_activetime[0])+float(find_activetime[1]))*60
            
            return time
        except:
            return None
    
    # try to check the time
    def get_time(self, page):
        try:
            find_time = page.find('dd', {'class':"active-time"}).text.split(' ')
            return find_time
        except:
            return None
    
    def get_ingredients(self, page):
        try:
            ing = []
            ingredients = page.findAll('li', {'class':'ingredient', 'itemprop': 'ingredients'})
            for i in range(len(ingredients)):
                
                if 'tablespoons' not in str(ingredients[i]):
                    if 'tablespoon' not in str(ingredients[i]):
                        if 'teaspoons' not in str(ingredients[i]):
                            if 'teaspoon' not in str(ingredients[i]):
                                if 'tsp.' not in str(ingredients[i]):
                                    if 'Tsp' not in str(ingredients[i]):
                                        if 'Tbsp.' not in str(ingredients[i]):
                                            pattern  = re.compile(r'.*\d+')
                                            igs = re.findall(pattern,str(ingredients[i]))
                                            if len(igs) >0:
                                                   ing.append(ingredients[i].text)
                #else:
                    #pattern = re.compile(r'[(]about\s(\d*\s.*)[)]')
                    #ingre = re.findall(pattern,ingredients[i])
                    #ing.append(ingre[0].text)
                    
            return ing
        except:
            return None

    def get_serving(self, page):
        try:
            numofp= page.find('span', {'class': 'per-serving'}).text.split('(')[1]
            return float(numofp.split(' ')[0])
        except:
            return None


    def __init__(self, page):
        print('attempting to build from: '+page)
        try:
            self.build_recipie(bs(url(page), 'html.parser'))
        except Exception as x:
            print('Could not build from %s, %s'%(page,x))
            



# 3. Find all the addresses for recipes and store them in ep_urls

In [7]:
all_url = ['https://www.epicurious.com/recipes-menus/what-to-cook-this-weekend-february-22-24-gallery',
          'https://www.epicurious.com/recipes-menus/what-to-cook-this-weekend-february-8-10-gallery',
          'https://www.epicurious.com/ingredients/acorn-delicata-kabocha-spaghetti-squash-winter-recipes-gallery',
           'https://www.epicurious.com/recipes-menus/easy-dinner-recipes-for-cook90-gallery',
          'https://www.epicurious.com/recipes-menus/our-favorite-cook90-lunches-gallery',
         'https://www.epicurious.com/recipes-menus/make-ahead-weeknight-dinners-stew-soup-freezer-casserole-quick-easy-recipes-gallery',
         'https://www.epicurious.com/recipes-menus/what-to-cook-this-weekend-january-11-13-gallery',
         'https://www.epicurious.com/recipes-menus/back-to-school-dinner-recipes-gallery'
          ]

ep_urls = set()
for i in all_url:        
    initializer = url(i)
    res = bs(initializer.read(),"html5lib")
    for div in res.findAll('div', {'class': 'gallery-slide-caption__dek-container'}):
            ep_urls.update([div.find('a')['href']]) 

# 4. Scrape the website of recipes and generate the data. Store the data as recipes_data.json.

In [8]:
p = multiprocessing.Pool(4)
output = p.map(EP_Recipe,ep_urls)
pickle.dump(output,open('epi_recipes.final','wb'))

data = pickle.load(open('epi_recipes.final','rb'))
ar = []
for i in data:
    ar.append(i.__dict__)
pickle.dump(ar,open('epi_recipe_dict_form.dict','wb'))

with open('recipes_data.json', 'w') as fp:
    json.dump(ar, fp)

attempting to build from: https://www.epicurious.com/recipes/food/views/butternut-squash-coconut-and-ginger-muffins
attempting to build from: https://www.epicurious.com/recipes/food/views/sheet-pan-skirt-steak-with-balsamic-vinaigrette-broccolini-and-white-beans-56390002
attempting to build from: https://www.epicurious.com/recipes/food/views/cast-iron-pizza-with-fennel-and-sausage
attempting to build from: https://www.epicurious.com/recipes/food/views/quick-sesame-chicken-with-broccoli
attempting to build from: https://www.epicurious.com/recipes/food/views/twice-roasted-squash-with-parmesan-butter-and-grains
attempting to build from: https://www.epicurious.com/recipes/food/views/pasta-with-15-minute-meat-sauce-56390063
attempting to build from: https://www.epicurious.com/recipes/food/views/easy-chicken-tortilla-soup-with-bean-and-cheese-nachos
attempting to build from: https://www.epicurious.com/recipes/food/views/butternut-squash-kale-and-crunchy-pepitas-taco-51249020
attempting to bu

attempting to build from: https://www.epicurious.com/recipes/food/views/stuffed-sweet-potatoes-with-curried-chickpeas-and-mushrooms
attempting to build from: https://www.epicurious.com/recipes/food/views/chicken-soup-with-charred-cabbage
attempting to build from: https://www.epicurious.com/recipes/food/views/sausage-and-greens-sheet-pan-dinner
attempting to build from: https://www.epicurious.com/recipes/food/views/curried-chickpea-and-lentil-dal
attempting to build from: https://www.epicurious.com/recipes/food/views/autumn-kale-salad-gefilte-manifesto-recipe
attempting to build from: https://www.epicurious.com/recipes/food/views/chicken-coconut-curry-in-a-hurry
attempting to build from: https://www.epicurious.com/recipes/food/views/cracked-farro-and-broccoli-salad
attempting to build from: https://www.epicurious.com/recipes/food/views/chicken-meatballs-with-ginger-and-miso
attempting to build from: https://www.epicurious.com/recipes/food/views/trout-toast-with-soft-scrambled-eggs
attem

attempting to build from: https://www.epicurious.com/recipes/food/views/baked-feta-and-greens-with-lemony-yogurt
attempting to build from: https://www.epicurious.com/expert-advice/sunday-stash-1-batch-of-sweet-potatoes-9-family-meals-article
Could not build from https://www.epicurious.com/expert-advice/sunday-stash-1-batch-of-sweet-potatoes-9-family-meals-article, 'NoneType' object has no attribute 'text'
attempting to build from: https://www.epicurious.com/recipes/food/views/big-batch-roasted-kale
attempting to build from: https://www.epicurious.com/recipes/food/views/chickpea-crepe-tacos-with-eggplant-and-lamb
attempting to build from: https://www.epicurious.com/recipes/food/views/hummus-dinner-bowls-with-spiced-ground-beef-and-tomatoes
attempting to build from: https://www.epicurious.com/recipes/food/views/sunday-stash-sweet-potatoes
attempting to build from: https://www.epicurious.com/recipes/food/views/chicory-escarole-radicchio-salad-with-honey-mustard-vinaigrette
attempting to b

attempting to build from: https://www.epicurious.com/recipes/food/views/winter-italian-chopped-salad
attempting to build from: https://www.epicurious.com/recipes/food/views/butternut-squash-steaks-with-brown-buttersage-sauce
attempting to build from: https://www.epicurious.com/recipes/food/views/diner-style-patty-melt
attempting to build from: https://www.epicurious.com/recipes/food/views/honey-roasted-vegetable-salad
attempting to build from: https://www.epicurious.com/recipes/food/views/pantry-pasta-puttanesca
attempting to build from: https://www.epicurious.com/recipes/food/views/spiced-kabocha-squash-pie-with-pumpkin-seed-crumble-51257280
attempting to build from: https://www.epicurious.com/recipes/food/views/shredded-chicken-salad-with-creamy-miso-dressing
attempting to build from: https://www.epicurious.com/recipes/food/views/pork-tenderloin-with-turmeric-squash-and-collard-greens-salad
attempting to build from: https://www.epicurious.com/recipes/food/views/stuffed-sweet-potatoes

attempting to build from: https://www.epicurious.com/recipes/food/views/kale-salad-with-roasted-butternut-squash-pomegranate-and-pumpkin-seeds
attempting to build from: https://www.epicurious.com/recipes/food/views/roasted-beet-tzatziki-salad
attempting to build from: https://www.epicurious.com/recipes/food/views/honeynut-squash-with-radicchio-and-miso
attempting to build from: https://www.epicurious.com/recipes/food/views/easy-fried-rice-with-chicken-and-broccolini
attempting to build from: https://www.epicurious.com/recipes/food/views/korean-fried-chicken
attempting to build from: https://www.epicurious.com/recipes/food/views/skillet-chicken-and-zucchini-enchiladas-with-tomatillo-sauce
attempting to build from: https://www.epicurious.com/recipes/food/views/steak-and-eggs-with-saucy-beans
attempting to build from: https://www.epicurious.com/recipes/food/views/curried-yellow-split-pea-soup-with-spiced-coconut
attempting to build from: https://www.epicurious.com/recipes/food/views/spice

attempting to build from: https://www.epicurious.com/recipes/food/views/low-country-boil-with-shrimp-corn-and-sausage
attempting to build from: https://www.epicurious.com/recipes/food/views/tuna-and-artichoke-cooler-pressed-sandwiches-238871


# 5. Read the data

In [9]:
df = pd.read_json('recipes_data.json')
df

Unnamed: 0,activetime,calories,carbohydrates,cholesterol,fat,fiber,monounsaturatedFat,personal_rating,polyunsaturatedFat,protein,rating,saturateFat,servings,sodium,time,title,total_ingredients
0,15.0,820.0,37.0,109.0,56.0,9.0,35.0,"[[bamohr from Omaha, NE , 4], [dstephenson5 fr...",5.0,45.0,4.5,13.0,4.0,977.0,"[15, minutes]",Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,"[4 garlic cloves, divided, 1/2 cup balsamic vi..."
1,,507.0,70.0,41.0,24.0,7.0,10.0,"[[yazbeansaysmeow from Washington, DC , 2]]",2.0,6.0,2.0,11.0,6.0,1156.0,,,"[1 (3–4-lb.) kabocha, buttercup, or kuri squas..."
2,,749.0,39.0,163.0,51.0,10.0,25.0,"[[gumboboy from Rochester NY , 3], [carienmool...",8.0,36.0,4.0,12.0,4.0,905.0,,Charred Chicken with Sweet Potatoes and Oranges,"[4 skin-on, bone-in chicken thighs, 4 garlic c..."
3,15.0,416.0,43.0,45.0,18.0,3.0,6.0,"[[gzwisern from Nashville,TN , 5], [glamourbeb...",2.0,19.0,5.0,8.0,4.0,1188.0,"[15, minutes]",French Bread Pizzas with Mozzarella and Pepper...,"[1 (12–14"") soft loaf French or Italian bread,..."
4,,194.0,22.0,34.0,11.0,1.0,4.0,"[[rstarner3877 from Columbia, SC , 5], [mirand...",1.0,2.0,5.0,6.0,12.0,110.0,"[1, hour,, 10, minutes]",Orange Sweet Rolls,"[1/2 cup (1 stick) unsalted butter, softened, ..."
5,,1184.0,71.0,79.0,92.0,16.0,54.0,"[[laurieandMarls , 5], [msneller from Wisconsi...",10.0,28.0,5.0,23.0,4.0,1806.0,,,"[1 small kabocha or acorn squash (2–3 lb.), 8 ..."
6,12.0,410.0,64.0,41.0,12.0,5.0,3.0,,1.0,11.0,3.5,7.0,6.0,417.0,"[12, minutes]",Pasta with Smoky Pumpkin Cream Sauce,"[1 pound penne or other pasta, 1 (15-ounce) ca..."
7,35.0,98.0,15.0,18.0,4.0,0.0,1.0,"[[weescotishlassie from Stamford, CT , 5], [Al...",0.0,1.0,4.5,3.0,24.0,56.0,"[35, minutes]",Classic Snickerdoodle Cookies,"[1 1/2 cups all-purpose flour, 1/2 cup (1 stic..."
8,,1010.0,59.0,275.0,85.0,1.0,14.0,"[[tochefs from Toronto, ON , 5], [A Cook from ...",3.0,12.0,4.5,41.0,6.0,273.0,,,"[2 cups heavy whipping cream, 1/3 cup (packed)..."
9,,763.0,47.0,411.0,53.0,15.0,19.0,"[[sitagaki , 5], [joannearno from Winnipeg,Can...",17.0,28.0,4.5,13.0,6.0,847.0,,,"[12 large eggs, 1 1/2 cups chopped tender herb..."


# 6. Data Processing: Delete the row with NaN data and only consider the recipes with at least ten reviews.

In [10]:
df=df.dropna(axis = 0)
df

Unnamed: 0,activetime,calories,carbohydrates,cholesterol,fat,fiber,monounsaturatedFat,personal_rating,polyunsaturatedFat,protein,rating,saturateFat,servings,sodium,time,title,total_ingredients
0,15.0,820.0,37.0,109.0,56.0,9.0,35.0,"[[bamohr from Omaha, NE , 4], [dstephenson5 fr...",5.0,45.0,4.5,13.0,4.0,977.0,"[15, minutes]",Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,"[4 garlic cloves, divided, 1/2 cup balsamic vi..."
3,15.0,416.0,43.0,45.0,18.0,3.0,6.0,"[[gzwisern from Nashville,TN , 5], [glamourbeb...",2.0,19.0,5.0,8.0,4.0,1188.0,"[15, minutes]",French Bread Pizzas with Mozzarella and Pepper...,"[1 (12–14"") soft loaf French or Italian bread,..."
7,35.0,98.0,15.0,18.0,4.0,0.0,1.0,"[[weescotishlassie from Stamford, CT , 5], [Al...",0.0,1.0,4.5,3.0,24.0,56.0,"[35, minutes]",Classic Snickerdoodle Cookies,"[1 1/2 cups all-purpose flour, 1/2 cup (1 stic..."
13,25.0,662.0,39.0,116.0,44.0,5.0,12.0,"[[pumpkinsee from Hong Kong , 5], [laah1 from ...",3.0,32.0,4.0,23.0,4.0,756.0,"[25, minutes]",Grilled Cheese Tacos,"[2 ears of corn, husked, 1 small red onion, cu..."
17,50.0,820.0,73.0,184.0,47.0,4.0,15.0,"[[dlnordel from Gardena, CA , 5], [sarahmlee f...",3.0,28.0,4.5,26.0,4.0,713.0,"[50, minutes]",Beef and Potato Pasties,"[2 1/2 cups all-purpose flour, plus more for s..."
18,22.0,564.0,10.0,166.0,35.0,2.0,8.0,"[[Francus from Charlotte, NC , 5], [cindy6766 ...",3.0,55.0,4.5,21.0,4.0,852.0,"[22, minutes]",Coconut Chicken Curry in a Hurry,"[2 pounds boneless, skinless chicken breasts o..."
26,20.0,470.0,15.0,84.0,34.0,4.0,16.0,"[[hasmig4u from Ottawa, Canada , 5], [Annegioi...",3.0,28.0,4.5,12.0,4.0,989.0,"[20, minutes]",Sheet-Pan Chicken with Tomatoes and Mozzarella,"[3 pints grape tomatoes, halved, 1/4 cup extra..."
28,10.0,349.0,22.0,232.0,22.0,1.0,6.0,"[[jhageman from Chicago, IL , 5], [websherpa.c...",2.0,15.0,5.0,12.0,2.0,377.0,"[10, minutes]",Savory Dutch Baby for Two,"[1/4 cup all-purpose flour, 2 large eggs, 1/2 ..."
29,25.0,420.0,27.0,99.0,20.0,8.0,7.0,"[[splaine , 5], [lovestoputter , 4], [juls14 f...",3.0,35.0,4.5,8.0,6.0,843.0,"[25, minutes]",Cheesy Chicken Enchilada Skillet,"[1/2 onion, chopped, 2 garlic cloves, finely c..."
30,10.0,737.0,16.0,193.0,50.0,5.0,21.0,"[[502forestdrive4876 from Washington DC , 5], ...",10.0,52.0,5.0,14.0,4.0,708.0,"[10, minutes]","Braised Rotisserie Chicken with Bacon, Tomatoe...","[4 ounces bacon (about 4 strips), sliced cross..."


In [7]:
df.groupby(df['title']).size()

title
"Antipasto" Pasta with Sausage, Artichoke Hearts, and Sun-Dried Tomatoes         1
"Nextover" Chicken Tacos with Quick Refried Beans                                1
"Nextover" Chile-Orange Pork Stir-Fry                                            1
10-Minute Chicken Flatbreads with Hummus and Yogurt                              1
10-Minute Sausage Skillet with Cherry Tomatoes and Broccolini                    1
10-Minute Shrimp with Green Beans and Creamy Lemon-Dill Dip                      1
3-Ingredient Cacio e Pepe (Pasta With Cheese and Pepper)                         1
3-Ingredient Creamy Pumpkin Pasta                                                1
BBQ Pork Chops with Herb-Butter Corn and Sweet Potatoes                          1
Baked Feta and Greens with Lemony Yogurt                                         1
Baked Mustard-Crusted Salmon with Asparagus and Tarragon                         1
Basil-Cashew-Lime Vermicelli Bowls with Pork and Green Beans                     

In [8]:
# df = df.dropna(axis=0) #delete row with NaN 
df.shape

(94, 17)

In [100]:
df['activetime'].value_counts()

22.0    12
30.0    10
40.0     5
25.0     5
15.0     5
45.0     4
35.0     3
10.0     3
60.0     2
20.0     2
50.0     2
8.0      1
Name: activetime, dtype: int64

In [11]:
df = df[df['personal_rating'].map(len) > 10] ##delete the recipes with less than 10 reviews
df.shape[0]

54

In [9]:
df

Unnamed: 0,activetime,calories,carbohydrates,cholesterol,fat,fiber,monounsaturatedFat,personal_rating,polyunsaturatedFat,protein,rating,saturateFat,servings,sodium,time,title,total_ingredients
1,30.0,526.0,38.0,79.0,23.0,11.0,10.0,"[[alessandro368 from San Diego, CA , 3], [darz...",9.0,43.0,4.5,3.0,4.0,715.0,"[30, minutes]",Broiled Cod with Fennel and Orange,"[1/4 cup mayonnaise, 2 garlic cloves, finely g..."
2,22.0,813.0,46.0,215.0,47.0,6.0,22.0,"[[bobroeder from Penn Valley, CA. , 5], [flag...",7.0,51.0,5.0,14.0,4.0,1503.0,"[22, minutes]",Quick Baked Chicken Parmesan,"[2 large eggs, 1 1/2 cups breadcrumbs or panko..."
4,15.0,549.0,17.0,108.0,36.0,3.0,7.0,"[[cbradbury from Silverton, OR , 4], [egarson...",4.0,42.0,4.5,22.0,4.0,760.0,"[15, minutes]","Easy Green Curry with Chicken, Bell Pepper, an...","[1/4 cup green curry paste, 1 medium onion, sl..."
13,30.0,1141.0,89.0,18.0,77.0,25.0,45.0,"[[jaime_j from Chicago , 5], [dedi from Long I...",17.0,34.0,5.0,13.0,2.0,2287.0,"[30, minutes]",Herb-Crusted Cauliflower Steaks with Beans and...,"[1 large head of cauliflower (about 2 pounds),..."
17,22.0,1033.0,95.0,92.0,53.0,7.0,21.0,"[[gsconnor225 from Thousand Oaks, CA , 5], [ra...",7.0,44.0,5.0,20.0,4.0,3495.0,"[22, minutes]","Quick Pork Ramen with Carrots, Zucchini, and B...",[1 ounce dried mushrooms (preferably shiitake ...
18,50.0,820.0,73.0,184.0,47.0,4.0,15.0,"[[dlnordel from Gardena, CA , 5], [sarahmlee f...",3.0,28.0,4.5,26.0,4.0,713.0,"[50, minutes]",Beef and Potato Pasties,"[2 1/2 cups all-purpose flour, plus more for s..."
20,50.0,560.0,21.0,131.0,37.0,1.0,18.0,"[[catsare4me from New Jersey , 5], [amerolla f...",9.0,35.0,5.0,7.0,12.0,482.0,"[50, minutes]",Make-Ahead Crispy Chicken Cutlets,"[2 large egg yolks, 1/2 cup mayonnaise, 4 lb. ..."
36,35.0,733.0,92.0,11.0,34.0,30.0,21.0,"[[sheri2 from New Jersey , 5], [lnhalverson f...",5.0,23.0,4.5,5.0,2.0,1509.0,"[35, minutes]",Stuffed Sweet Potatoes with Beans and Guacamole,[2 medium sweet potatoes (about 9 ounces each)...
38,22.0,441.0,34.0,112.0,18.0,8.0,8.0,"[[freefield from Grass Valley, CA , 5], [lynne...",2.0,39.0,4.0,5.0,4.0,1134.0,"[22, minutes]",Quick Chicken Tikka Masala,"[1 (2 1/2"") piece ginger, peeled, 4 garlic clo..."
49,40.0,612.0,71.0,37.0,27.0,8.0,14.0,"[[mllnmchine from NYC , 5], [AbigaelSarah , 5]...",5.0,25.0,4.5,6.0,4.0,765.0,"[40, minutes]",Basil-Cashew-Lime Vermicelli Bowls with Pork a...,"[1 cup salted, roasted cashews, divided, 2 lar..."


# 7. Scratch the price of gredients and manipulate data to get the quantities, Calculate price of each recipe

In [73]:
df
df.ix[:,[15,16]].to_csv('revised_recipes.csv', header=True, index=False)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  


In [86]:
df.ix[:,[12]].to_csv('serving_numbers.csv', header=True, index=False)

.ix is deprecated. Please use
.loc for label based indexing or
.iloc for positional indexing

See the documentation here:
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated
  """Entry point for launching an IPython kernel.


In [12]:
p_r = pd.DataFrame(columns=['title', 'user', 'rating', 'loc'])

# 8.0 Combine and average customer ratings by 54 states 

In [13]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Los Angeles': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Chicago':'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Boston':'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'NYC': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY',
}

In [14]:
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", 
"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", 
"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", 
"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", 
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

In [15]:
#if location not in states, label it with other

count = 0
for i in range(df.shape[0]):
    for j in df.iloc[i,7]:
        location = j[0].split("from ")[-1].rstrip().replace(".","")
        location = location.split(",")[-1].lstrip()
        if location in us_state_abbrev.keys():
            location = us_state_abbrev[location]
        location = location.split(" ")[-1]
        location = location.upper()
        if location not in states:
            location = "OTHER"
        p_r.loc[count] = [df.iloc[i,15],j[0],j[1],location]     
        count += 1
p_r

Unnamed: 0,title,user,rating,loc
0,Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,"bamohr from Omaha, NE",4,NE
1,Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,"dstephenson5 from Washington, DC",5,DC
2,Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,"ladimcbeth from Los Angeles, CA",5,CA
3,Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,"wilkcj04 from Raleigh, NC",5,NC
4,Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,emsmandms from NYC,5,NY
5,Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,"kundwild from Walpole, ma",5,MA
6,Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,cathy2e_toohey from CA,5,CA
7,Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,"amychasenh from Alton, NH",5,NH
8,Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,vshapi from Los angeles,5,OTHER
9,Sheet-Pan Skirt Steak With Balsamic Vinaigrett...,"Habanero84 from Sonora, CA",3,CA


In [102]:
p_r['loc'].value_counts().describe()

count     49.000000
mean      22.734694
std       55.237282
min        1.000000
25%        3.000000
50%        9.000000
75%       20.000000
max      365.000000
Name: loc, dtype: float64

In [17]:
p_r.groupby(p_r['loc']).size()

loc
AK         1
AL         4
AZ        13
CA       141
CO        26
CT        12
DC        14
DE         1
FL        22
GA         7
HI         3
IA         3
ID         3
IL        44
IN        12
KS         1
KY         3
LA         7
MA        49
MD        21
ME         5
MI        16
MN        14
MO         8
MS         1
MT         2
NC        24
ND         1
NE         1
NH         6
NJ        15
NM         3
NV         7
NY        83
OH        14
OK         2
OR        22
OTHER    365
PA        19
RI         2
SC         9
TN         9
TX        33
UT         5
VA        20
VT         3
WA        28
WI         9
WY         1
dtype: int64

In [96]:
# user_reviews2 = p_r['user'].value_counts()[p_r['user'].value_counts() > 0].index
# trun_recipes_user_review = p_r[p_r['user'].isin(user_reviews2)]
# trun_recipes_user_review

Unnamed: 0,title,user,rating,loc
0,"Dill-Crusted Pork Tenderloin with Farro, Pea, ...",loriltx from Houston,5,OTHER
1,"Dill-Crusted Pork Tenderloin with Farro, Pea, ...",loriltx from Houston,5,OTHER
2,"Dill-Crusted Pork Tenderloin with Farro, Pea, ...","icountbyfives from Sweetwater, TN",5,TN
3,"Dill-Crusted Pork Tenderloin with Farro, Pea, ...","mmccbf from Cincinnati, OH",5,OH
4,"Dill-Crusted Pork Tenderloin with Farro, Pea, ...","edinan from Dallas, TX",5,TX
5,"Dill-Crusted Pork Tenderloin with Farro, Pea, ...","haneye from Seattle, WA",5,WA
6,"Dill-Crusted Pork Tenderloin with Farro, Pea, ...","rahappygirl from Houston, TX",4,TX
7,"Dill-Crusted Pork Tenderloin with Farro, Pea, ...","jansan1 from Orange County, CA",5,CA
8,"Dill-Crusted Pork Tenderloin with Farro, Pea, ...","sensorydeb from Asheville, NC",5,NC
9,"Dill-Crusted Pork Tenderloin with Farro, Pea, ...",seattletaste from Seattle,5,OTHER


# 9. Construct the final data set for recipes-user-rating.

In [16]:
p_r_2 = p_r[['title','rating','loc']]
p_r_2['rating'] = p_r_2['rating'].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [17]:
p_r_3 = pd.pivot_table(p_r_2,index = ['loc'],columns=['title'],aggfunc=np.mean)
p_r_3

Unnamed: 0_level_0,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating
title,10-Minute Sausage Skillet with Cherry Tomatoes and Broccolini,Baked Feta and Greens with Lemony Yogurt,Baked Mustard-Crusted Salmon with Asparagus and Tarragon,Basil-Cashew-Lime Vermicelli Bowls with Pork and Green Beans,Beef and Potato Pasties,"Braised Rotisserie Chicken with Bacon, Tomatoes, and Kale",Broiled Cod with Fennel and Orange,Cauliflower Carbonara,Cauliflower-Crust Pizza with Tomatoes and Mozzarella,Cheesy Chicken Enchilada Skillet,...,"Sheet-Pan Skirt Steak With Balsamic Vinaigrette, Broccolini, and White Beans",Shrimp and Grits Fritters,Shrimp with Herby White Beans and Tomatoes,Skillet Chicken and Zucchini Enchiladas with Tomatillo Sauce,Slow Cooker Pork Shoulder with Zesty Basil Sauce,Spanish Frittata with Herby Yogurt and Greens,Stuffed Sweet Potatoes with Beans and Guacamole,Stuffed Sweet Potatoes with Curried Chickpeas and Mushrooms,Thai Turkey Meatballs with Coconut Broth and Noodles,Tuna and Artichoke Cooler-Pressed Sandwiches
loc,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AK,,,3.0,,,,,,,,...,,,,,,,,,,
AL,,,,,,5.0,5.0,,,,...,,1.0,,,,,,,,
AZ,,,,5.0,5.0,,,,,,...,,,,,,,,,,
CA,4.8,5.0,5.0,,4.5,5.0,3.8,5.0,4.5,5.0,...,4.333333,3.0,4.5,5.0,5.0,4.5,,,5.0,4.5
CO,5.0,,,5.0,5.0,,,,5.0,5.0,...,5.0,4.5,5.0,5.0,3.0,,5.0,,,
CT,5.0,5.0,,,,,,,,,...,,,,5.0,,,5.0,,2.0,
DC,,,,,,5.0,,,,,...,5.0,,,,5.0,,,,,
DE,,,,,,,,,,,...,,,,,,,,,,
FL,,,5.0,,,5.0,5.0,,4.0,,...,5.0,,,,5.0,5.0,,,,
GA,,,,,,5.0,,,,3.5,...,,,,,,,,,,


In [88]:
p_r_3.to_csv('recipes_users_ratings.csv', header=False, index=False)
p_r_3

Unnamed: 0_level_0,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating,rating
title,10-Minute Sausage Skillet with Cherry Tomatoes and Broccolini,Baked Feta and Greens with Lemony Yogurt,Baked Mustard-Crusted Salmon with Asparagus and Tarragon,Basil-Cashew-Lime Vermicelli Bowls with Pork and Green Beans,Beef and Potato Pasties,"Braised Rotisserie Chicken with Bacon, Tomatoes, and Kale",Broiled Cod with Fennel and Orange,Cauliflower Carbonara,Cauliflower-Crust Pizza with Tomatoes and Mozzarella,Cheesy Chicken Enchilada Skillet,...,"Sheet-Pan Skirt Steak With Balsamic Vinaigrette, Broccolini, and White Beans",Shrimp and Grits Fritters,Shrimp with Herby White Beans and Tomatoes,Skillet Chicken and Zucchini Enchiladas with Tomatillo Sauce,Slow Cooker Pork Shoulder with Zesty Basil Sauce,Spanish Frittata with Herby Yogurt and Greens,Stuffed Sweet Potatoes with Beans and Guacamole,Stuffed Sweet Potatoes with Curried Chickpeas and Mushrooms,Thai Turkey Meatballs with Coconut Broth and Noodles,Tuna and Artichoke Cooler-Pressed Sandwiches
loc,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
AK,,,3.0,,,,,,,,...,,,,,,,,,,
AL,,,,,,5.0,5.0,,,,...,,1.0,,,,,,,,
AZ,,,,5.0,5.0,,,,,,...,,,,,,,,,,
CA,4.8,5.0,5.0,,4.5,5.0,3.8,5.0,4.5,5.0,...,4.333333,3.0,4.5,5.0,5.0,4.5,,,5.0,4.5
CO,5.0,,,5.0,5.0,,,,5.0,5.0,...,5.0,4.5,5.0,5.0,3.0,,5.0,,,
CT,5.0,5.0,,,,,,,,,...,,,,5.0,,,5.0,,2.0,
DC,,,,,,5.0,,,,,...,5.0,,,,5.0,,,,,
DE,,,,,,,,,,,...,,,,,,,,,,
FL,,,5.0,,,5.0,5.0,,4.0,,...,5.0,,,,5.0,5.0,,,,
GA,,,,,,5.0,,,,3.5,...,,,,,,,,,,


# Collaborative Filtering in R

# 10. Construct model and solve objective 

# Download the library

In [83]:
import sys
try:
    import docplex.mp
except:
    if hasattr(sys, 'real_prefix'):
        #we are in a virtual env.
        !pip install docplex
    else:
        !pip install --user docplex

# create a model instance

In [93]:
from docplex.mp.model import Model

MnM = Model(name='Meal Planning for the 2 roomates')

# Define the variables

In [94]:
# x, y, z are binary variable
x = {(i): MnM.binary_var(name='x_{0}'.format(i)) for i in range(df.shape[0])}
y = {(i,j): MnM.binary_var(name='y_{0}_{1}'.format(i,j)) for i in range(df.shape[0]) for j in range(5)}
z = {(i,j): MnM.binary_var(name='z_{0}_{1}'.format(i,j)) for i in range(df.shape[0]) for j in range(5)}

# only use x
# idx = [(i, t, k) for i in range(2) for j in range(5) for k in range(df.shape[0])]
# x = {(i,t,k): MnM.binary_var(name='x_{0}_{1}_{2}'.format(i,t,k)) 
#      for i in range(2) for t in range(5) for k in range(df.shape[0])}

In [95]:
# for limit the size of nutrient, cooking time, expence
# ？？ why there need a fplus and range for 5
# fplus = {(i): MnM.continuous_var(name='fplus_{0}'.format(i), lb=0) for i in range(5)}
# fminus = {(i): MnM.continuous_var(name='fminus_{0}'.format(i), lb=0) for i in range(5)}
# eplus = MnM.continuous_var(name='eplus', lb=0)
# eminus = MnM.continuous_var(name='eminus', lb=0)

w = MnM.continuous_var(name='w', lb=0)

# Define the parameters

In [26]:
#read personal rating
import csv

with open('personal-rating-after-matrix-completion.csv', 'r') as f:
    reader = csv.reader(f)
    your_list = list(reader)

In [27]:
a = your_list[1:]
a = a[0]
rating = list(map(float, a))
del rating[0]
rating

[1.69,
 1.4,
 2.52,
 1.26,
 0.81,
 0.87,
 1.26,
 0.87,
 0.91,
 2.03,
 0.99,
 1.51,
 0.89,
 2.24,
 1.23,
 1.17,
 1.79,
 0.87,
 0.85,
 1.29,
 1.0,
 1.49,
 1.81,
 1.28,
 1.72,
 2.07,
 2.18,
 1.88,
 0.95,
 1.04,
 0.99,
 1.36,
 2.12,
 2.68,
 1.91,
 3.46,
 0.82,
 1.35,
 1.19,
 1.52,
 1.67,
 0.95,
 1.11,
 2.15,
 2.33,
 2.64,
 1.99,
 1.72,
 2.48,
 2.65,
 2.0,
 1.02,
 0.85,
 3.17]

In [28]:
# import csv
# with open('price.csv', 'r') as price_file:
#     reader = csv.reader(price_file)
#     price_list = list(reader)
# file_errors_location = '\~533-project\\price.xlsx'
# price_list= pd.read_excel(file_errors_location)
# price_list

from pandas import read_excel
my_sheet = 'Sheet1'
file_name = 'price.xlsx' 
price_list= read_excel(file_name, sheet_name = my_sheet)
price_list

Unnamed: 0,title,ground chicken(pound),egg(#),breadcrumbs (cup),water(cup),onion(#),green(oz),feta(lb),chickpeas(oz),yogurt(cup),...,asparagus (lb),squash(lb),cucumber(#),Unnamed: 63,Unnamed: 64,total_price,servings,Unnamed: 67,pirce/2 people,Unnamed: 69
0,Chicken Meatballs with Ginger and Miso,0.75,1,0.25,4.0,0.0,0.0,0.0,0,0.0,...,0,0,0,,,5.77,20,,2.308,# 4 meatball/person
1,Baked Feta and Greens with Lemony Yogurt,0.0,0,0.0,0.0,0.0,5.0,0.5,15,0.5,...,0,0,0,,,8.34,4,,4.17,
2,10-Minute Sausage Skillet with Cherry Tomatoes...,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0,0,0,,,14.71,4,,7.355,
3,"Braised Rotisserie Chicken with Bacon, Tomatoe...",0.0,0,0.0,0.0,0.0,2.5,0.0,0,0.0,...,0,0,0,,,15.72,4,,7.86,
4,Thai Turkey Meatballs with Coconut Broth and N...,1.0,1,0.5,0.0,0.0,0.0,0.0,0,0.0,...,0,0,0,,,9.93,4,,4.965,
5,Tuna and Artichoke Cooler-Pressed Sandwiches,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0,0,0,,,3.23,4,,1.615,
6,Homemade Instant Mac and Cheese,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0,0,0,,,3.74,6,,1.246667,
7,"Pasta with 15-Minute Ham, Pea, and Cream Sauce",0.0,0,0.0,0.0,0.0,0.0,0.0,0,0.0,...,0,0,0,,,3.47,6,,1.156667,
8,Quick Baked Chicken Parmesan,0.4,2,1.5,3.0,0.5,0.0,0.0,0,0.0,...,0,0,0,,,6.63,4,,3.315,
9,Oven Risotto with Crispy Roasted Mushrooms,0.0,0,0.0,0.0,1.0,0.3,0.0,0,0.0,...,0,0,0,,,7.316,4,,3.658,


In [92]:
# define coefficient of contraints and rhs: price, expense, nutrients,  time

# nutrients = ['calories','carbohydrates','cholesterol','fat','fiber','monounsaturatedFat','polyunsaturatedFat',
#             'protein','protein','saturateFat','sodium']
# for i in nutrients:
#     i = df[i].tolist()

#Define nutrient parameter
calories = df['calories'].tolist()
carbohydrates = df['carbohydrates'].tolist()
cholesterol = df['cholesterol'].tolist()
fat = df['fat'].tolist()
fiber = df['fiber'].tolist()
protein = df['protein'].tolist()
saturateFat = df['saturateFat'].tolist()
sodium = df['sodium'].tolist()
# monounsaturatedFat = df['monounsaturatedFat'].tolist()
# polyunsaturatedFat = df['polyunsaturatedFat'].tolist()

#Define the cost parameter
price = price_list['pirce/2 people'].tolist()
# price = [9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  
# 8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99, 9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  
# 8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99, 9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,9.38, 11.97 , 7.26 ,4.97 , 11.75 , 7.99 , 5.01,14.87 ,10.28 , 4.38, 9.38 ,  8.14 ,  5.32, 11.37,10.23, 7.64 , 9.45 , 6.76, 4.27 ,12.87 ,3.99,
#          5.32, 11.37,10.23, 7.64]

#Define the time parameter
time = df['activetime'].tolist()

# time constraint right hand side
remaintime_y =[70,50,100,100,120]
remaintime_z=[50,80,50,80,80]

   
# nutrients constraints right hand side
#lower(1) and upper(2) bound of nutrient
#Daily Nutritional Goals for Age-Sex Groups 
#Based on Dietary Reference Intakes and Dietary 
#We choose Female 19-30 group as criterion
#colories = 2000kcal
#carbohydrates = 130g,45-65%colories
#cholesterol less than 500
#fat = 20-35%colories
#fiber =28g
#protein =46g, 10-35%colories
#saturateFat < 10%colories
#sodium = 2,300mg

c1_bound = [1300,2500]    #colories
c2_bound = [60,180]       #carbohydrates 
c3_bound = 500            #cholesterol upper bound 
f1_bound = [84,240]        #fat 
f2_bound = [18,35]        #fiber
p1_bound = [20,200]       #protein
s1_bound = 25             #saturateFat, upper bound 
s2_bound = 2300       #2300    #sodium ?? upper or lower bound? 
p2_bound = 30             #expense per week 


# penalty constraints 
###parameter for objective  ??? question
# beta = [1 ,1 , 1 , 1]
# gamma = 0.1
alpha = 0.1

In [59]:
# df = df.dropna()
# df.isnull().sum()
df.shape

(54, 17)

# Define the constraints

In [96]:
# five meals constrain
MnM.add_constraint(MnM.sum(x) - 5 == 0, ctname = 'subject to five_meals_total')
MnM.add_constraint(MnM.sum(y) <= 5)
MnM.add_constraint(MnM.sum(z) <= 5)

# assignment constraints
for i in range(54):
    MnM.add_constraint(MnM.sum(y[i,j] + z[i,j] for j in range(5)) == x[i])
    MnM.add_constraint(x[i] <= 1)

# schedule date constraint, each day only has 1 meal 
for j in range(5):
    MnM.add_constraint(MnM.sum(y[i,j] for i in range(54)) <= 1)
    MnM.add_constraint(MnM.sum(z[i,j] for i in range(54)) <= 1)
    MnM.add_constraint(MnM.sum(z[i,j]+y[i,j] for i in range(54)) <= 1)

# number of cooking constraint
MnM.add_constraint(MnM.sum(y) + MnM.sum(z) - MnM.sum(x) == 0)

# not repeat constraint, only one person do 1 meal per day
for i in range(df.shape[0]):
    MnM.add_constraint(MnM.sum(y[i,j] + z[i,j] for j in range(5)) <= 1)

# nutrition lower bound   
# here not each recipe nutrients limitation
# all five days nutrientions limitations.
for i in range(df.shape[0]):  #? question about how to formulate 
    MnM.add_indicator(x[i], 3*calories[i]*x[i] >= c1_bound[0], active_value =1)
    MnM.add_indicator(x[i], 3*calories[i]*x[i] <= c1_bound[1], active_value =1)
    MnM.add_indicator(x[i], carbohydrates[i]*x[i] >= c2_bound[0]/3, active_value =1)
    MnM.add_indicator(x[i], carbohydrates[i]*x[i] <= c2_bound[1]/3, active_value =1)
    MnM.add_indicator(x[i], 3*fat[i]*x[i] >= f1_bound[0], active_value =1)
    MnM.add_indicator(x[i], 3*fat[i]*x[i] <= f1_bound[1], active_value =1)
#     MnM.add_indicator(x[i], fiber[i]*x[i] >= f2_bound[0]/3, active_value =1)
#     MnM.add_indicator(x[i], fiber[i]*x[i] <= f2_bound[1]/3, active_value =1)
    MnM.add_indicator(x[i], 3*protein[i]*x[i] >= p1_bound[0], active_value =1)
    MnM.add_indicator(x[i], 3*protein[i]*x[i] <= p1_bound[1], active_value =1)
    MnM.add_indicator(x[i], cholesterol[i]*x[i] <= c3_bound, active_value =1)
    MnM.add_indicator(x[i], saturateFat[i]*x[i] <= s1_bound, active_value =1)
    MnM.add_indicator(x[i], sodium[i]*x[i] <= s2_bound, active_value =1)

# Expense constraint
MnM.add_constraint(MnM.sum(price[i]*x[i] for i in range(54)) <= p2_bound)

# schedule time penalty constraint
MnM.add_constraint(MnM.sum(time[i]* (y[i,j] - z[i,j]) for i in range(54) for j in range(5)) <= w)
MnM.add_constraint(MnM.sum(time[i]* (z[i,j] - y[i,j]) for i in range(54) for j in range(5)) <= w)

# cooking time constraint,
for j in range(5):
    MnM.add_constraint(MnM.sum(time[i]*y[i,j] for i in range(54)) <= remaintime_y[j])
    MnM.add_constraint(MnM.sum(time[i]*z[i,j] for i in range(54)) <= remaintime_z[j])   



# Define the objective function

In [97]:
MnM.maximize(MnM.sum(rating[i]*x[i] for i in range(df.shape[0])) - alpha*w)

In [70]:
# MnM.maximize(MnM.sum(rating[i]*x[i] for i in range(54)))

# Solve the problem

In [98]:
MnM.print_information()

Model: Meal Planning for the 2 roomates
 - number of variables: 595
   - binary=594, integer=0, continuous=1
 - number of constraints: 788
   - linear=194, indicator=594
 - parameters: defaults


In [99]:
MnMs= MnM.solve(log_output=True)
# assert MnMs
MnM.print_solution()

CPXPARAM_Read_DataCheck                          1
Tried aggregator 1 time.
MIP Presolve eliminated 299 rows and 421 columns.
MIP Presolve modified 500 coefficients.
Reduced MIP has 26 rows, 174 columns, and 824 nonzeros.
Reduced MIP has 173 binaries, 0 generals, 0 SOSs, and 0 indicators.
Presolve time = 0.01 sec. (2.14 ticks)
Found incumbent of value -152.140000 after 0.01 sec. (3.11 ticks)
Probing time = 0.00 sec. (0.62 ticks)
Tried aggregator 1 time.
Reduced MIP has 26 rows, 174 columns, and 824 nonzeros.
Reduced MIP has 173 binaries, 1 generals, 0 SOSs, and 0 indicators.
Presolve time = 0.01 sec. (0.57 ticks)
Probing time = 0.00 sec. (0.62 ticks)
Clique table members: 21.
MIP emphasis: balance optimality and feasibility.
MIP search method: dynamic search.
Parallel mode: deterministic, using up to 4 threads.
Root relaxation solution time = 0.00 sec. (0.26 ticks)

        Nodes                                         Cuts/
   Node  Left     Objective  IInf  Best Integer    Best Bound