In [25]:
import pandas as pd

In [26]:
df = pd.read_csv("data/nutrition.csv")

In [27]:
df = df.drop(columns="Unnamed: 0")

In [28]:
df.sample(1)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
6636,"Rice, cooked without salt, unenriched, regular...",100 g,130,0.3g,0.1g,0,1.00 mg,2.1 mg,3.00 mcg,0.00 mcg,...,0.28 g,0.077 g,0.088 g,0.076 g,0.00 mg,0.0 g,0.41 g,0.00 mg,0.00 mg,68.44 g


In [29]:
df.columns

Index(['name', 'serving_size', 'calories', 'total_fat', 'saturated_fat',
       'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid', 'niacin',
       'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
       'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
       'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12',
       'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha',
       'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese',
       'phosphorous', 'potassium', 'selenium', 'zink', 'protein', 'alanine',
       'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
       'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
       'methionine', 'phenylalanine', 'proline', 'serine', 'threonine',
       'tryptophan', 'tyrosine', 'valine', 'carbohydrate', 'fiber', 'sugars',
       'fructose', 'galactose', 'glucose', 'lactose', 'maltose', 'sucrose',
       'fat', 'saturated_fatty_acids', 'mon

In [30]:
df.describe()

Unnamed: 0,calories,lucopene
count,8789.0,8789.0
mean,226.283878,0.0
std,169.862001,0.0
min,0.0,0.0
25%,91.0,0.0
50%,191.0,0.0
75%,337.0,0.0
max,902.0,0.0


Lucopene looks like an unecessary variable

In [31]:
df['lucopene'].unique()

array([0])

Since the only value is 0 we remove the data

In [32]:
df = df.drop('lucopene', axis=1)

The serving size are all the same

In [33]:
df['serving_size'].unique()

array(['100 g'], dtype=object)

In [34]:
df['serving_size'] = 100

## Transforming g to numerical values

In [35]:
import re

In [36]:
def transform(rows):
    return float(re.search("[0-9.]*", rows)[0])


In [37]:
df['carbohydrate'] = df["carbohydrate"].apply(transform)
df['protein'] = df["protein"].apply(transform)
df['total_fat'] = df["total_fat"].apply(transform)

In [38]:
df.describe()

Unnamed: 0,serving_size,calories,total_fat,protein,carbohydrate
count,8789.0,8789.0,8789.0,8789.0,8789.0
mean,100.0,226.283878,10.556855,11.345616,22.121915
std,0.0,169.862001,15.818247,10.530602,27.266261
min,100.0,0.0,0.0,0.0,0.0
25%,100.0,91.0,1.0,2.38,0.05
50%,100.0,191.0,5.1,8.02,9.34
75%,100.0,337.0,14.0,19.88,34.91
max,100.0,902.0,100.0,88.32,100.0


In [39]:
macros_df = df[['name', 'calories', 'protein', 'carbohydrate', 'total_fat']]

## Optimization

1. Minimize calories
2. half of the calories must be carbohydrates, 1g per kg for protein and the rest fat.

In [40]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
from pulp import * 
import seaborn as sns

In [41]:
print(macros_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   name          8789 non-null   object 
 1   calories      8789 non-null   int64  
 2   protein       8789 non-null   float64
 3   carbohydrate  8789 non-null   float64
 4   total_fat     8789 non-null   float64
dtypes: float64(3), int64(1), object(1)
memory usage: 343.4+ KB
None


In [42]:
week_days = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
split_values = np.linspace(0,len(macros_df),8).astype(int)
split_values[-1] = split_values[-1]-1
def random_dataset():
    frac_data = macros_df.sample(frac=1).reset_index().drop('index',axis=1)
    day_data = []
    for s in range(len(split_values)-1):
        day_data.append(frac_data.loc[split_values[s]:split_values[s+1]])
    return dict(zip(week_days,day_data))

In [43]:
random_dataset()['Monday']

Unnamed: 0,name,calories,protein,carbohydrate,total_fat
0,"Chicken, not reheated, frozen, precooked, dark...",260,12.43,16.32,16.0
1,"Crackers, whole grain, cheese",412,9.62,57.29,16.0
2,"Cookies, chocolate-covered, sugar wafer",526,3.45,65.99,28.0
3,"Fish, raw, carp",127,17.83,0.00,5.6
4,"Beans, unprepared, all styles, frozen, green, ...",39,1.79,7.54,0.2
...,...,...,...,...,...
1251,"PIZZA HUT 14"" Pepperoni Pizza, THIN 'N CRISPY ...",333,14.13,32.66,16.0
1252,"Pork, broiled, cooked, boneless, separable lea...",169,26.14,0.89,6.7
1253,"Peppers, freeze-dried, green, sweet",314,17.90,68.70,3.0
1254,"Crustaceans, home recipe, crab cakes, blue, crab",155,20.21,0.48,7.5


In [44]:
def build_nutritional_values(kg,calories):
    protein_calories = kg*4
    res_calories = calories-protein_calories
    carb_calories = calories/2.
    fat_calories = calories-carb_calories-protein_calories
    res = {'Protein Calories':protein_calories,'Carbohydrates Calories':carb_calories,'Fat Calories':fat_calories}
    return res

In [45]:
def extract_gram(table):
    protein_grams = table['Protein Calories']/4.
    carbs_grams = table['Carbohydrates Calories']/4.
    fat_grams = table['Fat Calories']/9.
    res = {'Protein Grams':protein_grams, 'Carbohydrates Grams':carbs_grams,'Fat Grams':fat_grams}
    return res

In [48]:
print(build_nutritional_values(70,2000))

{'Protein Calories': 280, 'Carbohydrates Calories': 1000.0, 'Fat Calories': 720.0}


In [47]:
print(extract_gram(build_nutritional_values(70,2000)))

{'Protein Grams': 70.0, 'Carbohydrates Grams': 250.0, 'Fat Grams': 80.0}


In [49]:
days_data = random_dataset()
def model(day,kg,calories):
    G = extract_gram(build_nutritional_values(kg,calories))
    E = G['Carbohydrates Grams']
    F = G['Fat Grams']
    P = G['Protein Grams']
    day_data = days_data[day]
    day_data = day_data[day_data.calories!=0]
    food = day_data.name.tolist()
    c  = day_data.calories.tolist()
    x  = pulp.LpVariable.dicts( "x", indices = food, lowBound=0, upBound=1.5, cat='Continuous', indexStart=[] )
    e = day_data.carbohydrate.tolist()
    f = day_data.total_fat.tolist()
    p = day_data.protein.tolist()
    prob  = pulp.LpProblem( "Diet", LpMinimize )
    prob += pulp.lpSum( [x[food[i]]*c[i] for i in range(len(food))]  )
    prob += pulp.lpSum( [x[food[i]]*e[i] for i in range(len(x)) ] )>=E
    prob += pulp.lpSum( [x[food[i]]*f[i] for i in range(len(x)) ] )>=F
    prob += pulp.lpSum( [x[food[i]]*p[i] for i in range(len(x)) ] )>=P
    prob.solve()
    variables = []
    values = []
    for v in prob.variables():
        variable = v.name
        value = v.varValue
        variables.append(variable)
        values.append(value)
    values = np.array(values).round(2).astype(float)
    sol = pd.DataFrame(np.array([food,values]).T, columns = ['Food','Quantity'])
    sol['Quantity'] = sol.Quantity.astype(float)
    return sol


In [50]:
sol_monday = model('Monday',70,1500)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/eclipse/.local/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/8f90078f7f87456cb0c17c52af005a43-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/8f90078f7f87456cb0c17c52af005a43-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 8 COLUMNS
At line 4580 RHS
At line 4584 BOUNDS
At line 5834 ENDATA
Problem MODEL has 3 rows, 1249 columns and 3322 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Presolve 3 (0) rows, 1226 (-23) columns and 3273 (-49) elements
0  Obj 0 Primal inf 3.2563824 (3)
7  Obj 1045.8653
Optimal - objective value 1045.8653
After Postsolve, objective 1045.8653, infeasibilities - dual 0 (0), primal 0 (0)
Optimal objective 1045.865294 - 7 iterations time 0.002, Presolve 0.00
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wal

In [51]:
sol_monday = sol_monday[sol_monday['Quantity']!=0.0]
sol_monday.Quantity = sol_monday.Quantity*100
sol_monday = sol_monday.rename(columns={'Quantity':'Quantity (g)'})
sol_monday

Unnamed: 0,Food,Quantity (g)
20,"Beef, braised, cooked, all grades, trimmed to ...",150.0
74,"Beans, rinsed in tap water, drained solids, ca...",125.0
100,"KASHI Black Bean Mango, unprepared, frozen",150.0
575,"Potatoes, with salt, flesh and skin, baked",88.0
732,"Beans, no salt added, canned, baked",150.0
936,"Infant formula, 20 calories ready-to-feed Low ...",72.0
1132,"Cake, prepared from recipe without frosting, c...",150.0
1227,"Lamb, raw, choice, trimmed to 1/4"" fat, separa...",150.0


In [52]:
def model(prob,day,kg,calories):
    G = extract_gram(build_nutritional_values(kg,calories))
    E = G['Carbohydrates Grams']
    F = G['Fat Grams']
    P = G['Protein Grams']
    day_data = days_data[day]
    day_data = day_data[day_data.calories!=0]
    food = day_data.name.tolist()
    c  = day_data.calories.tolist()
    x  = pulp.LpVariable.dicts( "x", indices = food, lowBound=0, upBound=1.5, cat='Continuous', indexStart=[] )
    e = day_data.carbohydrate.tolist()
    f = day_data.total_fat.tolist()
    p = day_data.protein.tolist()
#    prob  = pulp.LpProblem( "Diet", LpMinimize )
    prob += pulp.lpSum( [x[food[i]]*c[i] for i in range(len(food))]  )
    prob += pulp.lpSum( [x[food[i]]*e[i] for i in range(len(x)) ] )>=E
    prob += pulp.lpSum( [x[food[i]]*f[i] for i in range(len(x)) ] )>=F
    prob += pulp.lpSum( [x[food[i]]*p[i] for i in range(len(x)) ] )>=P
    prob.solve()
    variables = []
    values = []
    for v in prob.variables():
        variable = v.name
        value = v.varValue
        variables.append(variable)
        values.append(value)
    values = np.array(values).round(2).astype(float)
    sol = pd.DataFrame(np.array([food,values]).T, columns = ['Food','Quantity'])
    sol['Quantity'] = sol.Quantity.astype(float)
    sol = sol[sol['Quantity']!=0.0]
    sol.Quantity = sol.Quantity*100
    sol = sol.rename(columns={'Quantity':'Quantity (g)'})
    return sol
def total_model(kg,calories):
    result = []
    for day in week_days:
        prob  = pulp.LpProblem( "Diet", LpMinimize )
        print('Building a model for day %s \n'%(day))
        result.append(model(prob,day,kg,calories))
    return dict(zip(week_days,result))

In [53]:
diet = total_model(70,3000)

Building a model for day Monday 

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/eclipse/.local/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/2d08879d64504cc49c947db344eac104-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/2d08879d64504cc49c947db344eac104-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 8 COLUMNS
At line 4580 RHS
At line 4584 BOUNDS
At line 5834 ENDATA
Problem MODEL has 3 rows, 1249 columns and 3322 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Presolve 3 (0) rows, 1226 (-23) columns and 3273 (-49) elements
0  Obj 0 Primal inf 5.9913384 (3)
9  Obj 2224.0194
Optimal - objective value 2224.0194
After Postsolve, objective 2224.0194, infeasibilities - dual 0 (0), primal 0 (0)
Optimal objective 2224.019367 - 9 iterations time 0.002, Presolve 0.00
Option for printingOptions changed from normal to all
Total tim

In [54]:
diet

{'Monday':                                                    Food  Quantity (g)
 20    Beef, braised, cooked, all grades, trimmed to ...         150.0
 74    Beans, rinsed in tap water, drained solids, ca...          19.0
 100          KASHI Black Bean Mango, unprepared, frozen         150.0
 389   Puddings, sodium, potassium, with added oil, r...         150.0
 575          Potatoes, with salt, flesh and skin, baked         150.0
 732                 Beans, no salt added, canned, baked         150.0
 733   Yogurt, fortified with vitamin D, sweetened wi...         150.0
 756               Babyfood, strained, banana no tapioca          16.0
 815                  Pancakes, dry mix, special dietary         139.0
 1132  Cake, prepared from recipe without frosting, c...         150.0
 1227  Lamb, raw, choice, trimmed to 1/4" fat, separa...         150.0,
 'Tuesday':                                                   Food  Quantity (g)
 363  Cereals ready-to-eat, STRAWBERRY FIELDS, KASHI... 