In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("data/nutrition.csv")

In [3]:
df = df.drop(columns="Unnamed: 0")

In [4]:
df.sample(1)

Unnamed: 0,name,serving_size,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
4566,"USDA Commodity Chicken, drained, meat only, ca...",100 g,162,5.7g,1.6g,83mg,271.00 mg,73.8 mg,6.00 mcg,0.00 mcg,...,5.72 g,1.572 g,2.047 g,1.313 g,83.00 mg,0.0 g,0.89 g,0.00 mg,0.00 mg,67.29 g


In [5]:
df.columns

Index(['name', 'serving_size', 'calories', 'total_fat', 'saturated_fat',
       'cholesterol', 'sodium', 'choline', 'folate', 'folic_acid', 'niacin',
       'pantothenic_acid', 'riboflavin', 'thiamin', 'vitamin_a',
       'vitamin_a_rae', 'carotene_alpha', 'carotene_beta',
       'cryptoxanthin_beta', 'lutein_zeaxanthin', 'lucopene', 'vitamin_b12',
       'vitamin_b6', 'vitamin_c', 'vitamin_d', 'vitamin_e', 'tocopherol_alpha',
       'vitamin_k', 'calcium', 'copper', 'irom', 'magnesium', 'manganese',
       'phosphorous', 'potassium', 'selenium', 'zink', 'protein', 'alanine',
       'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
       'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
       'methionine', 'phenylalanine', 'proline', 'serine', 'threonine',
       'tryptophan', 'tyrosine', 'valine', 'carbohydrate', 'fiber', 'sugars',
       'fructose', 'galactose', 'glucose', 'lactose', 'maltose', 'sucrose',
       'fat', 'saturated_fatty_acids', 'mon

In [6]:
df.describe()

Unnamed: 0,calories,lucopene
count,8789.0,8789.0
mean,226.283878,0.0
std,169.862001,0.0
min,0.0,0.0
25%,91.0,0.0
50%,191.0,0.0
75%,337.0,0.0
max,902.0,0.0


Lucopene looks like an unecessary variable

In [7]:
df['lucopene'].unique()

array([0])

Since the only value is 0 we remove the data

In [8]:
df = df.drop('lucopene', axis=1)

The serving size are all the same

In [9]:
df['serving_size'].unique()

array(['100 g'], dtype=object)

In [10]:
df['serving_size'] = 100

## Transforming g to numerical values

In [11]:
import re

In [12]:
def transform(rows):
    return float(re.search("[0-9.]*", rows)[0])


In [13]:
df['carbohydrate'] = df["carbohydrate"].apply(transform)
df['protein'] = df["protein"].apply(transform)
df['total_fat'] = df["total_fat"].apply(transform)

In [14]:
df.describe()

Unnamed: 0,serving_size,calories,total_fat,protein,carbohydrate
count,8789.0,8789.0,8789.0,8789.0,8789.0
mean,100.0,226.283878,10.556855,11.345616,22.121915
std,0.0,169.862001,15.818247,10.530602,27.266261
min,100.0,0.0,0.0,0.0,0.0
25%,100.0,91.0,1.0,2.38,0.05
50%,100.0,191.0,5.1,8.02,9.34
75%,100.0,337.0,14.0,19.88,34.91
max,100.0,902.0,100.0,88.32,100.0


In [15]:
macros_df = df[['name', 'calories', 'protein', 'carbohydrate', 'total_fat']]

## Optimization

1. Minimize calories
2. half of the calories must be carbohydrates, 1g per kg for protein and the rest fat.

In [16]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd 
from pulp import * 
import seaborn as sns

In [17]:
print(macros_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8789 entries, 0 to 8788
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   name          8789 non-null   object 
 1   calories      8789 non-null   int64  
 2   protein       8789 non-null   float64
 3   carbohydrate  8789 non-null   float64
 4   total_fat     8789 non-null   float64
dtypes: float64(3), int64(1), object(1)
memory usage: 343.4+ KB
None


In [31]:
macros_df.to_csv('macro_nutrients.csv')

In [18]:
week_days = ['Monday','Tuesday','Wednesday','Thursday','Friday','Saturday','Sunday']
split_values = np.linspace(0,len(macros_df),8).astype(int)
split_values[-1] = split_values[-1]-1
def random_dataset():
    frac_data = macros_df.sample(frac=1).reset_index().drop('index',axis=1)
    day_data = []
    for s in range(len(split_values)-1):
        day_data.append(frac_data.loc[split_values[s]:split_values[s+1]])
    return dict(zip(week_days,day_data))

In [19]:
random_dataset()['Monday']

Unnamed: 0,name,calories,protein,carbohydrate,total_fat
0,"Blueberries, drained, light syrup, canned",88,1.04,22.66,0.4
1,"Beef, raw, Aust. marble score 4/5, separable l...",234,20.35,0.00,17.0
2,"Pork, roasted, heated, separable lean only, bo...",139,26.49,0.68,3.7
3,"Gravy, dry, au jus",313,9.20,47.49,9.6
4,"Soymilk (All flavors), enhanced",45,2.94,3.45,2.0
...,...,...,...,...,...
1251,"Soup, prepared with equal volume milk, canned,...",55,2.51,3.99,3.2
1252,"Lamb, fast roasted, cooked, separable lean and...",157,28.96,0.00,4.6
1253,"Pork, unheated, separable lean and fat, blade ...",269,16.47,0.00,22.0
1254,"Soup, with water, low sodium, tomato",30,0.79,6.57,0.3


In [20]:
def build_nutritional_values(kg,calories):
    protein_calories = kg*4
    res_calories = calories-protein_calories
    carb_calories = calories/2.
    fat_calories = calories-carb_calories-protein_calories
    res = {'Protein Calories':protein_calories,'Carbohydrates Calories':carb_calories,'Fat Calories':fat_calories}
    return res

In [21]:
def extract_gram(table):
    protein_grams = table['Protein Calories']/4.
    carbs_grams = table['Carbohydrates Calories']/4.
    fat_grams = table['Fat Calories']/9.
    res = {'Protein Grams':protein_grams, 'Carbohydrates Grams':carbs_grams,'Fat Grams':fat_grams}
    return res

In [22]:
print(build_nutritional_values(70,2000))

{'Protein Calories': 280, 'Carbohydrates Calories': 1000.0, 'Fat Calories': 720.0}


In [23]:
print(extract_gram(build_nutritional_values(70,2000)))

{'Protein Grams': 70.0, 'Carbohydrates Grams': 250.0, 'Fat Grams': 80.0}


In [24]:
days_data = random_dataset()
def model(day,kg,calories):
    G = extract_gram(build_nutritional_values(kg,calories))
    E = G['Carbohydrates Grams']
    F = G['Fat Grams']
    P = G['Protein Grams']
    day_data = days_data[day]
    day_data = day_data[day_data.calories!=0]
    food = day_data.name.tolist()
    c  = day_data.calories.tolist()
    x  = pulp.LpVariable.dicts( "x", indices = food, lowBound=0, upBound=1.5, cat='Continuous', indexStart=[] )
    e = day_data.carbohydrate.tolist()
    f = day_data.total_fat.tolist()
    p = day_data.protein.tolist()
    prob  = pulp.LpProblem( "Diet", LpMinimize )
    prob += pulp.lpSum( [x[food[i]]*c[i] for i in range(len(food))]  )
    prob += pulp.lpSum( [x[food[i]]*e[i] for i in range(len(x)) ] )>=E
    prob += pulp.lpSum( [x[food[i]]*f[i] for i in range(len(x)) ] )>=F
    prob += pulp.lpSum( [x[food[i]]*p[i] for i in range(len(x)) ] )>=P
    prob.solve()
    variables = []
    values = []
    for v in prob.variables():
        variable = v.name
        value = v.varValue
        variables.append(variable)
        values.append(value)
    values = np.array(values).round(2).astype(float)
    sol = pd.DataFrame(np.array([food,values]).T, columns = ['Food','Quantity'])
    sol['Quantity'] = sol.Quantity.astype(float)
    return sol


In [25]:
sol_monday = model('Monday',70,1500)

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/eclipse/.local/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/11d6ff6506ea48929f6a9c8533406772-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/11d6ff6506ea48929f6a9c8533406772-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 8 COLUMNS
At line 4603 RHS
At line 4607 BOUNDS
At line 5858 ENDATA
Problem MODEL has 3 rows, 1250 columns and 3344 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Presolve 3 (0) rows, 1228 (-22) columns and 3298 (-46) elements
0  Obj 0 Primal inf 3.4271622 (3)
10  Obj 1112.0513
Optimal - objective value 1112.0513
After Postsolve, objective 1112.0513, infeasibilities - dual 0 (0), primal 0 (0)
Optimal objective 1112.051338 - 10 iterations time 0.002, Presolve 0.00
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (W

In [26]:
sol_monday = sol_monday[sol_monday['Quantity']!=0.0]
sol_monday.Quantity = sol_monday.Quantity*100
sol_monday = sol_monday.rename(columns={'Quantity':'Quantity (g)'})
sol_monday

Unnamed: 0,Food,Quantity (g)
70,"Chicken, simmered, cooked, all classes, heart",150.0
71,"Lamb, roasted, cooked, trimmed to 1/8"" fat, se...",150.0
359,"WORTHINGTON Choplets, unprepared, canned",150.0
609,"Fish, dry heat, cooked, fresh, yellowfin, tuna",62.0
1137,"Beverages, with vitamin E added, reduced sugar...",144.0
1139,"Beef, raw, choice, trimmed to 1/8"" fat, separa...",150.0
1165,"Cereals, cinnamon, weight control, Instant Oat...",33.0


In [27]:
def model(prob,day,kg,calories):
    G = extract_gram(build_nutritional_values(kg,calories))
    E = G['Carbohydrates Grams']
    F = G['Fat Grams']
    P = G['Protein Grams']
    day_data = days_data[day]
    day_data = day_data[day_data.calories!=0]
    food = day_data.name.tolist()
    c  = day_data.calories.tolist()
    x  = pulp.LpVariable.dicts( "x", indices = food, lowBound=0, upBound=1.5, cat='Continuous', indexStart=[] )
    e = day_data.carbohydrate.tolist()
    f = day_data.total_fat.tolist()
    p = day_data.protein.tolist()
#    prob  = pulp.LpProblem( "Diet", LpMinimize )
    prob += pulp.lpSum( [x[food[i]]*c[i] for i in range(len(food))]  )
    prob += pulp.lpSum( [x[food[i]]*e[i] for i in range(len(x)) ] )>=E
    prob += pulp.lpSum( [x[food[i]]*f[i] for i in range(len(x)) ] )>=F
    prob += pulp.lpSum( [x[food[i]]*p[i] for i in range(len(x)) ] )>=P
    prob.solve()
    variables = []
    values = []
    for v in prob.variables():
        variable = v.name
        value = v.varValue
        variables.append(variable)
        values.append(value)
    values = np.array(values).round(2).astype(float)
    sol = pd.DataFrame(np.array([food,values]).T, columns = ['Food','Quantity'])
    sol['Quantity'] = sol.Quantity.astype(float)
    sol = sol[sol['Quantity']!=0.0]
    sol.Quantity = sol.Quantity*100
    sol = sol.rename(columns={'Quantity':'Quantity (g)'})
    return sol
def total_model(kg,calories):
    result = []
    for day in week_days:
        prob  = pulp.LpProblem( "Diet", LpMinimize )
        print('Building a model for day %s \n'%(day))
        result.append(model(prob,day,kg,calories))
    return dict(zip(week_days,result))

In [28]:
diet = total_model(70,3000)

Building a model for day Monday 

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/eclipse/.local/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/f7a25d1548a04e708f8f6b3f62964c97-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/f7a25d1548a04e708f8f6b3f62964c97-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 8 COLUMNS
At line 4603 RHS
At line 4607 BOUNDS
At line 5858 ENDATA
Problem MODEL has 3 rows, 1250 columns and 3344 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Presolve 3 (0) rows, 1228 (-22) columns and 3298 (-46) elements
0  Obj 0 Primal inf 6.2694959 (3)
6  Obj 2353.6678
Optimal - objective value 2353.6678
After Postsolve, objective 2353.6678, infeasibilities - dual 0 (0), primal 0 (0)
Optimal objective 2353.667828 - 6 iterations time 0.002, Presolve 0.00
Option for printingOptions changed from normal to all
Total tim

In [29]:
diet

{'Monday':                                                    Food  Quantity (g)
 235   Beef, braised, cooked, all grades, trimmed to ...         150.0
 298   Pie crust, chilled, vanilla wafer, prepared fr...         122.0
 359            WORTHINGTON Choplets, unprepared, canned         150.0
 409              Ice creams, soft-serve, light, vanilla          49.0
 756   Cereals ready-to-eat, SPECIAL K Protein Plus, ...         150.0
 761   Soy sauce, made from hydrolyzed vegetable prot...         150.0
 839              Frozen novelties, fruit and juice bars          79.0
 913   Soup, prepared with equal volume water, canned...         150.0
 1137  Beverages, with vitamin E added, reduced sugar...         150.0
 1138            Cheesefurter, beef, pork, cheese smokie         150.0
 1139  Beef, raw, choice, trimmed to 1/8" fat, separa...         150.0
 1170  Lima beans, without salt, boiled, cooked, matu...         150.0,
 'Tuesday':                                                    Foo