In [100]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import cvxpy as cp
import json
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [60]:
#definitions
amino_acids = ['alanine', 
       'arginine', 'aspartic_acid', 'cystine', 'glutamic_acid', 'glycine',
       'histidine', 'hydroxyproline', 'isoleucine', 'leucine', 'lysine',
       'methionine', 'phenylalanine', 'proline', 'serine', 'threonine',
       'tryptophan', 'tyrosine', 'valine',]
essential = ['histidine', 'isoleucine', 'leucine', 'lysine', 
                         'methionine', 'phenylalanine', 'threonine', 'tryptophan', 'valine']
minerals = ['calcium','copper','irom','magnesium','manganese','phosphorous','potassium','zink']
vitamins = []
summary = ['protein','carbohydrate','total_fat','water']

In [3]:
df = pd.read_csv('../data/nutrition_clean.csv')
names = pd.read_csv('../data/food_names.csv')
names = names['name']
name_to_idx = {v: k for k, v in names.to_dict().items()}
columns_to_idx = {v: i for i, v in enumerate(df.columns)}

with open('../data/units.json') as json_file:
    units = json.load(json_file)

In [4]:
df.head()

Unnamed: 0,calories,total_fat,saturated_fat,cholesterol,sodium,choline,folate,folic_acid,niacin,pantothenic_acid,...,fat,saturated_fatty_acids,monounsaturated_fatty_acids,polyunsaturated_fatty_acids,fatty_acids_total_trans,alcohol,ash,caffeine,theobromine,water
0,381,0.1,0.0,0.0,9.0,0.4,0.0,0.0,0.0,0.0,...,0.05,0.009,0.016,0.025,0.0,0.0,0.09,0.0,0.0,8.32
1,691,72.0,6.2,0.0,0.0,40.5,22.0,0.0,1.167,0.863,...,71.97,6.18,40.801,21.614,0.0,0.0,1.49,0.0,0.0,3.52
2,25,0.2,0.0,0.0,2.0,6.9,22.0,0.0,0.649,0.281,...,0.18,0.034,0.016,0.076,0.0,0.0,0.66,0.0,0.0,92.3
3,367,2.4,0.4,0.0,12.0,13.1,0.0,0.0,3.363,0.942,...,2.38,0.449,0.589,1.071,0.0,0.0,2.37,0.0,0.0,8.82
4,144,2.0,1.2,1.0,46.0,7.7,4.0,0.0,0.063,0.224,...,2.0,1.16,0.53,0.08,1.0,0.0,0.4,0.0,0.0,66.1


## Constraints

In [65]:
lower = {'calories': 1750,
 'total_fat': 50,
 'saturated_fat': 20,
 'cholesterol': 100,
 'sodium': 1500,
 'choline': 550,
 'folate': 400,
 'folic_acid': 400,
 'niacin': 16,
 'pantothenic_acid': 5,
 'riboflavin': 1.3,
 'thiamin': 1.2,
 'vitamin_a': 3000,
 'vitamin_a_rae': ' mcg',
 'carotene_alpha': ' mcg',
 'carotene_beta': ' mcg',
 'cryptoxanthin_beta': ' mcg',
 'lutein_zeaxanthin': ' mcg',
 'vitamin_b12': 2.4,
 'vitamin_b6': 1.3,
 'vitamin_c': 90,
 'vitamin_d': 600,
 'vitamin_e': 15,
 'tocopherol_alpha': ' mg',
 'vitamin_k': 120,
 'calcium': 1300,
 'copper': 0.9,
 'irom': 8,
 'magnesium': 400,
 'manganese': 2.3,
 'phosphorous': 700,
 'potassium': 4700,
 'selenium': 55,
 'zink': 11,
 'protein': 56,
 'alanine': ' g',
 'arginine': ' g',
 'aspartic_acid': ' g',
 'cystine': 0.287,
 'glutamic_acid': ' g',
 'glycine': ' g',
 'histidine': 0.7,
 'hydroxyproline': 'g',
 'isoleucine': 1.4,
 'leucine': 2.73,
 'lysine': 2.1,
 'methionine': 0.728,
 'phenylalanine': 0.875,
 'proline': ' g',
 'serine': ' g',
 'threonine': 1.05,
 'tryptophan': 0.28,
 'tyrosine': 0.875,
 'valine': 1.82,
 'carbohydrate': 20, #130
 'fiber': 38,
 'sugars': 'g',
 'fructose': 'g',
 'galactose': 'g',
 'glucose': 'g',
 'lactose': 'g',
 'maltose': 'g',
 'sucrose': 'g',
 'fat': 'g', #78
 'saturated_fatty_acids': ' g',
 'monounsaturated_fatty_acids': ' g',
 'polyunsaturated_fatty_acids': 17,
 'fatty_acids_total_trans': 'mg',
 'alcohol': 'g',
 'ash': 'g',
 'caffeine': 'g',
 'theobromine': 'g',
 'water': 100} #500

upper = {'calories': 2500,
 'total_fat': 120,
 'saturated_fat': 40,
 'cholesterol': 300,
 'sodium': 3000,
 'choline': 2500,
 'folate': 900,
 'folic_acid': 900,
 'niacin': 30,
 'pantothenic_acid': 100,
 'riboflavin': 500,
 'thiamin': 200,
 'vitamin_a': 8000,
 'vitamin_a_rae': ' mcg',
 'carotene_alpha': ' mcg',
 'carotene_beta': ' mcg',
 'cryptoxanthin_beta': ' mcg',
 'lutein_zeaxanthin': ' mcg',
 'vitamin_b12': 8,
 'vitamin_b6': 4,
 'vitamin_c': 1000,
 'vitamin_d': 5000,
 'vitamin_e': 500,
 'tocopherol_alpha': ' mg',
 'vitamin_k': 500,
 'calcium': 2000,
 'copper': 7,
 'irom': 35,
 'magnesium': 500,
 'manganese': 9,
 'phosphorous': 3500,
 'potassium': 7000,
 'selenium': 300,
 'zink': 35,
 'protein': 120,
 'alanine': ' g',
 'arginine': ' g',
 'aspartic_acid': ' g',
 'cystine': ' g',
 'glutamic_acid': ' g',
 'glycine': ' g',
 'histidine': ' g',
 'hydroxyproline': 'g',
 'isoleucine': ' g',
 'leucine': ' g',
 'lysine': ' g',
 'methionine': ' g',
 'phenylalanine': ' g',
 'proline': ' g',
 'serine': ' g',
 'threonine': ' g',
 'tryptophan': ' g',
 'tyrosine': ' g',
 'valine': ' g',
 'carbohydrate': 100, #300
 'fiber': 50,
 'sugars': ' g',
 'fructose': ' g',
 'galactose': 'g',
 'glucose': ' g',
 'lactose': ' g',
 'maltose': ' g',
 'sucrose': ' g',
 'fat': 'g', #120
 'saturated_fatty_acids': ' g',
 'monounsaturated_fatty_acids': ' g',
 'polyunsaturated_fatty_acids': 30,
 'fatty_acids_total_trans': 0.1,
 'alcohol': ' g',
 'ash': ' g',
 'caffeine': ' mg',
 'theobromine': ' mg',
 'water': 2000}

In [66]:
constraints = {k_up: (v_low, v_up) for (k_up, v_up), (_, v_low) in zip(upper.items(), lower.items()) 
 if not (isinstance(v_low, str) and isinstance(v_up, str))}

In [67]:
constraints

{'calories': (1750, 2500),
 'total_fat': (50, 120),
 'saturated_fat': (20, 40),
 'cholesterol': (100, 300),
 'sodium': (1500, 3000),
 'choline': (550, 2500),
 'folate': (400, 900),
 'folic_acid': (400, 900),
 'niacin': (16, 30),
 'pantothenic_acid': (5, 100),
 'riboflavin': (1.3, 500),
 'thiamin': (1.2, 200),
 'vitamin_a': (3000, 8000),
 'vitamin_b12': (2.4, 8),
 'vitamin_b6': (1.3, 4),
 'vitamin_c': (90, 1000),
 'vitamin_d': (600, 5000),
 'vitamin_e': (15, 500),
 'vitamin_k': (120, 500),
 'calcium': (1300, 2000),
 'copper': (0.9, 7),
 'irom': (8, 35),
 'magnesium': (400, 500),
 'manganese': (2.3, 9),
 'phosphorous': (700, 3500),
 'potassium': (4700, 7000),
 'selenium': (55, 300),
 'zink': (11, 35),
 'protein': (56, 120),
 'cystine': (0.287, ' g'),
 'histidine': (0.7, ' g'),
 'isoleucine': (1.4, ' g'),
 'leucine': (2.73, ' g'),
 'lysine': (2.1, ' g'),
 'methionine': (0.728, ' g'),
 'phenylalanine': (0.875, ' g'),
 'threonine': (1.05, ' g'),
 'tryptophan': (0.28, ' g'),
 'tyrosine': (0.

Minimize $|\mathbf{x}|_1$

subject to $b_1 \leq A\mathbf{x} \leq b_2$

where $\mathbf{x} \in \mathcal{R}_+$

$\mathbf{x}: 1\times 8763$

$\mathbf{x}$ is the vector of all food weights in units of 100g

In [9]:
def get_constraint_vectors(constraints):
    lower = []
    upper = []
    lower_idx = []
    upper_idx = []
    for k,v in constraints.items():
        if not isinstance(v[0], str):
            lower_idx.append(k)
            lower.append(v[0])
        if not isinstance(v[1], str):
            upper_idx.append(k)
            upper.append(v[1])
    return lower, upper, lower_idx, upper_idx

In [10]:
l, u, l_idxs, u_idxs = get_constraint_vectors(constraints)

In [11]:
#should be roughly 70 x 8700, i.e. A.T.
Al = df[l_idxs].values.T
Au = df[u_idxs].values.T

In [12]:
# Least calorie solution (may need to control for beverages by placing constraint 
# or regularization on x, e.g. weight constraint)
n = len(df)


# Define and solve the CVXPY problem.
x = cp.Variable(n)
prob = cp.Problem(cp.Minimize(cp.norm1(x)),
                 [Au @ x <= u, Al @ x >= l,])
prob.solve()

# Print result.
print('Total weight: ',prob.value * 100, 'g')

Total weight:  392.2556865541974 g


In [13]:
u_dual = prob.constraints[0].dual_value
tol = 1e-6
u_lim = pd.DataFrame([(nutrient, val) for nutrient, val in zip(np.array(u_idxs)[u_dual > tol], u_dual[u_dual > tol])])
print(u_lim)

                             0         1
0                  vitamin_b12  0.000469
1                    manganese  0.000048
2                          fat  0.005107
3  polyunsaturated_fatty_acids  0.000003


In [14]:
l_dual = prob.constraints[1].dual_value
tol = 1e-6
l_lim = pd.DataFrame([(nutrient, val) for nutrient, val in zip(np.array(l_idxs)[l_dual > tol], l_dual[l_dual > tol])])
print(l_lim)

             0         1
0     calories  0.001673
1       sodium  0.000017
2      choline  0.000009
3   folic_acid  0.000036
4   vitamin_b6  0.014707
5    vitamin_c  0.000052
6    vitamin_e  0.000209
7    vitamin_k  0.000019
8      calcium  0.000065
9    magnesium  0.000070
10   potassium  0.000033
11     protein  0.002433
12     leucine  0.003679
13       fiber  0.003654
14       water  0.008806


In [15]:
result = x.value
idxs = np.array([i for i in range(len(result))])
tol = 1e-6
final_foods = pd.DataFrame([(names[food], food, val) for food, val in zip(idxs[result > tol], result[result > tol])])

In [16]:
final_foods.sort_values(by=2,ascending=False)

Unnamed: 0,0,1,2
19,"Alcoholic beverage, all (gin, rum, vodka, whis...",6145,1.201813
18,"Cocoa, processed with alkali, hi-fat or breakf...",5791,0.498978
10,"Oil, low linolenic, soy, industrial",2612,0.355754
12,"Gums, seed gums (includes locust bean, guar)",2829,0.241518
3,"Egg, dried, yolk",442,0.209543
6,"Pork, raw, salt pork, cured",959,0.204994
7,"Cheese, shredded, parmesan",1016,0.1939
5,"Smelt, dried (Alaska Native)",694,0.190103
13,"Protein supplement, powder, Muscle Milk Light,...",4950,0.160668
8,"Leavening agents, cream of tartar",1643,0.159591


In [106]:
low_lim_sum = []
low_lim_ess = []
low_lim_min = []
for nutrient in summary:
    low, upp = constraints[nutrient]
    low_lim_sum.append(low)
for nutrient in essential:
    low, upp = constraints[nutrient]
    low_lim_ess.append(low)
for nutrient in minerals:
    low, upp = constraints[nutrient]
    low_lim_min.append(low)

In [117]:
idx = 132
print(df[summary].iloc[idx].sum())

fig = make_subplots(rows=1, cols=3, specs=[[{'type':'polar'},{'type':'polar'},{'type':'polar'}]])

fig.add_trace(go.Scatterpolar(
      r=low_lim_sum + [low_lim_sum[0]],theta=summary + [summary[0]],
      name='lower_lim'
), row=1, col=1)

fig.add_trace(go.Scatterpolar(
      r=df[summary+[summary[0]]].iloc[idx],
      theta=summary+ [summary[0]],
      fill='toself',
      name=names[idx]
), row=1, col=1)

fig.add_trace(go.Scatterpolar(
      r=low_lim_ess + [low_lim_ess[0]],theta=essential + [essential[0]],
      name='lower_lim'
), row=1, col=2)

fig.add_trace(go.Scatterpolar(
      r=df[essential+[essential[0]]].iloc[idx],
      theta=essential+ [essential[0]],
      fill='toself',
      name=names[idx]
), row=1, col=2)

fig.add_trace(go.Scatterpolar(
      r=low_lim_min + [low_lim_min[0]],theta=minerals + [minerals[0]],
      name='lower_lim'
), row=1, col=3)

fig.add_trace(go.Scatterpolar(
      r=df[minerals+[minerals[0]]].iloc[idx],
      theta=minerals+ [minerals[0]],
      fill='toself',
      name=names[idx]
), row=1, col=3)



fig.update_layout(
    height=600, width=1800,
    title=names[idx],
  polar=dict(
    radialaxis=dict(
      visible=True,
#       range=[0, 100]
    )),
  showlegend=False
)

fig.show()

86.1


In [62]:
idx = 5791
print(df[minerals].iloc[idx].sum())
fig = px.line_polar(pd.DataFrame(dict(r=df[minerals].iloc[idx],theta=minerals)), 
                    r='r', theta='theta', line_close=True, title=names[idx], range_r=(0,5000))
fig.update_traces(fill='toself')

3889.81


In [41]:
df[['polyunsaturated_fatty_acids','monounsaturated_fatty_acids','saturated_fatty_acids']].iloc[idx]

polyunsaturated_fatty_acids     0.769
monounsaturated_fatty_acids     7.380
saturated_fatty_acids          12.640
Name: 5791, dtype: float64

In [126]:
df[(df['protein'] > 0) * (df[amino_acids].sum(axis=1) == 0)][['protein']+amino_acids]

Unnamed: 0,protein,alanine,arginine,aspartic_acid,cystine,glutamic_acid,glycine,histidine,hydroxyproline,isoleucine,leucine,lysine,methionine,phenylalanine,proline,serine,threonine,tryptophan,tyrosine,valine
4,1.10,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,23.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12,0.11,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,19.40,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
17,0.91,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8764,0.14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8767,1.78,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8770,1.48,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8771,3.20,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [127]:
df[(df['protein'] == 0) * (df[amino_acids].sum(axis=1) > 0)][['protein']+amino_acids]

Unnamed: 0,protein,alanine,arginine,aspartic_acid,cystine,glutamic_acid,glycine,histidine,hydroxyproline,isoleucine,leucine,lysine,methionine,phenylalanine,proline,serine,threonine,tryptophan,tyrosine,valine
615,0.0,0.0,0.0,0.002,0.0,0.002,0.0,0.0,0.0,0.0,0.001,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1221,0.0,0.072,0.088,0.155,0.019,0.234,0.07,0.027,0.0,0.06,0.094,0.078,0.016,0.075,0.067,0.067,0.062,0.023,0.058,0.093
1474,0.0,0.003,0.002,0.003,0.001,0.007,0.001,0.001,0.0,0.001,0.005,0.001,0.001,0.002,0.003,0.002,0.001,0.0,0.001,0.002
2186,0.0,0.036,0.064,0.105,0.014,0.18,0.026,0.031,0.0,0.007,0.017,0.019,0.029,0.017,0.029,0.041,0.024,0.0,0.016,0.024
4438,0.0,0.013,0.007,0.003,0.0,0.017,0.002,0.001,0.0,0.001,0.002,0.002,0.0,0.002,0.002,0.002,0.002,0.0,0.0,0.002
4621,0.0,0.003,0.002,0.003,0.001,0.007,0.001,0.001,0.0,0.001,0.005,0.001,0.001,0.002,0.003,0.002,0.001,0.0,0.001,0.002


In [129]:
complete = df[(df['protein'] > 0) * (df[amino_acids].sum(axis=1) > 0)][['protein']+amino_acids]

In [170]:
amino_median = []
for idx in range(len(amino_acids)):
    ratio = complete[[amino_acids[idx]]].sum(axis=1) / complete['protein']
    amino_median.append(ratio.median())
amino_median = np.array(amino_median)

In [181]:
fix = (df[(df['protein'] > 0) * (df[amino_acids].sum(axis=1) == 0)][['protein']].values * amino_median)
mask = (df['protein'] > 0) * (df[amino_acids].sum(axis=1) == 0)
for idx in range(len(amino_acids)):
    df[amino_acids[idx]][mask] = fix[:,0idx]



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

