In [1]:
import pandas as pd
from pulp import LpMinimize, LpMaximize, LpProblem, LpVariable

In [2]:
cnap = pd.read_csv('core_nutrient_amounts_prices_v3.csv', index_col=0)
cnap = cnap.rename(columns={'name': 'nutrient_name'})
constraints = pd.read_csv('nutrient_constraints_csv.csv').set_index('nutrient_nbr').drop(columns=['id', 'rank']).rename(columns={'name': 'nutrient_name'})
constraints = constraints.loc[constraints.target.notna() | constraints.ll.notna() | constraints.ul.notna()]    
n_values = cnap.pivot(index='nutrient_nbr', columns='food', values='nutrient_unit_per_dollar')
cc = constraints.join(n_values, how='left')
first_food_idx = cc.columns.get_loc('Almonds')
cc.iloc[:, first_food_idx:] = cc.iloc[:, first_food_idx:].fillna(0)

In [15]:
prob = LpProblem("Minimize_Cost", LpMinimize)
decision_variables = []
first_food_idx = cc.columns.get_loc('Almonds')
for food in cc.columns[first_food_idx:]:
    decision_variables.append(LpVariable(f"{food}", lowBound=0))
prob += sum([decision_variable for decision_variable in decision_variables])
for i, row in cc.iterrows():
    constraint = 0
    for j, food in enumerate(cc.columns[first_food_idx:]):
        constraint += decision_variables[j] * row[food]
    if pd.notna(row.target):
        print(row.nutrient_name, row.target)
        prob += constraint == row.target
    elif pd.notna(row.ll):
        prob += constraint >= row.ll
    elif pd.notna(row.ul):
        prob += constraint <= row.ul

Energy 3500.0


In [16]:
prob.solve()
for v in prob.variables():
    if v.varValue != 0:
        print(v.name, "=", v.varValue)

Beef_Liver = 0.03887292
Black_Beans = 0.22242298
Broccoli = 0.11559462
Cabbage = 0.05263944
Carrots = 0.064621147
Eggs = 0.040607867
Pink_Salmon = 0.03263155
Salmon = 1.4626999
Sunflower_seeds = 0.028170131
Tomatoes = 0.48588785
Wheat_flour = 0.8012629
Whole_Milk = 0.43843323


In [17]:
fs = pd.DataFrame([(v.name, v.varValue) for v in prob.variables() if v.varValue != 0], columns=['food', 'spend']).iloc[:-1,:]
cn = cc.set_index('nutrient_name').iloc[:, 4:]
# Rank the foods for each nutrient
food_ranks = cn.rank(axis=1, method='min', ascending=False)
final_foods = cnap.loc[cnap.food.isin([v.name for v in prob.variables() if v.varValue != 0])].drop_duplicates('food')

In [18]:
ffs = final_foods.merge(fs, on='food')

In [19]:
ffs.spend / ffs.price_per_100_g

0    2.843804
1    0.378215
2    0.318358
3    0.304842
4    0.829336
5    0.122796
dtype: float64

In [20]:
ffs.spend.sum() * 365

811.0485507599999

In [21]:
14 * 100 / 453.592

3.086474188257289

In [10]:
# TODO: Add fish oil/sardines to lower salmon cost, check which constraints limit optimization

In [22]:
food_ranks['Brown rice'].sort_values()

nutrient_name
Caffeine                           1.0
Sodium, Na                         2.0
Thiamin                            5.0
Niacin                             5.0
Magnesium, Mg                      5.0
Phosphorus, P                      6.0
Lycopene                           6.0
Vitamin B-6                        7.0
Carbohydrate, by difference        8.0
Energy                             8.0
Riboflavin                         8.0
Zinc, Zn                           8.0
Selenium, Se                      10.0
PUFA 20:5 n-3 (EPA)               11.0
Vitamin D (D2 + D3)               12.0
Fiber, total dietary              12.0
PUFA 22:6 n-3 (DHA)               12.0
Iron, Fe                          15.0
Choline, total                    15.0
Copper, Cu                        15.0
PUFA 22:5 n-3 (DPA)               15.0
Protein                           16.0
Potassium, K                      18.0
PUFA 18:2                         18.0
Retinol                           22.0
Vitamin B-1