# Justus von Liebig Final Code 
### Quinn Dahl, Hannah Thompson, Adrianna Ngo, Julia Goswick, Alex Yuan, Nick Everest

In [1]:
!pip install -r requirements.txt
!pip install scipy

Collecting pint>=0.18
  Using cached Pint-0.18-py2.py3-none-any.whl (209 kB)
Collecting eep153_tools
  Using cached eep153_tools-0.11-py2.py3-none-any.whl (4.4 kB)
Processing /home/jovyan/.cache/pip/wheels/20/7e/30/7d702acd6a1e89911301cd9dbf9cb9870ca80c0e64bc2cde23/gnupg-2.3.1-py3-none-any.whl
Installing collected packages: pint, eep153-tools, gnupg
  Attempting uninstall: pint
    Found existing installation: Pint 0.17
    Uninstalling Pint-0.17:
      Successfully uninstalled Pint-0.17
Successfully installed eep153-tools-0.11 gnupg-2.3.1 pint-0.18


## [A] Description of Population of Interest

As students at UC Berkeley, our group was interested in pursuing a topic that could help our peers. We took into consideration the grocery stores at which we most commonly shop and are accessible in terms of distance (taking the bus or walking). Additionally, we were interested in making our project inclusive to different diets.

That being said, our project is a comparison of minimum cost diets for students shopping exclusively at specific stores in Berkeley, ideally adjustable based on diet type and personal preferences. We looked into stores such as Safeway, Berkeley Bowl, Whole Foods, and Trader Joe’s. 

We hope to find minimum cost meals and provide recipes for vegan, vegetarian, pescatarian, and omnivorous students. Hopefully our project can help next time you go grocery shopping!   


## [A] Function for Dietary Reference Intakes

In [2]:
apikey = "tJl01ofHyVljUR9Zpj444bDQ07REEpVXRdx1kpRl"

In [3]:
from scipy.optimize import linprog as lp
import numpy as np
import warnings

def solve_subsistence_problem(FoodNutrients,Prices,diet_min,diet_max,max_weight=None,tol=1e-6):
    """Solve Stigler's Subsistence Cost Problem.

    Inputs:
       - FoodNutrients : A pd.DataFrame with rows corresponding to foods, columns to nutrients.
       - Prices : A pd.Series of prices for different foods
       - diet_min : A pd.Series of DRIs, with index corresponding to columns of FoodNutrients,
                    describing minimum intakes.
       - diet_max : A pd.Series of DRIs, with index corresponding to columns of FoodNutrients,
                    describing maximum intakes.
       - max_weight : Maximum weight (in hectograms) allowed for diet.
       - tol : Solution values smaller than this in absolute value treated as zeros.
       
    """
    p = Prices.apply(lambda x:x.magnitude).dropna()

    # Compile list that we have both prices and nutritional info for; drop if either missing
    use = p.index.intersection(FoodNutrients.columns)
    p = p[use]

    # Drop nutritional information for foods we don't know the price of,
    # and replace missing nutrients with zeros.
    Aall = FoodNutrients[p.index].fillna(0)

    # Drop rows of A that we don't have constraints for.
    Amin = Aall.loc[Aall.index.intersection(diet_min.index)]

    Amax = Aall.loc[Aall.index.intersection(diet_max.index)]

    # Minimum requirements involve multiplying constraint by -1 to make <=.
    A = pd.concat([Amin,
                   -Amax])

    b = pd.concat([diet_min,
                   -diet_max]) # Note sign change for max constraints

    # Make sure order of p, A, b are consistent
    A = A.reindex(p.index,axis=1)
    A = A.reindex(b.index,axis=0)

    if max_weight is not None:
        # Add up weights of foods consumed
        A.loc['Hectograms'] = -1
        b.loc['Hectograms'] = -max_weight
        
    # Now solve problem!  (Note that the linear program solver we'll use assumes
    # "less-than-or-equal" constraints.  We can switch back and forth by
    # multiplying $A$ and $b$ by $-1$.)

    result = lp(p, -A, -b, method='interior-point')

    result.A = A
    result.b = b
    
    if result.success:
        result.diet = pd.Series(result.x,index=p.index)
    else: # No feasible solution?
        warnings.warn(result.message)
        result.diet = pd.Series(result.x,index=p.index)*np.nan  

    return result

## [A] Data on prices for different foods

In [4]:
SHEETs = [("https://docs.google.com/spreadsheets/d/17XCCM7-_Vk2erOKQmZsOZ2fYSsPUGzbGyTYZd_02bVk/edit#gid=0","Berkeley Bowl")]
SHEETs2 = [("https://docs.google.com/spreadsheets/d/17XCCM7-_Vk2erOKQmZsOZ2fYSsPUGzbGyTYZd_02bVk/edit#gid=0","Safeway")]
SHEETs3 = [("https://docs.google.com/spreadsheets/d/17XCCM7-_Vk2erOKQmZsOZ2fYSsPUGzbGyTYZd_02bVk/edit#gid=0","Whole Foods")]
SHEETs4 = [("https://docs.google.com/spreadsheets/d/17XCCM7-_Vk2erOKQmZsOZ2fYSsPUGzbGyTYZd_02bVk/edit#gid=0","Trader Joe's")]

In [5]:
import pandas as pd
from eep153_tools.sheets import read_sheets

df_bb = read_sheets(SHEETs[0][0])[SHEETs[0][1]]
df_sw = read_sheets(SHEETs2[0][0])[SHEETs2[0][1]]
df_wf = read_sheets(SHEETs3[0][0])[SHEETs3[0][1]]
df_tj = read_sheets(SHEETs4[0][0])[SHEETs4[0][1]]

Key available for students@eep153.iam.gserviceaccount.com.


APIError: {'code': 429, 'message': "Quota exceeded for quota metric 'Read requests' and limit 'Read requests per minute per user' of service 'sheets.googleapis.com' for consumer 'project_number:456913459995'.", 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.ErrorInfo', 'reason': 'RATE_LIMIT_EXCEEDED', 'domain': 'googleapis.com', 'metadata': {'quota_metric': 'sheets.googleapis.com/read_requests', 'service': 'sheets.googleapis.com', 'consumer': 'projects/456913459995', 'quota_limit': 'ReadRequestsPerMinutePerUser'}}]}

In [None]:
# do NOT continuously run next cell

In [None]:
##do not rerun this cell while making changes to other cells, just run once. It takes like 5 minutes to run
import fooddatacentral as fdc
import warnings

D = {}
count = 0
for food in  df_bb.Food.tolist():
    try:
        FDC = df_bb.loc[df_bb.Food==food,:].FDC[count]
        count+=1
        D[food] = fdc.nutrients(apikey,FDC).Quantity
    except AttributeError: 
        warnings.warn("Couldn't find FDC Code %s for food %s." % (food,FDC))        

FoodNutrients = pd.DataFrame(D,dtype=float)

In [None]:
def change_df_col_to_int(df, col):
   # df[col] = df[col].fillna(1)
    df[col] = df[col].str.replace('$', '')
    df[col] = df[col].astype(float)
    return df

In [None]:
change_df_col_to_int(df_bb, 'Price')
change_df_col_to_int(df_sw, 'Price')
change_df_col_to_int(df_wf, 'Price')
change_df_col_to_int(df_tj, 'Price')

In [None]:
# Convert food quantities to FDC units
def find_unit_prices(df):
    df['FDC Quantity'] = df[['Quantity','Units']].T.apply(lambda x : fdc.units(x['Quantity'],x['Units']))

# Now may want to filter df by time or place--need to get a unique set of food names.
    df['FDC Price'] = df['Price']/df['FDC Quantity']

    df.dropna(how='any') # Drop food with any missing data

# To use minimum price observed
    Prices = df.groupby('Food',sort=False)['FDC Price'].min()
    return Prices

## [A] Nutritional Content 

In [None]:
prices_bb = find_unit_prices(df_bb)
prices_sw = find_unit_prices(df_sw)
prices_wf = find_unit_prices(df_wf)
prices_tj = find_unit_prices(df_tj)

In [None]:
from eep153_tools.sheets import read_sheets

DRI_url = "https://docs.google.com/spreadsheets/d/1y95IsQ4HKspPW3HHDtH7QMtlDA66IUsCHJLutVL-MMc/"

DRIs = read_sheets(DRI_url)

# Define *minimums*
diet_min = DRIs['diet_minimums'].set_index('Nutrition')

# Define *maximums*
diet_max = DRIs['diet_maximums'].set_index('Nutrition')

In [None]:
group = 'M 19-30'
tol = 1e-6

result = solve_subsistence_problem(FoodNutrients,prices_bb,diet_min[group],diet_max[group],tol=tol)

print("Cost of diet at Berkeley Bowl for %s is $%4.2f per day.\n" % (group,result.fun))

# Put back into nice series
diet = result.diet

print("\nDiet (in 100s of grams or milliliters):")
print(diet[diet >= tol])  # Drop items with quantities less than precision of calculation.
print()

tab = pd.DataFrame({"Outcome":np.abs(result.A).dot(diet),"Recommendation":np.abs(result.b)})
print("\nWith the following nutritional outcomes of interest:")
print(tab)
print()

print("\nConstraining nutrients are:")
excess = tab.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol*100].index.tolist())

In [None]:
group = 'F 19-30'
tol = 1e-6

result = solve_subsistence_problem(FoodNutrients,prices_bb,diet_min[group],diet_max[group],tol=tol)

print("Cost of diet at Berkeley Bowl for %s is $%4.2f per day.\n" % (group,result.fun))

# Put back into nice series
diet = result.diet

print("\nDiet (in 100s of grams or milliliters):")
print(diet[diet >= tol])  # Drop items with quantities less than precision of calculation.
print()

tab = pd.DataFrame({"Outcome":np.abs(result.A).dot(diet),"Recommendation":np.abs(result.b)})
print("\nWith the following nutritional outcomes of interest:")
print(tab)
print()

print("\nConstraining nutrients are:")
excess = tab.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol*100].index.tolist())

In [None]:
# Get df_bb in terms of diet restrictions
def diet_df(df, col):
    return df[df[col] == 1]

# Creating vegan, vegetarian, and pescetarian dfs for bb
vegan_df_bb = diet_df(df_bb, 'Vegan (1/0)')
veg_df_bb = diet_df(df_bb, 'Vegetarian (1/0)')
pesc_df_bb = diet_df(df_bb, 'Pescetarian (1/0)')

In [None]:
# find prices for diet restricted dfs
vegan_prices_bb = find_unit_prices(vegan_df_bb)
veg_prices_bb = find_unit_prices(veg_df_bb)
pesc_prices_bb = find_unit_prices(pesc_df_bb)

In [None]:
# find minimum cost meal with diet restrictions using solve_subsistence_problem()

def diet_restricted_subsistence(prices, type, sex, store):
    group = sex+' 19-30'
    tol = 1e-6

    result = solve_subsistence_problem(FoodNutrients,prices,diet_min[group],diet_max[group],tol=tol)

    print("Cost of a "+type+" diet at "+store+" for %s is $%4.2f per day.\n" % (group,result.fun))

    # Put back into nice series
    diet = result.diet

    print("\nDiet (in 100s of grams or milliliters):")
    print(diet[diet >= tol])  # Drop items with quantities less than precision of calculation.
    print()

    tab = pd.DataFrame({"Outcome":np.abs(result.A).dot(diet),"Recommendation":np.abs(result.b)})
    print("\nWith the following nutritional outcomes of interest:")
    print(tab)
    print()

    print("\nConstraining nutrients are:")
    excess = tab.diff(axis=1).iloc[:,1]
    print(excess.loc[np.abs(excess) < tol*100].index.tolist())

In [None]:
vegan_bb_m = diet_restricted_subsistence(vegan_prices_bb, 'vegan', 'M', 'Berkeley Bowl')

In [None]:
vegan_bb_f = diet_restricted_subsistence(vegan_prices_bb, 'vegan', 'F', 'Berkeley Bowl')

In [None]:
veg_bb_m = diet_restricted_subsistence(veg_prices_bb, 'vegetarian', 'M', 'Berkeley Bowl')

In [None]:
veg_bb_f = diet_restricted_subsistence(veg_prices_bb, 'vegetarian', 'F', 'Berkeley Bowl')

In [None]:
pesc_bb_m = diet_restricted_subsistence(pesc_prices_bb, 'pescetarian', 'M', 'Berkeley Bowl')

In [None]:
pesc_bb_f = diet_restricted_subsistence(pesc_prices_bb, 'pescetarian', 'F', 'Berkeley Bowl')

In [None]:
group = 'M 19-30'
tol = 1e-6

result = solve_subsistence_problem(FoodNutrients,prices_sw,diet_min[group],diet_max[group],tol=tol)

print("Cost of diet at Safeway for %s is $%4.2f per day.\n" % (group,result.fun))

# Put back into nice series
diet = result.diet

print("\nDiet (in 100s of grams or milliliters):")
print(diet[diet >= tol])  # Drop items with quantities less than precision of calculation.
print()

tab = pd.DataFrame({"Outcome":np.abs(result.A).dot(diet),"Recommendation":np.abs(result.b)})
print("\nWith the following nutritional outcomes of interest:")
print(tab)
print()

print("\nConstraining nutrients are:")
excess = tab.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol*100].index.tolist())

In [None]:
group = 'F 19-30'
tol = 1e-6

result = solve_subsistence_problem(FoodNutrients,prices_sw,diet_min[group],diet_max[group],tol=tol)

print("Cost of diet at Safeway for %s is $%4.2f per day.\n" % (group,result.fun))

# Put back into nice series
diet = result.diet

print("\nDiet (in 100s of grams or milliliters):")
print(diet[diet >= tol])  # Drop items with quantities less than precision of calculation.
print()

tab = pd.DataFrame({"Outcome":np.abs(result.A).dot(diet),"Recommendation":np.abs(result.b)})
print("\nWith the following nutritional outcomes of interest:")
print(tab)
print()

print("\nConstraining nutrients are:")
excess = tab.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol*100].index.tolist())

In [None]:
# Creating vegan, vegetarian, and pescetarian dfs for sw
vegan_df_sw = diet_df(df_sw, 'Vegan (1/0)')
veg_df_sw = diet_df(df_sw, 'Vegetarian (1/0)')
pesc_df_sw = diet_df(df_sw, 'Pescetarian (1/0)')
# find prices for diet restricted dfs
vegan_prices_sw = find_unit_prices(vegan_df_sw)
veg_prices_sw = find_unit_prices(veg_df_sw)
pesc_prices_sw = find_unit_prices(pesc_df_sw)

In [None]:
vegan_sw_m = diet_restricted_subsistence(vegan_prices_sw, 'vegan', 'M', 'Safeway')

In [None]:
vegan_sw_f = diet_restricted_subsistence(vegan_prices_sw, 'vegan', 'F', 'Safeway')

In [None]:
veg_sw_m = diet_restricted_subsistence(veg_prices_sw, 'vegetarian', 'M', 'Safeway')

In [None]:
veg_sw_f = diet_restricted_subsistence(veg_prices_sw, 'vegetarian', 'F', 'Safeway')

In [None]:
pesc_sw_m = diet_restricted_subsistence(pesc_prices_sw, 'pescetarian', 'M', 'Safeway')

In [None]:
pesc_sw_f = diet_restricted_subsistence(pesc_prices_sw, 'pescetarian', 'F', 'Safeway')

In [None]:
group = 'M 19-30'
tol = 1e-6

result = solve_subsistence_problem(FoodNutrients,prices_wf,diet_min[group],diet_max[group],tol=tol)

print("Cost of diet at Whole Foods for %s is $%4.2f per day.\n" % (group,result.fun))

# Put back into nice series
diet = result.diet

print("\nDiet (in 100s of grams or milliliters):")
print(diet[diet >= tol])  # Drop items with quantities less than precision of calculation.
print()

tab = pd.DataFrame({"Outcome":np.abs(result.A).dot(diet),"Recommendation":np.abs(result.b)})
print("\nWith the following nutritional outcomes of interest:")
print(tab)
print()

print("\nConstraining nutrients are:")
excess = tab.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol*100].index.tolist())

In [None]:
group = 'F 19-30'
tol = 1e-6

result = solve_subsistence_problem(FoodNutrients,prices_wf,diet_min[group],diet_max[group],tol=tol)

print("Cost of diet at Whole Foods for %s is $%4.2f per day.\n" % (group,result.fun))

# Put back into nice series
diet = result.diet

print("\nDiet (in 100s of grams or milliliters):")
print(diet[diet >= tol])  # Drop items with quantities less than precision of calculation.
print()

tab = pd.DataFrame({"Outcome":np.abs(result.A).dot(diet),"Recommendation":np.abs(result.b)})
print("\nWith the following nutritional outcomes of interest:")
print(tab)
print()

print("\nConstraining nutrients are:")
excess = tab.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol*100].index.tolist())

In [None]:
# Creating vegan, vegetarian, and pescetarian dfs for wf
vegan_df_wf = diet_df(df_wf, 'Vegan (1/0)')
veg_df_wf = diet_df(df_wf, 'Vegetarian (1/0)')
pesc_df_wf = diet_df(df_wf, 'Pescetarian (1/0)')
# find prices for diet restricted dfs
vegan_prices_wf = find_unit_prices(vegan_df_wf)
veg_prices_wf = find_unit_prices(veg_df_wf)
pesc_prices_wf = find_unit_prices(pesc_df_wf)

In [None]:
vegan_sw_m = diet_restricted_subsistence(vegan_prices_wf, 'vegan', 'M', 'Whole Foods')

In [None]:
vegan_sw_f = diet_restricted_subsistence(vegan_prices_wf, 'vegan', 'F', 'Whole Foods')

In [None]:
veg_sw_m = diet_restricted_subsistence(veg_prices_wf, 'vegetarian', 'M', 'Whole Foods')

In [None]:
veg_sw_f = diet_restricted_subsistence(veg_prices_wf, 'vegetarian', 'F', 'Whole Foods')

In [None]:
pesc_sw_m = diet_restricted_subsistence(pesc_prices_wf, 'pescetarian', 'M', 'Whole Foods')

In [None]:
pesc_sw_f = diet_restricted_subsistence(pesc_prices_wf, 'pescetarian', 'F', 'Whole Foods')

In [None]:
group = 'M 19-30'
tol = 1e-6

result = solve_subsistence_problem(FoodNutrients,prices_tj,diet_min[group],diet_max[group],tol=tol)

print("Cost of diet at Trader Joe's for %s is $%4.2f per day.\n" % (group,result.fun))

# Put back into nice series
diet = result.diet

print("\nDiet (in 100s of grams or milliliters):")
print(diet[diet >= tol])  # Drop items with quantities less than precision of calculation.
print()

tab = pd.DataFrame({"Outcome":np.abs(result.A).dot(diet),"Recommendation":np.abs(result.b)})
print("\nWith the following nutritional outcomes of interest:")
print(tab)
print()

print("\nConstraining nutrients are:")
excess = tab.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol*100].index.tolist())

In [None]:
group = 'F 19-30'
tol = 1e-6

result = solve_subsistence_problem(FoodNutrients,prices_tj,diet_min[group],diet_max[group],tol=tol)

print("Cost of diet at Trader Joe's for %s is $%4.2f per day.\n" % (group,result.fun))

# Put back into nice series
diet = result.diet

print("\nDiet (in 100s of grams or milliliters):")
print(diet[diet >= tol])  # Drop items with quantities less than precision of calculation.
print()

tab = pd.DataFrame({"Outcome":np.abs(result.A).dot(diet),"Recommendation":np.abs(result.b)})
print("\nWith the following nutritional outcomes of interest:")
print(tab)
print()

print("\nConstraining nutrients are:")
excess = tab.diff(axis=1).iloc[:,1]
print(excess.loc[np.abs(excess) < tol*100].index.tolist())

In [None]:
# Creating vegan, vegetarian, and pescetarian dfs for tj
vegan_df_tj = diet_df(df_tj, 'Vegan (1/0)')
veg_df_tj = diet_df(df_tj, 'Vegetarian (1/0)')
pesc_df_tj = diet_df(df_tj, 'Pescetarian (1/0)')
# find prices for diet restricted dfs
vegan_prices_tj = find_unit_prices(vegan_df_tj)
veg_prices_tj = find_unit_prices(veg_df_tj)
pesc_prices_tj = find_unit_prices(pesc_df_tj)

In [None]:
vegan_tj_m = diet_restricted_subsistence(vegan_prices_tj, 'vegan', 'M', "Trader Joe's")

In [None]:
vegan_tj_f = diet_restricted_subsistence(vegan_prices_tj, 'vegan', 'F', "Trader Joe's")

In [None]:
veg_tj_m = diet_restricted_subsistence(veg_prices_tj, 'vegetarian', 'M', "Trader Joe's")

In [None]:
veg_tj_f = diet_restricted_subsistence(veg_prices_tj, 'vegetarian', 'F', "Trader Joe's")

In [None]:
pesc_tj_m = diet_restricted_subsistence(pesc_prices_tj, 'pescetarian', 'M', "Trader Joe's")

In [None]:
pesc_tj_f = diet_restricted_subsistence(pesc_prices_tj, 'pescetarian', 'F', "Trader Joe's")

## Survey Data 
### Guide for identifying our Central Resarch question

In [None]:
!pip install eep153_tools
!pip install python_gnupg

from eep153_tools.sheets import decrypt_credentials
#decrypt_credentials('../students.json.gpg')

In [None]:
import matplotlib.pyplot as plt
from  scipy.optimize import linprog as lp
import numpy as np
import warnings
import pandas as pd

from eep153_tools.sheets import read_sheets
SHEET = [("https://docs.google.com/spreadsheets/d/1aI1wYbJ1LY-en_B9wt1kHJ0CB9Hh6QibausCN5l6FlA/edit?usp=sharing","Form Responses 1")]

In [None]:
df_survey = read_sheets(SHEET[0][0])[SHEET[0][1]]

In [None]:
df_survey

In [None]:
def change_df_col_to_int(df, col):
   # df[col] = df[col].dropna()
    df[col] = df[col].str.replace('$', '')
    df[col] = df[col].astype(float)
    return df

In [None]:
df_survey['How many times per week do you go grocery shopping?'].value_counts()

In [None]:
GroceryShopping_dict = {'Once a Week':77, 'Twice a Week':5, 'Three times a Week':1, 'Four Times a Week': 2}
frequency_of_shopping = list(GroceryShopping_dict.keys())
values = list(GroceryShopping_dict.values())
fig_GroceryShopping_Freq = plt.figure(figsize = (10, 5))

# Bar plot
plt.bar(frequency_of_shopping, values, color ='green',
        width = 0.5)
plt.xlabel("Grocery Shop Trips Per Week")
plt.ylabel("Number of Respondents")
plt.title("How many times per week do you go grocery shopping?")
plt.show()

In [None]:
df_survey['How much do you spend on an average grocery-shopping trip?'].value_counts()

In [None]:
GrocerySpending_dict = {'$ 0 - 20':3, '$ 20 - 40 ':33, '$ 40 - 60':25, '$ 60 - 80':12, '$ 80 - 100':7, '$ 100 +':5}
spending_per_shopping = list(GrocerySpending_dict.keys())
values = list(GrocerySpending_dict.values())
fig_GrocerySpending_Per = plt.figure(figsize = (10, 5))

# Bar plot
plt.bar(spending_per_shopping, values, color = ['green','blue', 'red', 'purple', 'orange', 'brown']
        , width = 0.5)

plt.xlabel("Approx Grocery Spending Per Trip")
plt.ylabel("Number of Respondents")
plt.title("How much do you spend on an average grocery-shopping trip?")
plt.show()

In [None]:
df_survey['Where do you grocery shop the most in Berkeley?'].value_counts()

In [None]:
tj_percent = (59/86)*100
safeway_percent = (14/86)*100
bb_percent= (11/86)*100
wf_percent = (2/86)*100

In [None]:
# Pie chart, where the slices will be ordered and plotted counter-clockwise:
labels = 'Trader Joes', 'Safeway', 'Berkeley Bowl', 'Whole Foods'
sizes = [tj_percent, safeway_percent, bb_percent, wf_percent]
explode = (0.1, 0, 0, 0)  # only "explode" the 1st slice Trader Joes

fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode, labels=labels, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title("Where do you grocery shop the most in Berkeley?")
plt.show()

In [None]:
df_survey['What is your preferred diet?'].value_counts()

In [None]:
om_percent = (66/86)*100
veget_percent = (14/86)*100
vegan_percent= (3/86)*100
pesc_percent = (3/86)*100

In [None]:
labels_diet = 'Omnivorous', 'Vegetarian', 'Vegan', 'Pescatarian'
sizes_diet = [om_percent, veget_percent, vegan_percent, pesc_percent]
explode_diet = (0.1, 0, 0, 0)  # only "explode" the 1st slice Trader Joes

fig1, ax1 = plt.subplots()
ax1.pie(sizes, explode=explode_diet, labels=labels_diet, autopct='%1.1f%%',
        shadow=True, startangle=90)
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.
plt.title("What is your preferred diet?")
plt.show()

## [C]Sensitivity Analysis
As prices change, we should expect the minimum cost diet to also change. The code below creates a graph which changes prices away from the `base’ case one food at a time, and plots changes in total diet cost. We see that increases in price have a relatively lower impact (increase) on total diet cost compared to a decrease in price. For example, if there were discounts one week, it would most likely have a pretty large impact on the cost of the total diet. We find that cheddar cheese is very sensitive to price, while sliced turkey is sensitive to increases and jalapenos are sensitive to decreases.

In [None]:
import cufflinks as cf
cf.go_offline()

#Berkeley Bowl

scale = [.5,.6,.7,.8,.9,1.,1.1,1.2,1.3,1.4,1.5]

cost0 = solve_subsistence_problem(FoodNutrients,prices_bb,diet_min[group],diet_max[group],tol=tol).fun

Price_response={}
for s in scale:
    cost = {}
    for i,p in enumerate(prices_bb):
        my_p = prices_bb.copy()
        my_p[i] = p*s
        result = solve_subsistence_problem(FoodNutrients,my_p,diet_min[group],diet_max[group],tol=tol)
        cost[prices_bb.index[i]] = np.log(result.fun/cost0)
    Price_response[np.log(s)] = cost

Price_response = pd.DataFrame(Price_response).T
Price_response.iplot(xTitle='change in log price',yTitle='change in log cost')

###  Trader Joe's
We find that at Trader Joe's, Romaine Lettuce and peanut butter are most sensitive to price changes, both increases and decreases

In [None]:
#trader joes

scale = [.5,.6,.7,.8,.9,1.,1.1,1.2,1.3,1.4,1.5]

cost0 = solve_subsistence_problem(FoodNutrients,prices_tj,diet_min[group],diet_max[group],tol=tol).fun

Price_response={}
for s in scale:
    cost = {}
    for i,p in enumerate(prices_tj):
        my_p = prices_tj.copy()
        my_p[i] = p*s
        result = solve_subsistence_problem(FoodNutrients,my_p,diet_min[group],diet_max[group],tol=tol)
        cost[prices_tj.index[i]] = np.log(result.fun/cost0)
    Price_response[np.log(s)] = cost

Price_response = pd.DataFrame(Price_response).T
Price_response.iplot(xTitle='change in log price',yTitle='change in log cost')

### Whole Foods
We find that at Whole Foods, Almond milk is the most sensitive to price changes across the board, while bananas and peanut butter are specifically sensitive to decreases, and sliced turkey and chickpeas are more sensitive to price increases.

In [None]:
#whole foods

scale = [.5,.6,.7,.8,.9,1.,1.1,1.2,1.3,1.4,1.5]

cost0 = solve_subsistence_problem(FoodNutrients,prices_wf,diet_min[group],diet_max[group],tol=tol).fun

Price_response={}
for s in scale:
    cost = {}
    for i,p in enumerate(prices_wf):
        my_p = prices_wf.copy()
        my_p[i] = p*s
        result = solve_subsistence_problem(FoodNutrients,my_p,diet_min[group],diet_max[group],tol=tol)
        cost[prices_wf.index[i]] = np.log(result.fun/cost0)
    Price_response[np.log(s)] = cost

Price_response = pd.DataFrame(Price_response).T
Price_response.iplot(xTitle='change in log price',yTitle='change in log cost')

### Safeway
We find that at Safeway, Peanut butter is sensitive to price across the board, while romaine lettuce is sensitive to increases and sliced turkey is sensitive to decreases.

In [None]:
#safeway

scale = [.5,.6,.7,.8,.9,1.,1.1,1.2,1.3,1.4,1.5]

cost0 = solve_subsistence_problem(FoodNutrients,prices_sw,diet_min[group],diet_max[group],tol=tol).fun

Price_response={}
for s in scale:
    cost = {}
    for i,p in enumerate(prices_sw):
        my_p = prices_sw.copy()
        my_p[i] = p*s
        result = solve_subsistence_problem(FoodNutrients,my_p,diet_min[group],diet_max[group],tol=tol)
        cost[prices_sw.index[i]] = np.log(result.fun/cost0)
    Price_response[np.log(s)] = cost

Price_response = pd.DataFrame(Price_response).T
Price_response.iplot(xTitle='change in log price',yTitle='change in log cost')

In [None]:
cf.go_offline()

ReferenceGood = 'Sliced Turkey'

scale = [0.5,0.75,0.9,1.,1.1,1.2,1.3,1.4,1.5,2,4]

cost0 = solve_subsistence_problem(FoodNutrients,prices_bb,diet_min[group],diet_max[group],tol=tol).fun

my_p = prices_bb.copy()

diet = {}
for s in scale:

    my_p[ReferenceGood] = prices_bb[ReferenceGood]*s
    result = solve_subsistence_problem(FoodNutrients,my_p,diet_min[group],diet_max[group],tol=tol)
    diet[my_p[ReferenceGood]] = result.diet

Diet_response = pd.DataFrame(diet).T
Diet_response.index.name = '%s Price' % ReferenceGood

Diet_response.reset_index(inplace=True)

# Get rid of units for index (cufflinks chokes)
Diet_response['%s Price' % ReferenceGood] = Diet_response['%s Price' % ReferenceGood].apply(lambda x: x.magnitude)

Diet_response = Diet_response.set_index('%s Price' % ReferenceGood)

# Just look at goods consumed in quantities greater than error tolerance
Diet_response.loc[:,(Diet_response>tol).sum()>0].iplot(xTitle='%s Price' % ReferenceGood,yTitle='Hectograms')

In [None]:
cf.go_offline()

ReferenceGood = 'Cheddar cheese'

scale = [0.5,0.75,0.9,1.,1.1,1.2,1.3,1.4,1.5,2,4]

cost0 = solve_subsistence_problem(FoodNutrients,prices_bb,diet_min[group],diet_max[group],tol=tol).fun

my_p = prices_bb.copy()

diet = {}
for s in scale:

    my_p[ReferenceGood] = prices_bb[ReferenceGood]*s
    result = solve_subsistence_problem(FoodNutrients,my_p,diet_min[group],diet_max[group],tol=tol)
    diet[my_p[ReferenceGood]] = result.diet

Diet_response = pd.DataFrame(diet).T
Diet_response.index.name = '%s Price' % ReferenceGood

Diet_response.reset_index(inplace=True)

# Get rid of units for index (cufflinks chokes)
Diet_response['%s Price' % ReferenceGood] = Diet_response['%s Price' % ReferenceGood].apply(lambda x: x.magnitude)

Diet_response = Diet_response.set_index('%s Price' % ReferenceGood)

# Just look at goods consumed in quantities greater than error tolerance
Diet_response.loc[:,(Diet_response>tol).sum()>0].iplot(xTitle='%s Price' % ReferenceGood,yTitle='Hectograms')

In [None]:
cf.go_offline()

ReferenceGood = 'Jalepenos'

scale = [0.5,0.75,0.9,1.,1.1,1.2,1.3,1.4,1.5,2,4]

cost0 = solve_subsistence_problem(FoodNutrients,prices_bb,diet_min[group],diet_max[group],tol=tol).fun

my_p = prices_bb.copy()

diet = {}
for s in scale:

    my_p[ReferenceGood] = prices_bb[ReferenceGood]*s
    result = solve_subsistence_problem(FoodNutrients,my_p,diet_min[group],diet_max[group],tol=tol)
    diet[my_p[ReferenceGood]] = result.diet

Diet_response = pd.DataFrame(diet).T
Diet_response.index.name = '%s Price' % ReferenceGood

Diet_response.reset_index(inplace=True)

# Get rid of units for index (cufflinks chokes)
Diet_response['%s Price' % ReferenceGood] = Diet_response['%s Price' % ReferenceGood].apply(lambda x: x.magnitude)

Diet_response = Diet_response.set_index('%s Price' % ReferenceGood)

# Just look at goods consumed in quantities greater than error tolerance
Diet_response.loc[:,(Diet_response>tol).sum()>0].iplot(xTitle='%s Price' % ReferenceGood,yTitle='Hectograms')