In [58]:
import pandas as pd
import numpy as np

In [98]:
eat_in = pd.read_csv('data/eat_in.csv').dropna(how='all')
eat_out = pd.read_csv('data/eat_out.csv').dropna(how='all')
profiles = pd.read_csv('data/profiles.csv').dropna(how='all')

In [99]:
# Convert to metric
profiles['weight'] = profiles['weight'] * 0.453592
profiles['height'] = profiles['height'] * 2.54
profiles['preferences'] = profiles['preferences'].apply(lambda x : x.lower().split('_'))
profiles.index.name = 'id'

In [100]:
def fill_missing_profile(series):
    if str(series['age']) == 'nan':
        series['age'] = 30
    
    if str(series['sex']) == 'nan':
        if str(series['weight']) == 'nan':
            series['weight'] = 81.5
        if str(series['height']) == 'nan':
            series['height'] = 169.7
        if str(series['age']) == 'nan':
            series['age'] = 30
    elif str(series['sex']) == 'M':
        if str(series['weight']) == 'nan':
            series['weight'] = 88.3
        if str(series['height']) == 'nan':
            series['height'] = 176.4
        if str(series['age']) == 'nan':
            series['age'] = 30
    elif str(series['sex']) == 'F':
        if str(series['weight']) == 'nan':
            series['weight'] = 74.7
        if str(series['height']) == 'nan':
            series['height'] = 162.9
        if str(series['age']) == 'nan':
            series['age'] = 30
            
    return series

In [101]:
def get_bmr(series):
    
    if series['sex'] == 'M':
        bmr = (10 * series['weight']) + (6.25 * series['height']) - (5 * series['age']) + 5
    elif series['sex'] == 'F':
        bmr = (10 * series['weight']) + (6.25 * series['height']) - (5 * series['age']) - 161
    else:
        bmr = (10 * series['weight']) + (6.25 * series['height']) - (5 * series['age']) - 78
        
    if series['activity level'] == 'little to no':
        bmr *= 1.2
    elif series['activity level'] == 'light':
        bmr *= 1.375
    elif series['activity level'] == 'moderate':
        bmr *= 1.55
    elif series['activity level'] == 'heavy':
        bmr *= 1.725
    elif series['activity level'] == 'very heavy':
        bmr *= 1.9
    else: 
        bmr *= 1.3
        
    return round(bmr,2)

In [102]:
def get_daily_macros(series,bmr,grams=True):
    protein = round(2.2 * series['weight'],2)
    protein_cal = round(4 * protein,2)
    
    fat = round(0.4 * 2.2 * series['weight'],2)
    fat_cal = round(9 * fat,2)
    
    carb_cal = round(bmr - protein_cal - fat_cal,2)
    carb = round(carb_cal / 4,2)
    
    if grams:
        return protein, fat, carb
    else:
        return protein_cal, fat_cal, carb_cal

In [103]:
def get_nutrition(df):
    columns = ['calories','protein','fat','carb']
    for col in columns:
        df[col] = np.nan
    
    for index,row in df.iterrows():
        test_profile = fill_missing_profile(row)
        
        bmr = get_bmr(test_profile)
        protein, fat, carb = get_daily_macros(test_profile,bmr,grams=True)
        
        df.loc[index,'calories'] = bmr
        df.loc[index,'protein'] = protein
        df.loc[index,'fat'] = fat
        df.loc[index,'carb'] = carb
        
    return df

In [104]:
get_nutrition(profiles)

Unnamed: 0_level_0,name_first,name_last,telephone number,time,location,sex,height,weight,activity level,age,preferences,calories,protein,fat,carb
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,Josiah,Carberry,(401) 863-1000,11:02,"222 Richmond St, Providence, RI 02903",M,172.72,68.0388,light,21.0,"[mediterranean, mexican, meat, sandwiches]",2282.35,149.69,59.87,286.19
1,Josie,Carberry,(401) 276-966,5:35,"94 Meeting St, Providence, RI 02906",,,,,,"[thai, japanese, vegetarian]",2141.91,179.3,71.72,194.81


In [105]:
average_meal_df = pd.DataFrame(index=['breakfast','lunch','dinner'],columns=['calories','protein','fat','carb'])

average_meal_df.loc['breakfast','calories'] = 700
average_meal_df.loc['breakfast','protein'] = 60
average_meal_df.loc['breakfast','fat'] = 24
average_meal_df.loc['breakfast','carb'] = 65

average_meal_df.loc['lunch','calories'] = 700
average_meal_df.loc['lunch','protein'] = 60
average_meal_df.loc['lunch','fat'] = 24
average_meal_df.loc['lunch','carb'] = 65

average_meal_df.loc['dinner','calories'] = 700
average_meal_df.loc['dinner','protein'] = 60
average_meal_df.loc['dinner','fat'] = 24
average_meal_df.loc['dinner','carb'] = 65

In [106]:
eat_out.columns = [x.lower() for x in eat_out.columns]

In [107]:
eat_out.columns

Index(['resaurant', 'location', 'foodstamps', 'delivery', 'meal title',
       'price', 'fat (g)', 'carb (g)', 'protein (g)', 'sugar (g)',
       'sodium (mg)', 'calories', 'fast food', 'mexican', 'thai', 'indian',
       'chinese', 'japanese', 'caribbean', 'mediterranean', 'vegetarian',
       'vegan', 'sandwiches', 'american', 'meat', 'seafood'],
      dtype='object')

In [108]:
preferences_lst = [
    'fast food',
    'mexican',
    'thai',
    'indian',
    'chinese',
    'japanese',
    'caribbean',
    'mediterranean',
    'vegetarian',
    'vegan',
    'sandwiches',
    'american',
    'meat',
    'seafood',
]

In [112]:
pref_dummies = pd.get_dummies(profiles.loc[0,'preferences'])
for pref in preferences_lst:
    if pref not in pref_dummies.columns:
        pref_dummies[pref] = 0

pref_one_hot = pref_dummies.max()

In [131]:
((pref_one_hot - row.astype(int)).apply(lambda x : abs(x))).sum()

  return this.join(other, how=how, return_indexers=return_indexers)


0.0

In [136]:
loss = pd.Series(index=eat_out.index)
for index, row in eat_out[preferences_lst].iterrows():
    loss[index] = ((pref_one_hot - row.astype(int)).apply(lambda x : abs(x))).sum()

In [137]:
loss

0     3.0
1     3.0
2     4.0
3     3.0
4     3.0
5     5.0
6     6.0
7     6.0
8     5.0
9     5.0
10    2.0
11    5.0
12    3.0
13    6.0
14    5.0
15    2.0
16    3.0
17    2.0
18    2.0
19    3.0
20    4.0
21    4.0
22    4.0
23    4.0
24    4.0
dtype: float64

In [None]:
def get_preferences(series):
    