<a href="https://colab.research.google.com/github/hpayettepeterson/foodprint/blob/hannah_branch2/Clustering_visualization_%26_data_to_show.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Set up drive

In [1]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# enter correct folder
%cd /content/drive/MyDrive/Colab_Notebooks/foodprint.ai

/content/drive/MyDrive/Colab_Notebooks/foodprint.ai


# Prepare data

## Import data

In [3]:
import pandas as pd

clustering_df = pd.read_csv('3D_recipe_clustering.csv') 
recipes_df = pd.read_csv('dishes_with_co2_nutrients_4.csv')
clustering_df.head()


Unnamed: 0,id,PCA1,PCA2,PCA3,co2,co2_score,recipeName
0,000095fc1d,-0.661828,-0.500178,0.030783,0.127718,low,Yogurt Parfaits
1,00051d5b9d,-0.051442,-0.167541,0.389536,0.159063,low,"Salt Free, Low Cholesterol Sugar Cookies Recipe"
2,00059b093b,0.17145,1.004125,-0.877142,0.408182,high,Honey Sriracha Chicken Wings
3,0005fc89f7,-0.871275,-0.090311,0.001686,0.370836,high,Shrimp and Caper Salad
4,0006ca31f4,-0.044486,-0.500189,-0.382812,0.081544,low,Natural Peanut Butter Chocolate Bon Bons


## Add columns

### Add numerical co2 score column

In [4]:
clustering_df['co2_score_num'] = clustering_df['co2']
clustering_df.head()

Unnamed: 0,id,PCA1,PCA2,PCA3,co2,co2_score,recipeName,co2_score_num
0,000095fc1d,-0.661828,-0.500178,0.030783,0.127718,low,Yogurt Parfaits,0.127718
1,00051d5b9d,-0.051442,-0.167541,0.389536,0.159063,low,"Salt Free, Low Cholesterol Sugar Cookies Recipe",0.159063
2,00059b093b,0.17145,1.004125,-0.877142,0.408182,high,Honey Sriracha Chicken Wings,0.408182
3,0005fc89f7,-0.871275,-0.090311,0.001686,0.370836,high,Shrimp and Caper Salad,0.370836
4,0006ca31f4,-0.044486,-0.500189,-0.382812,0.081544,low,Natural Peanut Butter Chocolate Bon Bons,0.081544


In [5]:
# transform high scores
mask = clustering_df['co2_score'].str.contains(r'high', na=True)
clustering_df.loc[mask, 'co2_score_num'] = 3
# transform med scores
mask = clustering_df['co2_score'].str.contains(r'moderate', na=True)
clustering_df.loc[mask, 'co2_score_num'] = 2
# transform low scores
mask = clustering_df['co2_score'].str.contains(r'low', na=True)
clustering_df.loc[mask, 'co2_score_num'] = 1

clustering_df['co2_score_num'].unique()

array([1., 3., 2.])

In [6]:
# code to select particular recipe

recipe_id = '000095fc1d' # this needs to be the recipe id you get from the api call

row = clustering_df.loc[clustering_df['id'] == recipe_id]

a = float(row['PCA1'])
b = float(row['PCA2'])
c = float(row['PCA3'])



### Add dietary info column (veg/nonveg)

In [7]:
clustering_df['dietary_info'] = recipes_df['dietary_info']


In [8]:
mask = clustering_df['dietary_info'].str.contains(r'non-veg', na=True)
clustering_df_nonveg = clustering_df.loc[mask]
clustering_df_nonveg

Unnamed: 0,id,PCA1,PCA2,PCA3,co2,co2_score,recipeName,co2_score_num,dietary_info
2,00059b093b,0.171450,1.004125,-0.877142,0.408182,high,Honey Sriracha Chicken Wings,3.0,non-veg
3,0005fc89f7,-0.871275,-0.090311,0.001686,0.370836,high,Shrimp and Caper Salad,3.0,non-veg
14,000cfbeccf,-0.174308,-0.367911,-0.472555,0.386642,high,Broccoli Chicken Casserole Recipe,3.0,non-veg
15,000fe7fcf5,0.330976,0.588465,-0.780570,0.176728,low,Chicken Diable,1.0,non-veg
21,00180ff35c,-0.645111,0.753625,-0.065738,1.843081,high,Potato and Hamburger Baskets #SP5,3.0,non-veg
...,...,...,...,...,...,...,...,...,...
51196,ffda308edd,-0.006003,-0.453361,-0.463585,0.202869,moderate,5 Minute Mochi,2.0,non-veg
51204,ffded4d757,1.053171,0.403966,-0.492261,0.048767,low,Roasted Sweet Potatoes with Cinnamon Pecan Crunch,1.0,non-veg
51209,ffe09ce6e1,-0.277427,-0.538117,0.218542,0.107507,low,Kumquat Ice Cream,1.0,non-veg
51220,ffefad6dcc,1.098508,0.604828,0.342928,0.204514,moderate,Pumpkin-raisin Cookies,2.0,non-veg


In [9]:
mask = clustering_df['dietary_info'].str.contains(r'vegetarian', na=True)
clustering_df_veg = clustering_df.loc[mask]

# Visualize data

### Clustering of all recipes

In [10]:
# import plotly.express as px
import plotly.graph_objects as go

# change this variable based on if you want to show all recipes, just veg, or just non-veg
df = clustering_df_nonveg

x, y, z = df['PCA1'], df['PCA2'], df['PCA3']
recipe = df['recipeName']
co2 = df['co2']

fig = go.Figure(data=[go.Scatter3d(
    x=x,
    y=y,
    z=z,
    mode='markers',
    hovertext= recipe,
    text=co2,
    hovertemplate = '%{hovertext}<br>CO2:%{text:02f}', # can't get text to properly substitute 
    marker=dict(
        size=4,
        color=df['co2_score_num'],                # set color to an array/list of desired values
        colorscale='RdYlGn',   # choose a colorscale
        opacity=0.8,
        reversescale= True
    )
)])


camera = dict(
    center=dict(x=a, y=b, z=c),
    eye=dict(x=0.0001, y=0.0001, z=0.0001)
)

fig.update_layout(scene_camera=camera)

fig.show()

#### Trying to zoom in on one point (this didn't work)

In [11]:
size_lst = []
for i in range(len(clustering_df)):
  size_lst.append(4)
clustering_df['size'] = size_lst
clustering_df.head()

Unnamed: 0,id,PCA1,PCA2,PCA3,co2,co2_score,recipeName,co2_score_num,dietary_info,size
0,000095fc1d,-0.661828,-0.500178,0.030783,0.127718,low,Yogurt Parfaits,1.0,vegetarian,4
1,00051d5b9d,-0.051442,-0.167541,0.389536,0.159063,low,"Salt Free, Low Cholesterol Sugar Cookies Recipe",1.0,vegetarian,4
2,00059b093b,0.17145,1.004125,-0.877142,0.408182,high,Honey Sriracha Chicken Wings,3.0,non-veg,4
3,0005fc89f7,-0.871275,-0.090311,0.001686,0.370836,high,Shrimp and Caper Salad,3.0,non-veg,4
4,0006ca31f4,-0.044486,-0.500189,-0.382812,0.081544,low,Natural Peanut Butter Chocolate Bon Bons,1.0,vegetarian,4


In [12]:
temp_id = '00051d5b9d'
size_lst = []
for i in range(len(clustering_df)):
  size_lst.append(6)
clustering_df['size'] = size_lst

clustering_df.loc[clustering_df['id'] == temp_id, 'size'] = 25


# code to select particular recipe

row = clustering_df.loc[clustering_df['id'] == temp_id]

a = -0.051442	#float(row['PCA1'])
b = -0.167541 #float(row['PCA2'])
c = 0.389536 #float(row['PCA3'])



clustering_df.head()

Unnamed: 0,id,PCA1,PCA2,PCA3,co2,co2_score,recipeName,co2_score_num,dietary_info,size
0,000095fc1d,-0.661828,-0.500178,0.030783,0.127718,low,Yogurt Parfaits,1.0,vegetarian,6
1,00051d5b9d,-0.051442,-0.167541,0.389536,0.159063,low,"Salt Free, Low Cholesterol Sugar Cookies Recipe",1.0,vegetarian,25
2,00059b093b,0.17145,1.004125,-0.877142,0.408182,high,Honey Sriracha Chicken Wings,3.0,non-veg,6
3,0005fc89f7,-0.871275,-0.090311,0.001686,0.370836,high,Shrimp and Caper Salad,3.0,non-veg,6
4,0006ca31f4,-0.044486,-0.500189,-0.382812,0.081544,low,Natural Peanut Butter Chocolate Bon Bons,1.0,vegetarian,6


### Plot for specific dish & neighbors

Add numerical CO2 score column

In [13]:
name = ['peanut butter', 'chocolate peanut butter', 'crunchy peanut butter', 'hot peanut butter chocolate']
distance = [0, 0.9, 0.99, 1.2]
co2 = [0.8, 0.25, 0.001, 0.5]
nutritional_value = [15, 21, 13, 5]
marker_size = [30, 42, 26, 12]

api_input_df = pd.DataFrame({'name': name, 'distance': distance, 'co2': co2, 'nutritional_value': nutritional_value, 'marker_size': marker_size})
api_input_df

Unnamed: 0,name,distance,co2,nutritional_value,marker_size
0,peanut butter,0.0,0.8,15,30
1,chocolate peanut butter,0.9,0.25,21,42
2,crunchy peanut butter,0.99,0.001,13,26
3,hot peanut butter chocolate,1.2,0.5,5,12


In [14]:
api_input_df['Dish'] = api_input_df['name']

api_input_df['Carbon Footprint'] = api_input_df['co2']


api_input_df['CO2 Output (kg per kg)'] = api_input_df['co2'].apply(lambda x: x * 10)
api_input_df.loc[api_input_df['CO2 Output (kg per kg)'] <= (3), 'Carbon Footprint'] = 'moderate'
api_input_df.loc[api_input_df['CO2 Output (kg per kg)'] <= (2), 'Carbon Footprint'] = 'low'
api_input_df.loc[api_input_df['CO2 Output (kg per kg)'] > (3), 'Carbon Footprint'] = 'high'
df_show = api_input_df.copy(deep=True)
df_show.drop(columns=['name', 'distance', 'marker_size', 'co2'], inplace=True)
df_show

Unnamed: 0,nutritional_value,Dish,Carbon Footprint,CO2 Output (kg per kg)
0,15,peanut butter,high,8.0
1,21,chocolate peanut butter,moderate,2.5
2,13,crunchy peanut butter,low,0.01
3,5,hot peanut butter chocolate,high,5.0


In [15]:
api_input_df

Unnamed: 0,name,distance,co2,nutritional_value,marker_size,Dish,Carbon Footprint,CO2 Output (kg per kg)
0,peanut butter,0.0,0.8,15,30,peanut butter,high,8.0
1,chocolate peanut butter,0.9,0.25,21,42,chocolate peanut butter,moderate,2.5
2,crunchy peanut butter,0.99,0.001,13,26,crunchy peanut butter,low,0.01
3,hot peanut butter chocolate,1.2,0.5,5,12,hot peanut butter chocolate,high,5.0


In [18]:
import plotly.express as px

fig_api = px.scatter_3d(api_input_df,
    title="",
    x='distance',
    y='nutritional_value',
    z='co2',
    labels={
        "distance": " ",
        "nutritional_value": " nutritional value ",
        "co2": "co2 output per 100g"
    },
    color_discrete_map={
                "low": "green",
                "moderate": "yellow",
                "high": "red"},
    size="marker_size",
    template="plotly",
    color='Carbon Footprint')
fig_api.update(layout_coloraxis_showscale=False)

#fig_api.update_layout(showlegend=True)
fig_api.show()


# Other interesting statistics

In [19]:
recipes_df.head()

Unnamed: 0,id,dish_name,ingredients,weight_per_ingr,url,total_dish_weight,footprints_per_ingredient,footprint_per_ingr_100gr,total_footprint,dish_footprint_per_100gr,confidence_score,dietary_info,dish_footprint_per_kilo,co2_score,km_driven_per_100gr,nutrients_per_100gr,calories_per_100gr,fat_per_100gr,protein_per_100gr,salt_per_100gr,sugar_per_100gr,proportion_of_ingredient
0,000095fc1d,Yogurt Parfaits,"['yogurt, greek, plain, nonfat', 'strawberries...","[226.796, 152.0, 30.5]",http://tastykitchen.com/recipes/breakfastbrunc...,409.296,"[0.3401940029570833, 0.13679999765008688, 0.04...","[0.08311686480128888, 0.03342324323963266, 0.0...",0.522744,0.127718,1.0,vegetarian,1.277178,low,0.494907,"{'energy': 81.12946131894766, 'fat': 2.1401392...",81.129461,2.140139,6.914437,0.055978,5.086341,"[0.5541124271920566, 0.3713693757085337, 0.074..."
1,00051d5b9d,"Salt Free, Low Cholesterol Sugar Cookies Recipe","['sugars, granulated', 'oil, corn, peanut, and...","[100.80000000000001, 168.0, 21.25, 5.166666666...",http://cookeatshare.com/recipes/salt-free-low-...,747.808333,"[0.35280001088976864, 0.5040000043809414, 0.08...","[0.04717786565939605, 0.06739694944751103, 0.0...",1.189488,0.159063,1.0,vegetarian,1.590633,low,0.61637,"{'energy': 477.09640393594606, 'fat': 23.41248...",477.096404,23.412486,7.625492,0.548621,14.298443,"[0.13479389772334713, 0.2246564962055785, 0.02..."
2,00059b093b,Honey Sriracha Chicken Wings,"['chicken, broilers or fryers, wing, meat and ...","[1360.7759999999998, 6.0, 1.15, 13.5, 42.59999...",http://tastykitchen.com/recipes/main-courses/h...,1579.409333,"[6.123491736397146, 0.009600000455975533, 0.00...","[0.3877077086453299, 0.0006078221936117596, 0....",6.44686,0.408182,1.0,non-veg,4.081817,high,1.581704,"{'energy': 208.05898280960727, 'fat': 14.29704...",208.058983,14.297046,15.383456,1.063915,3.048951,"[0.8615727229673203, 0.0037988885296359742, 0...."
3,0005fc89f7,Shrimp and Caper Salad,"['crustaceans, shrimp, raw (not previously fro...","[907.184, 75.0, 151.5, 25.799999999999997, 134...",http://allrecipes.com/recipe/shrimp-and-caper-...,1574.431917,"[4.898793727070093, 0.037500001781154424, 0.01...","[0.31114674919965113, 0.0023818115844950697, 0...",5.838564,0.370836,0.983613,non-veg,3.708362,high,1.43699,"{'energy': 194.7525956849092, 'fat': 15.980767...",194.752596,15.980767,11.946687,0.614843,0.314583,"[0.5761976687570326, 0.0476362294273019, 0.096..."
4,0006ca31f4,Natural Peanut Butter Chocolate Bon Bons,"['cocoa, dry powder, unsweetened', 'honey', 'p...","[1032.0, 4068.0, 256.0]",http://www.food.com/recipe/natural-peanut-butt...,5356.0,"[4.068000193219632, 0.29951998591423035]","[False, 0.07595220674420522, 0.005592232746718...",4.36752,0.081544,0.807319,vegetarian,0.815444,low,0.315985,"{'energy': 303.43539955190437, 'fat': 5.094846...",303.4354,5.094847,5.067961,0.019791,63.210605,"[0.19268110530246452, 0.7595220313666916, 0.04..."


In [20]:
import numpy as np

mean_co2_100gr_all = np.mean(recipes_df['dish_footprint_per_100gr'])
mean_co2_100gr_all

0.24300794959118754

## Average carbon footprints per dish type

In [21]:

mask1 = recipes_df['dietary_info'].str.contains(r'vegetarian', na=True)
mean_co2_100gr_veg = np.mean(recipes_df[mask1]['dish_footprint_per_100gr'])
mean_co2_100gr_veg

0.1765068335587245

In [22]:
mask2 = recipes_df['dietary_info'].str.contains(r'non-veg', na=True)
mean_co2_100gr_nonveg = np.mean(recipes_df[mask2]['dish_footprint_per_100gr'])
mean_co2_100gr_nonveg

0.707707048527114

## Nutritional info

In [23]:
import ast

temp_id = 'ff7e324594'
mask3 = recipes_df['id'].str.contains(f'{temp_id}', na=True)
nutrients_dict = ast.literal_eval(recipes_df[mask3].nutrients_per_100gr.item())
nutrients_dict['energy']

388.79233960175964

In [24]:
# get calories per 100gr for dish with particular ID
recipes_df.loc[recipes_df['id'] == temp_id]['calories_per_100gr'].values[0]

388.7923396017597

In [36]:
# get other nutritional info per 100gr for dish with particular ID
# fat
fat = round(recipes_df.loc[recipes_df['id'] == temp_id]['fat_per_100gr'].values[0], 2)
# protein
protein = round(recipes_df.loc[recipes_df['id'] == temp_id]['protein_per_100gr'].values[0], 2)
# salt
salt = round(recipes_df.loc[recipes_df['id'] == temp_id]['salt_per_100gr'].values[0], 2)
# sugar
sugar = round(recipes_df.loc[recipes_df['id'] == temp_id]['sugar_per_100gr'].values[0], 2)

In [37]:
nutr_df_temp = pd.DataFrame({'fat': [fat], 'protein': [protein], 'salt': [salt], 'sugar': [sugar]})
nutr_df_temp

Unnamed: 0,fat,protein,salt,sugar
0,22.95,8.5,0.29,22.42


## Footprint info for each ingredient in a dish

In [26]:
# check if footprints per ingredient for 100gr of dish has preserved missing ingredient values
for i in range(len(recipes_df)):
  recipe = recipes_df['footprint_per_ingr_100gr'][i]
  recipe_lst = ast.literal_eval(recipe)
  if False in recipe_lst:
    print(i)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
30346
30348
30358
30365
30366
30370
30379
30386
30387
30389
30391
30392
30395
30397
30410
30412
30414
30419
30422
30436
30437
30438
30442
30445
30448
30451
30452
30456
30457
30458
30461
30469
30472
30485
30488
30491
30501
30509
30513
30514
30519
30520
30523
30525
30527
30528
30537
30540
30547
30552
30560
30570
30573
30574
30576
30578
30579
30581
30583
30590
30594
30598
30599
30601
30605
30607
30608
30620
30625
30628
30629
30630
30632
30633
30635
30642
30645
30653
30658
30662
30663
30664
30666
30667
30673
30680
30686
30689
30693
30696
30700
30706
30707
30718
30727
30734
30742
30743
30751
30753
30755
30756
30757
30759
30769
30770
30774
30775
30778
30786
30788
30803
30807
30809
30810
30814
30818
30820
30821
30822
30823
30825
30829
30831
30833
30835
30838
30842
30846
30847
30848
30860
30861
30863
30867
30870
30875
30880
30885
30888
30891
30893
30904
30909
30912
30915
30921
30925
30933
30938
30942
30951
30952
30953
30956
30960

In [27]:
recipes_df.iloc[51134]

id                                                                  ff7f082cb9
dish_name                           Middle East Butter Cookies (Mamool) Recipe
ingredients                  ['wheat flour, white, all-purpose, unenriched'...
weight_per_ingr              [375.0, 125.0, 201.60000000000002, 14.8, 340.7...
url                          http://cookeatshare.com/recipes/middle-east-bu...
total_dish_weight                                                      1072.51
footprints_per_ingredient    [0.18750000890577212, 0.0625000029685907, 0.70...
footprint_per_ingr_100gr     [0.0174823145563126, 0.005827438185437532, 0.0...
total_footprint                                                        1.37314
dish_footprint_per_100gr                                               0.12803
confidence_score                                                             1
dietary_info                                                        vegetarian
dish_footprint_per_kilo                             

In [28]:
recipes_df.loc[recipes_df['id'] == id]['footprint_per_ingr_100gr'].values[0]

IndexError: ignored

In [29]:
recipes_df.columns

Index(['id', 'dish_name', 'ingredients', 'weight_per_ingr', 'url',
       'total_dish_weight', 'footprints_per_ingredient',
       'footprint_per_ingr_100gr', 'total_footprint',
       'dish_footprint_per_100gr', 'confidence_score', 'dietary_info',
       'dish_footprint_per_kilo', 'co2_score', 'km_driven_per_100gr',
       'nutrients_per_100gr', 'calories_per_100gr', 'fat_per_100gr',
       'protein_per_100gr', 'salt_per_100gr', 'sugar_per_100gr',
       'proportion_of_ingredient'],
      dtype='object')

In [30]:
temp_dish_footprint_per_100gr = recipes_df.loc[recipes_df['id'] == id]['dish_footprint_per_100gr'].values[0]
temp_dish_footprint_per_100gr

IndexError: ignored

In [31]:
# get ingredient footprints, make temp df

#st.write('The following 3 ingredients account for the highest percentages of the dish\'s carbon footprint:')

# make temp df for displaying top 3 ingredients
ingr_footprints = ast.literal_eval(recipes_df.loc[recipes_df['id'] == id]['footprint_per_ingr_100gr'].values[0])
ingr_names = ast.literal_eval(recipes_df.loc[recipes_df['id'] == id]['ingredients'].values[0])


# get percentages that each ingredient accounts for of total footprint
percentage_of_total_footprint = []
for ingredient_number, ingredient in enumerate(ingr_footprints):
  if ingredient != False:
    ingr_percent = np.round(((ingr_footprints[ingredient_number] / temp_dish_footprint_per_100gr) * 100), 1)
    percentage_of_total_footprint.append(ingr_percent)
  else:
    percentage_of_total_footprint.append('No info for this ingredient')


if len(ingr_names) == len(ingr_footprints):
  recipe_temp_df = pd.DataFrame({'Ingredient': ingr_names, 'Carbon Footprint': ingr_footprints})
  recipe_temp_df['Percentage of dish CO2 Footprint'] = percentage_of_total_footprint
else: 
  message = 'Sorry, we don\'t have ingredient information available for this dish'
recipe_temp_df.sort_values('Carbon Footprint', ascending=False, inplace=True)
recipe_temp_df.reset_index(inplace=True)
recipe_temp_df.drop(columns=['Carbon Footprint', 'index'], inplace=True)
recipe_temp_df.head(3)

IndexError: ignored

## Process ingredients for each dish

In [32]:
ingredients_for_dish = ast.literal_eval(recipes_df.loc[recipes_df['id'] == id]['ingredients'].values[0])

for ingredient in ingredients_for_dish:
  print(ingredient)


IndexError: ignored

In [None]:
recipes_df.columns

In [None]:
recipe_url = recipes_df.loc[recipes_df['id'] == id]['url'].values[0]
recipe_url

## General food stats

In [None]:
avg_cal_veg = np.mean(recipes_df[mask1]['calories_per_100gr'])
avg_cal_veg

In [None]:
avg_cal_nonveg = np.mean(recipes_df[mask2]['calories_per_100gr'])
avg_cal_nonveg

In [None]:
recipes_df.columns