In [1]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Mounted at /gdrive
/gdrive


In [2]:
!pip install pandas_bokeh

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pandas_bokeh
  Downloading pandas_bokeh-0.5.5-py2.py3-none-any.whl (29 kB)
Installing collected packages: pandas-bokeh
Successfully installed pandas-bokeh-0.5.5


In [3]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.spatial import distance

In [4]:
import pandas_bokeh
# Embedding plots in Jupyter/Colab Notebook
pandas_bokeh.output_notebook()

In [5]:
df =pd.read_csv(r'/gdrive/Shareddrives/FYP Project/Dataset/SOM_FoodData_Nov15.csv')

In [6]:
# Calculate saturated fat as a percentage of total fat
fat_column = (df["Saturated Fat (g)"]/df["Fats (g)"])*100

In [7]:
df_category = df.copy()
df_category["Saturated Fat (g)"] = fat_column

In [8]:
labels = ["low","medium", "high"]
# Cut of criteria
cut_off_values = {
    "Calories (kcal)":(100,200),
    "Protein (g)": (10, 20),
    "Fats (g)": (5,15),
    "Carbs (g)": (10, 30),
    "Fiber (g)": (1, 5),
    "Sugar (g)": (2, 12),
    "Phosphorus (mg)": (100, 300),
    "Potassium (mg)": (100, 300),
    "Sodium (mg)": (500, 1000),
    "Saturated Fat (g)": (20, 50),
    "Trans Fat (g)": (0.5, 2)
}

In [9]:
# Apply cutoff margins
for key,values in cut_off_values.items():
  cond_list = [df_category[key].between(0, values[0],"left"), df_category[key].between(values[0], values[1],"both"), df_category[key].between(values[1], np.inf,"neither")]
  df_category[key] = np.select(cond_list, ["low", "medium", "high"], 0)

In [32]:
df_category.to_csv(r'/gdrive/Shareddrives/FYP Project/Dataset/with_cutoff_values.csv')

In [10]:
df_category

Unnamed: 0.1,Unnamed: 0,Name,Group,GI,Calories (kcal),Protein (g),Fats (g),Carbs (g),Fiber (g),Sugar (g),Phosphorus (mg),Potassium (mg),Sodium (mg),Saturated Fat (g),Trans Fat (g),Category,wx,wy
0,0,"Bread, wheat, toasted",baked-products,1,high,medium,low,high,medium,medium,medium,medium,medium,medium,low,1,2.5,2.0
1,1,"Biscuits, plain or buttermilk, prepared from r...",baked-products,0,high,low,high,high,medium,medium,medium,medium,medium,medium,low,1,2.5,4.0
2,2,"Bread, french or vienna (includes sourdough)",baked-products,1,high,medium,low,high,medium,medium,medium,medium,medium,medium,low,3,1.0,3.0
3,3,"Cake, chocolate, prepared from recipe without ...",baked-products,0,high,low,high,high,medium,low,medium,medium,low,medium,low,3,0.5,2.0
4,4,"Cake, fruitcake, commercially prepared",baked-products,0,high,low,medium,high,medium,high,low,medium,low,low,low,0,6.0,7.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
545,545,"Tomato juice, canned, without salt added",vegetables,0,low,low,low,low,low,medium,low,medium,low,low,low,1,1.5,8.0
546,546,"Pimento, canned",vegetables,0,low,low,low,low,medium,medium,low,medium,low,low,low,1,1.5,8.0
547,547,"Broccoli, chinese, cooked",vegetables,0,low,low,low,low,medium,low,low,medium,low,low,low,3,2.5,8.0
548,548,"Cabbage, napa, cooked",vegetables,0,low,low,low,low,low,low,low,low,low,low,low,1,0.5,8.0


In [11]:
df_category['Calories (kcal)'].value_counts()

high      262
low       197
medium     91
Name: Calories (kcal), dtype: int64

In [12]:
# Create a copy of data set
df_std = df_category.copy()

In [13]:
df_std["Group"].unique()

array(['baked-products', 'beverages', 'diary', 'fast-foods', 'fruits',
       'grains', 'greens', 'meals-entrees-side-dishes', 'meat',
       'mashrooms', 'nuts', 'oils-and-sauces', 'seafood', 'soups',
       'spices', 'sweets', 'vegetables'], dtype=object)

In [14]:
GI_high_food = df_std[df_std["GI"]==2]

In [15]:
GI_high_food

Unnamed: 0.1,Unnamed: 0,Name,Group,GI,Calories (kcal),Protein (g),Fats (g),Carbs (g),Fiber (g),Sugar (g),Phosphorus (mg),Potassium (mg),Sodium (mg),Saturated Fat (g),Trans Fat (g),Category,wx,wy
5,5,"Cake, gingerbread, prepared from recipe",baked-products,2,high,low,high,high,low,low,low,high,low,medium,low,1,2.5,4.0
11,11,"Cookies, graham crackers, plain or honey (incl...",baked-products,2,high,low,medium,high,medium,high,medium,medium,low,low,low,2,6.5,8.0
15,15,"Crackers, saltines (includes oyster, soda, soup)",baked-products,2,high,low,medium,high,medium,low,medium,medium,medium,low,low,3,1.5,2.0
19,19,"Croutons, plain",baked-products,2,high,medium,medium,high,high,low,medium,medium,medium,medium,low,1,2.5,2.0
33,33,"Waffles, plain, prepared from recipe",baked-products,2,high,low,medium,high,low,low,medium,medium,medium,medium,low,3,1.0,1.0
34,34,"Leavening agents, baking powder, double-acting...",baked-products,2,low,low,low,medium,low,low,high,low,high,0,low,2,8.0,11.0
37,37,"English muffins, plain, unenriched, without ca...",baked-products,2,high,low,low,high,medium,low,medium,medium,low,low,low,3,1.5,4.0
47,47,"Alcoholic beverage, beer, regular, all",beverages,2,low,low,low,low,low,low,low,low,low,0,low,1,0.5,8.0
72,72,"Alcoholic beverage, liqueur, coffee with cream...",beverages,2,high,low,high,medium,low,high,low,low,low,high,low,0,8.0,5.0
119,119,"Ice cream, soft serve, chocolate",diary,2,high,low,medium,medium,low,high,medium,medium,low,high,low,3,4.5,6.0


In [16]:
random_food = GI_high_food.loc[[177]]

In [17]:
random_food

Unnamed: 0.1,Unnamed: 0,Name,Group,GI,Calories (kcal),Protein (g),Fats (g),Carbs (g),Fiber (g),Sugar (g),Phosphorus (mg),Potassium (mg),Sodium (mg),Saturated Fat (g),Trans Fat (g),Category,wx,wy
177,177,"Watermelon, raw",fruits,2,low,low,low,low,low,medium,low,medium,low,low,low,3,-0.5,8.0


In [18]:
df_std.columns

Index(['Unnamed: 0', 'Name', 'Group', 'GI', 'Calories (kcal)', 'Protein (g)',
       'Fats (g)', 'Carbs (g)', 'Fiber (g)', 'Sugar (g)', 'Phosphorus (mg)',
       'Potassium (mg)', 'Sodium (mg)', 'Saturated Fat (g)', 'Trans Fat (g)',
       'Category', 'wx', 'wy'],
      dtype='object')

In [19]:
selected_match = df_std[(df_std["GI"]==0) & (df_std["Group"]=="fruits") & (df_std["Category"]==3)]

In [20]:
distance_column = selected_match.apply(lambda x : np.sqrt((x.wx-(-0.5))**2 + (x.wy-8.0)**2), axis=1 )

In [21]:
selected_match.loc[distance_column.sort_values().index[:5],:]

Unnamed: 0.1,Unnamed: 0,Name,Group,GI,Calories (kcal),Protein (g),Fats (g),Carbs (g),Fiber (g),Sugar (g),Phosphorus (mg),Potassium (mg),Sodium (mg),Saturated Fat (g),Trans Fat (g),Category,wx,wy
201,201,"Cloudberries, raw (Alaska Native)",fruits,0,low,low,low,low,low,low,low,low,low,low,low,3,0.0,9.0
188,188,"Groundcherries, (cape-gooseberries or poha), raw",fruits,0,low,low,low,medium,low,low,low,low,low,low,low,3,0.0,9.0
180,180,"Cranberry, low bush or lingenberry, raw (Alask...",fruits,0,low,low,low,medium,low,low,low,low,low,low,low,3,0.0,9.0
152,152,"Grapefruit, raw, pink and red, all areas",fruits,0,low,low,low,medium,medium,medium,low,medium,low,low,low,3,0.5,10.0
197,197,"Soursop, raw",fruits,0,low,low,low,medium,medium,high,low,medium,low,low,low,3,1.0,11.0


In [22]:
def recommend_food(selected_food, df_std):
  # Get data from selected food
  gi=selected_food["GI"].values[0]
  cat=selected_food["Category"].values[0]
  group=selected_food["Group"].values[0]
  wx=selected_food["wx"].values[0]
  wy=selected_food["wy"].values[0]

  # Select matching food considering GI, Group, Category
  selected_match = df_std[(df_std["GI"] < gi) & (df_std["Group"]==group) & (df_std["Category"]==cat)]
  # Calculate eucleadian distance
  distance_column = selected_match.apply(lambda x : np.sqrt((x.wx- wx)**2 + (x.wy-wy)**2), axis=1 )
  # return top 5 matching foods
  return selected_match.loc[distance_column.sort_values().index[:5],:]

In [23]:
recommend_food(random_food, df_std)

Unnamed: 0.1,Unnamed: 0,Name,Group,GI,Calories (kcal),Protein (g),Fats (g),Carbs (g),Fiber (g),Sugar (g),Phosphorus (mg),Potassium (mg),Sodium (mg),Saturated Fat (g),Trans Fat (g),Category,wx,wy
201,201,"Cloudberries, raw (Alaska Native)",fruits,0,low,low,low,low,low,low,low,low,low,low,low,3,0.0,9.0
188,188,"Groundcherries, (cape-gooseberries or poha), raw",fruits,0,low,low,low,medium,low,low,low,low,low,low,low,3,0.0,9.0
180,180,"Cranberry, low bush or lingenberry, raw (Alask...",fruits,0,low,low,low,medium,low,low,low,low,low,low,low,3,0.0,9.0
152,152,"Grapefruit, raw, pink and red, all areas",fruits,0,low,low,low,medium,medium,medium,low,medium,low,low,low,3,0.5,10.0
197,197,"Soursop, raw",fruits,0,low,low,low,medium,medium,high,low,medium,low,low,low,3,1.0,11.0


In [24]:
recommended_food = recommend_food(random_food, df_std)
recommended_food

Unnamed: 0.1,Unnamed: 0,Name,Group,GI,Calories (kcal),Protein (g),Fats (g),Carbs (g),Fiber (g),Sugar (g),Phosphorus (mg),Potassium (mg),Sodium (mg),Saturated Fat (g),Trans Fat (g),Category,wx,wy
201,201,"Cloudberries, raw (Alaska Native)",fruits,0,low,low,low,low,low,low,low,low,low,low,low,3,0.0,9.0
188,188,"Groundcherries, (cape-gooseberries or poha), raw",fruits,0,low,low,low,medium,low,low,low,low,low,low,low,3,0.0,9.0
180,180,"Cranberry, low bush or lingenberry, raw (Alask...",fruits,0,low,low,low,medium,low,low,low,low,low,low,low,3,0.0,9.0
152,152,"Grapefruit, raw, pink and red, all areas",fruits,0,low,low,low,medium,medium,medium,low,medium,low,low,low,3,0.5,10.0
197,197,"Soursop, raw",fruits,0,low,low,low,medium,medium,high,low,medium,low,low,low,3,1.0,11.0


In [25]:
random_food

Unnamed: 0.1,Unnamed: 0,Name,Group,GI,Calories (kcal),Protein (g),Fats (g),Carbs (g),Fiber (g),Sugar (g),Phosphorus (mg),Potassium (mg),Sodium (mg),Saturated Fat (g),Trans Fat (g),Category,wx,wy
177,177,"Watermelon, raw",fruits,2,low,low,low,low,low,medium,low,medium,low,low,low,3,-0.5,8.0


In [26]:
def plot_difference(food1, food2):
  food1_numeric = df.loc[food1.index]
  food2_numeric = df.loc[food2.index]
  df_plot = pd.concat([food1_numeric,food2_numeric])
  df_plot.drop(["Unnamed: 0","Group", "wx", "wy", "Category"],axis=1, inplace=True)
  # df_new = df_plot.transpose().drop(["Unnamed: 0","Group", "Name", "wx", "wy"], axis=0)
  title= f"{df_plot['Name'].values[0]} vs {df_plot['Name'].values[1]}"
  df_plot.set_index("Name", inplace=True)
  df_plot.plot_bokeh.bar(ylabel="Nutrients range", xlabel="Food", title=title, legend = "top_right", figsize=(500, 200), sizing_mode="scale_width")



In [27]:
#@title
plot_difference(random_food, recommended_food.iloc[[0]])

In [28]:
#@title
plot_difference(random_food, recommended_food.iloc[[1]])

In [29]:
#@title
plot_difference(random_food, recommended_food.iloc[[2]])

In [30]:
#@title
plot_difference(random_food, recommended_food.iloc[[3]])

In [31]:
#@title
plot_difference(random_food, recommended_food.iloc[[4]])