In [27]:
import csv
import pandas as pd
from sklearn.metrics import DistanceMetric

In [28]:
food_macro_map = {}
food_name_map = {}

# load necessary categories from csv to map
with open("FOOD-DATA-GROUP1.csv", "r") as food_file:
    reader = csv.DictReader(food_file)
    for food in reader:
        food_id = food["ID"]
        food_name = food["food"]
        protein = float(food["Protein"])
        fat = float(food["Fat"])
        carbs = float(food["Carbohydrates"])
        
        # fill map for value/keys
        food_macro_map[food_id] = {
            "protein": protein,
            "fat": fat,
            "carbs": carbs
        }
        # fill map for names, used in top 10 similar foods list
        food_name_map[food_id] = food_name


food_df = pd.DataFrame(food_macro_map.values(), index=food_macro_map.keys())
food_df = food_df.fillna(0)
food_df

Unnamed: 0,protein,fat,carbs
0,0.900,5.000,0.8
1,7.800,19.400,3.1
2,0.800,3.600,0.9
3,1.500,2.000,1.5
4,1.200,2.300,1.2
...,...,...,...
2390,0.500,0.095,6.7
2391,0.062,0.099,0.2
2392,0.400,0.070,0.7
2393,1.800,0.300,3.3


In [29]:
# query: pink salmon cooked
query = '412'  
query_loc = food_df.loc[[query]]

# get pairwise distance from food
query_location = food_df.loc[[query]]
pairwise_dist = dist.pairwise(food_df, query_location)
query_distances = list(zip(food_df.index, pairwise_dist.flatten()))

# print top 10 most similar foods
for similar_food_id, macro_distance in sorted(query_distances, key=lambda x: x[1], reverse=False)[:10]:
    print(similar_food_id, food_name_map[similar_food_id], macro_distance)


412 pink salmon cooked 0.0
1310 veal heart cooked 2.2226110770892893
1353 pork blade chops raw 3.405877273185281
1334 veal lungs cooked 4.104875150354759
1352 pork leg cap steak cooked 4.220189569201839
1233 veal shank cooked 5.200000000000003
395 wolffish raw 5.277309920783504
353 wolffish cooked 5.371219600798315
1325 lamb kidneys cooked 5.707889277132133
1326 beef lungs cooked 5.854912467321776


In [30]:
# query: pork backfat
query = '1372'  
dist = DistanceMetric.get_metric('euclidean')

# get pairwise distance from food
query_location = food_df.loc[[query]]
pairwise_dist = dist.pairwise(food_df, query_location)
query_distances = list(zip(food_df.index, pairwise_dist.flatten()))

# print top 10 most similar foods
for similar_food_id, macro_distance in sorted(query_distances, key=lambda x: x[1], reverse=False)[:10]:
    print(similar_food_id, food_name_map[similar_food_id], macro_distance)

1372 pork backfat 0.0
1430 duck meat raw 163.9161370945521
1390 pork belly raw 164.41374638393228
1676 menhaden fish oil 184.772102872701
1692 nutmeg butter oil 184.772102872701
1705 sheanut oil 184.772102872701
1710 babassu oil 184.772102872701
1721 tomatoseed oil 184.772102872701
1403 pork spareribs raw 211.84947486363993
1357 pork carcass raw 248.26648988536493


In [31]:
# query: string beans wellsley farms
query = '2274'  
dist = DistanceMetric.get_metric('euclidean')

# get pairwise distance from food
query_location = food_df.loc[[query]]
pairwise_dist = dist.pairwise(food_df, query_location)
query_distances = list(zip(food_df.index, pairwise_dist.flatten()))

# print top 10 most similar foods
for similar_food_id, macro_distance in sorted(query_distances, key=lambda x: x[1], reverse=False)[:10]:
    print(similar_food_id, food_name_map[similar_food_id], macro_distance)

2274 string beans wellsley farms 0.0
1032 white pepper 0.1414213562373094
2167 hungarian pepper 0.17320508075688776
578 wine light 0.19999999999999998
2367 iceberg lettuce 0.20500000000000002
2103 celery cooked 0.22271955459725573
1041 black pepper 0.22368728171266236
1075 onion powder 0.2248910847499295
2305 onion cooked 0.22617692189964914
892 mulberries 0.22891264709491255
