In [2]:
import numpy as np
import pandas as pd
import os
from gensim.models import KeyedVectors
from sklearn.metrics.pairwise import cosine_similarity

cwd = os.getcwd()
df = pd.read_csv(os.path.join(cwd, 'data/merged_dataset_large.csv'))
known_embeddings = df['Word Embedding'].apply(lambda x:
                           np.fromstring(
                               x.replace('\n','')
                                .replace('[','')
                                .replace(']','')
                                .replace('  ',' '), sep=' '))
known_embeddings = known_embeddings.tolist()
known_embeddings = np.array(known_embeddings)

model_path = os.path.join(cwd, 'saved_models/word2vec.wordvectors')
wv = KeyedVectors.load(model_path, mmap='r')


def recognize_item(item_name, emb=known_embeddings):
    """
          recognizes the user input and finds the closest match in the full dataset
          :param emb: list of embeddings in the full dataset (implicit from outer scope)
          :param item_name: item entered by the user (string)
          :return: dataframe row containing the item that is the best match from the dataset
    """
    embedding = np.zeros(100)
    for i, word in enumerate(item_name.split()):
        embedding = np.add(embedding, wv[word])
    embedding = embedding / (i + 1)
    similarities = cosine_similarity([embedding], emb)
    max_index = np.argmax(similarities)
 
    return max_index
df


'''
if __name__ == '__main__':
    print("Welcome to Green Bites!")
    print("Enter 'Q' to exit")
    user_item = input("Enter any grocery item: ")
    while user_item != "Q":
        try:
            item = recognize_item(user_item)
            print(f"{user_item} is estimated to have a footprint of about {item['Footprint']} kg of CO2 per kg.")
        except:
            print("Sorry, we couldn't recognize that food item. Try another!")
        finally:
            user_item = input("Enter another item: ")

    print("Happy shopping!")
    '''


'\nif __name__ == \'__main__\':\n    print("Welcome to Green Bites!")\n    print("Enter \'Q\' to exit")\n    user_item = input("Enter any grocery item: ")\n    while user_item != "Q":\n        try:\n            item = recognize_item(user_item)\n            print(f"{user_item} is estimated to have a footprint of about {item[\'Footprint\']} kg of CO2 per kg.")\n        except:\n            print("Sorry, we couldn\'t recognize that food item. Try another!")\n        finally:\n            user_item = input("Enter another item: ")\n\n    print("Happy shopping!")\n    '

In [3]:
df

Unnamed: 0,FullName,Word Embedding,Energy_kcal,Protein_g,Fat_g,Carb_g,Sugar_g,Fiber_g,VitA_mcg,VitB6_mg,...,Calcium_mg,Copper_mcg,Iron_mg,Magnesium_mg,Manganese_mg,Phosphorus_mg,Selenium_mcg,Zinc_mg,Item,Footprint
0,agave cooked southwest,[ 0.04592686 -0.29528741 1.02437786 -0.344197...,135.000000,0.990000,0.29,32.000000,20.870000,10.6,6.000000,0.087000,...,460.000000,0.112000,3.550000,39.0,0.142000,9.000000,0.200000,0.250000,mixed fish frozen,1.040000
1,agave dried southwest,[ 0.1454093 -0.06162685 0.07142376 0.208371...,341.000000,1.710000,0.69,81.980000,50.700000,15.6,1.000000,0.216000,...,770.000000,0.189000,3.640000,207.0,0.337000,37.000000,0.600000,12.100000,yeast dried,3.294000
2,agave raw southwest,[-0.57195613 0.05473957 0.66499873 0.122658...,68.000000,0.520000,0.15,16.230000,2.580000,6.6,2.000000,0.055000,...,417.000000,0.138000,1.800000,55.0,0.094000,7.000000,0.400000,0.150000,yogurt plain,2.095343
3,alcoholic beverage wine,[ 0.89813073 -0.6840903 0.29133012 -0.747314...,83.133333,0.070000,0.00,2.665333,0.041333,0.0,0.000000,0.003800,...,0.533333,0.000733,0.030667,0.8,0.008800,1.533333,0.013333,0.009333,wine white,0.744100
4,alfalfa seeds sprouted,[ 0.4916201 -0.02581801 0.01801805 -0.437650...,23.000000,3.990000,0.69,2.100000,0.200000,1.9,8.000000,0.034000,...,32.000000,0.157000,0.960000,27.0,0.188000,70.000000,0.600000,0.920000,sunflower seed,1.159500
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2186,yogurt parfait lowfat,[-0.66621507 0.35822042 -0.05019585 -0.717498...,84.000000,3.360000,1.01,15.860000,11.680000,1.1,30.000000,0.233000,...,105.000000,0.054000,0.490000,17.0,0.000000,93.000000,3.900000,0.890000,yogurt flavoured,2.812144
2187,yogurt plain,[-5.17644763e-01 1.15670148e+00 2.78025508e-...,60.000000,4.816667,1.66,6.460000,6.460000,0.0,14.333333,0.044667,...,167.666667,0.012333,0.073333,16.0,0.004333,132.000000,3.033333,0.816667,yogurt plain,2.095343
2188,yogurt vanilla,[ 2.99653935 1.11646992 -3.54667389 -2.367649...,85.000000,4.930000,1.25,13.800000,13.800000,0.0,12.000000,0.045000,...,171.000000,0.013000,0.070000,16.0,0.004000,135.000000,4.900000,0.830000,vanilla,4.300000
2189,yogurt vanilla flavor,[ 1.97945546 0.97028023 -1.2717104 -1.122158...,86.000000,4.930000,1.25,13.800000,5.430000,0.0,12.000000,0.045000,...,171.000000,0.013000,0.070000,16.0,0.004000,135.000000,4.900000,0.830000,vanilla,4.300000


In [None]:
import recognize_item as r
from recommendations import recommendation

print("Welcome to Green Bites!")
print("Enter 'Q' to exit")

food_choice = input("Enter any grocery item: ")
while food_choice != "Q":
    try:
        # Identify the item
        food_choice = food_choice.lower()
        item = r.recognize_item(food_choice)
        # (f"Estimated footprint: {item['Footprint']} kg of CO2 per kg.")
        print("Estimated footprint: {0:.2f} kg of CO2 per kg".format(item['Footprint']))

        # Get recommendations
        user_recommendation = recommendation(item['FullName'])
        if len(user_recommendation) != 0:
            print("We recommend these alternatives: \n"
                  f"{user_recommendation}")
        else:  # There are no items in the cluster that have a lower footprint!
            print("Good choice!")

    except:
        print("Sorry, we couldn't recognize that food item. Try another!")

    finally:
        print("\n")
        food_choice = input("Enter another item: ")

print("Thank you for using Green Bites!")


Welcome to Green Bites!
Enter 'Q' to exit
Enter any grocery item: banana
Estimated footprint: 0.81 kg of CO2 per kg
We recommend these alternatives: 
                                          FullName  Footprint  Cluster_ID
946                     jams and preserves apricot   0.359143         103
136                            beverages cocoa mix   0.412620         103
497  chocolate flavor beverage mix for milk powder   0.412620         103
511                               cocoa mix powder   0.412620         103
947                          jellies reduced sugar   0.666330         103


Enter another item: orange
Estimated footprint: 0.35 kg of CO2 per kg
Good choice!


