In [1]:

import pandas as pd
import ast
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk import bigrams
from collections import Counter
import numpy as np

import io
from google.cloud import vision_v1p3beta1 as vision
from google.protobuf.json_format import MessageToDict

from gensim.models import Word2Vec
import gensim.downloader as api


In [2]:
%env GOOGLE_APPLICATION_CREDENTIALS=/Users/amandeepchabada/Desktop/AI539/recipes/evident-airline-386406-4d1a284a2b16.json

env: GOOGLE_APPLICATION_CREDENTIALS=/Users/amandeepchabada/Desktop/AI539/recipes/evident-airline-386406-4d1a284a2b16.json


### Recipies

In [3]:
lemmatizer = WordNetLemmatizer()
# Define stopwords to remove from the ingredients
stop_words = set(stopwords.words('english'))

In [4]:
column_names = ['id', 'title', 'ingredients', 'instructions', 'image_name', 'cleaned_ingredients']

# Read the CSV file and select the desired columns, skipping the first row
df = pd.read_csv('/Users/amandeepchabada/Desktop/AI539/recipes/recipes.csv')
df.columns = column_names

In [5]:
df.shape

(13501, 6)

In [6]:
df.sample(2).to_clipboard()

In [7]:
ingredients_data = pd.read_json('/Users/amandeepchabada/Desktop/AI539/recipes/ingredients.json')
ingredients_list = ingredients_data['ingredients'].tolist()
ingredients = [word.lower() for ingredients_list in ingredients_list for ingredient in ingredients_list for word in ingredient.split() if word.lower() not in stop_words]
unique_ingredients = set(ingredients)
unique_ingredients_list = list(unique_ingredients)
unique_ingredients_list_lemm = [lemmatizer.lemmatize(word) for word in unique_ingredients_list]

In [8]:
# Initialize lemmatizer

# This function will clean up your ingredients and filter based on unique ingredients
def clean_and_filter_ingredients(ingredients_str):
    # Convert string list to actual list
    ingredients_list = ast.literal_eval(ingredients_str)
    cleaned_ingredients = []
    for ingredient in ingredients_list:
        # Remove everything in parentheses
        ingredient = re.sub(r'\([^)]*\)', '', ingredient)
        # Remove numbers and special characters
        ingredient = re.sub(r'[^a-zA-Z\s]', '', ingredient)
        # Convert to lowercase
        ingredient = ingredient.lower()
        # Split into words
        words = ingredient.split()
        # Remove stopwords and lemmatize
        words = [lemmatizer.lemmatize(word) for word in words if word not in stop_words]
        cleaned_ingredients.extend(words)
    #filtered_ingredients = [ingredient for ingredient in cleaned_ingredients if ingredient in unique_ingredients_list_lemm]
    return list(set(cleaned_ingredients))



In [9]:
# Apply the clean_and_filter_ingredients function to the 'cleaned_ingredients' column
df['ingredients_list'] = df['cleaned_ingredients'].apply(clean_and_filter_ingredients)
short_df = df[['id', 'title','ingredients_list','image_name','instructions', 'cleaned_ingredients']]

In [10]:
def find_recipe_by_exact_match(ingredient_list):
    ingredient_list = [lemmatizer.lemmatize(word) for word in ingredient_list]
    print(ingredient_list)
    matching_records = short_df[short_df['ingredients_list'].apply(lambda x: set(ingredient_list) == set(x))]
    return matching_records


In [11]:
def find_recipe_by_similarity(ingredient_list, similarity_threshold=0.5):
    ingredient_list = [lemmatizer.lemmatize(word) for word in ingredient_list]
    print(ingredient_list)
    matching_records = short_df[short_df['ingredients_list'].apply(lambda x: len(set(ingredient_list).intersection(x)) / len(set(ingredient_list).union(x))) >= similarity_threshold]
    return matching_records

### Image Processing

In [12]:
files = ['img1.jpeg','img2.jpeg','img3.jpeg']

In [13]:
client = vision.ImageAnnotatorClient()

content = []
for file_path in files:

    with io.open(file_path, 'rb') as image_file:
        content.append(image_file.read())
    
data_from_images = []
for c in content:
    image = vision.Image(content=c)

    response = client.label_detection(image=image,max_results = 500)
    response = MessageToDict(response._pb)
    data_from_images.append([x['description'] for x in response['labelAnnotations']])

In [14]:
processed_img_ingredients = []
for data in data_from_images:
    processed_img_ingredients.append([ing.lower() for ing in data])
    
cleaned_processed_img_ingredients = []
for record in processed_img_ingredients:
    clean_record = []
    items = [lemmatizer.lemmatize(word) for record in record for word in record.split() if word not in stop_words]
    cleaned_processed_img_ingredients.append(items)

In [15]:
common_ingredients = []#['salt', 'pepper', 'flour', 'sugar', 'butter', 'oil', 'water', 'milk', 'eggs', 'onions', 'garlic', 'rice']
final_img_ingredients = []
for record in cleaned_processed_img_ingredients:
    final_img_ingredients.append(list(set(record + common_ingredients)))

In [29]:
def find_top_matches(df, final_img_ingredients):
    # Create a new column to store the intersection count
    df['intersection_count'] = 0

    # Iterate over each row and calculate the intersection count
    for index, row in df.iterrows():
        intersection = set(row['ingredients_list']) & set(final_img_ingredients)
        intersection_count = len(intersection)
        df.at[index, 'intersection_count'] = intersection_count

    # Sort the DataFrame by the intersection count column in descending order
    df_sorted = df.sort_values('intersection_count', ascending=False)

    # Retrieve the rows with the highest intersection count
    top_values = df_sorted['intersection_count'].unique()[:3]  # Get top 3 unique values
    top_matches = df_sorted[df_sorted['intersection_count'].isin(top_values)].head(3)

    return top_matches

In [30]:
find_top_matches(df,final_img_ingredients[0])

Unnamed: 0,id,title,ingredients,instructions,image_name,cleaned_ingredients,ingredients_list,intersection_count
9292,9292,Chicken Curry,['10 dried guajillo or New Mexico chiles (abou...,"In medium bowl, combine chiles and cold water ...",chicken-curry-350992,['10 dried guajillo or New Mexico chiles (abou...,"[kosher, cinnamon, clean, chicken, guajillo, g...",8
6375,6375,Pot Roast in Rich Gravy,"['1/4 cup matzoh cake meal', '4 tablespoons ve...",Preheat oven to 350°F with rack in middle.\nOn...,pot-roast-in-rich-gravy-394970,"['1/4 cup matzoh cake meal', '4 tablespoons ve...","[cinnamon, heavy, chuck, chicken, vegetable, r...",7
8223,8223,Vegetarian Shepherd's Pie,"['10 ounces pearl onions (about 2 1/4 cups)', ...",Blanch pearl onions in a 2-quart saucepan of b...,vegetarian-shepherds-pie-355994,"['10 ounces pearl onions (about 2 1/4 cups)', ...","[pearl, inch, heavy, flameproof, hot, cremini,...",7


In [31]:
find_top_matches(df,final_img_ingredients[1])

Unnamed: 0,id,title,ingredients,instructions,image_name,cleaned_ingredients,ingredients_list,intersection_count
7187,7187,Grapes Leaves with Bulgar and Mint,"[""1 1/2 cups whole grain quick-cooking bulgur ...","Combine bulgur, green onions, tomatoes, choppe...",grapes-leaves-with-bulgar-and-mint-364629,"['""1 1/2 cups whole grain quick-cooking bulgur...","[extravirgin, ounce, ground, store, tip, drain...",8
7696,7696,Lamb Bulgogi with Asian Pear Dipping Sauce,"['4 green onions, coarsely chopped', '3 tables...","Place green onions, sugar, chopped garlic, and...",lamb-bulgogi-with-asian-pear-dipping-sauce-359749,"['4 green onions, coarsely chopped', '3 tables...","[asian, inch, left, trim, use, lettuce, seed, ...",8
8003,8003,Sichuan Beef Noodle Soup with Pickled Mustard ...,"['5 pounds boneless beef shank', '1/4 cup vege...",Bring large pot of water to boil over high hea...,sichuan-beef-noodle-soup-with-pickled-mustard-...,"['5 pounds boneless beef shank', '1/4 cup vege...","[side, inch, choy, peppercorn, ginger, soy, ri...",8


In [32]:
find_top_matches(df,final_img_ingredients[2])

Unnamed: 0,id,title,ingredients,instructions,image_name,cleaned_ingredients,ingredients_list,intersection_count
7187,7187,Grapes Leaves with Bulgar and Mint,"[""1 1/2 cups whole grain quick-cooking bulgur ...","Combine bulgur, green onions, tomatoes, choppe...",grapes-leaves-with-bulgar-and-mint-364629,"['""1 1/2 cups whole grain quick-cooking bulgur...","[extravirgin, ounce, ground, store, tip, drain...",9
6292,6292,Fattoush,"['4 teaspoons ground sumac, soaked in 4 teaspo...","Combine sumac with soaking liquid, 3 tablespoo...",fattoush-395481,"['4 teaspoons ground sumac, soaked in 4 teaspo...","[kosher, tomato, lettuce, dried, market, lengt...",7
581,581,Pitaquiles,"['3 pitas (do not split into 2 rounds each)', ...",Preheat the oven to 425°F.\nStack the pitas an...,pitaquiles-pita-in-tomato-sauce,"['3 pitas (do not split into 2 rounds each)', ...","[kosher, feta, extravirgin, thin, ground, garn...",7
