In [1]:
import numpy as np 
import pandas as pd 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel, cosine_similarity
from sklearn.utils import gen_batches

# Path: cleaning_data.ipynb
# Read in the data
df = pd.read_csv('../data/recipes_data.csv')
df.head()



Unnamed: 0,title,ingredients,directions,link,source,NER,site
0,No-Bake Nut Cookies,"[""1 c. firmly packed brown sugar"", ""1/2 c. eva...","[""In a heavy 2-quart saucepan, mix brown sugar...",www.cookbooks.com/Recipe-Details.aspx?id=44874,Gathered,"[""bite size shredded rice biscuits"", ""vanilla""...",www.cookbooks.com
1,Jewell Ball'S Chicken,"[""1 small jar chipped beef, cut up"", ""4 boned ...","[""Place chipped beef on bottom of baking dish....",www.cookbooks.com/Recipe-Details.aspx?id=699419,Gathered,"[""cream of mushroom soup"", ""beef"", ""sour cream...",www.cookbooks.com
2,Creamy Corn,"[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg...","[""In a slow cooker, combine all ingredients. C...",www.cookbooks.com/Recipe-Details.aspx?id=10570,Gathered,"[""frozen corn"", ""pepper"", ""cream cheese"", ""gar...",www.cookbooks.com
3,Chicken Funny,"[""1 large whole chicken"", ""2 (10 1/2 oz.) cans...","[""Boil and debone chicken."", ""Put bite size pi...",www.cookbooks.com/Recipe-Details.aspx?id=897570,Gathered,"[""chicken gravy"", ""cream of mushroom soup"", ""c...",www.cookbooks.com
4,Reeses Cups(Candy),"[""1 c. peanut butter"", ""3/4 c. graham cracker ...","[""Combine first four ingredients and press in ...",www.cookbooks.com/Recipe-Details.aspx?id=659239,Gathered,"[""graham cracker crumbs"", ""powdered sugar"", ""p...",www.cookbooks.com


In [2]:
df["NER"] = df["NER"].str.strip('[]')
df.head()

Unnamed: 0,title,ingredients,directions,link,source,NER,site
0,No-Bake Nut Cookies,"[""1 c. firmly packed brown sugar"", ""1/2 c. eva...","[""In a heavy 2-quart saucepan, mix brown sugar...",www.cookbooks.com/Recipe-Details.aspx?id=44874,Gathered,"""bite size shredded rice biscuits"", ""vanilla"",...",www.cookbooks.com
1,Jewell Ball'S Chicken,"[""1 small jar chipped beef, cut up"", ""4 boned ...","[""Place chipped beef on bottom of baking dish....",www.cookbooks.com/Recipe-Details.aspx?id=699419,Gathered,"""cream of mushroom soup"", ""beef"", ""sour cream""...",www.cookbooks.com
2,Creamy Corn,"[""2 (16 oz.) pkg. frozen corn"", ""1 (8 oz.) pkg...","[""In a slow cooker, combine all ingredients. C...",www.cookbooks.com/Recipe-Details.aspx?id=10570,Gathered,"""frozen corn"", ""pepper"", ""cream cheese"", ""garl...",www.cookbooks.com
3,Chicken Funny,"[""1 large whole chicken"", ""2 (10 1/2 oz.) cans...","[""Boil and debone chicken."", ""Put bite size pi...",www.cookbooks.com/Recipe-Details.aspx?id=897570,Gathered,"""chicken gravy"", ""cream of mushroom soup"", ""ch...",www.cookbooks.com
4,Reeses Cups(Candy),"[""1 c. peanut butter"", ""3/4 c. graham cracker ...","[""Combine first four ingredients and press in ...",www.cookbooks.com/Recipe-Details.aspx?id=659239,Gathered,"""graham cracker crumbs"", ""powdered sugar"", ""pe...",www.cookbooks.com


In [3]:
df = df.dropna(subset=["NER"])
df["NER"] = df["NER"].fillna("")  

df["NER"] = df["NER"].astype(str)
print(df["NER"].head())



0    "bite size shredded rice biscuits", "vanilla",...
1    "cream of mushroom soup", "beef", "sour cream"...
2    "frozen corn", "pepper", "cream cheese", "garl...
3    "chicken gravy", "cream of mushroom soup", "ch...
4    "graham cracker crumbs", "powdered sugar", "pe...
Name: NER, dtype: object


In [4]:
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=2, stop_words='english')
tfidf_matrix = tf.fit_transform(df["NER"][:1000])
tfidf_matrix.shape


(1000, 1653)

In [5]:
def cos_compare(matrix):
   slices = gen_batches(matrix.shape[0], 1000)
   for i in slices:
       x = linear_kernel(matrix[i],matrix)
       yield x


In [6]:
try:
   for x in cos_compare(tfidf_matrix):
       print('Processed 1000 rows of {}:'.format(tfidf_matrix.shape[0]))
       pd.DataFrame(x).to_csv('cosine_sim.csv', header=False,index=False,mode='a')
finally:
   cosine_sim = pd.read_csv('cosine_sim.csv', header=None)


Processed 1000 rows of 1000:


In [7]:
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

In [8]:
df = df.reset_index()[:10000]
titles = df['title']
indices = pd.Series(df.index, index=df['title'])

In [9]:
def get_recommendations(title):
    try:
        idx = indices[title]
    except KeyError:
        print(f"Title '{title}' not found in indices.")
        return None  # or handle the error in an appropriate way

    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:31]
    recipe_indices = [i[0] for i in sim_scores]
    return titles.iloc[recipe_indices]

get_recommendations('Bread')



206                   Spoon Rolls
355                     Hot Rolls
825          Cinnamon Pull-Aparts
911      Swedish Heirloom Cookies
385              Never Fail Rolls
339                         Rolls
155                Bonnie'S Bread
713             Old Fashion Punch
77             Spanish Hamburgers
474              My Caramel Rolls
389      Good 'N Easy Yeast Rolls
690         One-Rise Monkey Bread
786               Boiled Dressing
466                 Becky'S Punch
430           Scottish Shortbread
834           English Muffin Loaf
550                 Puff Pancakes
198          Fresh Strawberry Pie
795               Vanilla Pudding
908    Madge'S Refrigerator Rolls
369                     Pie Crust
971            Happy Face Cookies
288                  Clam Chowder
29                 One Hour Rolls
546                     Fudge Pie
900                  Corn Pudding
42                 Angel Biscuits
688              Warm Apple Crisp
849           Date Rolled Cookies
175           