In [2]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'
import numpy as np
import collections
import json
import gensim 
import ast
from gensim.models import word2vec, phrases
from gensim.parsing.preprocessing import remove_stopwords, strip_punctuation, strip_numeric,\
                    strip_non_alphanum, strip_multiple_whitespaces, strip_short
from textblob import TextBlob, Word

import re
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
%matplotlib inline


from strsimpy.cosine import Cosine

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)


In [54]:
#load the recipes dataset
filepath = "recipesall.csv"
df_recipes = pd.read_csv(filepath, encoding="ISO-8859-1")

#drop rows where cuisine, ingregients are NA
df_recipes.dropna(subset=['cuisine', 'ingredients'],inplace=True)

In [34]:
df_recipes = pd.DataFrame()
for chunk in pd.read_csv('raw-data_recipe.csv',  chunksize=1000):
    df_recipes = pd.concat([recipe, chunk], ignore_index=True)

In [55]:
#convert to lower case
df_recipes['ingredients'] = df_recipes['ingredients'].apply(lambda x: x.lower())

total_ingredients = []
all_receipes_ingredients =  []

for i in range(len(df_recipes)):
    all_ingredients = list()
    
    #split each recipe into different ingredients
    ingred = df_recipes.loc[i, "ingredients"][1:-1]
    
    for ing in (ingred.split(',')):
        ing = remove_stopwords(ing)
        ing = strip_numeric(ing)
        ing = re.sub(r'\(.*oz.\)|(®)|(.*ed)|(.*ly)|boneless|skinless|chunks|fresh|large|cook drain|green|frozen|ground','',ing).strip()
        ing = strip_short(ing,2)
        ing = strip_multiple_whitespaces(ing)
        ing = strip_punctuation(ing)
        ing = strip_non_alphanum(ing)
        
        #convert plurals to singular e.g. tomatoes --> tomato
        ing = (" ".join(TextBlob(ing).words.singularize()))
        
        all_ingredients.append(ing)
        total_ingredients.append(ing)
        
    all_receipes_ingredients.append(all_ingredients)
    
counts_ingr = collections.Counter(total_ingredients)

print('Total Ingredients (with repetition):  \t{}'.format((len(total_ingredients))))
print('Unique Ingredients : \t\t\t{}'.format((len(counts_ingr.values()))))
print('Total Receipes:  \t\t\t{}'.format((len(all_receipes_ingredients))))


Total Ingredients (with repetition):  	55047
Unique Ingredients : 			2633
Total Receipes:  			5474


In [56]:
#add cleaned ingredients back to original dataframe
df_recipes['clean_ingredients'] = pd.Series(all_receipes_ingredients)

#record the number of ingredients for each recipe
df_recipes['ingredient_count'] =  df_recipes.apply(lambda row: len(row['clean_ingredients']), axis = 1)

#convert time in seconds to minutes
#df_recipes['timeMins'] = df_recipes.totalTimeInSeconds.apply(lambda x: x/60) 

In [57]:

num_features = 300    # Word vector dimensionality                      
min_word_count = 4                        
num_workers = 4       # Number of CPUs
context = 20          # Context window size; 

downsampling = 1e-2   # threshold for configuring which 
                      # higher-frequency words are randomly downsampled
            
# Initialize and train the model 
model = word2vec.Word2Vec(all_receipes_ingredients, workers=num_workers, \
            size=num_features, min_count = min_word_count, \
            window = context,sample = downsampling, iter=20)

# If you don't plan to train the model any further, calling 
# init_sims will make the model much more memory-efficient.
model.init_sims(replace=True)

In [79]:
ingredient = 'artichoke'
sub_1 = model.wv.most_similar(ingredient, topn=50)

In [81]:
df_sub1 = pd.DataFrame(sub_1,columns = ['co_ingredient', 'similarity'])
df_sub1 = df_sub1[df_sub1['co_ingredient'].str.contains(ingredient)==False]
df_sub1['ingredient'] = ingredient

In [83]:
df_sub1.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,flatbread,0.841902,artichoke
1,orzo pastum,0.835436,artichoke
2,vine tomato,0.821621,artichoke


In [85]:
ingredient = 'arugula'
sub_2 = model.wv.most_similar(ingredient, topn=50)
df_sub2 = pd.DataFrame(sub_2,columns = ['co_ingredient', 'similarity'])
df_sub2 = df_sub2[df_sub2['co_ingredient'].str.contains(ingredient)==False]
df_sub2['ingredient'] = ingredient
df_sub2.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,chee mozzarella,0.85294,arugula
1,fetum cheese crumble,0.842118,arugula
2,mozzarella,0.834547,arugula


In [86]:
ingredient = 'asparagu'
sub_3 = model.wv.most_similar(ingredient, topn=50)
df_sub3 = pd.DataFrame(sub_3,columns = ['co_ingredient', 'similarity'])
df_sub3 = df_sub3[df_sub3['co_ingredient'].str.contains(ingredient)==False]
df_sub3['ingredient'] = ingredient
df_sub3.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,roll,0.782588,asparagu
1,spinach leaf,0.729998,asparagu
2,fat,0.702776,asparagu


In [89]:
ingredient = 'bamboo shoot'
sub_4 = model.wv.most_similar(ingredient, topn=50)
df_sub4 = pd.DataFrame(sub_4,columns = ['co_ingredient', 'similarity'])
df_sub4 = df_sub4[df_sub4['co_ingredient'].str.contains(ingredient)==False]
df_sub4['ingredient'] = ingredient
df_sub4.head(3)


Unnamed: 0,co_ingredient,similarity,ingredient
0,sugar pea,0.704665,bamboo shoot
1,starch,0.703903,bamboo shoot
2,marinade,0.697772,bamboo shoot


In [93]:
ingredient = 'bell pepper'
sub_5 = model.wv.most_similar(ingredient, topn=50)
df_sub5 = pd.DataFrame(sub_5,columns = ['co_ingredient', 'similarity'])
df_sub5 = df_sub5[df_sub5['co_ingredient'].str.contains(ingredient)==False]
df_sub5['ingredient'] = ingredient
df_sub5.head(3)


Unnamed: 0,co_ingredient,similarity,ingredient
0,shrimp,0.689243,bell pepper
1,chicken stock,0.681362,bell pepper
2,chicken broth,0.679243,bell pepper


In [94]:
ingredient = 'bok choy'
sub_6 = model.wv.most_similar(ingredient, topn=50)
df_sub6 = pd.DataFrame(sub_6,columns = ['co_ingredient', 'similarity'])
df_sub6 = df_sub6[df_sub6['co_ingredient'].str.contains(ingredient)==False]
df_sub6['ingredient'] = ingredient
df_sub6.head(3)


Unnamed: 0,co_ingredient,similarity,ingredient
0,spring roll wrapper,0.789098,bok choy
1,firm tofu,0.78797,bok choy
2,shiitake,0.787515,bok choy


In [95]:
ingredient = 'broccoli'
sub_7 = model.wv.most_similar(ingredient, topn=50)
df_sub7 = pd.DataFrame(sub_7,columns = ['co_ingredient', 'similarity'])
df_sub7 = df_sub7[df_sub7['co_ingredient'].str.contains(ingredient)==False]
df_sub7['ingredient'] = ingredient
df_sub7.head(3)


Unnamed: 0,co_ingredient,similarity,ingredient
1,shiitake,0.746021,broccoli
2,spring onion,0.738154,broccoli
3,chinese rice wine,0.736951,broccoli


In [99]:
ingredient = 'brussel sprout'
sub_8 = model.wv.most_similar(ingredient, topn=50)
df_sub8 = pd.DataFrame(sub_8,columns = ['co_ingredient', 'similarity'])
df_sub8 = df_sub8[df_sub8['co_ingredient'].str.contains(ingredient)==False]
df_sub8['ingredient'] = ingredient
df_sub8.head(3)


Unnamed: 0,co_ingredient,similarity,ingredient
0,marsala wine,0.648823,brussel sprout
1,rotini,0.648559,brussel sprout
2,sweet potato,0.623499,brussel sprout


In [102]:
ingredient = 'butternut squash'
sub_9 = model.wv.most_similar(ingredient, topn=50)
df_sub9 = pd.DataFrame(sub_9,columns = ['co_ingredient', 'similarity'])
df_sub9 = df_sub9[df_sub9['co_ingredient'].str.contains(ingredient)==False]
df_sub9['ingredient'] = ingredient
df_sub9.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,garbanzo bean,0.640767,butternut squash
1,beef chuck,0.629607,butternut squash
2,mint,0.627556,butternut squash


In [103]:
ingredient = 'cabbage'
sub_10 = model.wv.most_similar(ingredient, topn=50)
df_sub10 = pd.DataFrame(sub_10,columns = ['co_ingredient', 'similarity'])
df_sub10 = df_sub10[df_sub10['co_ingredient'].str.contains(ingredient)==False]
df_sub10['ingredient'] = ingredient
df_sub10.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,pork,0.572894,cabbage
1,potato,0.570428,cabbage
2,hungarian hot paprika,0.565818,cabbage


In [106]:
ingredient = 'carrot'
sub_11 = model.wv.most_similar(ingredient, topn=50)
df_sub11 = pd.DataFrame(sub_11,columns = ['co_ingredient', 'similarity'])
df_sub11 = df_sub11[df_sub11['co_ingredient'].str.contains(ingredient)==False]
df_sub11['ingredient'] = ingredient
df_sub11.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,,0.520545,carrot
1,tomato juice,0.504274,carrot
2,meat bone,0.478955,carrot


In [107]:
ingredient = 'cauliflower'
sub_12 = model.wv.most_similar(ingredient, topn=50)
df_sub12 = pd.DataFrame(sub_12,columns = ['co_ingredient', 'similarity'])
df_sub12 = df_sub12[df_sub12['co_ingredient'].str.contains(ingredient)==False]
df_sub12['ingredient'] = ingredient
df_sub12.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,parsnip,0.671904,cauliflower
1,chicken leg quarters,0.650876,cauliflower
2,pea,0.639548,cauliflower


In [108]:
ingredient = 'celery'
sub_13 = model.wv.most_similar(ingredient, topn=50)
df_sub13 = pd.DataFrame(sub_13,columns = ['co_ingredient', 'similarity'])
df_sub13 = df_sub13[df_sub13['co_ingredient'].str.contains(ingredient)==False]
df_sub13['ingredient'] = ingredient
df_sub13.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
1,creole seasoning,0.797729,celery
2,crawfish,0.743754,celery
3,hot sauce,0.739766,celery


In [111]:
ingredient = 'chili pepper'
sub_14 = model.wv.most_similar(ingredient, topn=50)
df_sub14 = pd.DataFrame(sub_14,columns = ['co_ingredient', 'similarity'])
df_sub14 = df_sub14[df_sub14['co_ingredient'].str.contains(ingredient)==False]
df_sub14['ingredient'] = ingredient
df_sub14.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,chile pepper,0.82727,chili pepper
1,szechwan peppercorn,0.816725,chili pepper
2,chili flake,0.807388,chili pepper


In [113]:
ingredient = 'cilantro'
sub_15 = model.wv.most_similar(ingredient, topn=50)
df_sub15 = pd.DataFrame(sub_15,columns = ['co_ingredient', 'similarity'])
df_sub15 = df_sub15[df_sub15['co_ingredient'].str.contains(ingredient)==False]
df_sub15['ingredient'] = ingredient
df_sub15.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,jalapeno chily,0.776667,cilantro
1,coconut milk,0.745152,cilantro
2,lime,0.743005,cilantro


In [116]:
ingredient = 'cucumber'
sub_16 = model.wv.most_similar(ingredient, topn=50)
df_sub16 = pd.DataFrame(sub_16,columns = ['co_ingredient', 'similarity'])
df_sub16 = df_sub16[df_sub16['co_ingredient'].str.contains(ingredient)==False]
df_sub16['ingredient'] = ingredient
df_sub16.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
1,avocado,0.8033,cucumber
3,kalamatum olife,0.772849,cucumber
4,pita,0.766695,cucumber


In [117]:
ingredient = 'daikon'
sub_17 = model.wv.most_similar(ingredient, topn=50)
df_sub17 = pd.DataFrame(sub_17,columns = ['co_ingredient', 'similarity'])
df_sub17 = df_sub17[df_sub17['co_ingredient'].str.contains(ingredient)==False]
df_sub17['ingredient'] = ingredient
df_sub17.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,coconut milk,0.737065,daikon
1,tamarind paste,0.728159,daikon
2,tofu,0.716476,daikon


In [118]:
ingredient = 'eggplant'
sub_18 = model.wv.most_similar(ingredient, topn=50)
df_sub18 = pd.DataFrame(sub_18,columns = ['co_ingredient', 'similarity'])
df_sub18 = df_sub18[df_sub18['co_ingredient'].str.contains(ingredient)==False]
df_sub18['ingredient'] = ingredient
df_sub18.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,extra lean beef,0.696832,eggplant
1,pizza dough,0.650069,eggplant
2,couscou,0.610297,eggplant


In [122]:
ingredient = 'garlic'
sub_19 = model.wv.most_similar(ingredient, topn=50)
df_sub19 = pd.DataFrame(sub_19,columns = ['co_ingredient', 'similarity'])
df_sub19 = df_sub19[df_sub19['co_ingredient'].str.contains(ingredient)==False]
df_sub19['ingredient'] = ingredient
df_sub19.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
1,cilantro sprig,0.657487,garlic
2,cod fillet,0.65387,garlic
4,miso,0.568598,garlic


In [123]:
ingredient = 'ginger'
sub_20 = model.wv.most_similar(ingredient, topn=50)
df_sub20 = pd.DataFrame(sub_20,columns = ['co_ingredient', 'similarity'])
df_sub20 = df_sub20[df_sub20['co_ingredient'].str.contains(ingredient)==False]
df_sub20['ingredient'] = ingredient
df_sub20.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
2,thai basil,0.597881,ginger
3,noodle,0.593403,ginger
4,pork butt,0.592603,ginger


In [130]:
ingredient = 'kale'
sub_21 = model.wv.most_similar(ingredient, topn=50)
df_sub21 = pd.DataFrame(sub_21,columns = ['co_ingredient', 'similarity'])
df_sub21 = df_sub21[df_sub21['co_ingredient'].str.contains(ingredient)==False]
df_sub21['ingredient'] = ingredient
df_sub21.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,pimento,0.65221,kale
1,pizza crust,0.64579,kale
2,cannellini bean,0.600373,kale


In [132]:
ingredient = 'leek'
sub_22 = model.wv.most_similar(ingredient, topn=50)
df_sub22 = pd.DataFrame(sub_22,columns = ['co_ingredient', 'similarity'])
df_sub22 = df_sub22[df_sub22['co_ingredient'].str.contains(ingredient)==False]
df_sub22['ingredient'] = ingredient
df_sub22.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,russet potato,0.788616,leek
1,pancettum,0.674562,leek
2,wine,0.663435,leek


In [133]:
ingredient = 'lettuce'
sub_23 = model.wv.most_similar(ingredient, topn=50)
df_sub23 = pd.DataFrame(sub_23,columns = ['co_ingredient', 'similarity'])
df_sub23 = df_sub23[df_sub23['co_ingredient'].str.contains(ingredient)==False]
df_sub23['ingredient'] = ingredient
df_sub23.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,rocket leaf,0.752613,lettuce
1,hothouse cucumber,0.722464,lettuce
2,spaghettus squash,0.713745,lettuce


In [138]:
ingredient = 'mushroom'
sub_24 = model.wv.most_similar(ingredient, topn=50)
df_sub24 = pd.DataFrame(sub_24,columns = ['co_ingredient', 'similarity'])
df_sub24 = df_sub24[df_sub24['co_ingredient'].str.contains(ingredient)==False]
df_sub24['ingredient'] = ingredient
df_sub24.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,chicken breast half,0.663116,mushroom
1,low sodium chicken broth,0.604494,mushroom
2,chicken broth,0.596191,mushroom


In [142]:
ingredient = 'okra'
sub_25 = model.wv.most_similar(ingredient, topn=50)
df_sub25 = pd.DataFrame(sub_25,columns = ['co_ingredient', 'similarity'])
df_sub25 = df_sub25[df_sub25['co_ingredient'].str.contains(ingredient)==False]
df_sub25['ingredient'] = ingredient
df_sub25.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,andouille sausage,0.817203,okra
1,ium shrimp,0.757658,okra
2,file powder,0.75712,okra


In [143]:
ingredient = 'onion'
sub_26 = model.wv.most_similar(ingredient, topn=50)
df_sub26 = pd.DataFrame(sub_26,columns = ['co_ingredient', 'similarity'])
df_sub26 = df_sub26[df_sub26['co_ingredient'].str.contains(ingredient)==False]
df_sub26['ingredient'] = ingredient
df_sub26.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
4,scallion,0.796708,onion
7,ham hock,0.647278,onion
9,shallot,0.578796,onion


In [144]:
ingredient = 'parsley'
sub_27 = model.wv.most_similar(ingredient, topn=50)
df_sub27 = pd.DataFrame(sub_27,columns = ['co_ingredient', 'similarity'])
df_sub27 = df_sub27[df_sub27['co_ingredient'].str.contains(ingredient)==False]
df_sub27['ingredient'] = ingredient
df_sub27.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
1,parmesan cheese,0.763097,parsley
2,thyme leaf,0.760731,parsley
3,basil,0.758283,parsley


In [145]:
ingredient = 'parsnip'
sub_28 = model.wv.most_similar(ingredient, topn=50)
df_sub28 = pd.DataFrame(sub_28,columns = ['co_ingredient', 'similarity'])
df_sub28 = df_sub28[df_sub28['co_ingredient'].str.contains(ingredient)==False]
df_sub28['ingredient'] = ingredient
df_sub28.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,veal,0.753622,parsnip
1,hungarian hot paprika,0.74755,parsnip
2,navy bean,0.720113,parsnip


In [147]:
ingredient = 'pea'
sub_29 = model.wv.most_similar(ingredient, topn=50)
df_sub29 = pd.DataFrame(sub_29,columns = ['co_ingredient', 'similarity'])
df_sub29 = df_sub29[df_sub29['co_ingredient'].str.contains(ingredient)==False]
df_sub29['ingredient'] = ingredient
df_sub29.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,chily,0.654575,pea
1,floret,0.646578,pea
2,cauliflower,0.639548,pea


In [148]:
ingredient = 'pickle'
sub_30 = model.wv.most_similar(ingredient, topn=50)
df_sub30 = pd.DataFrame(sub_30,columns = ['co_ingredient', 'similarity'])
df_sub30 = df_sub30[df_sub30['co_ingredient'].str.contains(ingredient)==False]
df_sub30['ingredient'] = ingredient
df_sub30.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,corn oil,0.777982,pickle
1,hellmann best food light mayonnai,0.772619,pickle
3,turbinado,0.75729,pickle


In [152]:
ingredient = 'potato'
sub_31 = model.wv.most_similar(ingredient, topn=50)
df_sub31 = pd.DataFrame(sub_31,columns = ['co_ingredient', 'similarity'])
df_sub31 = df_sub31[df_sub31['co_ingredient'].str.contains(ingredient)==False]
df_sub31['ingredient'] = ingredient
df_sub31.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,egg noodle,0.707073,potato
1,sauerkraut,0.66248,potato
2,hungarian paprika,0.635739,potato


In [153]:
ingredient = 'pumpkin'
sub_32 = model.wv.most_similar(ingredient, topn=50)
df_sub32 = pd.DataFrame(sub_32,columns = ['co_ingredient', 'similarity'])
df_sub32 = df_sub32[df_sub32['co_ingredient'].str.contains(ingredient)==False]
df_sub32['ingredient'] = ingredient
df_sub32.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,bird chile,0.800097,pumpkin
1,mint leaf,0.766023,pumpkin
2,thai curry paste,0.757439,pumpkin


In [155]:
ingredient = 'radish'
sub_33 = model.wv.most_similar(ingredient, topn=50)
df_sub33 = pd.DataFrame(sub_33,columns = ['co_ingredient', 'similarity'])
df_sub33 = df_sub33[df_sub33['co_ingredient'].str.contains(ingredient)==False]
df_sub33['ingredient'] = ingredient
df_sub33.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,les cucumber,0.783172,radish
1,straw mushroom,0.767039,radish
2,tamarind,0.737567,radish


In [161]:
ingredient = 'sauerkraut'
sub_34 = model.wv.most_similar(ingredient, topn=50)
df_sub34 = pd.DataFrame(sub_34,columns = ['co_ingredient', 'similarity'])
df_sub34 = df_sub34[df_sub34['co_ingredient'].str.contains(ingredient)==False]
df_sub34['ingredient'] = ingredient
df_sub34.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,egg noodle,0.703878,sauerkraut
1,tomato paste,0.671219,sauerkraut
2,wine,0.670511,sauerkraut


In [163]:
ingredient = 'shallot'
sub_35 = model.wv.most_similar(ingredient, topn=50)
df_sub35 = pd.DataFrame(sub_35,columns = ['co_ingredient', 'similarity'])
df_sub35 = df_sub35[df_sub35['co_ingredient'].str.contains(ingredient)==False]
df_sub35['ingredient'] = ingredient
df_sub35.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,pearl onion,0.710526,shallot
1,small yellow onion,0.684168,shallot
2,purple onion,0.68322,shallot


In [165]:
ingredient = 'snow pea'
sub_36 = model.wv.most_similar(ingredient, topn=50)
df_sub36 = pd.DataFrame(sub_36,columns = ['co_ingredient', 'similarity'])
df_sub36 = df_sub36[df_sub36['co_ingredient'].str.contains(ingredient)==False]
df_sub36['ingredient'] = ingredient
df_sub36.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,bok choy,0.769804,snow pea
1,broccoli floret,0.750537,snow pea
2,tamarus soy sauce,0.746684,snow pea


In [167]:
ingredient = 'spinach'
sub_37 = model.wv.most_similar(ingredient, topn=50)
df_sub37 = pd.DataFrame(sub_37,columns = ['co_ingredient', 'similarity'])
df_sub37 = df_sub37[df_sub37['co_ingredient'].str.contains(ingredient)==False]
df_sub37['ingredient'] = ingredient
df_sub37.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,fetum cheese,0.785129,spinach
1,fetum cheese crumble,0.758398,spinach
2,greek seasoning,0.713143,spinach


In [170]:
ingredient = 'sweet potato'
sub_38 = model.wv.most_similar(ingredient, topn=50)
df_sub38 = pd.DataFrame(sub_38,columns = ['co_ingredient', 'similarity'])
df_sub38 = df_sub38[df_sub38['co_ingredient'].str.contains(ingredient)==False]
df_sub38['ingredient'] = ingredient
df_sub38.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,salad dressing,0.746,sweet potato
1,corn husk,0.745769,sweet potato
2,thai curry paste,0.708056,sweet potato


In [172]:
ingredient = 'tomato'
sub_39 = model.wv.most_similar(ingredient, topn=50)
df_sub39 = pd.DataFrame(sub_39,columns = ['co_ingredient', 'similarity'])
df_sub39 = df_sub39[df_sub39['co_ingredient'].str.contains(ingredient)==False]
df_sub39['ingredient'] = ingredient
df_sub39.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
5,potato,0.536735,tomato
7,black bean,0.494946,tomato
8,salsa,0.482346,tomato


In [173]:
ingredient = 'tomato paste'
sub_40 = model.wv.most_similar(ingredient, topn=50)
df_sub40 = pd.DataFrame(sub_40,columns = ['co_ingredient', 'similarity'])
df_sub40 = df_sub40[df_sub40['co_ingredient'].str.contains(ingredient)==False]
df_sub40['ingredient'] = ingredient
df_sub40.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,bay leaf,0.769148,tomato paste
1,beef stew meat,0.727453,tomato paste
2,chuck,0.712294,tomato paste


In [174]:
ingredient = 'turnip'
sub_41 = model.wv.most_similar(ingredient, topn=50)
df_sub41 = pd.DataFrame(sub_41,columns = ['co_ingredient', 'similarity'])
df_sub41 = df_sub41[df_sub41['co_ingredient'].str.contains(ingredient)==False]
df_sub41['ingredient'] = ingredient
df_sub41.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,lard,0.685719,turnip
1,tomato soup,0.68269,turnip
2,non fat smy cream,0.612673,turnip


In [175]:
ingredient = 'water chestnut'
sub_42 = model.wv.most_similar(ingredient, topn=50)
df_sub42 = pd.DataFrame(sub_42,columns = ['co_ingredient', 'similarity'])
df_sub42 = df_sub42[df_sub42['co_ingredient'].str.contains(ingredient)==False]
df_sub42['ingredient'] = ingredient
df_sub42.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,butter lettuce,0.767086,water chestnut
1,mung bean sprout,0.763924,water chestnut
2,shiitake,0.745072,water chestnut


In [178]:
ingredient = 'zucchini'
sub_43 = model.wv.most_similar(ingredient, topn=50)
df_sub43 = pd.DataFrame(sub_43,columns = ['co_ingredient', 'similarity'])
df_sub43 = df_sub43[df_sub43['co_ingredient'].str.contains(ingredient)==False]
df_sub43['ingredient'] = ingredient
df_sub43.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,black olife,0.81051,zucchini
1,flmy tortilla,0.807458,zucchini
2,olife,0.789339,zucchini


In [180]:
ingredient = 'black bean'
sub_44 = model.wv.most_similar(ingredient, topn=50)
df_sub44 = pd.DataFrame(sub_44,columns = ['co_ingredient', 'similarity'])
df_sub44 = df_sub44[df_sub44['co_ingredient'].str.contains(ingredient)==False]
df_sub44['ingredient'] = ingredient
df_sub44.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,coconut cream,0.706912,black bean
1,brown rice,0.676955,black bean
2,jalapeno chily,0.672055,black bean


In [181]:
ingredient = 'almond'
sub_45 = model.wv.most_similar(ingredient, topn=50)
df_sub45 = pd.DataFrame(sub_45,columns = ['co_ingredient', 'similarity'])
df_sub45 = df_sub45[df_sub45['co_ingredient'].str.contains(ingredient)==False]
df_sub45['ingredient'] = ingredient
df_sub45.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,rum,0.857428,almond
2,cream tartar,0.832596,almond
3,oat,0.810358,almond


In [182]:
ingredient = 'cashew'
sub_46 = model.wv.most_similar(ingredient, topn=50)
df_sub46 = pd.DataFrame(sub_46,columns = ['co_ingredient', 'similarity'])
df_sub46 = df_sub46[df_sub46['co_ingredient'].str.contains(ingredient)==False]
df_sub46['ingredient'] = ingredient
df_sub46.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,hoisin sauce,0.765905,cashew
1,shiitake mushroom,0.744919,cashew
2,dry roast peanut,0.722101,cashew


In [184]:
ingredient = 'coconut milk'
sub_47 = model.wv.most_similar(ingredient, topn=50)
df_sub47 = pd.DataFrame(sub_47,columns = ['co_ingredient', 'similarity'])
df_sub47 = df_sub47[df_sub47['co_ingredient'].str.contains(ingredient)==False]
df_sub47['ingredient'] = ingredient
df_sub47.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,curry paste,0.799028,coconut milk
1,curry powder,0.758541,coconut milk
2,lemongras,0.754584,coconut milk


In [186]:
ingredient = 'hazelnut'
sub_48 = model.wv.most_similar(ingredient, topn=50)
df_sub48 = pd.DataFrame(sub_48,columns = ['co_ingredient', 'similarity'])
df_sub48 = df_sub48[df_sub48['co_ingredient'].str.contains(ingredient)==False]
df_sub48['ingredient'] = ingredient
df_sub48.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,gelatin,0.919828,hazelnut
1,mascarpone,0.906019,hazelnut
2,semi sweet chocolate morsel,0.897564,hazelnut


In [188]:
ingredient = 'peanut'
sub_49 = model.wv.most_similar(ingredient, topn=50)
df_sub49 = pd.DataFrame(sub_49,columns = ['co_ingredient', 'similarity'])
df_sub49 = df_sub49[df_sub49['co_ingredient'].str.contains(ingredient)==False]
df_sub49['ingredient'] = ingredient
df_sub49.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,fish sauce,0.871395,peanut
1,beansprout,0.840041,peanut
2,rice noodle,0.804187,peanut


In [189]:
ingredient = 'pecan'
sub_50 = model.wv.most_similar(ingredient, topn=50)
df_sub50 = pd.DataFrame(sub_50,columns = ['co_ingredient', 'similarity'])
df_sub50 = df_sub50[df_sub50['co_ingredient'].str.contains(ingredient)==False]
df_sub50['ingredient'] = ingredient
df_sub50.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,semi sweet chocolate morsel,0.935094,pecan
1,oat,0.920535,pecan
2,almond extract,0.916135,pecan


In [190]:
ingredient = 'pine nut'
sub_51 = model.wv.most_similar(ingredient, topn=50)
df_sub51 = pd.DataFrame(sub_51,columns = ['co_ingredient', 'similarity'])
df_sub51 = df_sub51[df_sub51['co_ingredient'].str.contains(ingredient)==False]
df_sub51['ingredient'] = ingredient
df_sub51.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,tomato oil,0.828423,pine nut
1,pizza dough,0.825608,pine nut
2,basil pesto sauce,0.81786,pine nut


In [191]:
ingredient = 'pistachio'
sub_52 = model.wv.most_similar(ingredient, topn=50)
df_sub52 = pd.DataFrame(sub_52,columns = ['co_ingredient', 'similarity'])
df_sub52 = df_sub52[df_sub52['co_ingredient'].str.contains(ingredient)==False]
df_sub52['ingredient'] = ingredient
df_sub52.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,irish whiskey,0.866205,pistachio
1,syrup,0.861056,pistachio
2,berry,0.854761,pistachio


In [192]:
ingredient = 'walnut'
sub_53 = model.wv.most_similar(ingredient, topn=50)
df_sub53 = pd.DataFrame(sub_53,columns = ['co_ingredient', 'similarity'])
df_sub53 = df_sub53[df_sub53['co_ingredient'].str.contains(ingredient)==False]
df_sub53['ingredient'] = ingredient
df_sub53.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,light corn syrup,0.919226,walnut
1,almond paste,0.89383,walnut
2,semisweet chocolate,0.877877,walnut


In [193]:
ingredient = 'bacon'
sub_54 = model.wv.most_similar(ingredient, topn=50)
df_sub54 = pd.DataFrame(sub_54,columns = ['co_ingredient', 'similarity'])
df_sub54 = df_sub54[df_sub54['co_ingredient'].str.contains(ingredient)==False]
df_sub54['ingredient'] = ingredient
df_sub54.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,hungarian paprika,0.742435,bacon
1,hungarian sweet paprika,0.718678,bacon
2,parsley leaf,0.715941,bacon


In [197]:
ingredient = 'beef roast'
sub_55 = model.wv.most_similar(ingredient, topn=50)
df_sub55 = pd.DataFrame(sub_55,columns = ['co_ingredient', 'similarity'])
df_sub55 = df_sub55[df_sub55['co_ingredient'].str.contains(ingredient)==False]
df_sub55['ingredient'] = ingredient
df_sub55.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,gluten free soy sauce,0.589109,beef roast
1,fine sea salt,0.581276,beef roast
2,madra curry powder,0.580042,beef roast


In [199]:
ingredient = 'steak'
sub_56 = model.wv.most_similar(ingredient, topn=50)
df_sub56 = pd.DataFrame(sub_56,columns = ['co_ingredient', 'similarity'])
df_sub56 = df_sub56[df_sub56['co_ingredient'].str.contains(ingredient)==False]
df_sub56['ingredient'] = ingredient
df_sub56.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,chow mein noodle,0.756638,steak
1,onion soup mix,0.7364,steak
2,thai curry paste,0.695935,steak


In [200]:
ingredient = 'beef shank'
sub_57 = model.wv.most_similar(ingredient, topn=50)
df_sub57 = pd.DataFrame(sub_57,columns = ['co_ingredient', 'similarity'])
df_sub57 = df_sub57[df_sub57['co_ingredient'].str.contains(ingredient)==False]
df_sub57['ingredient'] = ingredient
df_sub57.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,white wine vinegar,0.633426,beef shank
1,shell,0.62979,beef shank
2,spaghettus,0.620896,beef shank


In [203]:
ingredient = 'chicken'
sub_58 = model.wv.most_similar(ingredient, topn=50)
df_sub58 = pd.DataFrame(sub_58,columns = ['co_ingredient', 'similarity'])
df_sub58 = df_sub58[df_sub58['co_ingredient'].str.contains(ingredient)==False]
df_sub58['ingredient'] = ingredient
df_sub58.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
2,chily,0.629562,chicken
3,cilantro leaf,0.613302,chicken
4,oxtail,0.56721,chicken


In [204]:
ingredient = 'chicken'
sub_59 = model.wv.most_similar(ingredient, topn=50)
df_sub59 = pd.DataFrame(sub_59,columns = ['co_ingredient', 'similarity'])
df_sub59 = df_sub59[df_sub59['co_ingredient'].str.contains(ingredient)==False]
df_sub59['ingredient'] = ingredient
df_sub59.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
2,chily,0.629562,chicken
3,cilantro leaf,0.613302,chicken
4,oxtail,0.56721,chicken


In [205]:
ingredient = 'chicken breast'
sub_60 = model.wv.most_similar(ingredient, topn=50)
df_sub60 = pd.DataFrame(sub_60,columns = ['co_ingredient', 'similarity'])
df_sub60 = df_sub60[df_sub60['co_ingredient'].str.contains(ingredient)==False]
df_sub60['ingredient'] = ingredient
df_sub60.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,chicken,0.637062,chicken breast
1,shrimp,0.591867,chicken breast
2,flank steak,0.539353,chicken breast


In [206]:
ingredient = 'chicken leg'
sub_61 = model.wv.most_similar(ingredient, topn=50)
df_sub61 = pd.DataFrame(sub_61,columns = ['co_ingredient', 'similarity'])
df_sub61 = df_sub61[df_sub61['co_ingredient'].str.contains(ingredient)==False]
df_sub61['ingredient'] = ingredient
df_sub61.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,rib eye steak,0.706069,chicken leg
1,jeera,0.675337,chicken leg
2,tilapium fillet,0.665889,chicken leg


In [208]:
ingredient = 'chicken thigh'
sub_62 = model.wv.most_similar(ingredient, topn=50)
df_sub62 = pd.DataFrame(sub_62,columns = ['co_ingredient', 'similarity'])
df_sub62 = df_sub62[df_sub62['co_ingredient'].str.contains(ingredient)==False]
df_sub62['ingredient'] = ingredient
df_sub62.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,chicken,0.761636,chicken thigh
1,stewing beef,0.630492,chicken thigh
2,chicken stock,0.616266,chicken thigh


In [209]:
ingredient = 'chicken wing'
sub_63 = model.wv.most_similar(ingredient, topn=50)
df_sub63 = pd.DataFrame(sub_63,columns = ['co_ingredient', 'similarity'])
df_sub63 = df_sub63[df_sub63['co_ingredient'].str.contains(ingredient)==False]
df_sub63['ingredient'] = ingredient
df_sub63.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,baby rib,0.753581,chicken wing
1,pork loin,0.732238,chicken wing
2,apricot jam,0.661526,chicken wing


In [210]:
ingredient = 'chorizo'
sub_64 = model.wv.most_similar(ingredient, topn=50)
df_sub64 = pd.DataFrame(sub_64,columns = ['co_ingredient', 'similarity'])
df_sub64 = df_sub64[df_sub64['co_ingredient'].str.contains(ingredient)==False]
df_sub64['ingredient'] = ingredient
df_sub64.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,leg lamb,0.725503,chorizo
1,poultry seasoning,0.715331,chorizo
2,tilapium fillet,0.708023,chorizo


In [221]:
ingredient = 'ham'
sub_65 = model.wv.most_similar(ingredient, topn=50)
df_sub65 = pd.DataFrame(sub_65,columns = ['co_ingredient', 'similarity'])
df_sub65 = df_sub65[df_sub65['co_ingredient'].str.contains(ingredient)==False]
df_sub65['ingredient'] = ingredient
df_sub65.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,swiss cheese,0.8133,ham
1,fettucine,0.73383,ham
2,hot pepper sauce,0.717026,ham


In [222]:
ingredient = 'hot dog'
sub_66 = model.wv.most_similar(ingredient, topn=50)
df_sub66 = pd.DataFrame(sub_66,columns = ['co_ingredient', 'similarity'])
df_sub66 = df_sub66[df_sub66['co_ingredient'].str.contains(ingredient)==False]
df_sub66['ingredient'] = ingredient
df_sub66.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,lemon slice,0.864758,hot dog
2,japanese eggplant,0.829783,hot dog
3,pecan half,0.787091,hot dog


In [223]:
ingredient = 'lamb'
sub_67 = model.wv.most_similar(ingredient, topn=50)
df_sub67 = pd.DataFrame(sub_67,columns = ['co_ingredient', 'similarity'])
df_sub67 = df_sub67[df_sub67['co_ingredient'].str.contains(ingredient)==False]
df_sub67['ingredient'] = ingredient
df_sub67.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,coriander,0.663449,lamb
1,cardamom,0.641446,lamb
2,orange,0.64138,lamb


In [225]:
ingredient = 'lamb chop'
sub_68 = model.wv.most_similar(ingredient, topn=50)
df_sub68 = pd.DataFrame(sub_68,columns = ['co_ingredient', 'similarity'])
df_sub68 = df_sub68[df_sub68['co_ingredient'].str.contains(ingredient)==False]
df_sub68['ingredient'] = ingredient
df_sub68.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,pecan half,0.851823,lamb chop
1,raw sugar,0.836343,lamb chop
2,low fat plain yogurt,0.835442,lamb chop


In [230]:
ingredient = 'lamb shoulder'
sub_69 = model.wv.most_similar(ingredient, topn=50)
df_sub69 = pd.DataFrame(sub_69,columns = ['co_ingredient', 'similarity'])
df_sub69 = df_sub69[df_sub69['co_ingredient'].str.contains(ingredient)==False]
df_sub69['ingredient'] = ingredient
df_sub69.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,light butter,0.692712,lamb shoulder
1,les cucumber,0.685589,lamb shoulder
2,sausage link,0.669117,lamb shoulder


In [234]:
ingredient = 'oxtail'
sub_70 = model.wv.most_similar(ingredient, topn=50)
df_sub70 = pd.DataFrame(sub_70,columns = ['co_ingredient', 'similarity'])
df_sub70 = df_sub70[df_sub70['co_ingredient'].str.contains(ingredient)==False]
df_sub70['ingredient'] = ingredient
df_sub70.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,stewing beef,0.691286,oxtail
1,hungarian sweet paprika,0.670513,oxtail
2,sweet pepper,0.667079,oxtail


In [236]:
ingredient = 'pepperoni'
sub_71 = model.wv.most_similar(ingredient, topn=50)
df_sub71 = pd.DataFrame(sub_71,columns = ['co_ingredient', 'similarity'])
df_sub71 = df_sub71[df_sub71['co_ingredient'].str.contains(ingredient)==False]
df_sub71['ingredient'] = ingredient
df_sub71.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,part skim mozzarella cheese,0.895034,pepperoni
1,cracker,0.869509,pepperoni
2,marinara sauce,0.864313,pepperoni


In [238]:
ingredient = 'polish sausage'
sub_72 = model.wv.most_similar(ingredient, topn=50)
df_sub72 = pd.DataFrame(sub_72,columns = ['co_ingredient', 'similarity'])
df_sub72 = df_sub72[df_sub72['co_ingredient'].str.contains(ingredient)==False]
df_sub72['ingredient'] = ingredient
df_sub72.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,strawberry jam,0.821409,polish sausage
1,white sandwich bread,0.816632,polish sausage
2,crescent roll,0.814907,polish sausage


In [239]:
ingredient = 'pork'
sub_73 = model.wv.most_similar(ingredient, topn=50)
df_sub73 = pd.DataFrame(sub_73,columns = ['co_ingredient', 'similarity'])
df_sub73 = df_sub73[df_sub73['co_ingredient'].str.contains(ingredient)==False]
df_sub73['ingredient'] = ingredient
df_sub73.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,white pepper,0.775981,pork
1,wonton wrapper,0.740246,pork
2,black vinegar,0.687613,pork


In [240]:
ingredient = 'pork chop'
sub_74 = model.wv.most_similar(ingredient, topn=50)
df_sub74 = pd.DataFrame(sub_74,columns = ['co_ingredient', 'similarity'])
df_sub74 = df_sub74[df_sub74['co_ingredient'].str.contains(ingredient)==False]
df_sub74['ingredient'] = ingredient
df_sub74.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,pork loin,0.75643,pork chop
1,apricot preserve,0.701904,pork chop
2,pork baby rib,0.697705,pork chop


In [241]:
ingredient = 'pork loin'
sub_75 = model.wv.most_similar(ingredient, topn=50)
df_sub75 = pd.DataFrame(sub_75,columns = ['co_ingredient', 'similarity'])
df_sub75 = df_sub75[df_sub75['co_ingredient'].str.contains(ingredient)==False]
df_sub75['ingredient'] = ingredient
df_sub75.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,baby rib,0.806151,pork loin
1,apricot jam,0.783278,pork loin
2,bourbon whiskey,0.770966,pork loin


In [242]:
ingredient = 'pork rib'
sub_76 = model.wv.most_similar(ingredient, topn=50)
df_sub76 = pd.DataFrame(sub_76,columns = ['co_ingredient', 'similarity'])
df_sub76 = df_sub76[df_sub76['co_ingredient'].str.contains(ingredient)==False]
df_sub76['ingredient'] = ingredient
df_sub76.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,apricot preserve,0.70988,pork rib
1,club soda,0.66153,pork rib
2,rice wine,0.652172,pork rib


In [243]:
ingredient = 'pork roast'
sub_77 = model.wv.most_similar(ingredient, topn=50)
df_sub77 = pd.DataFrame(sub_77,columns = ['co_ingredient', 'similarity'])
df_sub77 = df_sub77[df_sub77['co_ingredient'].str.contains(ingredient)==False]
df_sub77['ingredient'] = ingredient
df_sub77.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,montreal steak seasoning,0.770638,pork roast
1,bun,0.733181,pork roast
2,country crock spread,0.67431,pork roast


In [244]:
ingredient = 'pork shoulder'
sub_76 = model.wv.most_similar(ingredient, topn=50)
df_sub76 = pd.DataFrame(sub_76,columns = ['co_ingredient', 'similarity'])
df_sub76 = df_sub76[df_sub76['co_ingredient'].str.contains(ingredient)==False]
df_sub76['ingredient'] = ingredient
df_sub76.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,pork butt,0.744961,pork shoulder
1,salmon,0.720677,pork shoulder
2,hoisin sauce,0.653758,pork shoulder


In [247]:
ingredient = 'salami'
sub_77 = model.wv.most_similar(ingredient, topn=50)
df_sub77 = pd.DataFrame(sub_77,columns = ['co_ingredient', 'similarity'])
df_sub77 = df_sub77[df_sub77['co_ingredient'].str.contains(ingredient)==False]
df_sub77['ingredient'] = ingredient
df_sub77.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,pizza crust,0.833253,salami
1,vinaigrette,0.818534,salami
2,cracker,0.814799,salami


In [248]:
ingredient = 'sausage'
sub_76 = model.wv.most_similar(ingredient, topn=50)
df_sub76 = pd.DataFrame(sub_76,columns = ['co_ingredient', 'similarity'])
df_sub76 = df_sub76[df_sub76['co_ingredient'].str.contains(ingredient)==False]
df_sub76['ingredient'] = ingredient
df_sub76.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,long grain rice,0.798375,sausage
2,creole seasoning,0.762175,sausage
3,tabasco pepper sauce,0.756981,sausage


In [251]:
ingredient = 'turkey'
sub_77 = model.wv.most_similar(ingredient, topn=50)
df_sub77 = pd.DataFrame(sub_77,columns = ['co_ingredient', 'similarity'])
df_sub77 = df_sub77[df_sub77['co_ingredient'].str.contains(ingredient)==False]
df_sub77['ingredient'] = ingredient
df_sub77.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,rosemary,0.689243,turkey
1,herbe provence,0.662844,turkey
2,can t believe it s butter spread,0.651088,turkey


In [253]:
ingredient = 'veal'
sub_78 = model.wv.most_similar(ingredient, topn=50)
df_sub78 = pd.DataFrame(sub_78,columns = ['co_ingredient', 'similarity'])
df_sub78 = df_sub78[df_sub78['co_ingredient'].str.contains(ingredient)==False]
df_sub78['ingredient'] = ingredient
df_sub78.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,parsnip,0.753622,veal
1,leg lamb,0.667189,veal
2,leek,0.661809,veal


In [259]:
ingredient = 'brown rice'
sub_79 = model.wv.most_similar(ingredient, topn=50)
df_sub79 = pd.DataFrame(sub_79,columns = ['co_ingredient', 'similarity'])
df_sub79 = df_sub79[df_sub79['co_ingredient'].str.contains(ingredient)==False]
df_sub79['ingredient'] = ingredient
df_sub79.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,jalapeno chily,0.762946,brown rice
1,white rice,0.721335,brown rice
2,yellow bell pepper,0.709467,brown rice


In [260]:
ingredient = 'corn'
sub_80 = model.wv.most_similar(ingredient, topn=50)
df_sub80 = pd.DataFrame(sub_80,columns = ['co_ingredient', 'similarity'])
df_sub80 = df_sub80[df_sub80['co_ingredient'].str.contains(ingredient)==False]
df_sub80['ingredient'] = ingredient
df_sub80.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,white wine,0.689608,corn
1,button mushroom,0.673616,corn
2,chicken tenderloin,0.671754,corn


In [262]:
ingredient = 'cornmeal'
sub_81 = model.wv.most_similar(ingredient, topn=50)
df_sub81 = pd.DataFrame(sub_81,columns = ['co_ingredient', 'similarity'])
df_sub81 = df_sub81[df_sub81['co_ingredient'].str.contains(ingredient)==False]
df_sub81['ingredient'] = ingredient
df_sub81.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,leg lamb,0.653395,cornmeal
1,dry bread crumb,0.628187,cornmeal
2,panko breadcrumb,0.62181,cornmeal


In [266]:
ingredient = 'noodle'
sub_82 = model.wv.most_similar(ingredient, topn=50)
df_sub82 = pd.DataFrame(sub_82,columns = ['co_ingredient', 'similarity'])
df_sub82 = df_sub82[df_sub82['co_ingredient'].str.contains(ingredient)==False]
df_sub82['ingredient'] = ingredient
df_sub82.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,ginger root,0.709123,noodle
1,brisket,0.661701,noodle
2,sesame paste,0.635978,noodle


In [270]:
ingredient = 'quinoa'
sub_83 = model.wv.most_similar(ingredient, topn=50)
df_sub83 = pd.DataFrame(sub_83,columns = ['co_ingredient', 'similarity'])
df_sub83 = df_sub83[df_sub83['co_ingredient'].str.contains(ingredient)==False]
df_sub83['ingredient'] = ingredient
df_sub83.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,avocado,0.740836,quinoa
1,rice vermicelli,0.723262,quinoa
2,cherry tomato,0.708246,quinoa


In [271]:
ingredient = 'rice'
sub_84 = model.wv.most_similar(ingredient, topn=50)
df_sub84 = pd.DataFrame(sub_65,columns = ['co_ingredient', 'similarity'])
df_sub84 = df_sub65[df_sub65['co_ingredient'].str.contains(ingredient)==False]
df_sub84['ingredient'] = ingredient
df_sub84.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,swiss cheese,0.8133,rice
1,fettucine,0.73383,rice
2,hot pepper sauce,0.717026,rice


In [275]:
ingredient = 'oat'
sub_85 = model.wv.most_similar(ingredient, topn=50)
df_sub85 = pd.DataFrame(sub_85,columns = ['co_ingredient', 'similarity'])
df_sub85 = df_sub85[df_sub85['co_ingredient'].str.contains(ingredient)==False]
df_sub85['ingredient'] = ingredient
df_sub85.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,pecan,0.920535,oat
1,banana,0.911007,oat
2,syrup,0.886514,oat


In [281]:
ingredient = 'salmon'
sub_86 = model.wv.most_similar(ingredient, topn=50)
df_sub86 = pd.DataFrame(sub_86,columns = ['co_ingredient', 'similarity'])
df_sub86 = df_sub86[df_sub86['co_ingredient'].str.contains(ingredient)==False]
df_sub86['ingredient'] = ingredient
df_sub86.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,pork shoulder,0.720677,salmon
1,cashew,0.684655,salmon
2,greek yogurt,0.680182,salmon


In [288]:
ingredient = 'crab'
sub_87 = model.wv.most_similar(ingredient, topn=50)
df_sub87 = pd.DataFrame(sub_87,columns = ['co_ingredient', 'similarity'])
df_sub87 = df_sub87[df_sub87['co_ingredient'].str.contains(ingredient)==False]
df_sub87['ingredient'] = ingredient
df_sub87.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,boiling potato,0.795655,crab
1,piecrust,0.759741,crab
2,light butter,0.757472,crab


In [296]:
ingredient = 'lobster'
sub_88 = model.wv.most_similar(ingredient, topn=50)
df_sub88 = pd.DataFrame(sub_88,columns = ['co_ingredient', 'similarity'])
df_sub88 = df_sub88[df_sub88['co_ingredient'].str.contains(ingredient)==False]
df_sub88['ingredient'] = ingredient
df_sub88.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,limeade concentrate,0.828706,lobster
1,white sandwich bread,0.772852,lobster
2,halibut fillet,0.756378,lobster


In [301]:
ingredient = 'mussel'
sub_89 = model.wv.most_similar(ingredient, topn=50)
df_sub89 = pd.DataFrame(sub_89,columns = ['co_ingredient', 'similarity'])
df_sub89 = df_sub89[df_sub89['co_ingredient'].str.contains(ingredient)==False]
df_sub89['ingredient'] = ingredient
df_sub89.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,cotija,0.792929,mussel
1,poultry seasoning,0.758042,mussel
2,pepperoncini,0.756665,mussel


In [303]:
ingredient = 'oyster'
sub_90 = model.wv.most_similar(ingredient, topn=50)
df_sub90 = pd.DataFrame(sub_88,columns = ['co_ingredient', 'similarity'])
df_sub90 = df_sub88[df_sub88['co_ingredient'].str.contains(ingredient)==False]
df_sub90['ingredient'] = ingredient
df_sub90.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,limeade concentrate,0.828706,oyster
1,white sandwich bread,0.772852,oyster
2,halibut fillet,0.756378,oyster


In [308]:
ingredient = 'prawn'
sub_91 = model.wv.most_similar(ingredient, topn=50)
df_sub91 = pd.DataFrame(sub_91,columns = ['co_ingredient', 'similarity'])
df_sub91 = df_sub91[df_sub91['co_ingredient'].str.contains(ingredient)==False]
df_sub91['ingredient'] = ingredient
df_sub91.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,spice,0.736491,prawn
1,chili pepper,0.63844,prawn
2,oyster mushroom,0.635791,prawn


In [313]:
ingredient = 'scallop'
sub_92 = model.wv.most_similar(ingredient, topn=50)
df_sub92 = pd.DataFrame(sub_92,columns = ['co_ingredient', 'similarity'])
df_sub92 = df_sub92[df_sub92['co_ingredient'].str.contains(ingredient)==False]
df_sub92['ingredient'] = ingredient
df_sub92.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,fettuccine pastum,0.839875,scallop
2,semolina,0.773225,scallop
3,sea salt flake,0.770953,scallop


In [315]:
ingredient = 'shrimp'
sub_93 = model.wv.most_similar(ingredient, topn=50)
df_sub93 = pd.DataFrame(sub_93,columns = ['co_ingredient', 'similarity'])
df_sub93 = df_sub93[df_sub93['co_ingredient'].str.contains(ingredient)==False]
df_sub93['ingredient'] = ingredient
df_sub93.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,low sodium chicken broth,0.721623,shrimp
2,chicken stock,0.69948,shrimp
3,bell pepper,0.689243,shrimp


In [320]:
ingredient = 'whitefish'
sub_94 = model.wv.most_similar(ingredient, topn=50)
df_sub94 = pd.DataFrame(sub_94,columns = ['co_ingredient', 'similarity'])
df_sub94 = df_sub94[df_sub94['co_ingredient'].str.contains(ingredient)==False]
df_sub94['ingredient'] = ingredient
df_sub94.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,white sandwich bread,0.822563,whitefish
1,hot dog bun,0.809492,whitefish
2,wheat tortilla,0.794086,whitefish


In [321]:
ingredient = 'almond milk'
sub_95 = model.wv.most_similar(ingredient, topn=50)
df_sub95 = pd.DataFrame(sub_95,columns = ['co_ingredient', 'similarity'])
df_sub95 = df_sub95[df_sub95['co_ingredient'].str.contains(ingredient)==False]
df_sub95['ingredient'] = ingredient
df_sub95.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,cooking spray,0.636532,almond milk
1,hazelnut,0.625983,almond milk
2,chop pork,0.607695,almond milk


In [322]:
ingredient = 'cottage cheese'
sub_96 = model.wv.most_similar(ingredient, topn=50)
df_sub96 = pd.DataFrame(sub_96,columns = ['co_ingredient', 'similarity'])
df_sub96 = df_sub96[df_sub96['co_ingredient'].str.contains(ingredient)==False]
df_sub96['ingredient'] = ingredient
df_sub96.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,self rising flmy,0.785466,cottage cheese
1,marinara sauce,0.775166,cottage cheese
2,bailey irish cream liqueur,0.769293,cottage cheese


In [323]:
ingredient = 'egg'
sub_97 = model.wv.most_similar(ingredient, topn=50)
df_sub97 = pd.DataFrame(sub_97,columns = ['co_ingredient', 'similarity'])
df_sub97 = df_sub97[df_sub97['co_ingredient'].str.contains(ingredient)==False]
df_sub97['ingredient'] = ingredient
df_sub97.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
3,dough,0.499482,egg
4,heavy whipping cream,0.491935,egg
5,corn syrup,0.491145,egg


In [324]:
ingredient = 'pepper jack'
sub_98 = model.wv.most_similar(ingredient, topn=50)
df_sub98 = pd.DataFrame(sub_98,columns = ['co_ingredient', 'similarity'])
df_sub98 = df_sub98[df_sub98['co_ingredient'].str.contains(ingredient)==False]
df_sub98['ingredient'] = ingredient
df_sub98.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,olive oil spray,0.627782,pepper jack
1,fish sauce,0.611167,pepper jack
2,ium tomato,0.60961,pepper jack


In [328]:
ingredient = 'blue cheese'
sub_99 = model.wv.most_similar(ingredient, topn=50)
df_sub99 = pd.DataFrame(sub_99,columns = ['co_ingredient', 'similarity'])
df_sub99 = df_sub99[df_sub99['co_ingredient'].str.contains(ingredient)==False]
df_sub99['ingredient'] = ingredient
df_sub99.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,basil pesto sauce,0.815667,blue cheese
1,rocket leaf,0.812497,blue cheese
2,cut italian loaf,0.810937,blue cheese


In [331]:
ingredient = 'butter'
sub_100 = model.wv.most_similar(ingredient, topn=50)
df_sub100 = pd.DataFrame(sub_100,columns = ['co_ingredient', 'similarity'])
df_sub100 = df_sub100[df_sub100['co_ingredient'].str.contains(ingredient)==False]
df_sub100['ingredient'] = ingredient
df_sub100.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,margarine,0.626465,butter
1,country crock spread,0.579418,butter
2,corn oil,0.562246,butter


In [332]:
ingredient = 'buttermilk'
sub_101 = model.wv.most_similar(ingredient, topn=50)
df_sub101 = pd.DataFrame(sub_101,columns = ['co_ingredient', 'similarity'])
df_sub101 = df_sub101[df_sub101['co_ingredient'].str.contains(ingredient)==False]
df_sub101['ingredient'] = ingredient
df_sub101.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,baking powder,0.849293,buttermilk
1,milk,0.777356,buttermilk
3,baking soda,0.766876,buttermilk


In [337]:
ingredient = 'cream'
sub_102 = model.wv.most_similar(ingredient, topn=50)
df_sub102 = pd.DataFrame(sub_102,columns = ['co_ingredient', 'similarity'])
df_sub102 = df_sub102[df_sub102['co_ingredient'].str.contains(ingredient)==False]
df_sub102['ingredient'] = ingredient
df_sub102.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,saffron,0.746835,cream
1,fenugreek leaf,0.657145,cream
2,brown cardamom,0.63787,cream


In [338]:
ingredient = 'cream cheese'
sub_103 = model.wv.most_similar(ingredient, topn=50)
df_sub103 = pd.DataFrame(sub_103,columns = ['co_ingredient', 'similarity'])
df_sub103 = df_sub103[df_sub103['co_ingredient'].str.contains(ingredient)==False]
df_sub103['ingredient'] = ingredient
df_sub103.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,french bread,0.723109,cream cheese
1,pine nut,0.680937,cream cheese
2,baguette,0.676046,cream cheese


In [343]:
ingredient = 'ghee'
sub_104 = model.wv.most_similar(ingredient, topn=50)
df_sub104 = pd.DataFrame(sub_104,columns = ['co_ingredient', 'similarity'])
df_sub104 = df_sub104[df_sub104['co_ingredient'].str.contains(ingredient)==False]
df_sub104['ingredient'] = ingredient
df_sub104.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,coriander powder,0.78571,ghee
1,chily,0.785326,ghee
2,yoghurt,0.763034,ghee


In [344]:
ingredient = 'goat cheese'
sub_105 = model.wv.most_similar(ingredient, topn=50)
df_sub105 = pd.DataFrame(sub_105,columns = ['co_ingredient', 'similarity'])
df_sub105 = df_sub105[df_sub105['co_ingredient'].str.contains(ingredient)==False]
df_sub105['ingredient'] = ingredient
df_sub105.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,drain,0.805766,goat cheese
1,chee mozzarella,0.756861,goat cheese
2,arugula,0.691963,goat cheese


In [345]:
ingredient = 'gouda'
sub_106 = model.wv.most_similar(ingredient, topn=50)
df_sub106 = pd.DataFrame(sub_106,columns = ['co_ingredient', 'similarity'])
df_sub106 = df_sub106[df_sub106['co_ingredient'].str.contains(ingredient)==False]
df_sub106['ingredient'] = ingredient
df_sub106.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,quickcooking grit,0.719623,gouda
1,hoagie roll,0.708571,gouda
2,jumbo shrimp,0.69246,gouda


In [346]:
ingredient = 'milk'
sub_107 = model.wv.most_similar(ingredient, topn=50)
df_sub107 = pd.DataFrame(sub_107,columns = ['co_ingredient', 'similarity'])
df_sub107 = df_sub107[df_sub107['co_ingredient'].str.contains(ingredient)==False]
df_sub107['ingredient'] = ingredient
df_sub107.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
1,nutmeg,0.660759,milk
2,stout,0.614117,milk
3,raisin,0.604487,milk


In [348]:
ingredient = 'mozzarella'
sub_108 = model.wv.most_similar(ingredient, topn=50)
df_sub108 = pd.DataFrame(sub_108,columns = ['co_ingredient', 'similarity'])
df_sub108 = df_sub108[df_sub108['co_ingredient'].str.contains(ingredient)==False]
df_sub108['ingredient'] = ingredient
df_sub108.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,basil pesto sauce,0.861951,mozzarella
1,prosciutto,0.85455,mozzarella
2,arugula,0.834547,mozzarella


In [349]:
ingredient = 'parmesan'
sub_109 = model.wv.most_similar(ingredient, topn=50)
df_sub109 = pd.DataFrame(sub_109,columns = ['co_ingredient', 'similarity'])
df_sub109 = df_sub109[df_sub109['co_ingredient'].str.contains(ingredient)==False]
df_sub109['ingredient'] = ingredient
df_sub109.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,pecorino romano cheese,0.777157,parmesan
1,italian bread,0.768819,parmesan
2,sage,0.749036,parmesan


In [351]:
ingredient = 'provolone cheese'
sub_110 = model.wv.most_similar(ingredient, topn=50)
df_sub110 = pd.DataFrame(sub_110,columns = ['co_ingredient', 'similarity'])
df_sub110 = df_sub110[df_sub110['co_ingredient'].str.contains(ingredient)==False]
df_sub110['ingredient'] = ingredient
df_sub110.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,prosciutto,0.857415,provolone cheese
1,italian cheese blend,0.783433,provolone cheese
2,italian bread,0.779942,provolone cheese


In [353]:
ingredient = 'swiss cheese'
sub_111 = model.wv.most_similar(ingredient, topn=50)
df_sub111 = pd.DataFrame(sub_111,columns = ['co_ingredient', 'similarity'])
df_sub111 = df_sub111[df_sub111['co_ingredient'].str.contains(ingredient)==False]
df_sub111['ingredient'] = ingredient
df_sub111.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,ham,0.8133,swiss cheese
1,dill pickle,0.758424,swiss cheese
2,bun,0.722385,swiss cheese


In [355]:
ingredient = 'yoghurt'
sub_112 = model.wv.most_similar(ingredient, topn=50)
df_sub112 = pd.DataFrame(sub_112,columns = ['co_ingredient', 'similarity'])
df_sub112 = df_sub112[df_sub112['co_ingredient'].str.contains(ingredient)==False]
df_sub112['ingredient'] = ingredient
df_sub112.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,coriander powder,0.904573,yoghurt
1,chile powder,0.883207,yoghurt
2,garam masala,0.862316,yoghurt


In [356]:
ingredient = 'chickpea'
sub_111 = model.wv.most_similar(ingredient, topn=50)
df_sub111 = pd.DataFrame(sub_111,columns = ['co_ingredient', 'similarity'])
df_sub111 = df_sub111[df_sub111['co_ingredient'].str.contains(ingredient)==False]
df_sub111['ingredient'] = ingredient
df_sub111.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,garbanzo bean,0.754156,chickpea
1,mint,0.646082,chickpea
2,hothouse cucumber,0.616879,chickpea


In [362]:
ingredient = 'kidney bean'
sub_112 = model.wv.most_similar(ingredient, topn=50)
df_sub112 = pd.DataFrame(sub_112,columns = ['co_ingredient', 'similarity'])
df_sub112 = df_sub112[df_sub112['co_ingredient'].str.contains(ingredient)==False]
df_sub112['ingredient'] = ingredient
df_sub112.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,bean,0.685592,kidney bean
1,navy bean,0.664429,kidney bean
2,andouille sausage link,0.654666,kidney bean


In [364]:
ingredient = 'lentil'
sub_113 = model.wv.most_similar(ingredient, topn=50)
df_sub113 = pd.DataFrame(sub_113,columns = ['co_ingredient', 'similarity'])
df_sub113 = df_sub113[df_sub113['co_ingredient'].str.contains(ingredient)==False]
df_sub113['ingredient'] = ingredient
df_sub113.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,bean,0.66695,lentil
1,jalapeno chily,0.65708,lentil
2,ium potato,0.648108,lentil


In [365]:
ingredient = 'miso'
sub_114 = model.wv.most_similar(ingredient, topn=50)
df_sub114 = pd.DataFrame(sub_114,columns = ['co_ingredient', 'similarity'])
df_sub114 = df_sub114[df_sub114['co_ingredient'].str.contains(ingredient)==False]
df_sub114['ingredient'] = ingredient
df_sub114.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,rice flmy,0.784599,miso
2,wonton wrapper,0.707823,miso
3,chinese noodle,0.656024,miso


In [366]:
ingredient = 'navy bean'
sub_115 = model.wv.most_similar(ingredient, topn=50)
df_sub115 = pd.DataFrame(sub_115,columns = ['co_ingredient', 'similarity'])
df_sub115 = df_sub115[df_sub115['co_ingredient'].str.contains(ingredient)==False]
df_sub115['ingredient'] = ingredient
df_sub115.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,angel hair,0.786515,navy bean
1,dill pickle,0.785239,navy bean
2,hot pepper sauce,0.775234,navy bean


In [368]:
ingredient = 'peanut butter'
sub_116 = model.wv.most_similar(ingredient, topn=50)
df_sub116 = pd.DataFrame(sub_116,columns = ['co_ingredient', 'similarity'])
df_sub116 = df_sub116[df_sub116['co_ingredient'].str.contains(ingredient)==False]
df_sub116['ingredient'] = ingredient
df_sub116.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,sriracha,0.855606,peanut butter
1,firm tofu,0.845402,peanut butter
2,kaffir lime leaf,0.780754,peanut butter


In [381]:
ingredient = 'tofu'
sub_117 = model.wv.most_similar(ingredient, topn=50)
df_sub117 = pd.DataFrame(sub_117,columns = ['co_ingredient', 'similarity'])
df_sub117 = df_sub117[df_sub117['co_ingredient'].str.contains(ingredient)==False]
df_sub117['ingredient'] = ingredient
df_sub117.head(3)

Unnamed: 0,co_ingredient,similarity,ingredient
0,spring roll wrapper,0.804264,tofu
1,shiitake,0.798967,tofu
3,wonton wrapper,0.79793,tofu


In [386]:
new_pd = pd.concat([df_sub1, df_sub2, df_sub3, df_sub4, df_sub5, df_sub6, df_sub7, df_sub8, df_sub9, df_sub10,
                   df_sub11, df_sub12, df_sub13, df_sub14, df_sub15, df_sub16, df_sub17, df_sub18, df_sub19, df_sub20,
                   df_sub21, df_sub22, df_sub23, df_sub24, df_sub25, df_sub26, df_sub27, df_sub28, df_sub29, df_sub30,
                   df_sub31, df_sub32, df_sub33, df_sub34, df_sub35, df_sub36, df_sub37, df_sub38, df_sub39, df_sub40,
                   df_sub41, df_sub42, df_sub43, df_sub44, df_sub45, df_sub46, df_sub47, df_sub48, df_sub49, df_sub50,
                   df_sub51, df_sub52, df_sub53, df_sub54, df_sub55, df_sub56, df_sub57, df_sub58, df_sub59, df_sub60,
                   df_sub61, df_sub62, df_sub63, df_sub64, df_sub65, df_sub66, df_sub67, df_sub68, df_sub69, df_sub70,
                   df_sub71, df_sub72, df_sub73, df_sub74, df_sub75, df_sub76, df_sub77, df_sub78, df_sub79, df_sub80,
                   df_sub81, df_sub82, df_sub83, df_sub84, df_sub85, df_sub86, df_sub87, df_sub88, df_sub89, df_sub90,
                   df_sub91, df_sub92, df_sub93, df_sub94, df_sub95, df_sub96, df_sub97, df_sub98, df_sub99, df_sub100,
                   df_sub101, df_sub102, df_sub103, df_sub104, df_sub105, df_sub106, df_sub107, df_sub108, df_sub109, df_sub110,
                   df_sub111, df_sub112, df_sub113, df_sub114, df_sub115, df_sub116, df_sub117])
new_pd

Unnamed: 0,co_ingredient,similarity,ingredient
0,flatbread,0.841902,artichoke
1,orzo pastum,0.835436,artichoke
2,vine tomato,0.821621,artichoke
3,italian herb,0.807645,artichoke
4,watercres,0.803054,artichoke
5,cracker,0.788926,artichoke
6,part skim mozzarella,0.784945,artichoke
7,chee mozzarella,0.779897,artichoke
8,semolina,0.779609,artichoke
9,drain,0.775215,artichoke


In [388]:
new_pd.to_csv('co_ing.csv')

In [400]:
new_pd = pd.read_csv('co_ing.csv')
new_pd.head()
new_pd1 = new_pd.drop(columns='similarity')
new_pd1.head()
new_pd2 = new_pd1.drop_duplicates()
new_pd2

Unnamed: 0,co_ingredient,ingredient
0,flatbread,artichoke
1,orzo pastum,artichoke
2,tomato,artichoke
3,italian herb,artichoke
4,watercress,artichoke
5,cracker,artichoke
6,mozzarella,artichoke
8,semolina,artichoke
9,drain,artichoke
10,fetum,artichoke


In [None]:
ingredient = 'shallot'
sub_35 = model.wv.most_similar(ingredient, topn=50)
df_sub35 = pd.DataFrame(sub_35,columns = ['co_ingredient', 'similarity'])
df_sub35 = df_sub35[df_sub35['co_ingredient'].str.contains(ingredient)==False]
df_sub35['ingredient'] = ingredient
df_sub35.head(3)

In [398]:
df_ingredients = pd.read_csv('food data per ingredient_new.csv')
df_ingredients.head()

Unnamed: 0,ID,name,Food Group,Protein (g),Calcium (mg),"Iron, Fe (mg)","Potassium, K (mg)",Magnesium (mg),"Vitamin A, IU (IU)",Vitamin C (mg),Omega 3s (mg),Omega 6s (mg),Vitamin B6 (mg),Folate (B9) (mcg),Food Folate (mcg),Folate DFE (mcg)
0,787864,artichoke,Vegetables,2.87,21.0,0.61,284.0,42.0,,7.4,38.0,104.0,0.081,88.0,88.0,88.0
1,169387,arugula,Vegetables,2.58,160.0,1.46,369.0,47.0,2373.0,15.0,170.0,130.0,0.073,97.0,97.0,97.0
2,168389,asparagu,Vegetables,2.2,24.0,2.14,202.0,14.0,756.0,5.6,10.0,40.0,0.091,52.0,52.0,52.0
3,787908,bamboo shoot,Vegetables,3.45,17.0,0.66,706.0,4.0,,4.5,27.0,151.0,0.302,7.0,7.0,7.0
4,788013,beansprout,Vegetables,4.57,31.0,0.9,200.0,32.0,,9.6,327.0,2354.0,0.072,49.0,49.0,49.0


In [401]:
sub_result = pd.merge(new_pd2, df_ingredients,how="inner",on=None,left_on='co_ingredient',right_on='name',left_index=True,
    right_index=False, sort=True, suffixes=("_1", "_2"),copy=True,indicator=False,validate=None,).drop(columns='name')
sub_result.drop_duplicates()

Unnamed: 0,co_ingredient,ingredient,ID,Food Group,Protein (g),Calcium (mg),"Iron, Fe (mg)","Potassium, K (mg)",Magnesium (mg),"Vitamin A, IU (IU)",Vitamin C (mg),Omega 3s (mg),Omega 6s (mg),Vitamin B6 (mg),Folate (B9) (mcg),Food Folate (mcg),Folate DFE (mcg)
127,almond,hazelnut,170158,Nuts and Seeds,20.96,268.0,3.73,713.0,279.0,1.0,0.0,6.0,12945.0,0.136,55.0,55.0,55.0
127,almond,pecan,170158,Nuts and Seeds,20.96,268.0,3.73,713.0,279.0,1.0,0.0,6.0,12945.0,0.136,55.0,55.0,55.0
127,almond,pistachio,170158,Nuts and Seeds,20.96,268.0,3.73,713.0,279.0,1.0,0.0,6.0,12945.0,0.136,55.0,55.0,55.0
127,almond,walnut,170158,Nuts and Seeds,20.96,268.0,3.73,713.0,279.0,1.0,0.0,6.0,12945.0,0.136,55.0,55.0,55.0
127,almond,oat,170158,Nuts and Seeds,20.96,268.0,3.73,713.0,279.0,1.0,0.0,6.0,12945.0,0.136,55.0,55.0,55.0
127,almond,buttermilk,170158,Nuts and Seeds,20.96,268.0,3.73,713.0,279.0,1.0,0.0,6.0,12945.0,0.136,55.0,55.0,55.0
127,almond,cream,170158,Nuts and Seeds,20.96,268.0,3.73,713.0,279.0,1.0,0.0,6.0,12945.0,0.136,55.0,55.0,55.0
127,almond,milk,170158,Nuts and Seeds,20.96,268.0,3.73,713.0,279.0,1.0,0.0,6.0,12945.0,0.136,55.0,55.0,55.0
348,almond milk,oxtail,781123,Dairy and Egg Products,0.59,197.0,0.35,67.0,7.0,,0.0,0.0,236.0,0.015,1.0,1.0,1.0
348,almond milk,salmon,781123,Dairy and Egg Products,0.59,197.0,0.35,67.0,7.0,,0.0,0.0,236.0,0.015,1.0,1.0,1.0


In [402]:
sub_result.to_csv('co_ingredients_nutrition.csv')