In [1]:
import pickle
import pandas as pd
from ast import literal_eval

## Ingredient Files
Convert ingr_map.pkl into cleanIngredients.csv and rawIngredients.csv

In [2]:
# Import ingredient data
ingr_map_file = open('./Recipe Data/ingr_map.pkl','rb')
ingr_map_dict = pickle.load(ingr_map_file)
ingr_map_file.close()

## convert to dataframe
ingr_map_df = pd.DataFrame(ingr_map_dict)
ingr_map_df.head()

Unnamed: 0,raw_ingr,raw_words,processed,len_proc,replaced,count,id
0,"medium heads bibb or red leaf lettuce, washed,...",13,"medium heads bibb or red leaf lettuce, washed,...",73,lettuce,4507,4308
1,mixed baby lettuces and spring greens,6,mixed baby lettuces and spring green,36,lettuce,4507,4308
2,romaine lettuce leaf,3,romaine lettuce leaf,20,lettuce,4507,4308
3,iceberg lettuce leaf,3,iceberg lettuce leaf,20,lettuce,4507,4308
4,red romaine lettuce,3,red romaine lettuce,19,lettuce,4507,4308


### cleanIngredients.csv

In [3]:
# remove duplicates
cleanIngredients = ingr_map_df.loc[:,["replaced", "id"]].copy().drop_duplicates()

# rename columns
cleanIngredients.rename({"replaced": "name", "id": "cleanIngredientID"}, axis=1, inplace=True)

# reset index
cleanIngredients.reset_index(drop=True, inplace=True)

# reorder columns
cleanIngredients = cleanIngredients[["cleanIngredientID", "name"]]

cleanIngredients.head()

Unnamed: 0,cleanIngredientID,name
0,4308,lettuce
1,2744,french vanilla pudding and pie filling mix
2,6843,stove top stuffing mix
3,1910,cream cheese
4,1168,cheddar


In [4]:
# Confirm that all remaining ingredients are unique
assert len(ingr_map_df["replaced"].unique()) == cleanIngredients.shape[0]
assert len(ingr_map_df["id"].unique()) == cleanIngredients.shape[0]

### rawIngredients.csv

In [5]:
rawIngredients = ingr_map_df.copy()

# drop clean ingredient information
rawIngredients.drop(["replaced", "count"], axis=1, inplace=True)

# create ID from index
rawIngredients.reset_index(inplace=True)

# rename columns
rawIngredients.rename({"index": "rawIngredientID", "raw_ingr": "rawName", "raw_words": "rawNameLength", \
                       "processed": "processedName", "len_proc": "processedNameLength", \
                       "id":"cleanIngredientID"}, axis=1, inplace=True)

rawIngredients.head()

Unnamed: 0,rawIngredientID,rawName,rawNameLength,processedName,processedNameLength,cleanIngredientID
0,0,"medium heads bibb or red leaf lettuce, washed,...",13,"medium heads bibb or red leaf lettuce, washed,...",73,4308
1,1,mixed baby lettuces and spring greens,6,mixed baby lettuces and spring green,36,4308
2,2,romaine lettuce leaf,3,romaine lettuce leaf,20,4308
3,3,iceberg lettuce leaf,3,iceberg lettuce leaf,20,4308
4,4,red romaine lettuce,3,red romaine lettuce,19,4308


## Recipe Files - pp_recipes.csv
Format data and create join table for recipe and ingredients.

In [6]:
pp_recipes = pd.read_csv("./Recipe Data/pp_recipes.csv")
pp_recipes.head()

Unnamed: 0,id,i,name_tokens,ingredient_tokens,steps_tokens,techniques,calorie_level,ingredient_ids
0,424415,23,"[40480, 37229, 2911, 1019, 249, 6878, 6878, 28...","[[2911, 1019, 249, 6878], [1353], [6953], [153...","[40480, 40482, 21662, 481, 6878, 500, 246, 161...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...",0,"[389, 7655, 6270, 1527, 3406]"
1,146223,96900,"[40480, 18376, 7056, 246, 1531, 2032, 40481]","[[17918], [25916], [2507, 6444], [8467, 1179],...","[40480, 40482, 729, 2525, 10906, 485, 43, 8393...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...",0,"[2683, 4969, 800, 5298, 840, 2499, 6632, 7022,..."
2,312329,120056,"[40480, 21044, 16954, 8294, 556, 10837, 40481]","[[5867, 24176], [1353], [6953], [1301, 11332],...","[40480, 40482, 8240, 481, 24176, 296, 1353, 66...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...",1,"[1257, 7655, 6270, 590, 5024, 1119, 4883, 6696..."
3,74301,168258,"[40480, 10025, 31156, 40481]","[[1270, 1645, 28447], [21601], [27952, 29471, ...","[40480, 40482, 5539, 21601, 1073, 903, 2324, 4...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,"[7940, 3609, 7060, 6265, 1170, 6654, 5003, 3561]"
4,76272,109030,"[40480, 17841, 252, 782, 2373, 1641, 2373, 252...","[[1430, 11434], [1430, 17027], [1615, 23, 695,...","[40480, 40482, 14046, 1430, 11434, 488, 17027,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...",0,"[3484, 6324, 7594, 243]"


In [7]:
recipes1 = pp_recipes.copy()

# Rename columns
recipes1.rename({"id": "recipeID", "name_tokens": "nameTokenList", "ingredient_tokens": "ingredientTokenList", \
               "steps_tokens": "stepTokenList", "techniques": "techniqueList", "calorie_level": "calorieLevel",
               "ingredient_ids": "cleanIngredientIDList"}, axis=1, inplace=True)
recipes1.head()

Unnamed: 0,recipeID,i,nameTokenList,ingredientTokenList,stepTokenList,techniqueList,calorieLevel,cleanIngredientIDList
0,424415,23,"[40480, 37229, 2911, 1019, 249, 6878, 6878, 28...","[[2911, 1019, 249, 6878], [1353], [6953], [153...","[40480, 40482, 21662, 481, 6878, 500, 246, 161...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...",0,"[389, 7655, 6270, 1527, 3406]"
1,146223,96900,"[40480, 18376, 7056, 246, 1531, 2032, 40481]","[[17918], [25916], [2507, 6444], [8467, 1179],...","[40480, 40482, 729, 2525, 10906, 485, 43, 8393...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...",0,"[2683, 4969, 800, 5298, 840, 2499, 6632, 7022,..."
2,312329,120056,"[40480, 21044, 16954, 8294, 556, 10837, 40481]","[[5867, 24176], [1353], [6953], [1301, 11332],...","[40480, 40482, 8240, 481, 24176, 296, 1353, 66...","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, ...",1,"[1257, 7655, 6270, 590, 5024, 1119, 4883, 6696..."
3,74301,168258,"[40480, 10025, 31156, 40481]","[[1270, 1645, 28447], [21601], [27952, 29471, ...","[40480, 40482, 5539, 21601, 1073, 903, 2324, 4...","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",0,"[7940, 3609, 7060, 6265, 1170, 6654, 5003, 3561]"
4,76272,109030,"[40480, 17841, 252, 782, 2373, 1641, 2373, 252...","[[1430, 11434], [1430, 17027], [1615, 23, 695,...","[40480, 40482, 14046, 1430, 11434, 488, 17027,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...",0,"[3484, 6324, 7594, 243]"


### recipeCleanJoin.csv

In [8]:
#extract just the necessary columns
recipeIngredientJoin = recipes1.loc[:,["recipeID", "cleanIngredientIDList"]].copy()
recipeIngredientJoin.set_index('recipeID', inplace=True)

# convert the ingredient list into separate rows
recipeIngredientJoin = pd.melt(recipeIngredientJoin.cleanIngredientIDList.apply(lambda x: pd.Series(literal_eval(x))).reset_index(), 
             id_vars=['recipeID'],
             value_name='cleanIngredientID') \
         .drop('variable', axis=1) \
         .dropna() 
recipeIngredientJoin["cleanIngredientID"] = recipeIngredientJoin["cleanIngredientID"].astype(int)

# # create an index for the new table
# recipeIngredientJoin.reset_index(inplace=True)

# # confirm that the index is unique
# assert len(recipeIngredientJoin["index"].unique()) == recipeIngredientJoin.shape[0]

# # rename the index
# recipeIngredientJoin.rename({"index": "recipeIngredientID"}, axis=1, inplace=True)

recipeIngredientJoin.head()

Unnamed: 0,recipeID,cleanIngredientID
0,424415,389
1,146223,2683
2,312329,1257
3,74301,7940
4,76272,3484


## Recipe Files - raw_recipes.csv
Format data, create join table for tags, and combine with pp_recipes.csv file

In [9]:
raw_recipes = pd.read_csv("./Recipe Data/RAW_recipes.csv")
raw_recipes.head()

Unnamed: 0,name,id,minutes,contributor_id,submitted,tags,nutrition,n_steps,steps,description,ingredients,n_ingredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


In [10]:
recipes2 = raw_recipes.copy()

# rename columns
recipes2.rename({"id": "recipeID", "contributor_id": "contributorID", "submitted": "submittedDate", 
                    "n_steps": "numSteps", "n_ingredients": "numIngredients", 
                 "ingredients": "cleanIngredientNameList", "nutrition": "nutritionList", "tags": "tagsList",
                "steps": "stepsList"}, 
                axis=1, inplace=True)

recipes2.head()

Unnamed: 0,name,recipeID,minutes,contributorID,submittedDate,tagsList,nutritionList,numSteps,stepsList,description,cleanIngredientNameList,numIngredients
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,"['winter squash', 'mexican seasoning', 'mixed ...",7
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,"['prepared pizza crust', 'sausage patty', 'egg...",6
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,"['ground beef', 'yellow onions', 'diced tomato...",13
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...","['spreadable cheese with garlic and herbs', 'n...",11
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,"['tomato juice', 'apple cider vinegar', 'sugar...",8


### recipes.csv

In [11]:
# confirm there are no duplicate recipe IDs in either table
assert len(recipes1["recipeID"].unique()) == recipes1.shape[0]
assert len(recipes2["recipeID"].unique()) == recipes2.shape[0]

In [12]:
# check out number of recipes in each
print("Records in raw_recipes.csv: ", recipes2.shape[0])
print("Records in pp_recipes.csv: ", recipes1.shape[0])

Records in raw_recipes.csv:  231637
Records in pp_recipes.csv:  178265


In [14]:
# join two recipe tables together
recipes = recipes2.merge(recipes1, how="left", on="recipeID")

# create flag to signal whether recipe data is complete or not
recipes["isComplete"] = ~pd.isnull(recipes["cleanIngredientIDList"])

# separate out nutrition data 
recipes[['calories','fatPDV', 'sugarPDV', 'sodiumPDV', 'proteinPDV', 'saturatedFatPDV', 'carbsPDV']] = \
    recipes.nutritionList.apply(lambda x: pd.Series(literal_eval(x)))

recipes.head()

Unnamed: 0,name,recipeID,minutes,contributorID,submittedDate,tagsList,nutritionList,numSteps,stepsList,description,...,calorieLevel,cleanIngredientIDList,isComplete,calories,fatPDV,sugarPDV,sodiumPDV,proteinPDV,saturatedFatPDV,carbsPDV
0,arriba baked winter squash mexican style,137739,55,47892,2005-09-16,"['60-minutes-or-less', 'time-to-make', 'course...","[51.5, 0.0, 13.0, 0.0, 2.0, 0.0, 4.0]",11,"['make a choice and proceed with recipe', 'dep...",autumn is my favorite time of year to cook! th...,...,0.0,"[7933, 4694, 4795, 3723, 840, 5006, 6270]",True,51.5,0.0,13.0,0.0,2.0,0.0,4.0
1,a bit different breakfast pizza,31490,30,26278,2002-06-17,"['30-minutes-or-less', 'time-to-make', 'course...","[173.4, 18.0, 0.0, 17.0, 22.0, 35.0, 1.0]",9,"['preheat oven to 425 degrees f', 'press dough...",this recipe calls for the crust to be prebaked...,...,0.0,"[5481, 6324, 2499, 4717, 6276, 1170]",True,173.4,18.0,0.0,17.0,22.0,35.0,1.0
2,all in the kitchen chili,112140,130,196586,2005-02-25,"['time-to-make', 'course', 'preparation', 'mai...","[269.8, 22.0, 32.0, 48.0, 39.0, 27.0, 5.0]",6,"['brown ground beef in large pot', 'add choppe...",this modified version of 'mom's' chili was a h...,...,,,False,269.8,22.0,32.0,48.0,39.0,27.0,5.0
3,alouette potatoes,59389,45,68585,2003-04-14,"['60-minutes-or-less', 'time-to-make', 'course...","[368.1, 17.0, 10.0, 2.0, 14.0, 8.0, 20.0]",11,['place potatoes in a large pot of lightly sal...,"this is a super easy, great tasting, make ahea...",...,1.0,"[1170, 4918, 6426, 5185, 7099, 5006, 6009, 627...",True,368.1,17.0,10.0,2.0,14.0,8.0,20.0
4,amish tomato ketchup for canning,44061,190,41706,2002-10-25,"['weeknight', 'time-to-make', 'course', 'main-...","[352.9, 1.0, 337.0, 23.0, 3.0, 0.0, 28.0]",5,['mix all ingredients& boil for 2 1 / 2 hours ...,my dh's amish mother raised him on this recipe...,...,,,False,352.9,1.0,337.0,23.0,3.0,0.0,28.0


### tags.csv

In [15]:
# extract just necessary columns
tag_data = recipes.loc[:,["recipeID", "tagsList"]].copy()
tag_data.set_index('recipeID', inplace=True)

# separate list of tags into rows
all_recipe_tags = pd.melt(tag_data.tagsList.apply(lambda x: pd.Series(literal_eval(x))).reset_index(), 
             id_vars=['recipeID'],
             value_name='tag') \
         .set_index(['recipeID']) \
         .drop('variable', axis=1) \
         .dropna() \
         .sort_index() 

all_recipe_tags.sample(5)

Unnamed: 0_level_0,tag
recipeID,Unnamed: 1_level_1
20226,stove-top
38268,cheese
169095,vegetarian
422559,3-steps-or-less
270860,holiday-event


In [16]:
# extract just the necessary columns
tag_data = all_recipe_tags.copy()
tag_data["numRecipes"] = 1

# get unique list of tags and their count of recipes
tags = pd.DataFrame(tag_data.groupby("tag", as_index=False)["numRecipes"].sum())

# remove blank tags
tags = tags[tags["tag"].str.len() > 0]

# get unique index values for each tag
tags.reset_index(inplace = True)
tags.rename({"index": "tagID"}, axis=1, inplace=True)

tags

Unnamed: 0,tagID,tag,numRecipes
0,1,1-day-or-more,2138
1,2,15-minutes-or-less,43934
2,3,3-steps-or-less,44933
3,4,30-minutes-or-less,55077
4,5,4-hours-or-less,49497
...,...,...,...
546,547,wings,638
547,548,winter,7654
548,549,yams-sweet-potatoes,1508
549,550,yeast,3128


### recipeTagJoin.csv

In [17]:
# extract just necessary columns
all_recipe_tag_data = all_recipe_tags.copy()
all_recipe_tag_data.reset_index(inplace=True)

# join in the unique tag id
recipeTagJoin = all_recipe_tag_data.merge(tags.loc[:,["tag", "tagID"]], how="inner", on="tag")

# remove tag name
recipeTagJoin.drop("tag", inplace=True, axis=1)

# # create unique ID for table
# recipeTagJoin.reset_index(inplace=True)
# recipeTagJoin.rename({"index": "recipeTagID"}, axis=1, inplace=True)

# # confirm the IDs are unique
# assert len(recipeTagJoin["recipeTagID"].unique()) == recipeTagJoin.shape[0]

recipeTagJoin.head()

Unnamed: 0,recipeID,tagID
0,38,282
1,40,282
2,41,282
3,52,282
4,55,282


## Interaction Files

In [18]:
RAW_interactions = pd.read_csv("./Recipe Data/RAW_interactions.csv")
RAW_interactions.head()

Unnamed: 0,user_id,recipe_id,date,rating,review
0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."


### interactions.csv

In [19]:
interactions = RAW_interactions.copy()

# create unique ID for each interaction
interactions.reset_index(inplace=True)

# rename the columns
interactions.rename({"user_id": "userID", "recipe_id": "recipeID", "index": "interactionID"}, axis=1, inplace=True)

interactions.head()

Unnamed: 0,interactionID,userID,recipeID,date,rating,review
0,0,38094,40893,2003-02-17,4,Great with a salad. Cooked on top of stove for...
1,1,1293707,40893,2011-12-21,5,"So simple, so delicious! Great for chilly fall..."
2,2,8937,44394,2002-12-01,4,This worked very well and is EASY. I used not...
3,3,126440,85009,2010-02-27,5,I made the Mexican topping and took it to bunk...
4,4,57222,85009,2011-10-01,5,"Made the cheddar bacon topping, adding a sprin..."


## User Files

In [20]:
PP_users = pd.read_csv("./Recipe Data/PP_users.csv")
PP_users.head()

Unnamed: 0,u,techniques,items,n_items,ratings,n_ratings
0,0,"[8, 0, 0, 5, 6, 0, 0, 1, 0, 9, 1, 0, 0, 0, 1, ...","[1118, 27680, 32541, 137353, 16428, 28815, 658...",31,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 4.0, ...",31
1,1,"[11, 0, 0, 2, 12, 0, 0, 0, 0, 14, 5, 0, 0, 0, ...","[122140, 77036, 156817, 76957, 68818, 155600, ...",39,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...",39
2,2,"[13, 0, 0, 7, 5, 0, 1, 2, 1, 11, 0, 1, 0, 0, 1...","[168054, 87218, 35731, 1, 20475, 9039, 124834,...",27,"[3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, ...",27
3,3,"[498, 13, 4, 218, 376, 3, 2, 33, 16, 591, 10, ...","[163193, 156352, 102888, 19914, 169438, 55772,...",1513,"[5.0, 5.0, 5.0, 5.0, 4.0, 4.0, 5.0, 5.0, 5.0, ...",1513
4,4,"[161, 1, 1, 86, 93, 0, 0, 11, 2, 141, 0, 16, 0...","[72857, 38652, 160427, 55772, 119999, 141777, ...",376,"[5.0, 5.0, 5.0, 5.0, 4.0, 4.0, 5.0, 4.0, 5.0, ...",376


### users.csv

In [21]:
users = PP_users.copy()

# rename the columns
users.rename({"u": "userID", "n_items": "numItems", "n_ratings": "numRatings", "items": "itemsList", \
             "ratings": "ratingsList", "techniques": "techniquesList"}, axis=1, inplace=True)

users.head()

Unnamed: 0,userID,techniquesList,itemsList,numItems,ratingsList,numRatings
0,0,"[8, 0, 0, 5, 6, 0, 0, 1, 0, 9, 1, 0, 0, 0, 1, ...","[1118, 27680, 32541, 137353, 16428, 28815, 658...",31,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 4.0, 4.0, ...",31
1,1,"[11, 0, 0, 2, 12, 0, 0, 0, 0, 14, 5, 0, 0, 0, ...","[122140, 77036, 156817, 76957, 68818, 155600, ...",39,"[5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, ...",39
2,2,"[13, 0, 0, 7, 5, 0, 1, 2, 1, 11, 0, 1, 0, 0, 1...","[168054, 87218, 35731, 1, 20475, 9039, 124834,...",27,"[3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, ...",27
3,3,"[498, 13, 4, 218, 376, 3, 2, 33, 16, 591, 10, ...","[163193, 156352, 102888, 19914, 169438, 55772,...",1513,"[5.0, 5.0, 5.0, 5.0, 4.0, 4.0, 5.0, 5.0, 5.0, ...",1513
4,4,"[161, 1, 1, 86, 93, 0, 0, 11, 2, 141, 0, 16, 0...","[72857, 38652, 160427, 55772, 119999, 141777, ...",376,"[5.0, 5.0, 5.0, 5.0, 4.0, 4.0, 5.0, 4.0, 5.0, ...",376


## Save the final files

In [22]:
# Save files to csv
cleanIngredients.to_csv("Clean Recipe Data/cleanIngredients.csv", index=False)
rawIngredients.to_csv("Clean Recipe Data/rawIngredients.csv", index=False)
recipeIngredientJoin.to_csv("Clean Recipe Data/recipeCleanJoin.csv", index=False)
recipes.to_csv("Clean Recipe Data/recipes.csv", index=False)
recipeTagJoin.to_csv("Clean Recipe Data/recipeTagJoin.csv", index=False)
tags.to_csv("Clean Recipe Data/tags.csv", index=False)
interactions.to_csv("Clean Recipe Data/interactions.csv", index=False)
users.to_csv("Clean Recipe Data/users.csv", index=False)