In [58]:
import pandas as pd
import numpy as np
import re

### Load data frame from csv file

In [77]:
data_file = 'example_dataframe_100_recipes_0901'
data_df = pd.read_csv(data_file)
data_df.head(10)

Unnamed: 0,recipe_name,cuisine_SP,aisle_SP,ingredients_SP
0,White Chocolate Macadamia Banana Bread,[],"['Baking', 'Milk, Eggs, Other Dairy', 'Produce...","['sugar', 'butter', 'ripe banana', 'milk', 're..."
1,Asian Chickpea Lettuce Wraps,['Asian'],"['Canned and Jarred', 'Ethnic Foods', 'Condime...","['canned chickpeas', 'chili sauce', 'barbecue ..."
2,Salsa Verde Chicken Tamales,['Mexican'],"['Meat', 'Meat', 'Produce', 'Produce', 'Spices...","['cooked chicken', 'whole chicken', 'garlic', ..."
3,Cheddar Polenta With Bacon Wrapped Asparagus,[],"['Produce', 'Meat', 'Canned and Jarred', 'Cere...","['asparagus', 'applewood smoked bacon', 'chick..."
4,Blasian's Deviled Eggs,['American'],"['Milk, Eggs, Other Dairy', 'Condiments', 'Con...","['hard boiled egg', 'mayonnaise', 'mustard', '..."
5,Sweet and Sticky Chicken Strips,[],"['Baking', 'Milk, Eggs, Other Dairy', 'Baking'...","['golden brown sugar', 'egg', 'wheat flour', '..."
6,Moist Zucchini Pineapple Sweetbread,[],"['Baking', 'Canned and Jarred', 'Spices and Se...","['low sodium baking powder', 'pineapple with j..."
7,Classic Eggs Benedict,['American'],"['Oil, Vinegar, Salad Dressing;Spices and Seas...","['hollandaise sauce', 'canadian bacon', 'engli..."
8,"Kale & chickpea stew with cumin, smoked paprik...",[],"['Produce', 'Canned and Jarred', 'Produce', 'O...","['kale', 'canned chickpeas', 'sweet potato', '..."
9,"Vanilla Cream Cakes, Easy and Fluffy Holiday C...",[],"['Refrigerated;Frozen', 'Milk, Eggs, Other Dai...","['puff pastry dough', 'egg', 'sugar', 'sugar',..."


### Define functions required to clean dataframe

In [78]:
# Converting strings to lists then remove [] and ()
def ingredients_cleanup (data_df, column):
    row_list = []
    indexes = data_df.index.values.tolist()
    counter = 0
    for row in data_df[column]:
        row = row.replace("[", '')
        row = row.replace("'", '')
        row = row.replace("]", '')
        row = row.replace('"', '')
        row_list = row.split( ',')
        data_df.loc[indexes[counter],column] = row_list
        counter += 1
    
    return data_df

In [79]:
# Removing leading white spaces 
def remove_leading_ws(data_df, column):
    indexes = data_df.index.values.tolist()
    counter = 0
    for row in data_df[column]:
        for i, element in enumerate(row):
            element = element.strip()
            data_df.loc[indexes[counter],column][i] = element
        counter += 1
    return data_df


In [80]:
# Creating a single list from all the ingredents 
def total_ing_list_from_df(data_df):
    composite_ingredients = []
    for row in data_df.ingredients_SP:
        for element in row:
            composite_ingredients.append(element)
    return composite_ingredients

In [81]:
# Creating list of composite list and creat count of ingredients
def sort_and_count_ingredient_list(ingredient_list):
    ingredient_df = pd.DataFrame(ingredient_list)
    ingredient_df = ingredient_df.rename(columns = {0:  'ingredient'})
    ingredient_df['count'] = 1
    ingredient_counts = ingredient_df.groupby('ingredient').agg({'count':  'count'})
    ingredient_counts.sort_values('count', ascending=False, inplace=True)
    return ingredient_counts


In [82]:
# Replacing different verisons of ingredients
def ingredient_replacement (data_df, replacement_dict):
    
    
    axises = data_df.index.values.tolist()
    counter = 0
    for row in data_df.ingredients_SP:
        for i, element in enumerate(row):
            if element in replacement_dict.keys():
                element = replacement_dict[element]
                data_df.loc[axises[counter],'ingredients_SP'][i] = element
        counter += 1
    return data_df

### Create a new data frame with only Mexican recipes

In [83]:
# mexican_df = data_df[data_df['cuisine_SP'] == "['Mexican']"].copy()
# data_df

### For each row (recipe) remove the unwanted characters from the ingredients string, split the string at commas to return a list, save the returned list back to the dataframe.  

In [84]:
# Calling ingredient clean up function to create list
data_df = ingredients_cleanup(data_df, 'ingredients_SP')
data_df = ingredients_cleanup(data_df, 'aisle_SP')
data_df

Unnamed: 0,recipe_name,cuisine_SP,aisle_SP,ingredients_SP
0,White Chocolate Macadamia Banana Bread,[],"[Baking, Milk, Eggs, Other Dairy, Produce,...","[sugar, butter, ripe banana, milk, reduced..."
1,Asian Chickpea Lettuce Wraps,['Asian'],"[Canned and Jarred, Ethnic Foods, Condiments...","[canned chickpeas, chili sauce, barbecue sau..."
2,Salsa Verde Chicken Tamales,['Mexican'],"[Meat, Meat, Produce, Produce, Spices and ...","[cooked chicken, whole chicken, garlic, oni..."
3,Cheddar Polenta With Bacon Wrapped Asparagus,[],"[Produce, Meat, Canned and Jarred, Cereal;B...","[asparagus, applewood smoked bacon, chicken ..."
4,Blasian's Deviled Eggs,['American'],"[Milk, Eggs, Other Dairy, Condiments, Cond...","[hard boiled egg, mayonnaise, mustard, salt..."
...,...,...,...,...
95,Authentic Mexican Wedding Cookies,['Mexican'],"[Milk, Eggs, Other Dairy, Baking, Baking, ...","[butter, powdered sugar, vanilla extract, w..."
96,Corned Beef Ribs With Brown Sugar and Mustard ...,[],"[Spices and Seasonings, Spices and Seasonings...","[bay leaves, black peppercorns, cabbage, ca..."
97,Green Monster Ice Pops,[],"[Milk, Eggs, Other Dairy, Produce, Produce...","[almond milk, avocado, baby spinach, ripe b..."
98,Orange Zest Maple Date Bars,[],"[Dried Fruits;Produce, Beverages, Cereal, P...","[dates, water, maple syrup, orange zest, s..."


### Remove leading white spaces from the ingredients_SP column

In [85]:
data_df = remove_leading_ws(data_df, 'ingredients_SP')

In [86]:
data_df.head()

Unnamed: 0,recipe_name,cuisine_SP,aisle_SP,ingredients_SP
0,White Chocolate Macadamia Banana Bread,[],"[Baking, Milk, Eggs, Other Dairy, Produce,...","[sugar, butter, ripe banana, milk, reduced fat..."
1,Asian Chickpea Lettuce Wraps,['Asian'],"[Canned and Jarred, Ethnic Foods, Condiments...","[canned chickpeas, chili sauce, barbecue sauce..."
2,Salsa Verde Chicken Tamales,['Mexican'],"[Meat, Meat, Produce, Produce, Spices and ...","[cooked chicken, whole chicken, garlic, onion,..."
3,Cheddar Polenta With Bacon Wrapped Asparagus,[],"[Produce, Meat, Canned and Jarred, Cereal;B...","[asparagus, applewood smoked bacon, chicken br..."
4,Blasian's Deviled Eggs,['American'],"[Milk, Eggs, Other Dairy, Condiments, Cond...","[hard boiled egg, mayonnaise, mustard, salt, g..."


### Create a list of all ingredients from the cuisine 

In [87]:
composite_ingredients = total_ing_list_from_df(data_df)

### Create a dataframe from the composite ingredients list, group by ingredient name with count(), and sort descending

In [71]:
ing_groups = sort_and_count_ingredient_list(composite_ingredients)
ing_groups.to_csv('ingredients_to_parse_0908.csv')

### Create a dictionary to use to replace different names for the same ingredients.  In final product create reg ex logic to make these changes

In [88]:
replacement_dict = {'coarse sea salt':  'salt_and_pepper',
                    'salsa verde':  'salsa',
                    'boneless skinless chicken breast':  'chicken',
                    'salt':  'salt and pepper',
                    'whole chicken':  'chicken',
                    'white onion':  'onion',
                    'solive oil':  'oil',
                    'palm oil':   'oil',
                    'dried arbol chile':  'chili pepper',
                    'cooking oil':  'oil',
                    'cooked chicken':  'chicken',
                    'yellow onion':  'onion',
                    'ripe banana':'banana',
                    'fresh basil':'basil',
                    'black beans':'beans',
                    'canned kidney beans':'beans',
                    'canned pinto beans':'beans',
                    'chili beans':'beans',
                    'dried black beans':'beans',
                    'refried beans':'beans',
                    '95 percent lean ground beef':'beef',
                    'beef cubes':'beef',
                    'beef shoulder roast':'beef',
                    'flank steak':'beef',
                    'ground chuck':'beef',
                    'round tip steak':'beef',
                    'skirt steak':'beef',
                    'steak':'beef',
                    'green pepper':'bell pepper',
                    'orange pepper':'bell pepper',
                    'red pepper':'bell pepper',
                    'roasted red peppers':'bell pepper',
                    'yellow pepper':'bell pepper',
                    'canned black beans':'beans',
                    'french bread':'bread',
                    'beef broth':'broth',
                    'chicken broth':'broth',
                    'chicken stock':'broth',
                    'low sodium chicken broth':'broth',
                    'red cabbage':'cabbage',
                    'canned chipotle chile':'canned pepper',
                    'canned green chiles':'canned pepper',
                    'chipotle chiles':'canned pepper',
                    'peppers':'fresh peppers',
                    'ground cayenne pepper':'cayenne',
                    'cheddar cheese':'cheese',
                    'colby jack':'cheese',
                    'cotija cheese':'cheese',
                    'cream cheese':'cheese',
                    'extra sharp cheddar cheese':'cheese',
                    'feta cheese':'cheese',
                    'goat cheese':'cheese',
                    'grilling cheese':'cheese',
                    'low fat shredded cheddar':'cheese',
                    'mexican cheese':'cheese',
                    'monterey jack cheese':'cheese',
                    'queso fresco':'cheese',
                    'sharp cheddar cheese':'cheese',
                    'shredded cheddar cheese':'cheese',
                    'shredded cheese':'cheese',
                    'shredded mexican cheese blend':'cheese',
                    'soy cheese':'cheese',
                    'boneless skinless chicken breast':'chicken',
                    'boneless skinless chicken thighs':'chicken',
                    'chicken breast':'chicken',
                    'chicken breast halves':'chicken',
                    'chicken meat':'chicken',
                    'rotisserie chicken':'chicken',
                    'shredded chicken':'chicken',
                    'whole chicken':'chicken',
                    'condensed cream of chicken soup':'chicken soup',
                    'ancho chili powder':'chili powder',
                    'chili pepper':'chili powder',
                    'chili powder':'chili powder',
                    'chili seasoning mix':'chili powder',
                    'ground chipotle chile pepper':'chipotle',
                    'dried cilantro':'cilantro',
                    'cod fillets':'fish',
                    'ground coriander':'coriander',
                    'corn kernels':'corn',
                    'creamed corn':'corn',
                    'frozen corn':'corn',
                    'sweet corn on the cob':'corn',
                    'whole kernel corn':'corn',
                    'mexican crema':'crema',
                    'creole seasoning':'creole season',
                    'persian cucumber':'cucumber',
                    'cumin seeds':'cumin',
                    'ground cumin':'cumin',
                    'dried arbol chile':'dried pepper',
                    'green enchilada sauce':'enchilada sauce',
                    'salmon':'fish',
                    'swordfish':'fish',
                    'tilapia':'fish',
                    'anaheim pepper':'fresh pepper',
                    'green chili pepper':'fresh pepper',
                    'jalapeno pepper':'fresh pepper',
                    'pasilla pepper':'fresh pepper',
                    'poblano pepper':'fresh pepper',
                    'serrano pepper':'fresh pepper',
                    'garlic powder':'garlic',
                    'garlic salt':'garlic',
                    'whole garlic cloves':'garlic',
                    'ginger':'ginger',
                    'guacamole':'guacamole',
                    'smoked ham hock':'ham hock',
                    'light vanilla ice cream':'ice cream',
                    'italian seasoning':'italian season',
                    'lemon juice':'lemon',
                    'lemon peel':'lemon',
                    'butter lettuce leaves':'lettuce',
                    'romaine':'lettuce',
                    'lime juice':'lime',
                    'lime peel':'lime',
                    'lime wedge':'lime',
                    'dried mango':'mango',
                    'condensed cream of mushroom soup':'mushroom soup',
                    'fresh mushrooms':'mushrooms',
                    'mustard powder':'mustard powder',
                    'coconut oil':'oil',
                    'cooking oil':'oil',
                    'extra virgin olive oil':'oil',
                    'grape seed oil':'oil',
                    'light olive oil':'oil',
                    'olive oil':'oil',
                    'peanut oil':'oil',
                    'rice bran oil':'oil',
                    'vegetable oil':'oil',
                    'black olives':'olives',
                    'olives':'olives',
                    'onion powder':'onion',
                    'red onion':'onion',
                    'white onion':'onion',
                    'yellow onion':'onion',
                    'spring onions':'onion',
                    'navel orange':'orange',
                    'orange juice':'orange',
                    'smoked paprika':'paprika',
                    'peas and carrots':'pea',
                    'pico de gallo':'pico de gallo',
                    'pistachio nuts':'pistachio',
                    'boston butt':'pork',
                    'lean pork tenderloin':'pork',
                    'pork chops':'pork',
                    'ground pork sausage':'pork sausage',
                    'yukon gold potato':'potato',
                    'cooked quinoa':'quinoa',
                    'red pepper flakes':'red pepper flake',
                    'brown rice':'rice',
                    'cooked brown rice':'rice',
                    'cooked rice':'rice',
                    'picante sauce':'salsa',
                    'salsa verde':'salsa',
                    'kosher salt':'salt and pepper',
                    'black pepper':'salt and pepper',
                    'coarse sea salt':'salt and pepper',
                    'salt':'salt and pepper',
                    'seasoned salt':'salt and pepper',
                    'white pepper':'salt and pepper',
                    'vegetable stock':'broth',
                    'dark brown sugar':'sugar',
                    'golden brown sugar':'sugar',
                    'dried thyme':'thyme',
                    'canned diced tomatoes':'tomato',
                    'canned tomatoes':'tomato',
                    'cherry tomato':'tomato',
                    'diced tomatoes with green chilies':'tomato',
                    'fire roasted tomatoes':'tomato',
                    'italian tomato':'tomato',
                    'plum tomato':'tomato',
                    'tomato juice':'tomato',
                    'tomato paste':'tomato',
                    'canned tomato sauce':'tomato sauce',
                    'burrito size tortilla':'tortilla',
                    'flour tortilla':'tortilla',
                    'scoops tortilla chips':'tortilla',
                    'white corn tortilla':'tortilla',
                    'wrap':'tortilla',
                    'corn tortilla chips':'tortilla',
                    'ground turkey':'turkey',
                    'apple cider vinegar':'vinegar',
                    'balsamic vinegar':'vinegar',
                    'distilled white vinegar':'vinegar',
                    'white wine vinegar':'vinegar',
                    'fat free greek yogurt':'yogurt',
                    'greek yogurt':'yogurt',
                    'low fat yogurt':'yogurt',
                    'almond meal':'almonds',
                    'slivered almonds':'almonds',
                    'applewood smoked bacon':'bacon',
                    'canadian bacon':'bacon',
                    'low sodium baking powder':'baking powder',
                    'dried barberries':'barberries',
                    'dried basil':'basil',
                    'corned beef':'beef',
                    'light beer':'beer',
                    'blueberries':'berries',
                    'buttermilk biscuits':'biscuits',
                    'crusty bread':'bread',
                    'panko':'bread crumbs',
                    'stock':'broth',
                    'clarified butter':'butter',
                    'unsalted butter':'butter',
                    'buffalo mozzarella':'cheese',
                    'cottage cheese':'cheese',
                    'gorgonzola':'cheese',
                    'mozzarella':'cheese',
                    'pecorino romano':'cheese',
                    'pepperjack cheese':'cheese',
                    'reduced fat cream cheese':'cheese',
                    'ricotta cheese':'cheese',
                    'string cheese':'cheese',
                    'boneless skin on chicken thighs':'chicken',
                    'chicken drumsticks':'chicken',
                    'cooked chicken breast':'chicken',
                    'roasting chicken':'chicken',
                    'canned chickpeas':'chickpeas',
                    'chocolate chunks':'chocolate',
                    'dark chocolate chips':'chocolate',
                    'gluten free chocolate cake mix':'chocolate',
                    'hersheys cocoa':'chocolate',
                    'instant chocolate pudding mix':'chocolate',
                    'milk chocolate':'chocolate',
                    'mini chocolate chips':'chocolate',
                    'semisweet chocolate':'chocolate',
                    'semisweet chocolate chips':'chocolate',
                    'unsweetened dutch processed cocoa powder':'chocolate',
                    'white chocolate chips':'chocolate',
                    'ground cinnamon':'cinnamon',
                    'light coconut milk':'coconut',
                    'sweetened coconut flakes':'coconut',
                    'unsweetened coconut':'coconut',
                    'coconut milk beverage':'coconut milk',
                    'whole wheat couscous':'couscous',
                    'whipping cream':'cream',
                    'daikon radish':'daikon',
                    'medjool dates':'dates',
                    'egg whites':'egg',
                    'egg yolk':'egg',
                    'hard boiled egg':'egg',
                    'dried figs':'figs',
                    'crawfish':'fish',
                    'shrimp':'fish',
                    'tuna':'fish',
                    'ground flaxseed':'flaxseed',
                    'bread flour':'flour',
                    'cake flour':'flour',
                    'white whole wheat flour':'flour',
                    'unbleached flour':'flour',
                    'whole wheat flour':'flour',
                    'scotch bonnet chili':'fresh pepper',
                    'mixed fruit':'fruit',
                    'granulated garlic':'garlic',
                    'cherry flavored gelatin':'gelatin',
                    'ginger powder':'ginger',
                    'bottle gourd':'gourd',
                    'fat free half and half':'half and half',
                    'chocolate ice cream':'ice cream',
                    'raspberry jam':'jam',
                    '1 percent milk':'milk',
                    'fat free milk':'milk',
                    'enoki mushrooms':'mushrooms',
                    'mixed mushrooms':'mushrooms',
                    'creole mustard':'mustard',
                    'rolled oats':'oats',
                    'salad oil':'oil',
                    'kalamata olives':'olives',
                    'maui onion':'onion',
                    'candied orange peel':'orange',
                    'orange zest':'orange',
                    'dried parsley':'parsley',
                    'farfalle':'pasta',
                    'fettuccine':'pasta',
                    'lasagne noodles':'pasta',
                    'rigatoni':'pasta',
                    'petite peas':'peas',
                    'snow peas':'peas',
                    'pineapple juice':'pineapple',
                    'pineapple with juice':'pineapple',
                    'prepared pizza crust':'pizza',
                    'starchy potato':'potato',
                    'filo pastry':'puff pastry',
                    'puff pastry dough':'puff pastry',
                    'canned pumpkin':'pumpkin',
                    'golden raisins':'raisins',
                    'ranch dressing':'ranch',
                    'brown rice flour':'rice flour',
                    'fresh rosemary':'rosemary',
                    'black peppercorns':'salt and pepper',
                    'coarse salt':'salt and pepper',
                    'dried pepper':'salt and pepper',
                    'seasoning':'salt and pepper',
                    'andouille sausage':'sausage',
                    'italian sausage':'sausage',
                    'light sour cream':'sour cream',
                    'lower sodium soy sauce':'soy sauce',
                    'baby spinach':'spinach',
                    'dried strawberries':'strawberries',
                    'demerara sugar':'sugar',
                    'powdered sugar':'sugar',
                    'turbinado sugar':'sugar',
                    'vanilla sugar':'sugar',
                    'corn syrup':'syrup',
                    'golden syrup':'syrup',
                    'simple syrup':'syrup',
                    'silken tofu':'tofu',
                    'oil packed sun dried tomatoes':'tomato',
                    'tomato sauce':'tomato',
                    'whole turkey':'turkey',
                    'artificial vanilla':'vanilla',
                    'vanilla bean':'vanilla',
                    'vanilla extract':'vanilla',
                    'rice vinegar':'vinegar',
                    'sparkling water':'water',
                    'canned water chestnuts':'water chestnuts',
                    'cracked wheat':'wheat',
                    'dry white wine':'wine',
                    'plain greek yogurt':'yogurt',
                    'plain yogurt':'yogurt',
                    'vanilla yogurt':'yogurt'


}

### Use the replacement dictionary to replace the keys with the values

In [89]:
data_df = ingredient_replacement(data_df, replacement_dict)
data_df

Unnamed: 0,recipe_name,cuisine_SP,aisle_SP,ingredients_SP
0,White Chocolate Macadamia Banana Bread,[],"[Baking, Milk, Eggs, Other Dairy, Produce,...","[sugar, butter, banana, milk, cheese, egg, whe..."
1,Asian Chickpea Lettuce Wraps,['Asian'],"[Canned and Jarred, Ethnic Foods, Condiments...","[chickpeas, chili sauce, barbecue sauce, soy s..."
2,Salsa Verde Chicken Tamales,['Mexican'],"[Meat, Meat, Produce, Produce, Spices and ...","[chicken, chicken, garlic, onion, cumin, salt ..."
3,Cheddar Polenta With Bacon Wrapped Asparagus,[],"[Produce, Meat, Canned and Jarred, Cereal;B...","[asparagus, bacon, broth, grits, cheese, bell ..."
4,Blasian's Deviled Eggs,['American'],"[Milk, Eggs, Other Dairy, Condiments, Cond...","[egg, mayonnaise, mustard, salt and pepper, ga..."
...,...,...,...,...
95,Authentic Mexican Wedding Cookies,['Mexican'],"[Milk, Eggs, Other Dairy, Baking, Baking, ...","[butter, sugar, vanilla, wheat flour, pecans, ..."
96,Corned Beef Ribs With Brown Sugar and Mustard ...,[],"[Spices and Seasonings, Spices and Seasonings...","[bay leaves, salt and pepper, cabbage, carrot,..."
97,Green Monster Ice Pops,[],"[Milk, Eggs, Other Dairy, Produce, Produce...","[almond milk, avocado, spinach, banana, honey,..."
98,Orange Zest Maple Date Bars,[],"[Dried Fruits;Produce, Beverages, Cereal, P...","[dates, water, maple syrup, orange, sugar, but..."


In [90]:
# type(data_df.loc[0,'ingredients_SP'])

### Create a new composite ingredient list from the data frame

In [91]:
composite_ingredients = total_ing_list_from_df(data_df)

In [92]:
ing_groups = sort_and_count_ingredient_list(composite_ingredients)
# ing_groups.to_csv('ingredients_to_parse_0908.csv')

### Create a dataframe from the composite ingredients list, group by ingredient name with count(), and sort descending

In [96]:
ing_groups = sort_and_count_ingredient_list(composite_ingredients)
ing_groups.head(20)

Unnamed: 0_level_0,count
ingredient,Unnamed: 1_level_1
salt and pepper,80
sugar,51
oil,39
butter,37
egg,33
wheat flour,32
garlic,32
onion,29
water,28
cheese,28


In [97]:
# Dropping common ingredients 
ing_groups = ing_groups.drop(index = ["salt and pepper",'baking powder','baking soda','egg','milk','sugar','butter','wheat flour', 'water','lettuce', "oil", "tortilla",'onion','cheese','chicken','pork','beef','fish'])
ing_groups.head(20)

Unnamed: 0_level_0,count
ingredient,Unnamed: 1_level_1
garlic,32
chocolate,20
tomato,20
vanilla,18
lemon,12
parsley,12
honey,10
vinegar,10
flour,9
cream,8


In [98]:
data_df

Unnamed: 0,recipe_name,cuisine_SP,aisle_SP,ingredients_SP
0,White Chocolate Macadamia Banana Bread,[],"[Baking, Milk, Eggs, Other Dairy, Produce,...","[sugar, butter, banana, milk, cheese, egg, whe..."
1,Asian Chickpea Lettuce Wraps,['Asian'],"[Canned and Jarred, Ethnic Foods, Condiments...","[chickpeas, chili sauce, barbecue sauce, soy s..."
2,Salsa Verde Chicken Tamales,['Mexican'],"[Meat, Meat, Produce, Produce, Spices and ...","[chicken, chicken, garlic, onion, cumin, salt ..."
3,Cheddar Polenta With Bacon Wrapped Asparagus,[],"[Produce, Meat, Canned and Jarred, Cereal;B...","[asparagus, bacon, broth, grits, cheese, bell ..."
4,Blasian's Deviled Eggs,['American'],"[Milk, Eggs, Other Dairy, Condiments, Cond...","[egg, mayonnaise, mustard, salt and pepper, ga..."
...,...,...,...,...
95,Authentic Mexican Wedding Cookies,['Mexican'],"[Milk, Eggs, Other Dairy, Baking, Baking, ...","[butter, sugar, vanilla, wheat flour, pecans, ..."
96,Corned Beef Ribs With Brown Sugar and Mustard ...,[],"[Spices and Seasonings, Spices and Seasonings...","[bay leaves, salt and pepper, cabbage, carrot,..."
97,Green Monster Ice Pops,[],"[Milk, Eggs, Other Dairy, Produce, Produce...","[almond milk, avocado, spinach, banana, honey,..."
98,Orange Zest Maple Date Bars,[],"[Dried Fruits;Produce, Beverages, Cereal, P...","[dates, water, maple syrup, orange, sugar, but..."


In [99]:
data_df.columns

Index(['recipe_name', 'cuisine_SP', 'aisle_SP', 'ingredients_SP'], dtype='object')

In [56]:
data_df.to_csv('Mexican_recipes_processed_to_lists.csv')

In [100]:
for index, row in enumerate(ing_groups.index):
    if index < 20:
        data_df[row] = 0
data_df.columns   


Index(['recipe_name', 'cuisine_SP', 'aisle_SP', 'ingredients_SP', 'garlic',
       'chocolate', 'tomato', 'vanilla', 'lemon', 'parsley', 'honey',
       'vinegar', 'flour', 'cream', 'broth', 'basil', 'shallot', 'parmesan',
       'buttermilk', 'fresh pepper', 'almonds', 'chili powder', 'dry yeast',
       'orange'],
      dtype='object')

In [102]:
data_df

Unnamed: 0,recipe_name,cuisine_SP,aisle_SP,ingredients_SP,garlic,chocolate,tomato,vanilla,lemon,parsley,...,broth,basil,shallot,parmesan,buttermilk,fresh pepper,almonds,chili powder,dry yeast,orange
0,White Chocolate Macadamia Banana Bread,[],"[Baking, Milk, Eggs, Other Dairy, Produce,...","[sugar, butter, banana, milk, cheese, egg, whe...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Asian Chickpea Lettuce Wraps,['Asian'],"[Canned and Jarred, Ethnic Foods, Condiments...","[chickpeas, chili sauce, barbecue sauce, soy s...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Salsa Verde Chicken Tamales,['Mexican'],"[Meat, Meat, Produce, Produce, Spices and ...","[chicken, chicken, garlic, onion, cumin, salt ...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Cheddar Polenta With Bacon Wrapped Asparagus,[],"[Produce, Meat, Canned and Jarred, Cereal;B...","[asparagus, bacon, broth, grits, cheese, bell ...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Blasian's Deviled Eggs,['American'],"[Milk, Eggs, Other Dairy, Condiments, Cond...","[egg, mayonnaise, mustard, salt and pepper, ga...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Authentic Mexican Wedding Cookies,['Mexican'],"[Milk, Eggs, Other Dairy, Baking, Baking, ...","[butter, sugar, vanilla, wheat flour, pecans, ...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
96,Corned Beef Ribs With Brown Sugar and Mustard ...,[],"[Spices and Seasonings, Spices and Seasonings...","[bay leaves, salt and pepper, cabbage, carrot,...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
97,Green Monster Ice Pops,[],"[Milk, Eggs, Other Dairy, Produce, Produce...","[almond milk, avocado, spinach, banana, honey,...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
98,Orange Zest Maple Date Bars,[],"[Dried Fruits;Produce, Beverages, Cereal, P...","[dates, water, maple syrup, orange, sugar, but...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [103]:
# Assigning 1 or 0 if ingredient is in ingredient list
indexes = data_df.index.values.tolist()
for index in indexes:
    for col_name in data_df.columns:
        if col_name in data_df.ingredients_SP[index]:
            data_df.loc[index,col_name] = 1
data_df

Unnamed: 0,recipe_name,cuisine_SP,aisle_SP,ingredients_SP,garlic,chocolate,tomato,vanilla,lemon,parsley,...,broth,basil,shallot,parmesan,buttermilk,fresh pepper,almonds,chili powder,dry yeast,orange
0,White Chocolate Macadamia Banana Bread,[],"[Baking, Milk, Eggs, Other Dairy, Produce,...","[sugar, butter, banana, milk, cheese, egg, whe...",0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Asian Chickpea Lettuce Wraps,['Asian'],"[Canned and Jarred, Ethnic Foods, Condiments...","[chickpeas, chili sauce, barbecue sauce, soy s...",0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,0
2,Salsa Verde Chicken Tamales,['Mexican'],"[Meat, Meat, Produce, Produce, Spices and ...","[chicken, chicken, garlic, onion, cumin, salt ...",1,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
3,Cheddar Polenta With Bacon Wrapped Asparagus,[],"[Produce, Meat, Canned and Jarred, Cereal;B...","[asparagus, bacon, broth, grits, cheese, bell ...",0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,Blasian's Deviled Eggs,['American'],"[Milk, Eggs, Other Dairy, Condiments, Cond...","[egg, mayonnaise, mustard, salt and pepper, ga...",1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,Authentic Mexican Wedding Cookies,['Mexican'],"[Milk, Eggs, Other Dairy, Baking, Baking, ...","[butter, sugar, vanilla, wheat flour, pecans, ...",0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
96,Corned Beef Ribs With Brown Sugar and Mustard ...,[],"[Spices and Seasonings, Spices and Seasonings...","[bay leaves, salt and pepper, cabbage, carrot,...",1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
97,Green Monster Ice Pops,[],"[Milk, Eggs, Other Dairy, Produce, Produce...","[almond milk, avocado, spinach, banana, honey,...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
98,Orange Zest Maple Date Bars,[],"[Dried Fruits;Produce, Beverages, Cereal, P...","[dates, water, maple syrup, orange, sugar, but...",0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [104]:
# Dropping columns. Cleaning data for machine learning
data_df = data_df.drop(columns = ["recipe_name", "aisle_SP",  "ingredients_SP"])
data_df

Unnamed: 0,cuisine_SP,garlic,chocolate,tomato,vanilla,lemon,parsley,honey,vinegar,flour,...,broth,basil,shallot,parmesan,buttermilk,fresh pepper,almonds,chili powder,dry yeast,orange
0,[],0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,['Asian'],0,0,0,0,0,0,0,1,0,...,0,1,0,0,0,0,0,0,0,0
2,['Mexican'],1,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
3,[],0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0
4,['American'],1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,['Mexican'],0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
96,[],1,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
97,[],0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
98,[],0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,1


In [105]:
data_export_file = '100_random_cleaned_0908.csv'
data_df.to_csv(data_export_file)