In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline

# Loading Data

In [2]:
df = pd.read_pickle('../data/nytc_data.pkl')
display(df.head(5))
df.shape

Unnamed: 0,url,recipe_name,nutrition,category,cuisine,ingredient,instruction,raw_schema
0,https://cooking.nytimes.com/recipes/1024397-cr...,Crispy Potato Tacos,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, tacos, appetizer, main course",mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...",[Bring a large saucepan of salted water to a b...,"{'@context': 'http://schema.org', '@type': 'Re..."
1,https://cooking.nytimes.com/recipes/1024394-ca...,Cashew Celery,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, easy, quick, weeknight, vegetables, ma...",,"[2 teaspoons cornstarch, 1/4 cup vegetable sto...","[Combine the cornstarch, stock, rice wine and ...","{'@context': 'http://schema.org', '@type': 'Re..."
2,https://cooking.nytimes.com/recipes/1024372-ba...,Basil-Butter Pasta,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, easy, weeknight, pastas, main course",,"[Salt, 3 cups packed basil leaves (about 80 gr...",[Bring a large pot of well-salted water to a b...,"{'@context': 'http://schema.org', '@type': 'Re..."
3,https://cooking.nytimes.com/recipes/1024334-tu...,Turmeric Potato Salad,"{'@context': 'http://schema.org', '@type': 'Nu...",side dish,,"[2 pounds small, yellow-fleshed potatoes, Salt...","[In a medium pot over high heat, boil skin-on ...","{'@context': 'http://schema.org', '@type': 'Re..."
4,https://cooking.nytimes.com/recipes/1024222-st...,Street Corn Pudding,"{'@context': 'http://schema.org', '@type': 'Nu...","brunch, dinner, lunch, custards and puddings, ...",southern,"[Nonstick cooking spray, 1 large or 2 medium j...",[Heat oven to 350 degrees. Coat an 8-inch squa...,"{'@context': 'http://schema.org', '@type': 'Re..."


(22830, 8)

# Data Cleaning

Dropping empty and NaN entries

In [3]:
nan_count = df.isna().sum()
nan_count

url               0
recipe_name      87
nutrition      2746
category        140
cuisine         140
ingredient      140
instruction     403
raw_schema       87
dtype: int64

In [4]:
df = df.replace(r'^\s*$', np.nan, regex=True)
df

Unnamed: 0,url,recipe_name,nutrition,category,cuisine,ingredient,instruction,raw_schema
0,https://cooking.nytimes.com/recipes/1024397-cr...,Crispy Potato Tacos,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, tacos, appetizer, main course",mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...",[Bring a large saucepan of salted water to a b...,"{'@context': 'http://schema.org', '@type': 'Re..."
1,https://cooking.nytimes.com/recipes/1024394-ca...,Cashew Celery,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, easy, quick, weeknight, vegetables, ma...",,"[2 teaspoons cornstarch, 1/4 cup vegetable sto...","[Combine the cornstarch, stock, rice wine and ...","{'@context': 'http://schema.org', '@type': 'Re..."
2,https://cooking.nytimes.com/recipes/1024372-ba...,Basil-Butter Pasta,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, easy, weeknight, pastas, main course",,"[Salt, 3 cups packed basil leaves (about 80 gr...",[Bring a large pot of well-salted water to a b...,"{'@context': 'http://schema.org', '@type': 'Re..."
3,https://cooking.nytimes.com/recipes/1024334-tu...,Turmeric Potato Salad,"{'@context': 'http://schema.org', '@type': 'Nu...",side dish,,"[2 pounds small, yellow-fleshed potatoes, Salt...","[In a medium pot over high heat, boil skin-on ...","{'@context': 'http://schema.org', '@type': 'Re..."
4,https://cooking.nytimes.com/recipes/1024222-st...,Street Corn Pudding,"{'@context': 'http://schema.org', '@type': 'Nu...","brunch, dinner, lunch, custards and puddings, ...",southern,"[Nonstick cooking spray, 1 large or 2 medium j...",[Heat oven to 350 degrees. Coat an 8-inch squa...,"{'@context': 'http://schema.org', '@type': 'Re..."
...,...,...,...,...,...,...,...,...
22825,https://cooking.nytimes.com/recipes/907-white-...,White Fruitcake,,dessert,,"[1 pound butter at room temperature, 2 cups su...","[Preheat oven to 325 degrees., Cream butter an...","{'@context': 'http://schema.org', '@type': 'Re..."
22826,https://cooking.nytimes.com/recipes/867-tomato...,Tomato sauce,"{'@context': 'http://schema.org', '@type': 'Nu...",sauces and gravies,italian,"[3 tablespoons butter, 2 tablespoons finely ch...",[Heat one tablespoon of the butter in a casser...,"{'@context': 'http://schema.org', '@type': 'Re..."
22827,https://cooking.nytimes.com/recipes/866-goat-c...,Goat cheese filling for ravioli,,,italian,"[1 1/2 cups firmly packed chopped goat cheese,...","[Put the goat cheese, ricotta, salt, pepper, c...","{'@context': 'http://schema.org', '@type': 'Re..."
22828,https://cooking.nytimes.com/recipes/865-lobste...,Lobster stuffing for ravioli,"{'@context': 'http://schema.org', '@type': 'Nu...",,,"[2 1 1/2-pound live lobsters, 1 tablespoon cor...",[Cut the spinal cord of each lobster by insert...,"{'@context': 'http://schema.org', '@type': 'Re..."


In [5]:
nan_count = df.isna().sum()
nan_count

url                0
recipe_name       87
nutrition       2746
category         820
cuisine        13017
ingredient       140
instruction      403
raw_schema        87
dtype: int64

We have enough data to drop entries which do not have `cuisine`

In [6]:
df = df.dropna(subset=['recipe_name', 'category', 'instruction', 'ingredient', 'cuisine', 'nutrition'])
print(df.isna().sum())


url            0
recipe_name    0
nutrition      0
category       0
cuisine        0
ingredient     0
instruction    0
raw_schema     0
dtype: int64


In [11]:
df = df.reset_index(drop=True)
df

Unnamed: 0,url,recipe_name,nutrition,category,cuisine,ingredient,instruction,raw_schema
0,https://cooking.nytimes.com/recipes/1024397-cr...,Crispy Potato Tacos,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, tacos, appetizer, main course",mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...",[Bring a large saucepan of salted water to a b...,"{'@context': 'http://schema.org', '@type': 'Re..."
1,https://cooking.nytimes.com/recipes/1024222-st...,Street Corn Pudding,"{'@context': 'http://schema.org', '@type': 'Nu...","brunch, dinner, lunch, custards and puddings, ...",southern,"[Nonstick cooking spray, 1 large or 2 medium j...",[Heat oven to 350 degrees. Coat an 8-inch squa...,"{'@context': 'http://schema.org', '@type': 'Re..."
2,https://cooking.nytimes.com/recipes/1024129-go...,Gorditas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 3/4 pounds/794 grams fresh fine-grind corn ...,"[If using fresh masa, knead the masa, 1/2 teas...","{'@context': 'http://schema.org', '@type': 'Re..."
3,https://cooking.nytimes.com/recipes/1024128-to...,Tortillas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,"[Set out a 1-gallon zip-top freezer bag, a pla...","{'@context': 'http://schema.org', '@type': 'Re..."
4,https://cooking.nytimes.com/recipes/1024130-te...,Tetelas de Frijol Negro (Black Bean Masa Dumpl...,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, lunch, dumplings, project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,[Set out a blender; a 1-gallon zip-top freezer...,"{'@context': 'http://schema.org', '@type': 'Re..."
...,...,...,...,...,...,...,...,...
8548,https://cooking.nytimes.com/recipes/2362-pork-...,Pork Chops Provencal,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, main course",french,"[2 tablespoons olive oil, or as desired, 3 pou...",[Heat oil in a large skillet (12 or 15 inches)...,"{'@context': 'http://schema.org', '@type': 'Re..."
8549,https://cooking.nytimes.com/recipes/2322-spina...,Spinach Linguine With Tomato Sauce,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,"[1/2 pound fresh or dried green linguine, Salt...","[Bring to the boil 2 quarts water, or enough t...","{'@context': 'http://schema.org', '@type': 'Re..."
8550,https://cooking.nytimes.com/recipes/2283-lobst...,Lobster and Olive Pasta Salad,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,[3/4 pound mixed fresh yellow and green fettuc...,[Cook fettuccine in boiling water about 30 sec...,"{'@context': 'http://schema.org', '@type': 'Re..."
8551,https://cooking.nytimes.com/recipes/2282-bread...,Breaded Sweetbreads,"{'@context': 'http://schema.org', '@type': 'Nu...","project, appetizer",french,"[1 pair sweetbreads, about 1 pound, 1 egg, 2 t...",[Put the sweetbreads in a mixing bowl and add ...,"{'@context': 'http://schema.org', '@type': 'Re..."


In [12]:
df.to_pickle('../data/nytc_filtered.pkl')
df

Unnamed: 0,url,recipe_name,nutrition,category,cuisine,ingredient,instruction,raw_schema
0,https://cooking.nytimes.com/recipes/1024397-cr...,Crispy Potato Tacos,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, tacos, appetizer, main course",mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...",[Bring a large saucepan of salted water to a b...,"{'@context': 'http://schema.org', '@type': 'Re..."
1,https://cooking.nytimes.com/recipes/1024222-st...,Street Corn Pudding,"{'@context': 'http://schema.org', '@type': 'Nu...","brunch, dinner, lunch, custards and puddings, ...",southern,"[Nonstick cooking spray, 1 large or 2 medium j...",[Heat oven to 350 degrees. Coat an 8-inch squa...,"{'@context': 'http://schema.org', '@type': 'Re..."
2,https://cooking.nytimes.com/recipes/1024129-go...,Gorditas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 3/4 pounds/794 grams fresh fine-grind corn ...,"[If using fresh masa, knead the masa, 1/2 teas...","{'@context': 'http://schema.org', '@type': 'Re..."
3,https://cooking.nytimes.com/recipes/1024128-to...,Tortillas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,"[Set out a 1-gallon zip-top freezer bag, a pla...","{'@context': 'http://schema.org', '@type': 'Re..."
4,https://cooking.nytimes.com/recipes/1024130-te...,Tetelas de Frijol Negro (Black Bean Masa Dumpl...,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, lunch, dumplings, project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,[Set out a blender; a 1-gallon zip-top freezer...,"{'@context': 'http://schema.org', '@type': 'Re..."
...,...,...,...,...,...,...,...,...
8548,https://cooking.nytimes.com/recipes/2362-pork-...,Pork Chops Provencal,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, main course",french,"[2 tablespoons olive oil, or as desired, 3 pou...",[Heat oil in a large skillet (12 or 15 inches)...,"{'@context': 'http://schema.org', '@type': 'Re..."
8549,https://cooking.nytimes.com/recipes/2322-spina...,Spinach Linguine With Tomato Sauce,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,"[1/2 pound fresh or dried green linguine, Salt...","[Bring to the boil 2 quarts water, or enough t...","{'@context': 'http://schema.org', '@type': 'Re..."
8550,https://cooking.nytimes.com/recipes/2283-lobst...,Lobster and Olive Pasta Salad,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,[3/4 pound mixed fresh yellow and green fettuc...,[Cook fettuccine in boiling water about 30 sec...,"{'@context': 'http://schema.org', '@type': 'Re..."
8551,https://cooking.nytimes.com/recipes/2282-bread...,Breaded Sweetbreads,"{'@context': 'http://schema.org', '@type': 'Nu...","project, appetizer",french,"[1 pair sweetbreads, about 1 pound, 1 egg, 2 t...",[Put the sweetbreads in a mixing bowl and add ...,"{'@context': 'http://schema.org', '@type': 'Re..."


In [10]:
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import wordnet
# nltk.download('stopwords')
# nltk.download('wordnet')
# nltk.download('averaged_perceptron_tagger')
# nltk.download('words')
# nltk.download('omw-1.4')

In [11]:
features = recipe_filtered[['recipe_name', 'nutrition', 'category', 'cuisine', 'ingredient', 'instruction']]
features

Unnamed: 0,recipe_name,nutrition,category,cuisine,ingredient,instruction
0,Crispy Potato Tacos,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, tacos, appetizer, main course",mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...",[Bring a large saucepan of salted water to a b...
1,Street Corn Pudding,"{'@context': 'http://schema.org', '@type': 'Nu...","brunch, dinner, lunch, custards and puddings, ...",southern,"[Nonstick cooking spray, 1 large or 2 medium j...",[Heat oven to 350 degrees. Coat an 8-inch squa...
2,Gorditas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 3/4 pounds/794 grams fresh fine-grind corn ...,"[If using fresh masa, knead the masa, 1/2 teas..."
3,Tortillas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,"[Set out a 1-gallon zip-top freezer bag, a pla..."
4,Tetelas de Frijol Negro (Black Bean Masa Dumpl...,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, lunch, dumplings, project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,[Set out a blender; a 1-gallon zip-top freezer...
...,...,...,...,...,...,...
8548,Pork Chops Provencal,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, main course",french,"[2 tablespoons olive oil, or as desired, 3 pou...",[Heat oil in a large skillet (12 or 15 inches)...
8549,Spinach Linguine With Tomato Sauce,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,"[1/2 pound fresh or dried green linguine, Salt...","[Bring to the boil 2 quarts water, or enough t..."
8550,Lobster and Olive Pasta Salad,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,[3/4 pound mixed fresh yellow and green fettuc...,[Cook fettuccine in boiling water about 30 sec...
8551,Breaded Sweetbreads,"{'@context': 'http://schema.org', '@type': 'Nu...","project, appetizer",french,"[1 pair sweetbreads, about 1 pound, 1 egg, 2 t...",[Put the sweetbreads in a mixing bowl and add ...


In [12]:
display(features.sample(1)['nutrition'].tolist())

[{'@context': 'http://schema.org',
  '@type': 'NutritionInformation',
  'calories': 240,
  'unsaturatedFatContent': '4 grams',
  'carbohydrateContent': '37 grams',
  'cholesterolContent': None,
  'fatContent': '9 grams',
  'fiberContent': '4 grams',
  'proteinContent': '2 grams',
  'saturatedFatContent': '5 grams',
  'sodiumContent': '6 milligrams',
  'sugarContent': '22 grams',
  'transFatContent': '0 grams'}]

In [13]:
from re import sub

def process_nutrition_string(raw_string):
    return sub("[^0-9]", "", str(raw_string))

def extract_nutritions(df):
    df['calories'] = process_nutrition_string(df['nutrition']['calories'])
    df['carbohydrates'] = process_nutrition_string(df['nutrition']['carbohydrateContent'])
    df['cholesterol'] = process_nutrition_string(df['nutrition']['cholesterolContent'])
    df['fiber'] = process_nutrition_string(df['nutrition']['fiberContent'])
    df['protein'] = process_nutrition_string(df['nutrition']['proteinContent'])
    df['total_fat'] = process_nutrition_string(df['nutrition']['fatContent'])
    df['unsaturated_fat'] = process_nutrition_string(df['nutrition']['unsaturatedFatContent'])
    df['saturated_fat'] = process_nutrition_string(df['nutrition']['saturatedFatContent'])
    df['trans_fat'] = process_nutrition_string(df['nutrition']['transFatContent'])
    df['sugar'] = process_nutrition_string(df['nutrition']['sugarContent'])
    df['sodium'] = process_nutrition_string(df['nutrition']['sodiumContent'])
    return df

df = df.apply(extract_nutritions, axis=1)


In [14]:
df.drop(['nutrition'], axis=1)

Unnamed: 0,url,recipe_name,category,cuisine,ingredient,instruction,raw_schema,calories,carbohydrates,cholesterol,fiber,protein,total_fat,unsaturated_fat,saturated_fat,trans_fat,sugar,sodium
0,https://cooking.nytimes.com/recipes/1024397-cr...,Crispy Potato Tacos,"dinner, tacos, appetizer, main course",mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...",[Bring a large saucepan of salted water to a b...,"{'@context': 'http://schema.org', '@type': 'Re...",656,86,,12,22,27,14,10,0,8,1201
1,https://cooking.nytimes.com/recipes/1024222-st...,Street Corn Pudding,"brunch, dinner, lunch, custards and puddings, ...",southern,"[Nonstick cooking spray, 1 large or 2 medium j...",[Heat oven to 350 degrees. Coat an 8-inch squa...,"{'@context': 'http://schema.org', '@type': 'Re...",316,30,,3,5,20,9,10,1,7,495
2,https://cooking.nytimes.com/recipes/1024129-go...,Gorditas de Maíz,"project, side dish",mexican,[1 3/4 pounds/794 grams fresh fine-grind corn ...,"[If using fresh masa, knead the masa, 1/2 teas...","{'@context': 'http://schema.org', '@type': 'Re...",51,11,,1,2,1,1,0,0,4,137
3,https://cooking.nytimes.com/recipes/1024128-to...,Tortillas de Maíz,"project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,"[Set out a 1-gallon zip-top freezer bag, a pla...","{'@context': 'http://schema.org', '@type': 'Re...",24,5,,1,1,0,0,0,0,2,66
4,https://cooking.nytimes.com/recipes/1024130-te...,Tetelas de Frijol Negro (Black Bean Masa Dumpl...,"dinner, lunch, dumplings, project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,[Set out a blender; a 1-gallon zip-top freezer...,"{'@context': 'http://schema.org', '@type': 'Re...",112,14,,4,4,5,3,2,0,2,198
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8548,https://cooking.nytimes.com/recipes/2362-pork-...,Pork Chops Provencal,"dinner, main course",french,"[2 tablespoons olive oil, or as desired, 3 pou...",[Heat oil in a large skillet (12 or 15 inches)...,"{'@context': 'http://schema.org', '@type': 'Re...",541,17,,4,54,27,16,8,0,8,1408
8549,https://cooking.nytimes.com/recipes/2322-spina...,Spinach Linguine With Tomato Sauce,"dinner, pastas, main course",italian,"[1/2 pound fresh or dried green linguine, Salt...","[Bring to the boil 2 quarts water, or enough t...","{'@context': 'http://schema.org', '@type': 'Re...",318,47,,3,9,11,7,3,0,5,424
8550,https://cooking.nytimes.com/recipes/2283-lobst...,Lobster and Olive Pasta Salad,"dinner, pastas, main course",italian,[3/4 pound mixed fresh yellow and green fettuc...,[Cook fettuccine in boiling water about 30 sec...,"{'@context': 'http://schema.org', '@type': 'Re...",519,48,,5,22,27,22,4,0,3,565
8551,https://cooking.nytimes.com/recipes/2282-bread...,Breaded Sweetbreads,"project, appetizer",french,"[1 pair sweetbreads, about 1 pound, 1 egg, 2 t...",[Put the sweetbreads in a mixing bowl and add ...,"{'@context': 'http://schema.org', '@type': 'Re...",544,20,,1,18,43,23,16,1,1,433


In [15]:
from ingredient_parser import parse_ingredient, parse_multiple_ingredients

In [17]:
random_sample = recipe_filtered.sample(1)
ingredients = random_sample['ingredient'].values[0]
ingredients

['3/4 pound mixed fresh yellow and green fettuccine',
 '1 jar (2 ounces drained weight) pimento-stuffed olives, chopped',
 '1/2 cup Greek olives (calamata), drained and pitted',
 '1 large clove garlic, minced',
 '1/4 cup minced fresh parsley',
 '1/2 cup homemade mayonnaise (made with part olive oil)',
 '1/2 cup plain yogurt',
 '3 tablespoons white wine vinegar',
 '1 1/2 tablespoons good quality olive oil',
 '1 pound cooked lobster meat, cut into bite-size pieces',
 '1 ripe avocado']

In [18]:
parsed_ingredients = parse_multiple_ingredients(ingredients)
display(parsed_ingredients)
parsed_ingredients = [i['name'] for i in parsed_ingredients if i['name'] != '']
display(parsed_ingredients)

[{'sentence': '3/4 pound mixed fresh yellow and green fettuccine',
  'quantity': '0.75',
  'unit': 'pounds',
  'name': 'fresh yellow and green fettuccine',
  'comment': 'mixed',
  'other': ''},
 {'sentence': '1 jar (2 ounces drained weight) pimento-stuffed olives, chopped',
  'quantity': '1',
  'unit': 'jar',
  'name': 'pimento-stuffed olives',
  'comment': '(2 ounces drained weight), chopped',
  'other': ''},
 {'sentence': '1/2 cup Greek olives (calamata), drained and pitted',
  'quantity': '0.5',
  'unit': 'cups',
  'name': 'Greek olives',
  'comment': '(calamata), drained and pitted',
  'other': ''},
 {'sentence': '1 large clove garlic, minced',
  'quantity': '1',
  'unit': 'large clove',
  'name': 'garlic',
  'comment': 'minced',
  'other': ''},
 {'sentence': '1/4 cup minced fresh parsley',
  'quantity': '0.25',
  'unit': 'cups',
  'name': 'fresh parsley',
  'comment': 'minced',
  'other': ''},
 {'sentence': '1/2 cup homemade mayonnaise (made with part olive oil)',
  'quantity': '0

['fresh yellow and green fettuccine',
 'pimento-stuffed olives',
 'Greek olives',
 'garlic',
 'fresh parsley',
 'mayonnaise',
 'yogurt',
 'white wine vinegar',
 'olive oil',
 'lobster meat',
 'avocado']

In [19]:
', '.join(parsed_ingredients).lower()

'fresh yellow and green fettuccine, pimento-stuffed olives, greek olives, garlic, fresh parsley, mayonnaise, yogurt, white wine vinegar, olive oil, lobster meat, avocado'

In [17]:
def ingredient_parser(ingredients):
    parsed_ingredients = parse_multiple_ingredients(ingredients)
    parsed_ingredients = [i['name'] for i in parsed_ingredients if i['name'] != '']
    return ', '.join(parsed_ingredients).lower()

In [21]:
print(ingredient_parser(ingredients))

fresh yellow and green fettuccine, pimento-stuffed olives, greek olives, garlic, fresh parsley, mayonnaise, yogurt, white wine vinegar, olive oil, lobster meat, avocado


In [18]:
df['ingredient_parsed'] = df['ingredient'].apply(lambda x: ingredient_parser(x))

In [19]:
df

Unnamed: 0,url,recipe_name,nutrition,category,cuisine,ingredient,instruction,raw_schema,calories,carbohydrates,cholesterol,fiber,protein,total_fat,unsaturated_fat,saturated_fat,trans_fat,sugar,sodium,ingredient_parsed
0,https://cooking.nytimes.com/recipes/1024397-cr...,Crispy Potato Tacos,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, tacos, appetizer, main course",mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...",[Bring a large saucepan of salted water to a b...,"{'@context': 'http://schema.org', '@type': 'Re...",656,86,,12,22,27,14,10,0,8,1201,"sea salt, potatoes, cheddar, handful of cilant..."
1,https://cooking.nytimes.com/recipes/1024222-st...,Street Corn Pudding,"{'@context': 'http://schema.org', '@type': 'Nu...","brunch, dinner, lunch, custards and puddings, ...",southern,"[Nonstick cooking spray, 1 large or 2 medium j...",[Heat oven to 350 degrees. Coat an 8-inch squa...,"{'@context': 'http://schema.org', '@type': 'Re...",316,30,,3,5,20,9,10,1,7,495,"nonstick cooking spray, creamed corn, kernel c..."
2,https://cooking.nytimes.com/recipes/1024129-go...,Gorditas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 3/4 pounds/794 grams fresh fine-grind corn ...,"[If using fresh masa, knead the masa, 1/2 teas...","{'@context': 'http://schema.org', '@type': 'Re...",51,11,,1,2,1,1,0,0,4,137,"fresh fine-grind corn masa masa harina, sea salt"
3,https://cooking.nytimes.com/recipes/1024128-to...,Tortillas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,"[Set out a 1-gallon zip-top freezer bag, a pla...","{'@context': 'http://schema.org', '@type': 'Re...",24,5,,1,1,0,0,0,0,2,66,"fresh fine-grind corn masa masa harina, sea salt"
4,https://cooking.nytimes.com/recipes/1024130-te...,Tetelas de Frijol Negro (Black Bean Masa Dumpl...,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, lunch, dumplings, project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,[Set out a blender; a 1-gallon zip-top freezer...,"{'@context': 'http://schema.org', '@type': 'Re...",112,14,,4,4,5,3,2,0,2,198,"fresh fine-grind corn masa masa harina, sea sa..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8548,https://cooking.nytimes.com/recipes/2362-pork-...,Pork Chops Provencal,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, main course",french,"[2 tablespoons olive oil, or as desired, 3 pou...",[Heat oil in a large skillet (12 or 15 inches)...,"{'@context': 'http://schema.org', '@type': 'Re...",541,17,,4,54,27,16,8,0,8,1408,"olive oil, pork chops, mushrooms, dry white wi..."
8549,https://cooking.nytimes.com/recipes/2322-spina...,Spinach Linguine With Tomato Sauce,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,"[1/2 pound fresh or dried green linguine, Salt...","[Bring to the boil 2 quarts water, or enough t...","{'@context': 'http://schema.org', '@type': 'Re...",318,47,,3,9,11,7,3,0,5,424,"fresh green linguine, salt, tomatoes, olive oi..."
8550,https://cooking.nytimes.com/recipes/2283-lobst...,Lobster and Olive Pasta Salad,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,[3/4 pound mixed fresh yellow and green fettuc...,[Cook fettuccine in boiling water about 30 sec...,"{'@context': 'http://schema.org', '@type': 'Re...",519,48,,5,22,27,22,4,0,3,565,"fresh yellow and green fettuccine, pimento-stu..."
8551,https://cooking.nytimes.com/recipes/2282-bread...,Breaded Sweetbreads,"{'@context': 'http://schema.org', '@type': 'Nu...","project, appetizer",french,"[1 pair sweetbreads, about 1 pound, 1 egg, 2 t...",[Put the sweetbreads in a mixing bowl and add ...,"{'@context': 'http://schema.org', '@type': 'Re...",544,20,,1,18,43,23,16,1,1,433,"sweetbreads, egg, water, oil, salt, freshly gr..."


In [None]:
df.to_pickle('../data/nytc_training.pkl')

In [26]:
feature_set.to_pickle('../data/nytc_features.pkl')

In [46]:
feature_set[feature_set['ingredient_parsed'].str.contains(r'\d')]['ingredient_parsed'].to_list()

['fresh fine-grind corn masa masa harina, sea salt, lard, white onion, garlic, chiles de árbol, fresh avocado leaves, black beans and their liquid a href " https cooking.nytimes.com recipes 1024131-frijoles-de-olla " frijoles de olla their liquid), crema',
 'black tea bags, span a href " https cooking.nytimes.com recipes 1024366-lemonade " span lemonade span a, lemon, ice',
 'kosher salt, honey, dark brown sugar, prague powder 1, allspice berries, fresh bay leaves, fresh lemon, black peppercorns, fresh skin-on',
 '290 grams blanched almonds, 130 grams granulated sugar, egg whites, almond extract, confectioners sugar, amarena cherries unblanched almonds',
 'ground chicken, scallion greens, ginger, white miso, potato starch, sea salt, a href " https cooking.nytimes.com recipes 3213-dashi " dashi a, mirin, white tamari, white miso, mushrooms, tofu, spinach, white sesame seeds, ground yuzu shichimi togarashi',
 'unsalted butter, 45 grams sweetened coconut, almond flour, 67 grams granulated