In [15]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
%matplotlib inline

# Loading Data

In [16]:
df = pd.read_pickle('../data/nytc_data.pkl')
display(df.head(5))
df.shape

Unnamed: 0,url,recipe_name,nutrition,category,cuisine,ingredient,instruction
0,https://cooking.nytimes.com/recipes/1024397-cr...,Crispy Potato Tacos,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, tacos, appetizer, main course",mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...",[Bring a large saucepan of salted water to a b...
1,https://cooking.nytimes.com/recipes/1024394-ca...,Cashew Celery,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, easy, quick, weeknight, vegetables, ma...",,"[2 teaspoons cornstarch, 1/4 cup vegetable sto...","[Combine the cornstarch, stock, rice wine and ..."
2,https://cooking.nytimes.com/recipes/1024372-ba...,Basil-Butter Pasta,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, easy, weeknight, pastas, main course",,"[Salt, 3 cups packed basil leaves (about 80 gr...",[Bring a large pot of well-salted water to a b...
3,https://cooking.nytimes.com/recipes/1024334-tu...,Turmeric Potato Salad,"{'@context': 'http://schema.org', '@type': 'Nu...",side dish,,"[2 pounds small, yellow-fleshed potatoes, Salt...","[In a medium pot over high heat, boil skin-on ..."
4,https://cooking.nytimes.com/recipes/1024222-st...,Street Corn Pudding,"{'@context': 'http://schema.org', '@type': 'Nu...","brunch, dinner, lunch, custards and puddings, ...",southern,"[Nonstick cooking spray, 1 large or 2 medium j...",[Heat oven to 350 degrees. Coat an 8-inch squa...


(22830, 7)

# Data Cleaning

Dropping empty and NaN entries

In [17]:
nan_count = df.isna().sum()
nan_count

url               0
recipe_name      68
nutrition      2728
category        121
cuisine         121
ingredient      121
instruction     384
dtype: int64

In [18]:
df = df.replace(r'^\s*$', np.nan, regex=True)
df

Unnamed: 0,url,recipe_name,nutrition,category,cuisine,ingredient,instruction
0,https://cooking.nytimes.com/recipes/1024397-cr...,Crispy Potato Tacos,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, tacos, appetizer, main course",mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...",[Bring a large saucepan of salted water to a b...
1,https://cooking.nytimes.com/recipes/1024394-ca...,Cashew Celery,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, easy, quick, weeknight, vegetables, ma...",,"[2 teaspoons cornstarch, 1/4 cup vegetable sto...","[Combine the cornstarch, stock, rice wine and ..."
2,https://cooking.nytimes.com/recipes/1024372-ba...,Basil-Butter Pasta,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, easy, weeknight, pastas, main course",,"[Salt, 3 cups packed basil leaves (about 80 gr...",[Bring a large pot of well-salted water to a b...
3,https://cooking.nytimes.com/recipes/1024334-tu...,Turmeric Potato Salad,"{'@context': 'http://schema.org', '@type': 'Nu...",side dish,,"[2 pounds small, yellow-fleshed potatoes, Salt...","[In a medium pot over high heat, boil skin-on ..."
4,https://cooking.nytimes.com/recipes/1024222-st...,Street Corn Pudding,"{'@context': 'http://schema.org', '@type': 'Nu...","brunch, dinner, lunch, custards and puddings, ...",southern,"[Nonstick cooking spray, 1 large or 2 medium j...",[Heat oven to 350 degrees. Coat an 8-inch squa...
...,...,...,...,...,...,...,...
22825,https://cooking.nytimes.com/recipes/907-white-...,White Fruitcake,,dessert,,"[1 pound butter at room temperature, 2 cups su...","[Preheat oven to 325 degrees., Cream butter an..."
22826,https://cooking.nytimes.com/recipes/867-tomato...,Tomato sauce,"{'@context': 'http://schema.org', '@type': 'Nu...",sauces and gravies,italian,"[3 tablespoons butter, 2 tablespoons finely ch...",[Heat one tablespoon of the butter in a casser...
22827,https://cooking.nytimes.com/recipes/866-goat-c...,Goat cheese filling for ravioli,,,italian,"[1 1/2 cups firmly packed chopped goat cheese,...","[Put the goat cheese, ricotta, salt, pepper, c..."
22828,https://cooking.nytimes.com/recipes/865-lobste...,Lobster stuffing for ravioli,"{'@context': 'http://schema.org', '@type': 'Nu...",,,"[2 1 1/2-pound live lobsters, 1 tablespoon cor...",[Cut the spinal cord of each lobster by insert...


In [19]:
nan_count = df.isna().sum()
nan_count

url                0
recipe_name       68
nutrition       2728
category         802
cuisine        13011
ingredient       121
instruction      384
dtype: int64

We have enough data to drop entries which do not have `cuisine`

In [22]:
df = df.dropna(subset=['recipe_name', 'category', 'instruction', 'ingredient', 'cuisine'])
print(df.isna().sum())


url              0
recipe_name      0
nutrition      989
category         0
cuisine          0
ingredient       0
instruction      0
dtype: int64


In [23]:
df

Unnamed: 0,url,recipe_name,nutrition,category,cuisine,ingredient,instruction
0,https://cooking.nytimes.com/recipes/1024397-cr...,Crispy Potato Tacos,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, tacos, appetizer, main course",mexican,"[Sea salt, 1 1/2 pounds potatoes (any variety)...",[Bring a large saucepan of salted water to a b...
4,https://cooking.nytimes.com/recipes/1024222-st...,Street Corn Pudding,"{'@context': 'http://schema.org', '@type': 'Nu...","brunch, dinner, lunch, custards and puddings, ...",southern,"[Nonstick cooking spray, 1 large or 2 medium j...",[Heat oven to 350 degrees. Coat an 8-inch squa...
5,https://cooking.nytimes.com/recipes/1024129-go...,Gorditas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 3/4 pounds/794 grams fresh fine-grind corn ...,"[If using fresh masa, knead the masa, 1/2 teas..."
17,https://cooking.nytimes.com/recipes/1024128-to...,Tortillas de Maíz,"{'@context': 'http://schema.org', '@type': 'Nu...","project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,"[Set out a 1-gallon zip-top freezer bag, a pla..."
18,https://cooking.nytimes.com/recipes/1024130-te...,Tetelas de Frijol Negro (Black Bean Masa Dumpl...,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, lunch, dumplings, project, side dish",mexican,[1 pound/453 grams fresh fine-grind corn masa ...,[Set out a blender; a 1-gallon zip-top freezer...
...,...,...,...,...,...,...,...
22819,https://cooking.nytimes.com/recipes/2362-pork-...,Pork Chops Provencal,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, main course",french,"[2 tablespoons olive oil, or as desired, 3 pou...",[Heat oil in a large skillet (12 or 15 inches)...
22821,https://cooking.nytimes.com/recipes/2322-spina...,Spinach Linguine With Tomato Sauce,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,"[1/2 pound fresh or dried green linguine, Salt...","[Bring to the boil 2 quarts water, or enough t..."
22823,https://cooking.nytimes.com/recipes/2283-lobst...,Lobster and Olive Pasta Salad,"{'@context': 'http://schema.org', '@type': 'Nu...","dinner, pastas, main course",italian,[3/4 pound mixed fresh yellow and green fettuc...,[Cook fettuccine in boiling water about 30 sec...
22824,https://cooking.nytimes.com/recipes/2282-bread...,Breaded Sweetbreads,"{'@context': 'http://schema.org', '@type': 'Nu...","project, appetizer",french,"[1 pair sweetbreads, about 1 pound, 1 egg, 2 t...",[Put the sweetbreads in a mixing bowl and add ...
