# Data Cleaning

___

### Table Of Contents

1. 
2. 
3. 
4. 
5. 
6. 
7. 
8. 
9. 
10. 

___

### 1. Import Libraries

In [1]:
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
%matplotlib inline
import seaborn as sns
import numpy as np
import pandas as pd
import requests
import json
import sklearn
from scipy import stats
from scipy.stats import norm
from sklearn.utils import resample
import pickle
import statsmodels.api as sm
from statsmodels.formula.api import ols
import scipy.stats as stats
from wordcloud import WordCloud
import random
from collections import Counter

___

### 2. Expand maximum range of rows

Lots of incoming data from the Spoonacular API.<br>
It is probably a good idea to expand the max range of rows/columns/width to view in Pandas.<br>
Use <b>pd.set_option()</b>

In [2]:
pd.set_option('display.max_rows', 1000)
pd.set_option('display.max_columns', 1000)
pd.set_option('display.width', 1000)

___

### 3. Import data

In [3]:
#"results" needs to be split out some more into separate columns
df_1 = pd.read_json("/Users/alexandercheng/Desktop/code/projects/recipe-strategy/Data/top_1000_recipes.json")
df_1.head()

Unnamed: 0,results,baseUri,offset,number,totalResults,processingTimeMs
0,"[{'id': 592479, 'title': 'Kale and Quinoa Sala...",https://spoonacular.com/recipeImages/,0,100,313922,713
1,"[{'id': 474396, 'title': 'Mixed Bag” Kale Sala...",https://spoonacular.com/recipeImages/,100,100,313922,833
2,"[{'id': 553553, 'title': 'Broccoli quinoa cass...",https://spoonacular.com/recipeImages/,200,100,313922,671
3,"[{'id': 548686, 'title': 'Sauteed Kale – 0 Poi...",https://spoonacular.com/recipeImages/,300,100,313922,682
4,"[{'id': 622992, 'title': 'Moro's Warm Squash &...",https://spoonacular.com/recipeImages/,400,100,313922,580


In [147]:
#load top_1000_recipes_info
with open("/Users/alexandercheng/Desktop/code/projects/recipe-strategy/Data/top_1000_recipes_info.json") as f:
    info = json.load(f)

In [148]:
info[0][0]

{'vegetarian': True,
 'vegan': True,
 'glutenFree': True,
 'dairyFree': True,
 'veryHealthy': True,
 'cheap': False,
 'veryPopular': True,
 'sustainable': False,
 'weightWatcherSmartPoints': 6,
 'gaps': 'no',
 'lowFodmap': False,
 'ketogenic': False,
 'whole30': False,
 'preparationMinutes': 25,
 'cookingMinutes': 25,
 'sourceUrl': 'http://blog.fatfreevegan.com/2013/06/kale-and-quinoa-salad-with-black-beans.html',
 'spoonacularSourceUrl': 'https://spoonacular.com/kale-and-quinoa-salad-with-black-beans-592479',
 'aggregateLikes': 50078,
 'spoonacularScore': 100.0,
 'healthScore': 100.0,
 'creditsText': 'Fat Free Vegan',
 'sourceName': 'Fat Free Vegan',
 'pricePerServing': 101.23,
 'extendedIngredients': [{'id': 1022009,
   'aisle': 'Ethnic Foods;Spices and Seasonings',
   'image': 'chili-powder.jpg',
   'consitency': 'solid',
   'name': 'ancho chile powder',
   'original': '1 teaspoon ancho chile powder (or other pure chile powder, not a blend)',
   'originalString': '1 teaspoon ancho c

In [122]:
#"results" needs to be split out some more into separate columns
df_3 = pd.read_json("/Users/alexandercheng/Desktop/code/projects/recipe-strategy/Data/top_1000_recipes_metascore.json")
df_3.head()

Unnamed: 0,results,baseUri,offset,number,totalResults,processingTimeMs
0,"[{'id': 592479, 'title': 'Kale and Quinoa Sala...",https://spoonacular.com/recipeImages/,0,100,313938,707
1,"[{'id': 474396, 'title': 'Mixed Bag” Kale Sala...",https://spoonacular.com/recipeImages/,100,100,313938,697
2,"[{'id': 553553, 'title': 'Broccoli quinoa cass...",https://spoonacular.com/recipeImages/,200,100,313938,729
3,"[{'id': 548686, 'title': 'Sauteed Kale – 0 Poi...",https://spoonacular.com/recipeImages/,300,100,313938,710
4,"[{'id': 622992, 'title': 'Moro's Warm Squash &...",https://spoonacular.com/recipeImages/,400,100,313938,708


In [150]:
#load top_1000_recipes_metascore_info
with open("/Users/alexandercheng/Desktop/code/projects/recipe-strategy/Data/top_1000_recipes_metascore_info.json") as g:
    info_m = json.load(g)

In [151]:
info_m[0][0]

{'vegetarian': True,
 'vegan': True,
 'glutenFree': True,
 'dairyFree': True,
 'veryHealthy': True,
 'cheap': False,
 'veryPopular': True,
 'sustainable': False,
 'weightWatcherSmartPoints': 6,
 'gaps': 'no',
 'lowFodmap': False,
 'ketogenic': False,
 'whole30': False,
 'preparationMinutes': 25,
 'cookingMinutes': 25,
 'sourceUrl': 'http://blog.fatfreevegan.com/2013/06/kale-and-quinoa-salad-with-black-beans.html',
 'spoonacularSourceUrl': 'https://spoonacular.com/kale-and-quinoa-salad-with-black-beans-592479',
 'aggregateLikes': 50078,
 'spoonacularScore': 100.0,
 'healthScore': 100.0,
 'creditsText': 'Fat Free Vegan',
 'sourceName': 'Fat Free Vegan',
 'pricePerServing': 101.23,
 'extendedIngredients': [{'id': 1022009,
   'aisle': 'Ethnic Foods;Spices and Seasonings',
   'image': 'chili-powder.jpg',
   'consitency': 'solid',
   'name': 'ancho chile powder',
   'original': '1 teaspoon ancho chile powder (or other pure chile powder, not a blend)',
   'originalString': '1 teaspoon ancho c

___

### 4. Extract useful JSON data into new DataFrames

In [9]:
#all of the data we want is in the "results" column.
#we need to extract this and create a new dataframe.
df_1

Unnamed: 0,results,baseUri,offset,number,totalResults,processingTimeMs
0,"[{'id': 592479, 'title': 'Kale and Quinoa Sala...",https://spoonacular.com/recipeImages/,0,100,313922,713
1,"[{'id': 474396, 'title': 'Mixed Bag” Kale Sala...",https://spoonacular.com/recipeImages/,100,100,313922,833
2,"[{'id': 553553, 'title': 'Broccoli quinoa cass...",https://spoonacular.com/recipeImages/,200,100,313922,671
3,"[{'id': 548686, 'title': 'Sauteed Kale – 0 Poi...",https://spoonacular.com/recipeImages/,300,100,313922,682
4,"[{'id': 622992, 'title': 'Moro's Warm Squash &...",https://spoonacular.com/recipeImages/,400,100,313922,580
5,"[{'id': 860004, 'title': 'Pan-Roasted Salmon W...",https://spoonacular.com/recipeImages/,500,100,313922,688
6,"[{'id': 584078, 'title': 'Berry Spinach Salad ...",https://spoonacular.com/recipeImages/,600,100,313922,709
7,"[{'id': 735809, 'title': 'Homemade Vegan Calzo...",https://spoonacular.com/recipeImages/,700,100,313922,704
8,"[{'id': 610300, 'title': 'Greek Style Kale Eda...",https://spoonacular.com/recipeImages/,800,100,313922,721
9,"[{'id': 250480, 'title': 'Vegan Twice Baked Po...",https://spoonacular.com/recipeImages/,900,100,313922,709


In [104]:
#extract ids from all 10 calls from top_1000_recipes
all_recipes = []
numbers_1 = list(range(0,10))
numbers_2 = list(range(0,100))
for number in numbers_1:
    for digit in numbers_2:
        for result in df_1['results'][number][digit].values():
            all_recipes.append(result)

In [116]:
#this is one recipe
all_recipes[0:4]

[592479,
 'Kale and Quinoa Salad with Black Beans',
 'https://spoonacular.com/recipeImages/592479-312x231.jpg',
 'jpg']

In [117]:
#ids are every 4th value starting at index [0]
#titles are every 4th value starting at index [1]
print(all_recipes[0])
print(all_recipes[1])

592479
Kale and Quinoa Salad with Black Beans


In [118]:
#generate list of numbers to get ids
iterate_4_ids = list(range(0,4000,4))

#generate list of numbers to get titles
iterate_4_titles = list(range(1,4001,4))

In [119]:
ids=[]
titles=[]

#get list of all recipe ids
for number in iterate_4_ids:
    ids.append(all_recipes[number])
    
#get list of all recipe titles
for number in iterate_4_titles:
    titles.append(all_recipes[number])

In [121]:
#coerce id and title of each recipe into a dataframe
df_1a = pd.DataFrame({"id": ids, "title": titles})
df_1a

Unnamed: 0,id,title
0,592479,Kale and Quinoa Salad with Black Beans
1,547775,Creamy Avocado Pasta
2,818941,"Avocado Toast with Eggs, Spinach, and Tomatoes"
3,495111,Citrus Sesame Kale
4,689502,Melt In Your Mouth Kale Salad
5,837136,Kale Pineapple Smoothie
6,582897,Mexican Salad with Lime Dressing
7,777037,Weekly Meal Plan #17
8,801710,Matcha Green Tea and Pineapple Smoothie
9,695486,Green Smoothie


In [130]:
#export this to .CSV for safe keeping
#df_1a.to_csv('top_1000_recipes.csv')

In [123]:
#all of the data we want is in the "results" column.
#we need to extract this and create a new dataframe.
df_3

Unnamed: 0,results,baseUri,offset,number,totalResults,processingTimeMs
0,"[{'id': 592479, 'title': 'Kale and Quinoa Sala...",https://spoonacular.com/recipeImages/,0,100,313938,707
1,"[{'id': 474396, 'title': 'Mixed Bag” Kale Sala...",https://spoonacular.com/recipeImages/,100,100,313938,697
2,"[{'id': 553553, 'title': 'Broccoli quinoa cass...",https://spoonacular.com/recipeImages/,200,100,313938,729
3,"[{'id': 548686, 'title': 'Sauteed Kale – 0 Poi...",https://spoonacular.com/recipeImages/,300,100,313938,710
4,"[{'id': 622992, 'title': 'Moro's Warm Squash &...",https://spoonacular.com/recipeImages/,400,100,313938,708
5,"[{'id': 860004, 'title': 'Pan-Roasted Salmon W...",https://spoonacular.com/recipeImages/,500,100,313938,681
6,"[{'id': 584078, 'title': 'Berry Spinach Salad ...",https://spoonacular.com/recipeImages/,600,100,313938,692
7,"[{'id': 735809, 'title': 'Homemade Vegan Calzo...",https://spoonacular.com/recipeImages/,700,100,313938,677
8,"[{'id': 610300, 'title': 'Greek Style Kale Eda...",https://spoonacular.com/recipeImages/,800,100,313938,626
9,"[{'id': 250480, 'title': 'Vegan Twice Baked Po...",https://spoonacular.com/recipeImages/,900,100,313938,725


In [124]:
#extract ids from all 10 calls from top_1000_metascore_recipes
all_recipes_metascore = []
numbers_1 = list(range(0,10))
numbers_2 = list(range(0,100))
for number in numbers_1:
    for digit in numbers_2:
        for result in df_3['results'][number][digit].values():
            all_recipes_metascore.append(result)

In [125]:
#this is one recipe
all_recipes_metascore[0:4]

[592479,
 'Kale and Quinoa Salad with Black Beans',
 'https://spoonacular.com/recipeImages/592479-312x231.jpg',
 'jpg']

In [126]:
#ids are every 4th value starting at index [0]
#titles are every 4th value starting at index [1]
print(all_recipes_metascore[0])
print(all_recipes_metascore[1])

592479
Kale and Quinoa Salad with Black Beans


In [127]:
#generate list of numbers to get ids
iterate_4_ids = list(range(0,4000,4))

#generate list of numbers to get titles
iterate_4_titles = list(range(1,4001,4))

In [128]:
ids_metascore=[]
titles_metascore=[]

#get list of all recipe ids
for number in iterate_4_ids:
    ids_metascore.append(all_recipes_metascore[number])
    
#get list of all recipe titles
for number in iterate_4_titles:
    titles_metascore.append(all_recipes_metascore[number])

In [129]:
#coerce id and title of each recipe into a dataframe
df_3a = pd.DataFrame({"id": ids_metascore, "title": titles_metascore})
df_3a

Unnamed: 0,id,title
0,592479,Kale and Quinoa Salad with Black Beans
1,547775,Creamy Avocado Pasta
2,818941,"Avocado Toast with Eggs, Spinach, and Tomatoes"
3,495111,Citrus Sesame Kale
4,689502,Melt In Your Mouth Kale Salad
5,837136,Kale Pineapple Smoothie
6,582897,Mexican Salad with Lime Dressing
7,777037,Weekly Meal Plan #17
8,801710,Matcha Green Tea and Pineapple Smoothie
9,695486,Green Smoothie


In [131]:
#export this to .CSV for safe keeping
#df_3a.to_csv('top_1000_recipes_metascore.csv')

In [152]:
info[0][0]

{'vegetarian': True,
 'vegan': True,
 'glutenFree': True,
 'dairyFree': True,
 'veryHealthy': True,
 'cheap': False,
 'veryPopular': True,
 'sustainable': False,
 'weightWatcherSmartPoints': 6,
 'gaps': 'no',
 'lowFodmap': False,
 'ketogenic': False,
 'whole30': False,
 'preparationMinutes': 25,
 'cookingMinutes': 25,
 'sourceUrl': 'http://blog.fatfreevegan.com/2013/06/kale-and-quinoa-salad-with-black-beans.html',
 'spoonacularSourceUrl': 'https://spoonacular.com/kale-and-quinoa-salad-with-black-beans-592479',
 'aggregateLikes': 50078,
 'spoonacularScore': 100.0,
 'healthScore': 100.0,
 'creditsText': 'Fat Free Vegan',
 'sourceName': 'Fat Free Vegan',
 'pricePerServing': 101.23,
 'extendedIngredients': [{'id': 1022009,
   'aisle': 'Ethnic Foods;Spices and Seasonings',
   'image': 'chili-powder.jpg',
   'consitency': 'solid',
   'name': 'ancho chile powder',
   'original': '1 teaspoon ancho chile powder (or other pure chile powder, not a blend)',
   'originalString': '1 teaspoon ancho c

In [196]:
#we need to get this information out of each recipe...
#probably a for loop from 0-7

#calories
display(info[0][0]['nutrition']['nutrients'][0]['title'])
display(info[0][0]['nutrition']['nutrients'][0]['amount'])

#fat
display(info[0][0]['nutrition']['nutrients'][1]['title'])
display(info[0][0]['nutrition']['nutrients'][1]['amount'])

#saturated fat
display(info[0][0]['nutrition']['nutrients'][2]['title'])
display(info[0][0]['nutrition']['nutrients'][2]['amount'])

#carbs
display(info[0][0]['nutrition']['nutrients'][3]['title'])
display(info[0][0]['nutrition']['nutrients'][3]['amount'])

#sugar
display(info[0][0]['nutrition']['nutrients'][4]['title'])
display(info[0][0]['nutrition']['nutrients'][4]['amount'])

#cholesterol
display(info[0][0]['nutrition']['nutrients'][5]['title'])
display(info[0][0]['nutrition']['nutrients'][5]['amount'])

#sodium
display(info[0][0]['nutrition']['nutrients'][6]['title'])
display(info[0][0]['nutrition']['nutrients'][6]['amount'])

#protein
display(info[0][0]['nutrition']['nutrients'][7]['title'])
display(info[0][0]['nutrition']['nutrients'][7]['amount'])

'Calories'

242.6

'Fat'

7.38

'Saturated Fat'

1.03

'Carbohydrates'

37.19

'Sugar'

1.8

'Cholesterol'

0.0

'Sodium'

250.66

'Protein'

9.79

In [228]:
numbers_1 = list(range(0,10))
numbers_2 = list(range(0,100))
numbers_3 = list(range(0,8))

In [234]:
calories = []
fat = []
saturated_fat = []
carbs = []
sugar = []
cholesterol = []
sodium = []
protein = []

In [235]:
for x in numbers_1:
    for y in numbers_2:
        calories.append(info[x][y]['nutrition']['nutrients'][0]['amount'])

for x in numbers_1:
    for y in numbers_2:
        fat.append(info[x][y]['nutrition']['nutrients'][1]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        saturated_fat.append(info[x][y]['nutrition']['nutrients'][2]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        carbs.append(info[x][y]['nutrition']['nutrients'][3]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        sugar.append(info[x][y]['nutrition']['nutrients'][4]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        cholesterol.append(info[x][y]['nutrition']['nutrients'][5]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        sodium.append(info[x][y]['nutrition']['nutrients'][6]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        protein.append(info[x][y]['nutrition']['nutrients'][7]['amount'])

In [236]:
ids = []
titles = []
aggregateLikes = []
spoonacularScore = []
healthScore = []
pricePerServing = []
readyInMinutes = []
servings = []
weightWatcherSmartPoints = []
veryHealthy = []
vegetarian = []
vegan = []
glutenFree = []
dairyFree = []
whole30 = []

In [238]:
for x in numbers_1:
    for y in numbers_2:
        ids.append(info[x][y]['id'])

for x in numbers_1:
    for y in numbers_2:
        titles.append(info[x][y]['title'])
                   
for x in numbers_1:
    for y in numbers_2:
        aggregateLikes.append(info[x][y]['aggregateLikes'])
                   
for x in numbers_1:
    for y in numbers_2:
        spoonacularScore.append(info[x][y]['spoonacularScore'])
                   
for x in numbers_1:
    for y in numbers_2:
        healthScore.append(info[x][y]['healthScore'])
                   
for x in numbers_1:
    for y in numbers_2:
        pricePerServing.append(info[x][y]['pricePerServing'])
                   
for x in numbers_1:
    for y in numbers_2:
        readyInMinutes.append(info[x][y]['readyInMinutes'])
                   
for x in numbers_1:
    for y in numbers_2:
        servings.append(info[x][y]['servings'])

for x in numbers_1:
    for y in numbers_2:
        weightWatcherSmartPoints.append(info[x][y]['weightWatcherSmartPoints'])
                   
for x in numbers_1:
    for y in numbers_2:
        veryHealthy.append(info[x][y]['veryHealthy'])
                           
for x in numbers_1:
    for y in numbers_2:
        vegetarian.append(info[x][y]['vegetarian'])
                           
for x in numbers_1:
    for y in numbers_2:
        vegan.append(info[x][y]['vegan'])
                           
for x in numbers_1:
    for y in numbers_2:
        glutenFree.append(info[x][y]['glutenFree'])
                           
for x in numbers_1:
    for y in numbers_2:
        dairyFree.append(info[x][y]['dairyFree'])
                         
for x in numbers_1:
    for y in numbers_2:
        whole30.append(info[x][y]['whole30'])

In [243]:
info_df = pd.DataFrame(list(zip(
    ids,
    titles,
    aggregateLikes,
    spoonacularScore,
    healthScore,
    pricePerServing,
    readyInMinutes,
    servings,
    weightWatcherSmartPoints,
    veryHealthy,
    vegetarian,
    vegan,
    glutenFree,
    dairyFree,
    whole30,
    calories,
    fat,
    saturated_fat,
    carbs,
    sugar,
    cholesterol,
    sodium,
    protein)),
    
    columns=[
        'ids',
        'titles',
        'aggregateLikes',
        'spoonacularScore',
        'healthScore',
        'pricePerServing',
        'readyInMinutes',
        'servings',
        'weightWatcherSmartPoints',
        'veryHealthy',
        'vegetarian',
        'vegan',
        'glutenFree',
        'dairyFree',
        'whole30',
        'calories',
        'fat',
        'saturated_fat',
        'carbs',
        'sugar',
        'cholesterol',
        'sodium',
        'protein']
)

In [250]:
#should be 1000 rows
print(len(info_df))
info_df.head()

1000


Unnamed: 0,ids,titles,aggregateLikes,spoonacularScore,healthScore,pricePerServing,readyInMinutes,servings,weightWatcherSmartPoints,veryHealthy,vegetarian,vegan,glutenFree,dairyFree,whole30,calories,fat,saturated_fat,carbs,sugar,cholesterol,sodium,protein
0,592479,Kale and Quinoa Salad with Black Beans,50078,100.0,100.0,101.23,50,6,6,True,True,True,True,True,False,242.6,7.38,1.03,37.19,1.8,0.0,250.66,9.79
1,547775,Creamy Avocado Pasta,67131,100.0,100.0,171.39,15,2,13,True,True,True,False,True,False,484.43,16.17,2.38,78.08,3.95,0.0,599.51,15.36
2,818941,"Avocado Toast with Eggs, Spinach, and Tomatoes",142071,100.0,95.0,160.46,10,1,7,True,True,False,False,True,False,277.24,15.9,2.36,22.41,2.9,0.0,446.21,0.45
3,495111,Citrus Sesame Kale,7119,100.0,100.0,51.22,15,4,1,True,True,True,True,True,False,72.07,3.56,0.47,8.58,0.41,0.0,158.35,4.14
4,689502,Melt In Your Mouth Kale Salad,231494,100.0,96.0,279.17,10,2,11,True,False,False,True,False,False,441.93,33.36,4.33,32.12,20.69,6.8,381.72,12.02


In [248]:
#Test to make sure recipes in dataframe correspond to .json call order.
#Looks like the data is in order! Yay!

for x in list(range(0,5)):
    display(info[0][x]['title'])

'Kale and Quinoa Salad with Black Beans'

'Creamy Avocado Pasta'

'Avocado Toast with Eggs, Spinach, and Tomatoes'

'Citrus Sesame Kale'

'Melt In Your Mouth Kale Salad'

In [251]:
#Let's save out this data as a .csv file for safe keeping
#info_df.to_csv('top_1000_recipes_info.csv')

___

#### Now do the same for the top_1000_metascore_info JSON file

In [252]:
numbers_1 = list(range(0,10))
numbers_2 = list(range(0,100))
numbers_3 = list(range(0,8))

In [253]:
calories = []
fat = []
saturated_fat = []
carbs = []
sugar = []
cholesterol = []
sodium = []
protein = []

In [254]:
for x in numbers_1:
    for y in numbers_2:
        calories.append(info_m[x][y]['nutrition']['nutrients'][0]['amount'])

for x in numbers_1:
    for y in numbers_2:
        fat.append(info_m[x][y]['nutrition']['nutrients'][1]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        saturated_fat.append(info_m[x][y]['nutrition']['nutrients'][2]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        carbs.append(info_m[x][y]['nutrition']['nutrients'][3]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        sugar.append(info_m[x][y]['nutrition']['nutrients'][4]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        cholesterol.append(info_m[x][y]['nutrition']['nutrients'][5]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        sodium.append(info_m[x][y]['nutrition']['nutrients'][6]['amount'])
        
for x in numbers_1:
    for y in numbers_2:
        protein.append(info_m[x][y]['nutrition']['nutrients'][7]['amount'])

In [255]:
ids = []
titles = []
aggregateLikes = []
spoonacularScore = []
healthScore = []
pricePerServing = []
readyInMinutes = []
servings = []
weightWatcherSmartPoints = []
veryHealthy = []
vegetarian = []
vegan = []
glutenFree = []
dairyFree = []
whole30 = []

In [256]:
for x in numbers_1:
    for y in numbers_2:
        ids.append(info_m[x][y]['id'])

for x in numbers_1:
    for y in numbers_2:
        titles.append(info_m[x][y]['title'])
                   
for x in numbers_1:
    for y in numbers_2:
        aggregateLikes.append(info_m[x][y]['aggregateLikes'])
                   
for x in numbers_1:
    for y in numbers_2:
        spoonacularScore.append(info_m[x][y]['spoonacularScore'])
                   
for x in numbers_1:
    for y in numbers_2:
        healthScore.append(info_m[x][y]['healthScore'])
                   
for x in numbers_1:
    for y in numbers_2:
        pricePerServing.append(info_m[x][y]['pricePerServing'])
                   
for x in numbers_1:
    for y in numbers_2:
        readyInMinutes.append(info_m[x][y]['readyInMinutes'])
                   
for x in numbers_1:
    for y in numbers_2:
        servings.append(info_m[x][y]['servings'])

for x in numbers_1:
    for y in numbers_2:
        weightWatcherSmartPoints.append(info_m[x][y]['weightWatcherSmartPoints'])
                   
for x in numbers_1:
    for y in numbers_2:
        veryHealthy.append(info_m[x][y]['veryHealthy'])
                           
for x in numbers_1:
    for y in numbers_2:
        vegetarian.append(info_m[x][y]['vegetarian'])
                           
for x in numbers_1:
    for y in numbers_2:
        vegan.append(info_m[x][y]['vegan'])
                           
for x in numbers_1:
    for y in numbers_2:
        glutenFree.append(info_m[x][y]['glutenFree'])
                           
for x in numbers_1:
    for y in numbers_2:
        dairyFree.append(info_m[x][y]['dairyFree'])
                         
for x in numbers_1:
    for y in numbers_2:
        whole30.append(info_m[x][y]['whole30'])

In [257]:
info_m_df = pd.DataFrame(list(zip(
    ids,
    titles,
    aggregateLikes,
    spoonacularScore,
    healthScore,
    pricePerServing,
    readyInMinutes,
    servings,
    weightWatcherSmartPoints,
    veryHealthy,
    vegetarian,
    vegan,
    glutenFree,
    dairyFree,
    whole30,
    calories,
    fat,
    saturated_fat,
    carbs,
    sugar,
    cholesterol,
    sodium,
    protein)),
    
    columns=[
        'ids',
        'titles',
        'aggregateLikes',
        'spoonacularScore',
        'healthScore',
        'pricePerServing',
        'readyInMinutes',
        'servings',
        'weightWatcherSmartPoints',
        'veryHealthy',
        'vegetarian',
        'vegan',
        'glutenFree',
        'dairyFree',
        'whole30',
        'calories',
        'fat',
        'saturated_fat',
        'carbs',
        'sugar',
        'cholesterol',
        'sodium',
        'protein']
)

In [258]:
#should be 1000 rows
print(len(info_df))
info_m_df.head()

1000


Unnamed: 0,ids,titles,aggregateLikes,spoonacularScore,healthScore,pricePerServing,readyInMinutes,servings,weightWatcherSmartPoints,veryHealthy,vegetarian,vegan,glutenFree,dairyFree,whole30,calories,fat,saturated_fat,carbs,sugar,cholesterol,sodium,protein
0,592479,Kale and Quinoa Salad with Black Beans,50078,100.0,100.0,101.23,50,6,6,True,True,True,True,True,False,242.6,7.38,1.03,37.19,1.8,0.0,250.66,9.79
1,547775,Creamy Avocado Pasta,67131,100.0,100.0,171.39,15,2,13,True,True,True,False,True,False,484.43,16.17,2.38,78.08,3.95,0.0,599.51,15.36
2,818941,"Avocado Toast with Eggs, Spinach, and Tomatoes",142071,100.0,95.0,160.46,10,1,7,True,True,False,False,True,False,277.24,15.9,2.36,22.41,2.9,0.0,446.21,0.45
3,495111,Citrus Sesame Kale,7119,100.0,100.0,51.22,15,4,1,True,True,True,True,True,False,72.07,3.56,0.47,8.58,0.41,0.0,158.35,4.14
4,689502,Melt In Your Mouth Kale Salad,231494,100.0,96.0,279.17,10,2,11,True,False,False,True,False,False,441.93,33.36,4.33,32.12,20.69,6.8,381.72,12.02


In [259]:
#Test to make sure recipes in dataframe correspond to .json call order.
#Looks like the data is in order! Yay!

for x in list(range(0,5)):
    display(info_m[0][x]['title'])

'Kale and Quinoa Salad with Black Beans'

'Creamy Avocado Pasta'

'Avocado Toast with Eggs, Spinach, and Tomatoes'

'Citrus Sesame Kale'

'Melt In Your Mouth Kale Salad'

In [260]:
#Let's save out this data as a .csv file for safe keeping
#info_df.to_csv('top_1000_recipes_metascore_info.csv')