Relevant searches to extract data structures that can be of use down the line.

In [1]:
import pandas as pd
import json
import sqlite3
import numpy as np
from datetime import datetime

In [2]:
conn = sqlite3.connect('../data/recsys.db')
c = conn.cursor()

# Nutrient Data

1. Load the nutrient data and pivot (long => wide).
2. Then join this with the original recipe data. This can be saved out and sent.

In [21]:
df = pd.read_sql_query('''SELECT * FROM nutrients;''', conn)
df.head(30)

Unnamed: 0,nutrient_id,recipe_id,complete,name,value,percent_daily_value,display_value,value_unit
0,1,222388,0,Niacin Eqivalents,9.319291,72.0,9,mg
1,2,222388,1,Sgars,0.093559,0.0,0.1,g
2,3,222388,1,Sodim,2017.13,81.0,2017,mg
3,4,222388,1,Carbohydrates,1.797819,0.0,1.8,g
4,5,222388,0,Vitamin B6,0.23298,15.0,< 1,mg
5,6,222388,1,Calories,308.1481,15.0,308,kcal
6,7,222388,0,Thiamin,0.394774,39.0,< 1,mg
7,8,222388,1,Fat,23.58587,36.0,23.6,g
8,9,222388,0,Folate,2.109131,1.0,2,mcg
9,10,222388,1,Calories from Fat,212.2728,,212,kcal


In [6]:
df_val = df.pivot(index='recipe_id', columns='name', values='value')
df_val.head()

name,Calcim,Calories,Calories from Fat,Carbohydrates,Cholesterol,Dietary Fiber,Fat,Folate,Iron,Magnesim,Niacin Eqivalents,Potassim,Protein,Satrated Fat,Sgars,Sodim,Thiamin,Vitamin A - IU,Vitamin B6,Vitamin C
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
6663,20.95548,120.7066,74.32346,9.370335,1.466667,0.329367,8.258162,16.15505,0.55931,4.213947,1.305051,18.26903,2.300013,2.25785,0.190441,78.67492,0.082433,7.767053,0.007098,0.024194
6664,210.9034,313.8263,138.8422,35.49886,85.73899,2.394961,15.42691,46.72293,1.77818,36.90105,2.932469,196.9331,9.789059,8.499446,7.16969,623.7267,0.212935,876.4019,0.144638,4.720046
6665,6.107055,104.0672,26.76795,16.62239,10.33333,0.597292,2.974216,45.38445,1.002785,5.176659,1.801886,33.46304,2.499005,0.551574,1.95101,163.3568,0.161123,124.1246,0.018682,0.007361
6666,18.13379,303.33,121.6247,43.18983,31.0,1.555075,13.51386,39.37596,1.242923,15.29333,2.180518,129.8199,3.694001,1.792503,26.38888,171.3883,0.190914,3246.537,0.077493,3.324017
6667,46.99566,197.9225,32.90274,34.94212,9.495556,1.218333,3.65586,76.35577,2.018598,13.39478,2.921615,105.104,5.619544,2.114157,2.975532,97.72266,0.350947,142.9118,0.038195,0.069111


In [13]:
# Only get those recipes with some sort-of nutrient info
df_recipes = pd.read_sql_query('''SELECT * FROM recipes WHERE id in (SELECT DISTINCT(recipe_id) FROM nutrients);''', conn)
df_recipes.head()

Unnamed: 0,id,name,ave_rating,image_url,review_nums,ingredients,directions
0,222388,Homemade Bacon,5.0,https://images.media-allrecipes.com/userphotos...,3,"pork belly, smoked paprika, kosher salt, groun...",'Prep\n5 m\nCook\n2 h 45 m\nReady In\n11 h 50 ...
1,240488,"Pork Loin, Apples, and Sauerkraut",4.764706,https://images.media-allrecipes.com/userphotos...,29,"sauerkraut drained, Granny Smith apples sliced...",'Prep\n15 m\nCook\n2 h 30 m\nReady In\n2 h 45 ...
2,218939,Foolproof Rosemary Chicken Wings,4.571429,https://images.media-allrecipes.com/userphotos...,12,"chicken wings, sprigs rosemary, head garlic, o...","""Prep\n20 m\nCook\n40 m\nReady In\n1 h\nPrehea..."
3,87211,Chicken Pesto Paninis,4.625,https://images.media-allrecipes.com/userphotos...,163,"focaccia bread quartered, prepared basil pesto...",'Prep\n15 m\nCook\n5 m\nReady In\n20 m\nPrehea...
4,245714,Potato Bacon Pizza,4.5,https://images.media-allrecipes.com/userphotos...,2,"red potatoes, strips bacon, Sauce:, heavy whip...",'Prep\n20 m\nCook\n45 m\nReady In\n1 h 10 m\nP...


In [7]:
pd.merge(df, df_recipes, )

name,Calcim,Calories,Calories from Fat,Carbohydrates,Cholesterol,Dietary Fiber,Fat,Folate,Iron,Magnesim,Niacin Eqivalents,Potassim,Protein,Satrated Fat,Sgars,Sodim,Thiamin,Vitamin A - IU,Vitamin B6,Vitamin C
recipe_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
262646,84.40975,113.2842,18.68494,24.81477,0.0,0.274675,2.076105,41.05075,1.164613,41.04175,3.748584,369.089,2.434993,0.403351,0.75,1044.986,0.126048,425.367,0.344493,7.18865
262797,68.69238,307.3315,118.2982,22.00819,64.6,2.953104,13.14425,58.90721,2.351311,54.36844,16.62486,547.9546,26.05278,2.2035,14.87805,782.9474,0.189611,1690.515,0.668266,56.74846
262815,25.25903,235.1897,133.6973,4.055043,52.0725,0.271827,14.85525,14.45597,1.576513,20.14212,7.895879,302.278,20.45703,4.529202,0.516253,115.6808,0.119869,108.4522,0.423013,0.953285
262850,70.08701,551.5228,313.0581,6.597518,114.1225,1.474026,34.78424,15.42799,3.593588,39.756,11.67496,516.5079,31.45104,12.49591,0.829528,165.0731,0.140095,677.9331,0.68744,4.074295
262956,161.1804,734.925,408.3376,54.67565,132.0595,4.300613,45.37084,204.8631,4.990212,64.6734,8.357469,550.9448,18.20098,22.93011,5.167469,613.4282,0.773093,2344.36,0.324647,9.982


In [18]:
df2 = pd.merge(df_recipes, df_val, left_on='id', right_on='recipe_id')
df2.head()

Unnamed: 0,id,name,ave_rating,image_url,review_nums,ingredients,directions,Calcim,Calories,Calories from Fat,...,Niacin Eqivalents,Potassim,Protein,Satrated Fat,Sgars,Sodim,Thiamin,Vitamin A - IU,Vitamin B6,Vitamin C
0,222388,Homemade Bacon,5.0,https://images.media-allrecipes.com/userphotos...,3,"pork belly, smoked paprika, kosher salt, groun...",'Prep\n5 m\nCook\n2 h 45 m\nReady In\n11 h 50 ...,11.18365,308.1481,212.2728,...,9.319291,347.2267,21.00254,7.736815,0.093559,2017.13,0.394774,474.2073,0.23298,0.776127
1,240488,"Pork Loin, Apples, and Sauerkraut",4.764706,https://images.media-allrecipes.com/userphotos...,29,"sauerkraut drained, Granny Smith apples sliced...",'Prep\n15 m\nCook\n2 h 30 m\nReady In\n2 h 45 ...,135.4538,371.7219,105.0769,...,15.6016,1088.923,36.39878,3.646474,19.84146,2606.764,0.842312,73.17785,1.328631,52.76848
2,218939,Foolproof Rosemary Chicken Wings,4.571429,https://images.media-allrecipes.com/userphotos...,12,"chicken wings, sprigs rosemary, head garlic, o...","""Prep\n20 m\nCook\n40 m\nReady In\n1 h\nPrehea...",60.08832,335.1655,212.6267,...,10.06679,249.1827,23.91265,5.683611,0.23613,762.805,0.06496,359.364,0.5538,5.307448
3,87211,Chicken Pesto Paninis,4.625,https://images.media-allrecipes.com/userphotos...,163,"focaccia bread quartered, prepared basil pesto...",'Prep\n15 m\nCook\n5 m\nReady In\n20 m\nPrehea...,528.4617,640.5617,264.3085,...,13.22826,348.8067,32.37537,10.91876,1.97421,1075.527,0.03088,604.7537,0.273496,18.01502
4,245714,Potato Bacon Pizza,4.5,https://images.media-allrecipes.com/userphotos...,2,"red potatoes, strips bacon, Sauce:, heavy whip...",'Prep\n20 m\nCook\n45 m\nReady In\n1 h 10 m\nP...,132.2265,162.6685,68.40259,...,3.013383,106.8477,7.059566,3.975159,2.452803,189.8088,0.165943,168.3245,0.055718,0.905797


In [19]:
df2.shape

(48632, 27)

In [20]:
df2.to_csv('../data/14_recipes_with_nutrients.csv')