## ICA 1
Complete the `extract_nutrition()` below such that:

```python
# get / extract a data frame of recipes (only name and href)
str_query = 'dog food'
html_str = get_search_recipe(str_query)
df_recipe = extract_recipes(html_str)

for row_idx in range(df_recipe.shape[0]):
    # get / extract nutrition info for a particular recipe
    recipe_url = df_recipe.loc[row_idx, 'href']
    nutr_dict = extract_nutrition(recipe_url)
    
    # add each new nutrition feature to the dataframe
    for nutr_feat, nutr_val in nutr_dict.items():
        df_recipe.loc[row_idx, nutr_feat] = nutr_val

```

generates the `df_recipe`:

|   |                                              href |                                       name | calories | protein | carbohydrates |  fat | cholesterol | sodium |
|--:|--------------------------------------------------:|-------------------------------------------:|---------:|--------:|--------------:|-----:|------------:|-------:|
| 0 | https://www.allrecipes.com/recipe/140286/homem... |                          Homemade Dog Food |    440.0 |    23.1 |          64.1 |  9.8 |        71.7 |  118.3 |
| 1 | https://www.allrecipes.com/recipe/265867/grain... |               Grain-Free Homemade Dog Food |     95.0 |     7.8 |           8.8 |  3.4 |        69.3 |   56.8 |
| 2 | https://www.allrecipes.com/recipe/275424/homem... |             Homemade Dog Food with Chicken |    386.0 |    33.3 |          11.4 | 22.4 |       113.1 |  369.0 |
| 3 | https://www.allrecipes.com/recipe/275185/homem... | Homemade Dog Food with Meat and Vegetables |    938.0 |    60.2 |          78.0 | 43.8 |       146.9 |  795.6 |
| 4 | https://www.allrecipes.com/recipe/286349/homem... |       Homemade Grain-Free Organic Dog Food |    275.0 |     9.4 |          21.0 | 17.9 |        27.2 |  131.4 |

In [3]:
import requests
import pandas as pd
from bs4 import BeautifulSoup


def get_search_recipe(str_query):
    """ gets html of from allrecipes.com to search query
    
    Args:
        str_query (str): search string
        
    Returns:
        html_str (str): html response from allreceipes.com
    """
    
    url = f'https://www.allrecipes.com/search/results/?search={str_query}'
    html_str = requests.get(url).text
    
    return html_str

def extract_recipes(html_str):
    """ builds list of recipe names from allrecipies html
    
    Args:
        html_str (str): html response from allrecipes.com, see crawl_recipes()
        
    Returns:
        df_recipe (pd.DataFrame): dataframe of recipes
    """
    # build soup object from text
    soup = BeautifulSoup(html_str)
    
    
    df_recipe = pd.DataFrame()
    for recipe in soup.find_all(class_='card__recipe'):
        # extract / store recipe
        recipe_name = recipe.find_all(class_='card__title')[0].text.strip()
        
        # search within this recipe for a title link
        a = recipe.find_all('a', class_='card__titleLink')[0]
        recipe_href = a.attrs['href']
        
        
        # bundle as a dictionary (easy to pass to pandas series later)
        recipe_dict = {'name': recipe_name,
                      'href': recipe_href}
        df_recipe = df_recipe.append(pd.Series(recipe_dict), ignore_index=True)
        
    return df_recipe

def extract_nutrition(url):
    """ returns a dictionary of nutrition info 
    
    Args:
        url (str): location of all recipes "recipe"
        
    Returns:
        nutrition_dict (dict): keys are molecule types ('fat'), 
            vals are str of quantity ('24 g')
    """
         
    # get soup from url
    html = requests.get(url).text
    soup = BeautifulSoup(html)
    
    # extract nutrition info
    str_nutrit = soup.find_all(class_='recipeNutritionSectionBlock')[0].text
    
    # discard uneeded str
    str_nutrit = str_nutrit.replace('Per Serving:', '')
    str_nutrit = str_nutrit.replace('Full Nutrition:', '')
    
    # split
    nutr_dict = dict()
    for str_nutr_feat in str_nutrit.split(';'):
        str_nutr_feat = str_nutr_feat.strip()
        
        for str_rm in ['mg.', 'mg', 'g']:
            if str_nutr_feat.endswith(str_rm):
                str_nutr_feat = str_nutr_feat[:len(str_rm)]
        
        # split into name of feature / value
        nutr_feat, nutr_val = str_nutr_feat.split(' ')
        
        # swaps
        if nutr_val == 'calories': 
            nutr_feat, nutr_val = nutr_val, nutr_feat
            
        nutr_dict[nutr_feat] = float(nutr_val)

        # store
        nutr_dict[nutr_feat] = nutr_val
        
    return nutrition_dict

In [4]:
# get / extract a data frame of recipes (only name and href)
str_query = 'dog food'
html_str = get_search_recipe(str_query)
df_recipe = extract_recipes(html_str)

for row_idx in range(df_recipe.shape[0]):
    # get / extract nutrition info for a particular recipe
    recipe_url = df_recipe.loc[row_idx, 'href']
    nutr_dict = extract_nutrition(recipe_url)
    
    # add each new nutrition feature to the dataframe
    for nutr_feat, nutr_val in nutr_dict.items():
        df_recipe.loc[row_idx, nutr_feat] = nutr_val

ValueError: not enough values to unpack (expected 2, got 1)

In [5]:
df_recipe.head()

Unnamed: 0,name,href
0,Homemade Dog Food,https://www.allrecipes.com/recipe/140286/homem...
1,Grain-Free Homemade Dog Food,https://www.allrecipes.com/recipe/265867/grain...
2,Homemade Dog Food with Chicken,https://www.allrecipes.com/recipe/275424/homem...
3,Homemade Dog Food with Meat and Vegetables,https://www.allrecipes.com/recipe/275185/homem...
4,Homemade Grain-Free Organic Dog Food,https://www.allrecipes.com/recipe/286349/homem...


In [6]:
def get_df_recipe(str_query, recipe_limit=None):
    """ searches for recipes and returns list, with nutrition info
    
    (this function was made from script given in ica1 example above)
    
    Args:
        str_query (str): search string
        recipe_limit (int): if passed, limits recipe (helpful
            to speed up nutrition scraping for teaching!)
        
    Returns:
        df_recipe (pd.DataFrame): dataframe, each row is recipe.
            includes columns href, name, calories, protein, fat, 
            cholesterol, sodium
    """

    # get / extract a data frame of recipes (only name and href)
    html_str = get_search_recipe(str_query)
    df_recipe = extract_recipes(html_str)
    
    if recipe_limit is not None:
        # discard all but first few recipes
        df_recipe = df_recipe.iloc[:recipe_limit, :]

    for row_idx in range(df_recipe.shape[0]):
        # get / extract nutrition info for a particular recipe
        recipe_url = df_recipe.loc[row_idx, 'href']
        nutr_dict = extract_nutrition(recipe_url)
        
        # add each new nutrition feature to the dataframe
        for nutr_feat, nutr_val in nutr_dict.items():
            df_recipe.loc[row_idx, nutr_feat] = nutr_val
            
    return df_recipe

In [7]:
query_list = ['pickles', 'maple syrup', 'truffles', 'peanut butter', 'ice cream']

df_recipe = pd.DataFrame()
for str_query in query_list:
    # get a few recipes of the given search
    df_recipe_query = get_df_recipe(str_query, recipe_limit=5)
    
    # record the query used to search for these recipes & aggregate
    df_recipe_query['query'] = str_query
    df_recipe = df_recipe.append(df_recipe_query)

ValueError: not enough values to unpack (expected 2, got 1)

In [None]:
df_recipe

In [None]:
import plotly.express as px
px.scatter(data_frame=df_recipe, x='calories', y='fat', color='query', hover_data=['name'])