In [749]:
import pandas as pd
from recipe_scrapers import scrape_me
import parse_ingredient
import altair as alt
import numpy as np
import os
from functools import reduce

In [314]:
urls = ['https://www.allrecipes.com/recipe/279938/egg-salad-with-celery/', 'https://www.allrecipes.com/recipe/158968/spinach-and-feta-turkey-burgers/']

In [315]:
def compile_recipe_info(urls):
    dfs = []
    for url in urls:
        scraper = scrape_me(url)
        ingredient_info = parse_ingredients(scraper.ingredients())

        #Add other info
        ingredient_info["recipe"] = scraper.title()
        ingredient_info["cook_time"] = scraper.total_time()
        ingredient_info["link"] = url

        dfs.append(ingredient_info)

    return pd.concat(dfs)

In [309]:
def parse_ingredients(ingredients):
    data = []
    for ingredient in ingredients:
        print(ingredient)
        info = (ingredient, np.NaN, np.NaN, np.NaN)
        try:
            info = parse_ingredient.parse(ingredient)
            info = info.as_dict()
        except Exception as e:
            pass
            data.append(info)
            continue
        
        #puts original ingredient if low confidence
        if info["confidence"] <= .05:
            data.append((ingredient, np.NaN, np.NaN, np.NaN))
            continue
        
        #puts all information together
        data.append((info["product"], info["quantity"], info["unit"], info["usda_info"]["category"]))
        
    return pd.DataFrame(data, columns=["product", "quantity", "unit", "category"])


In [755]:
def merge_shopping_list(list_):
    quantity = list_.groupby("product").quantity.sum()
    units = list_.groupby("product").unit.unique()
    categories = list_.groupby("product").category.first()
    recipes = list_.groupby("product").recipe.unique()

    #union multiple recipes
    recipes = recipes.str.join(" + ")

    df_merged = reduce(lambda left, right: pd.merge(left, right, left_index=True, right_index=True), [quantity, units, categories, recipes])

    #prioritze items part of more recipes
    s = df_merged.recipe.str.len().sort_values(ascending=False).index
    df_merged = df_merged.reindex(s)
    
    return df_merged.sort_values(["category"])

In [646]:
list_ = compile_recipe_info(urls)

6 large eggs
None
2 stalks celery, finely chopped
None
½ cup mayonnaise (such as Hellman's®)
None
¼ cup finely chopped onion
None
¼ teaspoon mustard powder
None
⅛ teaspoon salt
None
2 eggs, beaten
None
2 cloves garlic, minced
None
4 ounces feta cheese
None
1 (10 ounce) box frozen chopped spinach, thawed and squeezed dry
None
2 pounds ground turkey
None


In [756]:
final_shopping_list = merge_shopping_list(list_)

In [757]:
final_shopping_list

Unnamed: 0_level_0,quantity,unit,category,recipe
product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
eggs,8.0,[None],Dairy and Egg Products,Egg Salad with Celery + Spinach and Feta Turke...
feta cheese,4.0,[ounce],Dairy and Egg Products,Spinach and Feta Turkey Burgers
mayonnaise,0.5,[cup],Fats and Oils,Egg Salad with Celery
turkey,2.0,[pound],Poultry Products,Spinach and Feta Turkey Burgers
salt,0.125,[teaspoon],Spices and Herbs,Egg Salad with Celery
mustard powder,0.25,[teaspoon],Spices and Herbs,Egg Salad with Celery
onion,0.25,[cup],Vegetables and Vegetable Products,Egg Salad with Celery
celery,2.0,[stalk],Vegetables and Vegetable Products,Egg Salad with Celery
garlic,2.0,[clove],,Spinach and Feta Turkey Burgers
"1 (10 ounce) box frozen chopped spinach, thawed and squeezed dry",0.0,[nan],,Spinach and Feta Turkey Burgers


In [848]:
def get_interactive_shopping_list(final_shopping_list):
    selection = alt.selection(fields=["recipe"], type="single", bind="legend")

    ranked_text = alt.Chart(final_shopping_list.reset_index()).mark_text().encode(
        y=alt.Y('row_number:O',axis=None),
        color="recipe:N",
        opacity=alt.condition(selection, alt.value(1), alt.value(0.02))
    ).add_selection(selection).transform_window(
        row_number='row_number()'
    ).transform_window(
        rank='rank(row_number)'
    ).properties(width=150)

    # Data Tables
    category = ranked_text.encode(text='category:N').properties(title='category')
    quantity = ranked_text.encode(text='quantity:N').properties(title='quantity')
    unit = ranked_text.encode(text='unit:N').properties(title='unit')
    item = ranked_text.encode(text='product:N').properties(title='item')

    chart = alt.hconcat(category, quantity, unit, item) # Combine data tables
    
    #set font sizes
    chart = chart.configure_legend(padding=20, labelFontSize=5, fillColor='#EEEEEE', cornerRadius=10, rowPadding=10)
    chart = chart.configure_text(fontSize=12)

    return chart


In [850]:
chart = get_interactive_shopping_list(final_shopping_list)

In [851]:
chart.save("index.html")