In [1]:
import pandas as pd
from marmiton import Marmiton, RecipeNotFound
import re

In [7]:
def receipt_ingredients(plate_name, veggie): # veggie option is currently useless - add feature for veggie 
    """
    Marmiton has it own library for webscrapping
    
    search options return most popular recipes according to your keyword
    the code only looks at the top items from the first marmitton page
    what do we collect?
        recipe name, url, ingrdient name, quantity, unit
    """   
    query_options = {
      "aqt": plate_name,  # Query keywords - separated by a white space
      "veg": veggie,  # Vegetarien only : 0 -> False, 1 -> True (optional)
    }
    query_result = Marmiton.search(query_options)    

    list_plate_name = []
    list_ingredient_id = []
    list_ingredient_name = []
    list_personnes = []
    list_url = []
    
    #pick up the first link
    for recipe_index in range(len(query_result)):
        recipe = query_result[recipe_index]
        main_recipe_url = recipe['url']

        # set up list
        plate_name = [recipe["name"]]#*len(detailed_recipe['ingredients'])

        ingredient_id = []
        ingredient_name = []
        receipts_per = []

        try:
            detailed_recipe = Marmiton.get(main_recipe_url)  # Get the details of the first returned recipe (most relevant in our case)
        except RecipeNotFound as e:
            print(f"No recipe found for '{query_options['aqt']}'")
            import sys
            sys.exit(0) 

        # the lenght of receipt and url must be equal to shape of ingredient
        number_ingredient = len(detailed_recipe['ingredients'])
        personnes = [detailed_recipe['recipe_quantity']] * number_ingredient
        plate_name = plate_name * number_ingredient
        url = [main_recipe_url] *number_ingredient

        # Add all information to the below empty lists
        for index, ingredient in zip( range(len(detailed_recipe['ingredients'])), detailed_recipe['ingredients']):  # List of ingredients
            ingredient_id.append(index+1)
            ingredient_name.extend([ingredient])  
        
        list_plate_name.extend(plate_name)
        list_ingredient_id.extend(ingredient_id)
        list_ingredient_name.extend(ingredient_name)
        list_personnes.extend(personnes)
        list_url.extend(url)
    
    new_recipes = list(zip(list_plate_name, list_ingredient_id, list_ingredient_name, list_personnes, list_url ))
    
    df= pd.DataFrame(new_recipes,
                     columns=['Recipe', 'Index Ingredient', 'Ingredient','Personne', 'URL'])
    return   df

In [3]:
units = ['l', 'cl', 'ml', 'g', 'càc', 'càs', 'kg', 'pinc.', 'boîte de conserve', 'bouquet', 'brins', 'cuillères ', 'tranches', 'pavés ','c.à.s', 'c.à.c' ] 

def dataframe_manipulation(df):
    """
    update the dataframe
        a) transform column person into int --> remove the string personne
        b) extract quantity from ingredient
            --> how should we manage null values
        c) determine the unit measurement for each ingredient
            --> how should we manage null values
            --> continue to populate  the units list --> think about setting up a table for measurement
    """    
    dff = df.copy()
    dff['Personne'] = dff['Personne'].astype(str).str.extract('(\d+)').fillna(0).astype(int)
    dff['Quantity'] = dff['Ingredient'].astype(str).str.extract('(\d+)').fillna(0).astype(int)
    dff['Ingredient'] = dff['Ingredient'].astype(str).str.replace('\d+', '')

    pat = '|'.join(r"\b{}\b".format(x) for x in units)

    dff['Unit'] = dff['Ingredient'].str.extract('('+ pat + ')', expand=False, flags=re.I).fillna(0)
    dff['Ingredient'] = dff['Ingredient'].astype(str).str.replace(pat, '')
    
    return dff

In [4]:
def new_recipes(name):
    """
    merge the new recipe(s) to the table
    
    next step --> how to avoid duplicates 
    """
    old_df = pd.read_csv('Carbon_diet.csv') 
    new_dff = receipt_ingredients(name, 1)
    new_df = dataframe_manipulation(new_dff)
    old_df = pd.concat([old_df, new_df], axis = 0)
    old_df.to_csv('Carbon_diet.csv', index=False)
    return old_df

In [37]:
def recipe_table():
    """
    Set up a table with all the recipe names
    URL is the primary key( we should generate random VARCHAR as primary for 1) recipes 2) ingredients
    """
    ingredient_table = pd.read_csv('Carbon_diet.csv') 
    recipe_table = df[['Receipt', 'URL']][df['Index Ingredient']==1]
    recipe_table.to_csv('all_recipes.csv', index=False)   
    return recipe_table

Food list

In [5]:
french_and_italian_plate_names = ["Coq au Vin", "Bouillabaisse", "Ratatouille", "Bœuf Bourguignon", "Tarte Tatin", "Crème Brûlée", "Cassoulet", "Salade Niçoise", "Quiche Lorraine", "Poulet Basquaise", "Spaghetti Carbonara", "Lasagne au Four", "Pizza Margherita", "Risotto à la Milanaise", "Osso Bucco à la Milanaise", "Tiramisu", "Paella", "Gazpacho", "Churros", "Tapas", "Caldo Verde", "Bacalhau à Gomes de Sá", "Francesinha", "Codfish à Bras", "Piri-Piri Chicken", "Bitterballen", "Hutspot", "Stamppot", "Kibbeling", "Gouda Cheese", "Rösti", "Fondue", "Raclette",
 "Pizza Margherita", "Hamburger", "Sushi", "Peking Duck", "Pad Thai", "Fish and Chips", "Tacos", "Paella", "Chicken Tikka Masala", "Biryani", "Falafel", "Pho", "Goulash", "Currywurst", "Wiener Schnitzel", "Ramen", "Croissant", "Baklava", "Moussaka", "Ceviche", "Tostones", "Pupusas", "Empanadas", "Churrasco", "Feijoada", "Chimichurri", "Pierogi", "Sauerbraten", "Sarma", "Dolma", "Meze", "Kebab", "Lobster Thermidor", "Beef Wellington", "Shepherd's Pie", "Bangers and Mash", "Full English Breakfast", "Roast Beef and Yorkshire Pudding", "Fish Pie", "Apple Pie", "Cheesecake", "Pavlova", "Tiramisu", "Crème Brûlée", "Custard Tart", "Banoffee Pie", "Mochi", "Cannoli", "Tortellini", "Lasagna", "Spaghetti Carbonara", "Pesto", "Gnocchi", "Risotto", "Peking Duck", "Hot Pot", "Mapo Tofu", "Kung Pao Chicken", "Char Siu", "Dim Sum", "Baozi", "Katsu Curry", "Tonkatsu", "Okonomiyaki", "Soba Noodles", "Yakisoba", "Sukiyaki", "Shabu Shabu", "Bibimbap", "Kimchi", "Bulgogi", "Galbi", "Jjajangmyeon", "Ramyeon", "Samgyeopsal", "Gimbap", "Tteokbokki", "Moules Frites", "Steak Tartare", "Bouillabaisse", "Coq au Vin", "Ratatouille", "Choucroute Garnie", "Bœuf Bourguignon", "Pot-au-Feu", "Cassoulet", "Salade Niçoise", "Quiche Lorraine", "Tarte Tatin", "Crepe Suzette", "Fondue", "Raclette", "Schnitzel", "Sauerbraten", "Schweinshaxe", "Black Forest Cake", "Sacher Torte", "Apfelstrudel", "Kaiserschmarrn", "Goulash", "Halászlé", "Chicken Paprikash", "Stroganoff"]


In [6]:
for recipe in french_and_italian_plate_names:
     new_recipes(recipe)


In [38]:
recipe_table()

Unnamed: 0,Receipt,URL
0,Coq au vin maison,/recettes/recette_coq-au-vin-maison_25755.aspx
15,coq au vin de la mère Michèle,/recettes/recette_coq-au-vin-de-la-mere-michel...
32,"Coq fermier au vin, purée écrasée à la fourchette",/recettes/recette_coq-fermier-au-vin-puree-ecr...
52,Coq au vin rosé et ses petits champignons,/recettes/recette_coq-au-vin-rose-et-ses-petit...
62,Coq au vin,/recettes/recette_coq-au-vin_20442.aspx
...,...,...
10743,Porc Stroganoff,/recettes/recette_porc-stroganoff_19366.aspx
10759,Boeuf Stroganoff rapide et bon,/recettes/recette_boeuf-stroganoff-rapide-et-b...
10767,Porc Stroganoff du père Landreau,/recettes/recette_porc-stroganoff-du-pere-land...
10774,Boeuf Strogonoff,/recettes/recette_boeuf-strogonoff_219693.aspx
