In [5]:
import pandas as pd
import numpy as np
import requests
import time
from tqdm import tqdm

## Data Loading

In [8]:
ingredients_csv_path = "../data/chefkoch22k/231110_Chefkoch22k_Ingredience.csv"

ingredients_df = pd.read_csv(ingredients_csv_path, sep=";", encoding="utf-8")

print(ingredients_df.head(2))

      RecipID        1       2        3      4           5               6  \
0  6971800010  Cachaça  Zucker  Limette    Eis         NaN             NaN   
1  7971878818    Wodka     Gin      Rum  Likör  Ananassaft  Grapefruitsaft   

              7          8              9  ...   36   37   38   39   40   41  \
0           NaN        NaN            NaN  ...  NaN  NaN  NaN  NaN  NaN  NaN   
1  Zitronensaft  Grenadine  Guaranapulver  ...  NaN  NaN  NaN  NaN  NaN  NaN   

    42   43   44   45  
0  NaN  NaN  NaN  NaN  
1  NaN  NaN  NaN  NaN  

[2 rows x 46 columns]


  ingredients_df = pd.read_csv(ingredients_csv_path, sep=";", encoding="utf-8")


## Enrichment: add column with recipe title

In [6]:
chefkoch_api_base_url = "https://api.chefkoch.de/v2/recipes/"

# adding a recipe id to the base url like this: https://api.chefkoch.de/v2/recipes/22641005721453
# will return the recipe with the given id:
# {
#     "id": "22641005721453",
#     "type": 0,
#     "title": "Forelle Müllerin",
#     ...
# }

# we can use this to get the recipe title for each recipe id in the ingredients_df
# and add it to the ingredients_df

In [7]:
import concurrent.futures
from tqdm import tqdm

# Function to fetch recipe title
def fetch_recipe_title(recipe_id):
    delay = 1
    while True:
        try:
            url = chefkoch_api_base_url + str(recipe_id)
            response = requests.get(url)
            if response.status_code == 429:
                print(f"Rate limit reached. Sleeping for {delay} seconds...")
                time.sleep(delay)
                delay *= 2
                continue
            recipe_title = response.json()["title"]
            return recipe_title
        except Exception as e:
            print(f"An error occurred: {e}")
            return None

recipe_id_column = ingredients_df["RecipID"]

with concurrent.futures.ThreadPoolExecutor() as executor:
    for i in tqdm(range(0, len(recipe_id_column), 100)):
        recipe_titles = list(executor.map(fetch_recipe_title, recipe_id_column[i:i+100]))
        ingredients_df.loc[i:i+99, "RecipeTitle"] = recipe_titles
        ingredients_df.to_csv("data/chefkoch22k/ingredients_with_titles.csv", index=False)

print(ingredients_df.head())

  9%|▉         | 20/222 [05:57<1:23:14, 24.72s/it]

An error occurred: 'title'


 67%|██████▋   | 148/222 [53:08<13:26, 10.90s/it] 

An error occurred: 'title'


 95%|█████████▌| 212/222 [1:05:36<02:07, 12.74s/it]

An error occurred: 'title'
An error occurred: 'title'
An error occurred: 'title'
An error occurred: 'title'


 96%|█████████▌| 213/222 [1:05:49<01:56, 12.98s/it]

An error occurred: 'title'
An error occurred: 'title'


 96%|█████████▋| 214/222 [1:06:03<01:44, 13.04s/it]

An error occurred: 'title'
An error occurred: 'title'
An error occurred: 'title'
An error occurred: 'title'
An error occurred: 'title'


 97%|█████████▋| 215/222 [1:06:16<01:31, 13.06s/it]

An error occurred: 'title'


 98%|█████████▊| 217/222 [1:06:40<01:02, 12.45s/it]

An error occurred: 'title'


 98%|█████████▊| 218/222 [1:06:51<00:48, 12.05s/it]

An error occurred: 'title'


 99%|█████████▊| 219/222 [1:07:02<00:35, 11.73s/it]

An error occurred: 'title'


 99%|█████████▉| 220/222 [1:07:15<00:24, 12.23s/it]

An error occurred: 'title'
An error occurred: 'title'
An error occurred: 'title'
An error occurred: 'title'
An error occurred: 'title'


100%|█████████▉| 221/222 [1:07:40<00:16, 16.10s/it]

An error occurred: 'title'
An error occurred: 'title'
An error occurred: 'title'
An error occurred: 'title'


100%|██████████| 222/222 [1:07:50<00:00, 18.34s/it]

       RecipID              1            2              3        4  \
0   6971800010        Cachaça       Zucker        Limette      Eis   
1   7971878818          Wodka          Gin            Rum    Likör   
2  23973004603     Petersilie  Chilischote  Knoblauchzehe  Garnele   
3  43974876764  Schweinefilet        Möhre       Sellerie    Lauch   
4  48974889052      Kaninchen       Butter   Fleischbrühe  Zwiebel   

            5               6                 7              8              9  \
0         NaN             NaN               NaN            NaN            NaN   
1  Ananassaft  Grapefruitsaft      Zitronensaft      Grenadine  Guaranapulver   
2    Olivenöl             NaN               NaN            NaN            NaN   
3    Brokkoli     Mostbröckli  Käse Appenzeller  Sauerhalbrahm       Weißwein   
4       Möhre          Tomate             Speck           Salz        Pfeffer   

   ...   37   38   39   40   41   42   43   44   45  \
0  ...  NaN  NaN  NaN  NaN  NaN  NaN 




## 