In [47]:
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path
import pandas as pd
import json
import numpy as np

try:
    HERE = Path(__file__).resolve()
except NameError:
    HERE = Path.cwd()

data = pd.read_csv("retry1.csv", dtype=str)
data.head(10)

Unnamed: 0.1,Unnamed: 0,ingredient,amount,ingr_annotation,amount_annotation,norm_value,norm_unit,nutrition,ingr_identified
0,15,bio hähnchenschlegel,4 stk.,"{""anzahl"": 4, ""einheit"": ""stk.""}","{""zutat"": ""H\u00e4hnchenschlegel"", ""eigenschaf...",4.0,stück,,hähnchenschlegel
1,31,schokoriegel,1 stk.,"{""anzahl"": 1, ""einheit"": ""stk.""}","{""zutat"": ""Schokoriegel""}",1.0,stück,,schoko
2,32,amarettini mandelkeksezerstoßen,2 esslöffel,"{""anzahl"": 2, ""einheit"": ""essl\u00f6ffel""}","{""zutat"": ""Amarettini"", ""eigenschaft"": ""zersto...",0.03,liter,,mandelkekse
3,33,deko schirmchen,2 stk.,"{""anzahl"": 2, ""einheit"": ""stk.""}","{""zutat"": ""Schirmchen"", ""eigenschaft"": ""Deko""}",2.0,stück,,schirmchen
4,40,chilisoße sambal oelek,0.5 tl,"{""volumen"": 0.5, ""einheit"": ""tl""}","{""zutat"": ""Chiliso\u00dfe"", ""eigenschaft"": ""Sa...",0.0025,liter,,sambal\nchilisoße
5,46,orangenabrieb,1 teelöffel,"{""volumen"": ""1"", ""einheit"": ""teel\u00f6ffel""}","{""zutat"": ""Teel\u00f6ffel"", ""eigenschaft"": ""or...",0.005,liter,,orangen
6,52,datteltomaten,4 stk.,"{""anzahl"": 4, ""einheit"": ""stk.""}","{""zutat"": ""Datteltomate"", ""eigenschaft"": ""ganz""}",4.0,stück,,tomaten
7,65,riesling trocken,100 ml,"{""volumen"": 100, ""einheit"": ""ml""}","{""zutat"": ""Riesling"", ""eigenschaft"": ""trocken""}",0.1,liter,,riesling
8,103,schinkenwürfel,250 g,"{""gewicht"": 250, ""einheit"": ""g""}","{""zutat"": ""Schinkenw\u00fcrfel""}",250.0,g,,schinken
9,127,asiatische fischsoße,1 el,"{""volumen"": ""1"", ""einheit"": ""el""}","{""zutat"": ""Fischso\u00dfe"", ""eigenschaft"": ""as...",0.015,liter,,fischsoße


In [48]:
from loguru import logger

logger.add("API_Errors.log", level = "INFO", format = "{time}:{level}:{message}")

3

In [49]:
import requests

url = "https://smarthome.uni-regensburg.de/naehrwertrechner/api/1.0/recipe_info_optifast"

def get_nut(prompt: str) -> dict | None:
    payload = { "recipe": prompt }
    headers = {"content-type": "application/json"}
    response = requests.post(url, json = payload, headers = headers, verify=False)

    if response.ok:
        return response.json()
    else:
        return None

In [52]:
import json

def is_invalid(res: dict) -> bool:
    """
        check if the response from the API is valid or not.
        If the output is "Nicht Erkannt" then this function would returns True
        And log the error in a seperate file when an error occurs at the API
    """
    if not res or not res["detailed_info"] or res["detailed_info"][0][0]["erkannteZutat"] == "Nicht erkannt":
        return True
    return False

def calc_nut(x: any) -> str:
    """
        calculate the nutrition value using ingredient name and quantity.
        If the output is "Nicht Erkannt" then this function would returns np.NaN
        And log the error in a seperate file when an error occurs at the API
    """
    amount, unit  = float(x["norm_value"]), x["norm_unit"]

    if amount.is_integer(): amount = int(amount)

    ingr = x["ingr_identified"]

    prompt = f"{amount} {unit} {ingr}"

    res = get_nut(prompt)

    if is_invalid(res):
        logger.error(prompt)
        return np.nan

    try:
        return json.dumps(res)
    except Exception as e:
        return np.nan

In [53]:
data['nutrition'] = data.apply(
    lambda x: calc_nut(x),
    axis = 1,
)

[32m2025-11-10 00:52:00.112[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mcalc_nut[0m:[36m30[0m - [31m[1m4 stück hähnchenschlegel[0m
[32m2025-11-10 00:52:01.342[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mcalc_nut[0m:[36m30[0m - [31m[1m0.03 liter mandelkekse[0m
[32m2025-11-10 00:52:01.754[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mcalc_nut[0m:[36m30[0m - [31m[1m2 stück schirmchen[0m
[32m2025-11-10 00:52:04.008[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mcalc_nut[0m:[36m30[0m - [31m[1m0.1 liter riesling[0m
[32m2025-11-10 00:52:05.033[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mcalc_nut[0m:[36m30[0m - [31m[1m0.015 liter fischsoße[0m
[32m2025-11-10 00:52:05.643[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mcalc_nut[0m:[36m30[0m - [31m[1m0.25 liter brunch[0m
[32m2025-11-10 00:52:05.960[0m | [31m[1mERROR   [0m | [36m__main__[0m:[36mcalc_nut[0m:[36m30[0m - [31m[1m50 g reibekäse[0m
[32m

In [54]:
# calculate the number of errors
data["nutrition"].isna().sum()

np.int64(52)

In [55]:
# Filter out rows where nutrition is None
filtered = data[data['nutrition'].isna()]

# Save to CSV
filtered.to_csv("failed_extracts.csv", index=False)
filtered.info()

<class 'pandas.core.frame.DataFrame'>
Index: 52 entries, 0 to 72
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Unnamed: 0         52 non-null     object
 1   ingredient         52 non-null     object
 2   amount             52 non-null     object
 3   ingr_annotation    52 non-null     object
 4   amount_annotation  52 non-null     object
 5   norm_value         52 non-null     object
 6   norm_unit          52 non-null     object
 7   nutrition          0 non-null      object
 8   ingr_identified    52 non-null     object
dtypes: object(9)
memory usage: 4.1+ KB


In [None]:
data.