In [None]:
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path
import pandas as pd
import json
import numpy as np

try:
    HERE = Path(__file__).resolve()
except NameError:
    HERE = Path.cwd()

data = pd.read_csv("normalized_using_prompt2.csv", dtype=str)
data.head(10)

In [None]:
from loguru import logger

logger.add("API_Errors.log", level = "INFO", format = "{time}:{level}:{message}")

In [None]:
import requests

url = "https://smarthome.uni-regensburg.de/naehrwertrechner/api/1.0/recipe_info_optifast"

def get_nut(prompt: str) -> dict | None:
    payload = { "recipe": prompt }
    headers = {"content-type": "application/json"}
    response = requests.post(url, json = payload, headers = headers, verify=False)

    if response.ok:
        return response.json()
    else:
        return None

In [None]:
import json

def is_invalid(res: dict) -> bool:
    """
        check if the response from the API is valid or not.
        If the output is "Nicht Erkannt" then this function would returns True
        And log the error in a seperate file when an error occurs at the API
    """
    if not res or not res["detailed_info"] or res["detailed_info"][0][0]["erkannteZutat"] == "Nicht erkannt":
        return True
    return False

def calc_nut(x: any) -> str:
    """
        calculate the nutrition value using ingredient name and quantity.
        If the output is "Nicht Erkannt" then this function would returns np.NaN
        And log the error in a seperate file when an error occurs at the API
    """
    amount, unit  = float(x["norm_value"]), x["norm_unit"]

    if amount.is_integer(): amount = int(amount)

    ingr = x["ingr_identified"]

    prompt = f"{amount} {unit} {ingr}"

    res = get_nut(prompt)

    if is_invalid(res):
        logger.error(prompt)
        return np.nan

    try:
        return json.dumps(res)
    except Exception as e:
        return np.nan

In [None]:
data['nutrition'] = data.apply(
    lambda x: calc_nut(x),
    axis = 1,
)

In [None]:
# calculate the number of errors
data["nutrition"].isna().sum()

In [None]:
# Filter out rows where nutrition is None
filtered = data[data['nutrition'].isna()]

# Save to CSV (iterative process to get a smaller file)
filtered.to_csv("final_failed_extracts.csv", index=False)
filtered.info()

In [None]:
data