In [4]:
!pip install pandas numpy loguru requests




[notice] A new release of pip is available: 23.2.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path
import pandas as pd
import json
import numpy as np

try:
    HERE = Path(__file__).resolve() 
except NameError:
    HERE = Path.cwd()  

data = pd.read_csv(HERE.parent / "gemma_annotation_normalized.csv", dtype=str)

In [6]:
from loguru import logger

logger.add("API_Errors.log", level = "INFO", format = "{time}:{level}:{message}")

1

In [7]:
import requests

url = "https://smarthome.uni-regensburg.de/naehrwertrechner/api/1.0/recipe_info_optifast"

def get_nut(prompt: str) -> dict | None:
    payload = { "recipe": prompt }
    headers = {"content-type": "application/json"}
    response = requests.post(url, json = payload, headers = headers, verify=False)
    
    if response.ok:
        return response.json()
    else:
        return None

In [8]:
import json

def calc_nut(amount: int, unit: str, ingr: str) -> str:
    """
        calculate the nutrition value using ingredient name and quantity.
        If the output is "Nicht Erkannt" then this function would returns np.NaN
        And log the error in a seperate file when an error occurs at the API
    """
    prompt = f"{amount} {ingr}" if unit == 'stk' else f"{amount}{unit} {ingr}" # why are you excluding stk here when it is stück??
    res = get_nut(prompt)
    if not res or res["detailed_info"][0][0]["erkannteZutat"] == "Nicht erkannt":
        return None
    else:
        try:
            return json.dumps(res)
        except Exception:
            return None
# This logic is incomplete and needs to be fixed as mentioned in the call

In [9]:
data['nutrition'] = data.apply(
    lambda x: calc_nut(x["norm_value"], x["norm_unit"], x["ingredient"]),
    axis = 1,
)

In [10]:
data.to_csv("nutrition_data.csv")
# saving it this way would still keep ingredient rows with none as value for nutrition part...look at output

In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 910 entries, 0 to 909
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Unnamed: 0         910 non-null    object
 1   ingredient         910 non-null    object
 2   amount             910 non-null    object
 3   ingr_annotation    910 non-null    object
 4   amount_annotation  910 non-null    object
 5   norm_value         910 non-null    object
 6   norm_unit          910 non-null    object
 7   nutrition          796 non-null    object
dtypes: object(8)
memory usage: 57.0+ KB


In [12]:
# Filter out rows where nutrition is None
filtered = data[data['nutrition'].isna()]

# Save to CSV
filtered.to_csv("failed_extracts.csv", index=False)
filtered


Unnamed: 0.1,Unnamed: 0,ingredient,amount,ingr_annotation,amount_annotation,norm_value,norm_unit,nutrition
5,5,passierte tomaten,300 g,"{""gewicht"": 300, ""einheit"": ""g""}","{""zutat"": ""Tomate"", ""eigenschaft"": ""passiert""}",300.0,g,
15,15,bio hähnchenschlegel,4 stk.,"{""anzahl"": 4, ""einheit"": ""stk.""}","{""zutat"": ""H\u00e4hnchenschlegel"", ""eigenschaf...",4.0,stück,
20,20,snackkarotten,80 g,"{""gewicht"": 80, ""einheit"": ""g""}","{""zutat"": ""Karotte"", ""eigenschaft"": ""Snack-""}",80.0,g,
22,22,weißwein trocken,100 ml,"{""volumen"": 100, ""einheit"": ""ml""}","{""zutat"": ""Wein"", ""eigenschaft"": ""trocken""}",0.1,liter,
31,31,schokoriegel,1 stk.,"{""anzahl"": 1, ""einheit"": ""stk.""}","{""zutat"": ""Schokoriegel""}",1.0,stück,
...,...,...,...,...,...,...,...,...
891,891,küchentuch,1 stk.,"{""anzahl"": 1, ""einheit"": ""stk.""}","{""zutat"": ""K\u00fcchentuch"", ""eigenschaft"": ""S...",1.0,stück,
896,896,sellerieknolle gegart,50 g,"{""gewicht"": 50, ""einheit"": ""g""}","{""zutat"": ""Sellerieknolle"", ""eigenschaft"": ""ge...",50.0,g,
898,898,spitzpaprika rot,30 g,"{""gewicht"": 30, ""einheit"": ""g""}","{""zutat"": ""Spitzpaprika"", ""eigenschaft"": ""rot""}",30.0,g,
902,902,norderneyer seeluftschinken,6 stück,"{""anzahl"": 6, ""einheit"": ""st\u00fcck""}","{""zutat"": ""Schinken"", ""eigenschaft"": ""seeluft""}",6.0,stück,
