In [None]:
import json
import time
from pathlib import Path
import requests

# --- USDA API Config ---
API_KEY = "mk9ySoysK14h3E1emE5gufJcvEOJ6HpkhTPNKPBL"
API_SEARCH_URL = "https://api.nal.usda.gov/fdc/v1/foods/search"
API_DETAIL_URL = "https://api.nal.usda.gov/fdc/v1/food/"

# --- Nutrients of interest ---
IMPORTANT_NUTRIENTS = {
    1008: "Calories (kcal)",
    1003: "Protein (g)",
    1004: "Total Fat (g)",
    1005: "Carbohydrates (g)",
}

# --- Local file paths ---
db_path = Path("usda_nutrition_db_step1.json")
unresolved_path = Path("usda_unresolved_food_items.json")

# --- Food items list (example) ---
# ALL_FOOD_ITEMS = [
#     "Apple",
#     "Artichoke",
#     "Bagel",
#     "Balaleet",
#     "Bamya",
#     "Banana",
#     "Basbousa",
#     "Beer",
#     "Bell pepper",
#     "Bread",
#     "Broccoli",
#     "Burger",
#     "Burrito",
#     "Cabbage",
#     "Cake",
#     "Candy",
#     "Cantaloupe",
#     "Carrot",
#     "Cheese",
#     "Cocktail",
#     "Common fig",
#     "Cookie",
#     "Cream",
#     "Crab",
#     "Croissant",
#     "Cucumber",
#     "Dates_with_tahini",
#     "Fattoush",
#     "Fish",
#     "Fries",
#     "French fries",
#     "Garden Asparagus",
#     "Gers_Ogaily",
#     "Grape",
#     "Grapefruit",
#     "Guacamole",
#     "Hamburger",
#     "Harees",
#     "Hot dog",
#     "Hummus",
#     "Ice cream",
#     "Jireesh",
#     "Kebab",
#     "Khabeesa",
#     "Kubba",
#     "Labneh",
#     "Laban_drink",
#     "Lemon",
#     "Lentil_soup",
#     "Lobster",
#     "Luqaimat",
#     "Majboos_Dajaj",
#     "Mallooba(Maqluba)",
#     "Mango",
#     "Modas_rice",
#     "Molokhia",
#     "Muffin",
#     "Muhammara",
#     "Murabyan",
#     "Musakhan_Chicken",
#     "Mushroom",
#     "Om_ali",
#     "Orange",
#     "Orange (fruit)",
#     "Oyster",
#     "Pancake",
#     "Paneer_butter_masala",
#     "Pasta",
#     "Pastry",
#     "Peach",
#     "Pear",
#     "Pineapple",
#     "Pizza",
#     "Plain_white_rice",
#     "Pomegranate",
#     "Popcorn",
#     "Potato",
#     "Pretzel",
#     "Pumpkin",
#     "Radish",
#     "Rice_with_meat",
#     "Saffron",
#     "Samosa",
#     "Sandwich",
#     "Shrimp",
#     "Squash (Plant)",
#     "Strawberry",
#     "Submarine sandwich",
#     "Sushi",
#     "Tabouleh",
#     "Taco",
#     "Tahini",
#     "Tamria(Tamriyeh)",
#     "Tea_with_milk",
#     "Tomato",
#     "Tamarind_juice",
#     "Tart",
#     "Warak_Enab",
#     "Waffle",
#     "Watermelon",
#     "Wine",
#     "Winter melon",
#     "Zaatar",
#     "Zucchini"
# ]

ALL_FOOD_ITEMS = ["sugar"]

# --- Load existing data ---
if db_path.exists():
    with open(db_path) as f:
        nutrition_db = json.load(f)
else:
    nutrition_db = {}

if unresolved_path.exists():
    with open(unresolved_path) as f:
        unresolved_items = json.load(f)
else:
    unresolved_items = []

# --- Helper functions ---
def search_usda_foods(query):
    params = {
        "api_key": API_KEY,
        "query": query,
        "dataType": ["Survey (FNDDS)"]
    }
    response = requests.get(API_SEARCH_URL, params=params)
    response.raise_for_status()
    return response.json()

def get_food_details(fdc_id):
    params = {"api_key": API_KEY}
    response = requests.get(f"{API_DETAIL_URL}{fdc_id}", params=params)
    response.raise_for_status()
    return response.json()

def select_best_item(food_query, food_list):
    query_lower = food_query.lower().strip()
    for item in food_list:
        description = item.get("description", "").lower().strip()
        if query_lower in description:
            return item
        
    return food_list[0] if food_list else None

def extract_nutrient_info(food_details):
    nutrients = food_details.get("foodNutrients", [])
    result = {
        "description": food_details.get("description"),
        "fdcId": food_details.get("fdcId"),
        "nutrients": {}
    }

    for nutrient in nutrients:
        try:
            nid = int(nutrient.get("nutrient", {}).get("id"))
        except (ValueError, TypeError):
            continue
        if nid in IMPORTANT_NUTRIENTS:
            result["nutrients"][IMPORTANT_NUTRIENTS[nid]] = {
                "value": nutrient.get("amount"),
                "unit": nutrient.get("nutrient", {}).get("unitName")
            }

    return result

# --- Process all food items ---
for food in ALL_FOOD_ITEMS:
    key = food.lower().strip()
    if key in nutrition_db or key in unresolved_items:
        continue

    print(f"Querying: {food}")
    try:
        search_result = search_usda_foods(food)
        foods = search_result.get("foods", [])
        if foods:
            selected = select_best_item(food,foods)
            details = get_food_details(selected["fdcId"])
            parsed = extract_nutrient_info(details)
            nutrition_db[key] = parsed
            print(f"  ✔ Stored: {parsed['description']}")
        else:
            unresolved_items.append(key)
            print(f"  ✖ Not found: {key}")
    except Exception as e:
        unresolved_items.append(key)
        print(f"  ⚠ Error: {e}")

    time.sleep(1)

# --- Save ---
with open(db_path, "w") as f:
    json.dump(nutrition_db, f, indent=4)

with open(unresolved_path, "w") as f:
    json.dump(unresolved_items, f, indent=4)

print("✅ Step 1 completed.")

Querying: Hot dog
  ✔ Stored: Hot dog, vegetarian
✅ Step 1 completed.


In [1]:
import json
import time
from pathlib import Path
import requests

# --- USDA API Config ---
API_KEY = "mk9ySoysK14h3E1emE5gufJcvEOJ6HpkhTPNKPBL"
API_SEARCH_URL = "https://api.nal.usda.gov/fdc/v1/foods/search"
API_DETAIL_URL = "https://api.nal.usda.gov/fdc/v1/food/"

IMPORTANT_NUTRIENTS = {
    1008: "Calories (kcal)",
    1003: "Protein (g)",
    1004: "Total Fat (g)",
    1005: "Carbohydrates (g)",
}

# Load unresolved items and ingredient map
with open("usda_unresolved_food_items.json") as f:
    unresolved_items = json.load(f)

with open("utils/custom_dish_ingredients.json") as f:
    CUSTOM_DISH_INGREDIENTS = json.load(f)

# Load existing main database
main_db_path = Path("usda_nutrition_db_step1.json")
if main_db_path.exists():
    with open(main_db_path) as f:
        nutrition_db = json.load(f)
else:
    nutrition_db = {}

# Path for ingredient-level db
ingredient_db_path = Path("usda_nutrition_ingredients_db.json")
if ingredient_db_path.exists():
    with open(ingredient_db_path) as f:
        ingredient_db = json.load(f)
else:
    ingredient_db = {}


# --- Helper Functions ---
def search_usda_foods(query):
    params = {"api_key": API_KEY, "query": query, "dataType": ["Survey (FNDDS)"]}
    response = requests.get(API_SEARCH_URL, params=params)
    response.raise_for_status()
    return response.json()


def get_food_details(fdc_id):
    params = {"api_key": API_KEY}
    response = requests.get(f"{API_DETAIL_URL}{fdc_id}", params=params)
    response.raise_for_status()
    return response.json()


def select_best_item(food_query, food_list):
    query_lower = food_query.lower().strip()
    for item in food_list:
        desc = item.get("description", "").lower().strip()
        if query_lower == desc or "raw" in desc:
            return item
    return food_list[0] if food_list else None


def extract_nutrient_info(food_details):
    nutrients = food_details.get("foodNutrients", [])
    result = {
        "description": food_details.get("description"),
        "fdcId": food_details.get("fdcId"),
        "nutrients": {},
    }
    for nutrient in nutrients:
        try:
            nid = int(nutrient.get("nutrient", {}).get("id"))
        except (ValueError, TypeError):
            continue
        if nid in IMPORTANT_NUTRIENTS:
            result["nutrients"][IMPORTANT_NUTRIENTS[nid]] = {
                "value": nutrient.get("amount"),
                "unit": nutrient.get("nutrient", {}).get("unitName"),
            }
    return result


# --- Process each unresolved item ---
for dish in unresolved_items:
    if dish not in CUSTOM_DISH_INGREDIENTS:
        continue

    print(f"Processing: {dish}")
    if dish not in ingredient_db:
        ingredient_db[dish] = {"ingredients": {}}

    total_calories = 0
    for ingredient in CUSTOM_DISH_INGREDIENTS[dish]:
        ing_key = ingredient.lower()
        if ing_key in ingredient_db[dish]["ingredients"]:
            parsed = ingredient_db[dish]["ingredients"][ing_key]
            print(f"  ✔ Cached: {ingredient}")
        else:
            try:
                print(f"  → Searching: {ingredient}")
                search_result = search_usda_foods(ingredient)
                foods = search_result.get("foods", [])
                if not foods:
                    print(f"    ✖ No match: {ingredient}")
                    continue

                selected = select_best_item(ingredient, foods)
                details = get_food_details(selected["fdcId"])
                parsed = extract_nutrient_info(details)

                ingredient_db[dish]["ingredients"][ing_key] = parsed
                print(f"    ✔ Stored: {parsed['description']}")
            except Exception as e:
                print(f"    ⚠ Error for {ingredient}: {e}")
                continue

            time.sleep(1)

        cal = parsed.get("nutrients", {}).get("Calories (kcal)", {}).get("value")
        if isinstance(cal, (int, float)):
            total_calories += cal

        # Merge full nutrient data into main db
        nutrition_db[dish] = {
            "calculated_from_ingredients": True,
            "ingredient_count": len(CUSTOM_DISH_INGREDIENTS[dish]),
            "ingredients": ingredient_db[dish]["ingredients"],
        }

    print(f"  ✅ Added to nutrition_db: {dish} ({round(total_calories,2)} kcal)")

# Save both
with open(ingredient_db_path, "w") as f:
    json.dump(ingredient_db, f, indent=4)

with open(main_db_path, "w") as f:
    json.dump(nutrition_db, f, indent=4)

print("✅ Step 2 completed with merge into main DB.")

Processing: balaleet
  → Searching: vermicelli
    ✔ Stored: Vermicelli, made from soybeans
  → Searching: eggs
    ✔ Stored: Egg, whole, raw
  → Searching: sugar
    ✔ Stored: Strawberry milk, reduced sugar
  → Searching: cardamom
    ✖ No match: cardamom
  → Searching: saffron
    ✖ No match: saffron
  ✅ Added to nutrition_db: balaleet (328.0 kcal)
Processing: bamya
  → Searching: okra
    ✔ Stored: Fried okra
  → Searching: tomato
    ✔ Stored: Tomatoes, raw
  → Searching: onion
    ✔ Stored: Onions, raw
  → Searching: garlic
    ✔ Stored: Garlic, raw
  → Searching: lamb
    ✔ Stored: Lamb, chop
  → Searching: olive oil
    ✔ Stored: Olive oil
  ✅ Added to nutrition_db: bamya (1584.0 kcal)
Processing: dates_with_tahini
  → Searching: dates
    ✔ Stored: Date
  → Searching: tahini
    ✔ Stored: Tahini
  ✅ Added to nutrition_db: dates_with_tahini (979 kcal)
Processing: fattoush
  → Searching: lettuce
    ✔ Stored: Lettuce, raw
  → Searching: cucumber
    ✔ Stored: Cucumber, raw
  → Se

In [7]:
def get_nutrition_from_db(item_name: str, nutrition_db: dict) -> list:
    """
    Returns nutrient data from the unified USDA nutrition database for a given food item.
    
    If the item is a direct match, returns its nutrient dictionary.
    If the item is a dish with ingredients, returns a dictionary mapping each available ingredient
    to its nutrient dictionary.

    Args:
        item_name (str): Food name from detection/classification.
        nutrition_db (dict): Merged JSON database.

    Returns:
        dict: Nutrient info, either directly or per-ingredient (if composite).
    """
    key = item_name.lower().strip()

    if key not in nutrition_db:
        return []

    item = nutrition_db[key]

    # Case 1: Direct nutrient info (not a composite)
    if "nutrients" in item:
        return [{"description":item["description"], "nutrients": item["nutrients"]}]

    # Case 2: Composite dish with ingredients
    elif "ingredients" in item:
        return [
            {"description": ing, "nutrients": data["nutrients"]}
            for ing, data in item["ingredients"].items()
            if "nutrients" in data
        ]

    return []

with open("usda_nutrition_db_step1.json") as f:
    nutrition_db = json.load(f)

# Direct USDA item
print(get_nutrition_from_db("apple", nutrition_db))

# Composite dish (returns dict of ingredients → nutrients)
print(get_nutrition_from_db("majboos_dajaj", nutrition_db))

# Not found
print(get_nutrition_from_db("non_existing_item", nutrition_db))


[{'description': 'Apple, raw', 'nutrients': {'Protein (g)': {'value': 0.17, 'unit': 'g'}, 'Total Fat (g)': {'value': 0.15, 'unit': 'g'}, 'Carbohydrates (g)': {'value': 14.8, 'unit': 'g'}, 'Calories (kcal)': {'value': 61.0, 'unit': 'kcal'}}}]
[{'description': 'chicken', 'nutrients': {'Protein (g)': {'value': 26.7, 'unit': 'g'}, 'Total Fat (g)': {'value': 6.33, 'unit': 'g'}, 'Carbohydrates (g)': {'value': 0.0, 'unit': 'g'}, 'Calories (kcal)': {'value': 164, 'unit': 'kcal'}}}, {'description': 'rice', 'nutrients': {'Protein (g)': {'value': 5.12, 'unit': 'g'}, 'Total Fat (g)': {'value': 2.25, 'unit': 'g'}, 'Carbohydrates (g)': {'value': 17.2, 'unit': 'g'}, 'Calories (kcal)': {'value': 112, 'unit': 'kcal'}}}, {'description': 'onion', 'nutrients': {'Protein (g)': {'value': 0.86, 'unit': 'g'}, 'Total Fat (g)': {'value': 0.08, 'unit': 'g'}, 'Carbohydrates (g)': {'value': 8.46, 'unit': 'g'}, 'Calories (kcal)': {'value': 38.0, 'unit': 'kcal'}}}, {'description': 'tomato', 'nutrients': {'Protein (g

In [None]:
import requests

API_KEY = "mk9ySoysK14h3E1emE5gufJcvEOJ6HpkhTPNKPBL"
API_SEARCH_URL = "https://api.nal.usda.gov/fdc/v1/foods/search"
API_DETAIL_URL = "https://api.nal.usda.gov/fdc/v1/food/"

IMPORTANT_NUTRIENTS = {
    1008: "Calories (kcal)",
    1003: "Protein (g)",
    1004: "Total Fat (g)",
    1005: "Carbohydrates (g)",
}

def search_usda_foods(query):
    params = {
        "api_key": API_KEY,
        "query": query,
        "dataType": ["Survey (FNDDS)"]
    }
    response = requests.get(API_SEARCH_URL, params=params)
    response.raise_for_status()
    return response.json()

def get_food_details(fdc_id):
    params = {"api_key": API_KEY}
    response = requests.get(f"{API_DETAIL_URL}{fdc_id}", params=params)
    response.raise_for_status()
    return response.json()

def extract_nutrient_info(food_details):
    nutrients = food_details.get("foodNutrients", [])
    result = {
        "description": food_details.get("description"),
        "fdcId": food_details.get("fdcId"),
        "nutrients": {}
    }

    for nutrient in nutrients:
        try:
            nid = int(nutrient.get("nutrient", {}).get("id"))
        except (ValueError, TypeError):
            continue
        if nid in IMPORTANT_NUTRIENTS:
            result["nutrients"][IMPORTANT_NUTRIENTS[nid]] = {
                "value": nutrient.get("amount"),
                "unit": nutrient.get("nutrient", {}).get("unitName")
            }

    return result

# --- MAIN ---
query = "Sugar"
search_result = search_usda_foods(query)
foods = search_result.get("foods", [])

{
    "description": "Sugar substitute and sugar blend",
    "fdcId": 2710262,
    "nutrients": {
        "Protein (g)": {
            "value": 0.0,
            "unit": "g"
        },
        "Total Fat (g)": {
            "value": 0.31,
            "unit": "g"
        },
        "Carbohydrates (g)": {
            "value": 99.4,
            "unit": "g"
        },
        "Calories (kcal)": {
            "value": 399,
            "unit": "kcal"
        }
    }
}
