In [1]:
import os
import requests
import pandas as pd

In [2]:
os.listdir('archive/food-101/food-101/images')

['foie_gras',
 'club_sandwich',
 'cheese_plate',
 'cup_cakes',
 'garlic_bread',
 'gnocchi',
 'ice_cream',
 'samosa',
 'donuts',
 'tuna_tartare',
 'filet_mignon',
 '.DS_Store',
 'seaweed_salad',
 'french_toast',
 'chicken_curry',
 'shrimp_and_grits',
 'steak',
 'cheesecake',
 'red_velvet_cake',
 'waffles',
 'churros',
 'gyoza',
 'lobster_roll_sandwich',
 'huevos_rancheros',
 'breakfast_burrito',
 'grilled_cheese_sandwich',
 'spaghetti_bolognese',
 'falafel',
 'poutine',
 'greek_salad',
 'beef_tartare',
 'fried_calamari',
 'guacamole',
 'ravioli',
 'lobster_bisque',
 'beet_salad',
 'risotto',
 'crab_cakes',
 'strawberry_shortcake',
 'edamame',
 'ceviche',
 'hot_and_sour_soup',
 'spring_rolls',
 'sashimi',
 'paella',
 'clam_chowder',
 'miso_soup',
 'escargots',
 'hot_dog',
 'pulled_pork_sandwich',
 'bruschetta',
 'panna_cotta',
 'fish_and_chips',
 'pad_thai',
 'tiramisu',
 'takoyaki',
 'macarons',
 'apple_pie',
 'cannoli',
 'scallops',
 'frozen_yogurt',
 'chicken_quesadilla',
 'mussels',


In [59]:
# setting api to USDA FoodData Central

API_KEY = "0fzhN6pJrGh1g812ETw7ptA0gILWEcO7IOhJjexP"
BASE_URL = "https://api.nal.usda.gov/fdc/v1"

def search_food(food_name, data_types=["Foundation", "Branded", "SR Legacy", "Food Database"]):
    """
    Searches for a food item in the Foundation Foods section using the USDA API and retrieves its basic information.

    Args:
        food_name (str): Name of the food to search for.

    Returns:
        dict: Data of the first food result, or None if not found.
    """
    url = f"{BASE_URL}/foods/search"
    headers = {"X-Api-Key": API_KEY}
    params = {
        "query": food_name,
        "pageSize": 1,  # Limit results to the first match
        "dataType": data_types  # Restrict to Foundation Foods
    }
    response = requests.get(url, headers=headers, params=params)
    if response.status_code == 200:
        data = response.json()
        if data.get("foods"):
            return data["foods"][0]  # Return the first result
    return None

In [73]:
def get_calories(food_name):
    """
    Fetches calorie data and serving size for a specific food item by name.

    Args:
        food_name (str): Name of the food to fetch calories and serving size for.

    Returns:
        tuple: A tuple containing the calorie content (float) and serving size (float), or None if not found.
    """
    
    calories = None 
    protein = None
    serving_size = 100 # default serving size
    
    food_data = search_food(food_name, data_types=["Foundation"])
    
    # Check if food_data is valid and contains the fdcId
    if food_data:
        fdc_id = food_data.get("fdcId")
        url = f"{BASE_URL}/food/{fdc_id}"
        headers = {"X-Api-Key": API_KEY}
        
        # Make the API request
        response = requests.get(url, headers=headers)
        
        if response.status_code == 200:
            details = response.json()
            serving_size = details.get("servingSize", 100)
            food_nutrients = details.get("foodNutrients", [])
            for nutrient in food_nutrients:
                if nutrient.get("nutrient", {}).get("name", "").startswith("Energy") and nutrient.get("nutrient", {}).get("unitName").lower() == "kcal":
                    calories = nutrient.get("amount")
                if (nutrient.get("nutrient", {}).get("name") == "Protein") and (nutrient.get("nutrient", {}).get("unitName") == "g"):
                    protein = nutrient.get("amount")
                    
            # If calories are found, return the values
            if calories is not None:
                return calories, serving_size, protein
            
    # If no calories found, try a broader search
    food_data = search_food(food_name)
    if calories is None:
        food_data = search_food(food_name)
        if food_data:
            fdc_id = food_data.get("fdcId")
            url = f"{BASE_URL}/food/{fdc_id}"
            headers = {"X-Api-Key": API_KEY}
            response = requests.get(url, headers=headers)

            if response.status_code == 200:
                details = response.json()
                food_nutrients = details.get("foodNutrients", [])
                for nutrient in food_nutrients:
                    if nutrient.get("nutrient", {}).get("name", "").startswith("Energy") and nutrient.get("nutrient", {}).get("unitName").lower() == "kcal":
                        calories = nutrient.get("amount")
                    if (nutrient.get("nutrient", {}).get("name") == "Protein") and (nutrient.get("nutrient", {}).get("unitName") == "g"):
                        protein = nutrient.get("amount")
                        
                return calories, serving_size, protein
    
    return None, 100, None

In [74]:
print(get_calories("foie gras"))

(462.0, 100, 11.4)


In [76]:
food_list = [food.replace('_', ' ').lower() for food in os.listdir('archive/food-101/food-101/images') if not food.startswith('.')]

data = []

# Fetch the calories and serving size for each food item and store the results in the data list
for food in food_list:
    calories, serving_size, protein = get_calories(food)
    data.append({
        "Food": food.replace('_', ' ').title(),
        "Calories (kcal)": calories,
        "Serving Size (g)": serving_size,
        "Protein (g)": protein
    })

In [77]:
# Convert the collected data to a DataFrame
df = pd.DataFrame(data)

# Display the DataFrame
df.round(2)

Unnamed: 0,Food,Calories (kcal),Serving Size (g),Protein (g)
0,Foie Gras,462.00,100,11.40
1,Club Sandwich,155.00,100,22.80
2,Cheese Plate,408.00,100,23.30
3,Cup Cakes,356.00,100,2.22
4,Garlic Bread,143.00,100,6.62
...,...,...,...,...
96,Baklava,535.00,100,6.98
97,Creme Brulee,210.00,100,3.00
98,Carrot Cake,38.25,100,0.80
99,Onion Rings,288.00,100,4.52


In [78]:
df[df.isnull().any(axis=1)]

Unnamed: 0,Food,Calories (kcal),Serving Size (g),Protein (g)
54,Takoyaki,,100,


In [114]:
df.loc[df["Food"] == "Takoyaki", ["Calories (kcal)", "Serving Size (g)", "Protein (g)"]] = [70.0, 40.0, 3]

In [115]:
df[df.isnull().any(axis=1)]

Unnamed: 0,Food,Calories (kcal),Serving Size (g),Protein (g)


In [117]:
df.to_csv('calories.csv', index=False)