## Estimation des calories à partir des images alimentaires

L'objectif est d'estimer, pour chaque participant et chaque jour,
le nombre total de calories consommées à partir des images des repas.

Compte tenu de la variabilité des images (angles, portions, qualité),
l'approche retenue vise à capturer une **tendance journalière**
plutôt qu'une estimation nutritionnelle exacte.


### Limites de l'approche

- Absence d'information sur les portions
- Possibilité de plusieurs aliments dans une image
- Erreurs de classification possibles



In [1]:
from pathlib import Path
from PIL import Image
from PIL.ExifTags import TAGS
import pandas as pd


def extract_image_datetime(img_path: Path):
    """
    Extrait la date/heure depuis les métadonnées EXIF d'une image
    """
    try:
        img = Image.open(img_path)
        exif_data = img._getexif()

        if exif_data is None:
            return None

        for tag_id, value in exif_data.items():
            tag = TAGS.get(tag_id, tag_id)
            if tag == "DateTimeOriginal":
                return pd.to_datetime(value, format="%Y:%m:%d %H:%M:%S")

    except Exception:
        return None

    return None


def load_food_images_metadata(participant_id: str, images_root: Path, project_root: Path):
    """
    Charge les métadonnées des images alimentaires d'un participant
    avec un chemin relatif au projet
    """
    food_images_path = images_root / participant_id / "food-images"

    if not food_images_path.exists():
        print(f"[INFO] No food-images folder for {participant_id}")
        return pd.DataFrame(
            columns=["participant_id", "image_path", "datetime", "date"]
        )

    records = []

    for img_path in food_images_path.glob("*.jp*g"):
        dt = extract_image_datetime(img_path)

        if dt is not None:
            records.append({
                "participant_id": participant_id,
                "image_path": img_path.relative_to(project_root).as_posix(),
                "datetime": dt,
                "date": dt.date()
            })

    return pd.DataFrame(records)


In [2]:
from pathlib import Path

PROJECT_ROOT = Path("..").resolve()
DATA_RAW = PROJECT_ROOT / "data" / "raw"

participant_id = "p01"

food_df = load_food_images_metadata(
    participant_id=participant_id,
    images_root=DATA_RAW,
    project_root=PROJECT_ROOT
)

food_df.head()


Unnamed: 0,participant_id,image_path,datetime,date
0,p01,data/raw/p01/food-images/IMG_8916.jpeg,2020-02-01 10:03:41,2020-02-01
1,p01,data/raw/p01/food-images/IMG_8917.jpeg,2020-02-01 11:39:57,2020-02-01
2,p01,data/raw/p01/food-images/IMG_8918.jpeg,2020-02-01 17:03:39,2020-02-01
3,p01,data/raw/p01/food-images/IMG_8920.jpeg,2020-02-01 19:16:48,2020-02-01
4,p01,data/raw/p01/food-images/IMG_8921.jpeg,2020-02-01 21:04:57,2020-02-01


---

# Model efficientnet b0

In [3]:
import timm
import torch
from PIL import Image
import torchvision.transforms as T

model = timm.create_model(
    "tf_efficientnet_b0",
    pretrained=True
)
model.eval()

  from .autonotebook import tqdm as notebook_tqdm


EfficientNet(
  (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
  (bn1): BatchNormAct2d(
    32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
    (drop): Identity()
    (act): SiLU(inplace=True)
  )
  (blocks): Sequential(
    (0): Sequential(
      (0): DepthwiseSeparableConv(
        (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
        (bn1): BatchNormAct2d(
          32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True
          (drop): Identity()
          (act): SiLU(inplace=True)
        )
        (aa): Identity()
        (se): SqueezeExcite(
          (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
          (act1): SiLU(inplace=True)
          (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
          (gate): Sigmoid()
        )
        (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn2): BatchNorm

In [4]:
FOOD101_CLASSES = [
    "apple_pie","baby_back_ribs","baklava","beef_carpaccio","beef_tartare",
    "beet_salad","beignets","bibimbap","bread_pudding","breakfast_burrito",
    "bruschetta","caesar_salad","cannoli","caprese_salad","carrot_cake",
    "ceviche","cheesecake","cheese_plate","chicken_curry","chicken_quesadilla",
    "chicken_wings","chocolate_cake","chocolate_mousse","churros",
    "clam_chowder","club_sandwich","crab_cakes","creme_brulee","croque_madame",
    "cup_cakes","deviled_eggs","donuts","dumplings","edamame","eggs_benedict",
    "escargots","falafel","filet_mignon","fish_and_chips","foie_gras",
    "french_fries","french_onion_soup","french_toast","fried_calamari",
    "fried_rice","frozen_yogurt","garlic_bread","gnocchi","greek_salad",
    "grilled_cheese_sandwich","grilled_salmon","guacamole","gyoza",
    "hamburger","hot_and_sour_soup","hot_dog","huevos_rancheros","hummus",
    "ice_cream","lasagna","lobster_bisque","lobster_roll_sandwich",
    "macaroni_and_cheese","macarons","miso_soup","mussels","nachos",
    "omelette","onion_rings","oysters","pad_thai","paella","pancakes",
    "panna_cotta","peking_duck","pho","pizza","pork_chop","poutine",
    "prime_rib","pulled_pork_sandwich","ramen","ravioli",
    "red_velvet_cake","risotto","samosa","sashimi","scallops",
    "seaweed_salad","shrimp_and_grits","spaghetti_bolognese",
    "spaghetti_carbonara","spring_rolls","steak","strawberry_shortcake",
    "sushi","tacos","takoyaki","tiramisu","tuna_tartare","waffles"
]


In [5]:
transform = T.Compose([
    T.Resize((224, 224)),
    T.ToTensor(),
    T.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


In [6]:
def predict_food(image_path):
    img = Image.open(image_path).convert("RGB")
    x = transform(img).unsqueeze(0)

    with torch.no_grad():
        logits = model(x)
        probs = torch.softmax(logits, dim=1)
        top_prob, top_idx = probs.max(dim=1)

    idx = top_idx.item()

    if idx >= len(FOOD101_CLASSES):
        return "unknown", float(top_prob)

    return FOOD101_CLASSES[idx], float(top_prob)


In [7]:
# Calories
FOOD_CALORIES = {
    "apple_pie": 320,
    "baby_back_ribs": 650,
    "baklava": 330,
    "beef_carpaccio": 250,
    "beef_tartare": 300,
    "beet_salad": 180,
    "beignets": 350,
    "bibimbap": 550,
    "bread_pudding": 380,
    "breakfast_burrito": 550,
    "bruschetta": 180,
    "caesar_salad": 470,
    "cannoli": 300,
    "caprese_salad": 250,
    "carrot_cake": 420,
    "ceviche": 200,
    "cheesecake": 430,
    "cheese_plate": 450,
    "chicken_curry": 500,
    "chicken_quesadilla": 480,
    "chicken_wings": 600,
    "chocolate_cake": 450,
    "chocolate_mousse": 350,
    "churros": 400,
    "clam_chowder": 250,
    "club_sandwich": 550,
    "crab_cakes": 350,
    "creme_brulee": 330,
    "croque_madame": 550,
    "cup_cakes": 300,
    "deviled_eggs": 200,
    "donuts": 320,
    "dumplings": 400,
    "edamame": 190,
    "eggs_benedict": 550,
    "escargots": 220,
    "falafel": 400,
    "filet_mignon": 350,
    "fish_and_chips": 700,
    "foie_gras": 450,
    "french_fries": 420,
    "french_onion_soup": 300,
    "french_toast": 400,
    "fried_calamari": 450,
    "fried_rice": 520,
    "frozen_yogurt": 200,
    "garlic_bread": 350,
    "gnocchi": 450,
    "greek_salad": 230,
    "grilled_cheese_sandwich": 430,
    "grilled_salmon": 420,
    "guacamole": 230,
    "gyoza": 380,
    "hamburger": 550,
    "hot_and_sour_soup": 250,
    "hot_dog": 380,
    "huevos_rancheros": 500,
    "hummus": 250,
    "ice_cream": 270,
    "lasagna": 600,
    "lobster_bisque": 380,
    "lobster_roll_sandwich": 450,
    "macaroni_and_cheese": 600,
    "macarons": 300,
    "miso_soup": 90,
    "mussels": 300,
    "nachos": 600,
    "omelette": 350,
    "onion_rings": 400,
    "oysters": 150,
    "pad_thai": 650,
    "paella": 600,
    "pancakes": 450,
    "panna_cotta": 300,
    "peking_duck": 700,
    "pho": 450,
    "pizza": 700,
    "pork_chop": 450,
    "poutine": 800,
    "prime_rib": 650,
    "pulled_pork_sandwich": 550,
    "ramen": 550,
    "ravioli": 500,
    "red_velvet_cake": 430,
    "risotto": 550,
    "samosa": 260,
    "sashimi": 200,
    "scallops": 250,
    "seaweed_salad": 120,
    "shrimp_and_grits": 500,
    "spaghetti_bolognese": 600,
    "spaghetti_carbonara": 650,
    "spring_rolls": 300,
    "steak": 500,
    "strawberry_shortcake": 350,
    "sushi": 400,
    "tacos": 450,
    "takoyaki": 350,
    "tiramisu": 450,
    "tuna_tartare": 300,
    "waffles": 500,
}


In [8]:
def estimate_calories(food_label: str) -> int:

    if not isinstance(food_label, str):
        return 400

    food_label = food_label.lower().strip()

    return FOOD_CALORIES.get(food_label, 400)


In [9]:
food, confidence = predict_food("../data/raw/p01/food-images/IMG_9404.jpeg")
calories = estimate_calories(food)

print(food, confidence, calories)


unknown 0.5658053159713745 400


Ce modèle est limité, car il n'est pas fine tuner sur les images des aliments !

### ResNet152 food101

In [19]:
import os
import zipfile
from PIL import Image
from transformers import AutoImageProcessor, ResNetForImageClassification
import torch
from tqdm import tqdm
import pandas as pd

MODEL_ID = "fusion-bench/resnet152-food101-batch_size_64_lr_0.005_training_data_ratio_0.8-4000" 
EXTRACT_DIR = "../data/raw/p01/food-images/"
OUTPUT_CSV = "../data/food/p01_resultats_classification_resnet152_food101.csv"


In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Utilisation de l'appareil : {device}")

Utilisation de l'appareil : cuda


In [15]:
try:
    processor = AutoImageProcessor.from_pretrained(MODEL_ID)
    # Chargement du ResNet-152 avec les poids Food-101
    model = ResNetForImageClassification.from_pretrained(MODEL_ID).to(device)
    model.eval()
    print(f"Modèle ResNet-152 ({MODEL_ID}) chargé")
except Exception as e:
    print(f"ERREUR lors du chargement : {e}")
    raise

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


Modèle ResNet-152 (fusion-bench/resnet152-food101-batch_size_64_lr_0.005_training_data_ratio_0.8-4000) chargé


In [21]:
def classify_image(image_path):
    try:
        image = Image.open(image_path).convert("RGB")
    except Exception as e:
        return None, None, f"Erreur de lecture: {e}"

    try:
        inputs = processor(images=image, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs)

        logits = outputs.logits
        probabilities = torch.nn.functional.softmax(logits, dim=-1)[0]
        
        predicted_class_idx = torch.argmax(probabilities).item()
        
        predicted_label = model.config.id2label[predicted_class_idx]
        confidence_score = probabilities[predicted_class_idx].item() * 100

        return predicted_label, confidence_score, "OK"
    
    except Exception as e:
        return None, None, f"Erreur de classification: {e}"

In [22]:
all_files = []
for root, _, files in os.walk(EXTRACT_DIR):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg', '.tiff')):
            all_files.append(os.path.join(root, file))

results = []

print(f"\nDébut de la classification de {len(all_files)} images...")

for file_path in tqdm(all_files, desc="Classification des images"):
    # extraire la date/heure via EXIF
    datetime_taken = extract_image_datetime(Path(file_path))
    

    # Extrait la date seule pour le nouveau champ "Date_Prise"
    if isinstance(datetime_taken, pd.Timestamp):
        date_taken = datetime_taken.date()
    else:
        date_taken = "Non disponible"

    label, confidence, status = classify_image(file_path)

    results.append({
        "Chemin_Fichier": file_path,
        "Date_Prise": date_taken,
        "Classe_Predite": label,
        "Score_Confiance": f"{confidence:.2f}%" if confidence is not None else None,
        "Statut": status
    })


Début de la classification de 321 images...


Classification des images: 100%|██████████| 321/321 [01:37<00:00,  3.29it/s]


In [23]:

df_results = pd.DataFrame(results)

df_results['Chemin_Fichier'] = df_results['Chemin_Fichier'].str.replace(f'{EXTRACT_DIR}{os.sep}', '', regex=False)

# Sauvegarde au format CSV
df_results.to_csv(OUTPUT_CSV, index=False)

display(df_results.head())

Unnamed: 0,Chemin_Fichier,Date_Prise,Classe_Predite,Score_Confiance,Statut
0,../data/raw/p01/food-images/IMG_8916.jpeg,2020-02-01,macarons,12.93%,OK
1,../data/raw/p01/food-images/IMG_8917.jpeg,2020-02-01,pancakes,22.30%,OK
2,../data/raw/p01/food-images/IMG_8918.jpeg,2020-02-01,macarons,9.48%,OK
3,../data/raw/p01/food-images/IMG_8920.jpeg,2020-02-01,pizza,21.25%,OK
4,../data/raw/p01/food-images/IMG_8921.jpeg,2020-02-01,mussels,19.73%,OK


Le modèle ResNet-Food101 montre des performances de classification des aliments peu fiables. La précision est faible et meme les prédictions à haute confiance peuvent s'avérer incorrectes.

La reconnaissance alimentaire est une tâche extrêmement complexe qui dépasse les capacités d'un modèle simple, notamment à cause de la grande diversité culinaire mondiale et de la difficulté technique d'estimer correctement les portions et par extension les calories.

C'est précisément la complexité à laquelle s'attaque la startup Nutrify (Brisbane, Australie)