In [1]:
import os
import cv2
import torch
import torchvision
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as T

device = "cuda" if torch.cuda.is_available() else "cpu"
IMG_SIZE = 640
CARD_WIDTH_CM = 8.5


In [2]:
def polygon_to_bbox(poly):
    xs = poly[0::2]
    ys = poly[1::2]

    x_min, x_max = min(xs), max(xs)
    y_min, y_max = min(ys), max(ys)

    xc = (x_min + x_max) / 2
    yc = (y_min + y_max) / 2
    w  = x_max - x_min
    h  = y_max - y_min

    return torch.tensor([xc, yc, w, h], dtype=torch.float32)


In [3]:
class CardDataset(Dataset):
    def __init__(self, img_dir, label_dir):
        self.img_dir = img_dir
        self.label_dir = label_dir
        self.images = [f for f in os.listdir(img_dir) if f.endswith(".jpg")]

        self.transform = T.Compose([
            T.Resize((IMG_SIZE, IMG_SIZE)),
            T.ToTensor()
        ])

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_name = self.images[idx]

        image = Image.open(
            os.path.join(self.img_dir, img_name)
        ).convert("RGB")
        image = self.transform(image)

        label_path = os.path.join(
            self.label_dir, img_name.replace(".jpg", ".txt")
        )

        with open(label_path) as f:
            data = list(map(float, f.readline().split()))

        polygon = data[1:]          # skip class id
        bbox = polygon_to_bbox(polygon)

        return image, bbox


In [4]:
class CardDetector(nn.Module):
    def __init__(self):
        super().__init__()
        backbone = torchvision.models.resnet18(weights="DEFAULT")
        self.features = nn.Sequential(*list(backbone.children())[:-1])

        self.head = nn.Sequential(
            nn.Linear(512, 128),
            nn.ReLU(),
            nn.Linear(128, 4),
            nn.Sigmoid()  # لأن bbox normalized
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.head(x)


In [5]:
train_dataset = CardDataset(
    "/kaggle/input/ref-object/train/images",
    "/kaggle/input/ref-object/train/labels"
)

train_loader = DataLoader(
    train_dataset, batch_size=8, shuffle=True
)

model = CardDetector().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_fn = nn.MSELoss()

for epoch in range(15):
    total_loss = 0
    for imgs, targets in train_loader:
        imgs = imgs.to(device)
        targets = targets.to(device)

        preds = model(imgs)
        loss = loss_fn(preds, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch+1}: Loss = {total_loss:.4f}")

torch.save(model.state_dict(), "card_detector.pth")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 165MB/s] 


Epoch 1: Loss = 9.0985
Epoch 2: Loss = 2.9007
Epoch 3: Loss = 1.8585
Epoch 4: Loss = 1.3596
Epoch 5: Loss = 1.0819
Epoch 6: Loss = 0.7576
Epoch 7: Loss = 0.6244
Epoch 8: Loss = 0.4897
Epoch 9: Loss = 0.3438
Epoch 10: Loss = 0.4102
Epoch 11: Loss = 0.2936
Epoch 12: Loss = 0.2393
Epoch 13: Loss = 0.2518
Epoch 14: Loss = 0.1964
Epoch 15: Loss = 0.1999


In [7]:
model.load_state_dict(torch.load("card_detector.pth"))
model.eval()

img_path = "/kaggle/input/ref-object/test/images/20230716_043900_jpg.rf.16522fa61f87e8c05e03da432d46f447.jpg"
image = cv2.imread(img_path)
h, w, _ = image.shape

img = cv2.resize(image, (IMG_SIZE, IMG_SIZE))
img = torch.tensor(img).permute(2,0,1).float().unsqueeze(0) / 255
img = img.to(device)

with torch.no_grad():
    xc, yc, bw, bh = model(img)[0]

bw_px = bw.item() * w
s = bw_px / CARD_WIDTH_CM

print("Pixels per CM (s):", s)


Pixels per CM (s): 46.883181403664985


In [10]:
# Nutrition DB: density (g/cm³), calories per 100g
FOODSEG103_DB = {
    "rice":               {"density": 0.85, "cal_per_100g": 130},
    "potato":             {"density": 0.77, "cal_per_100g": 77},
    "tomato":             {"density": 0.99, "cal_per_100g": 18},
    "chicken duck":       {"density": 1.03, "cal_per_100g": 239},
    "steak":              {"density": 1.04, "cal_per_100g": 250},
    "broccoli":           {"density": 0.34, "cal_per_100g": 34},
    "ice cream":          {"density": 0.56, "cal_per_100g": 207},
    "cheese butter":      {"density": 0.95, "cal_per_100g": 402},
    "bread":              {"density": 0.27, "cal_per_100g": 265},
    "egg":                {"density": 1.03, "cal_per_100g": 155},
    # Add more classes here as needed
}


In [None]:
import torch
import numpy as np
import cv2

# --- UTILS ---

# IoU mask to area in pixels
def mask_area(mask):
    return (mask > 0).sum()

# Convert pixel area to cm²
def area_to_cm2(area_px, s):
    return area_px / (s * s)

# Weight (g) from volume and density
def estimate_weight(area_cm2, density, height_cm=2.5):
    # approximate height
    volume_cm3 = area_cm2 * height_cm
    return volume_cm3 * density

# Calories from weight
def estimate_calories(weight_g, cal_per_100g):
    return (weight_g * cal_per_100g) / 100.0

# --- CARD DETECTION & SCALE (s) ---

def compute_s_from_image(image_path, card_model, device):
    """Detect card, compute s (pixels/cm)."""
    img = cv2.imread(image_path)
    h, w = img.shape[:2]

    tensor = torch.tensor(cv2.resize(img,(640,640))).permute(2,0,1).float().unsqueeze(0)/255
    tensor = tensor.to(device)

    with torch.no_grad():
        xc, yc, bw, bh = card_model(tensor)[0].cpu().numpy()

    card_width_px = bw * w
    return card_width_px / 8.5  # 8.5 cm real card width

# --- SEGMENT AND ESTIMATE ---

def estimate_meal_nutrition(image, seg_model, s, class_list):
    """Run segmentation and compute per ingredient weight/calories."""
    # Run segmentation (example uses a PyTorch seg model)
    seg_input = torch.tensor(image).permute(2,0,1).float().unsqueeze(0)/255
    seg_input = seg_input.to(next(seg_model.parameters()).device)
    
    with torch.no_grad():
        seg_output = seg_model(seg_input)["out"]  # shape (1,C,H,W)
    
    seg_mask = torch.argmax(seg_output, dim=1)[0].cpu().numpy()
    
    results = {}
    
    for idx, cls_name in class_list.items():
        if cls_name not in FOODSEG103_DB:
            continue
        
        mask = (seg_mask == idx).astype(np.uint8)
        if mask.sum()==0:
            continue
        
        area_px = mask_area(mask)
        area_cm2 = area_to_cm2(area_px, s)
        
        params = FOODSEG103_DB[cls_name]
        density = params["density"]
        cals100 = params["cal_per_100g"]
        
        weight_g = estimate_weight(area_cm2, density)
        cals = estimate_calories(weight_g, cals100)
        
        results[cls_name] = {
            "area_cm2": area_cm2,
            "weight_g": weight_g,
            "calories": cals
        }
    
    return results

# --- RUN EVERYTHING ---

# 1) compute scale
image_file = "/kaggle/input/ref-object/test/images/20230716_043900_jpg.rf.16522fa61f87e8c05e03da432d46f447.jpg"
s = compute_s_from_image(image_file, model, device)
print(f"Pixels/cm (s) = {s:.2f}")

# 2) load image for segmentation
img = cv2.imread(image_file)
img_resized = cv2.resize(img,(640,640))

# 3) segmentation and nutrition
nutrition = estimate_meal_nutrition(img_resized, seg_model, s, FOODSEG103_CLASS_MAP)

# 4) print results
total_g = 0
total_cal = 0
for ing, info in nutrition.items():
    print(f"{ing}: {info['weight_g']:.1f} g, {info['calories']:.1f} kcal")
    total_g += info["weight_g"]
    total_cal += info["calories"]

print("Total weight:", total_g, "g")
print("Total calories:", total_cal, "kcal")
