In [11]:
from ultralytics import YOLO
import cv2
import numpy as np
import json
import os

model = YOLO(r"runs\segment\train\weights\best.pt")  # SEG model
img_path = r"C:\Users\siuts\Downloads\Gemini_Generated_Image_cc04zacc04zacc04.png"

img = cv2.imread(img_path)
results = model(img, conf=0.4)
r = results[0]  # first (and only) image

annotated = r.plot()
out_path = r"C:\Users\siuts\Downloads\segmented_output.png"
cv2.imwrite(out_path, annotated)
cv2.imshow("Segmented image", annotated)
cv2.waitKey(0)
cv2.destroyAllWindows()
print("Saved segmented image to:", out_path)

os.makedirs(r"C:\Users\siuts\Downloads\yolo_preds", exist_ok=True)

pred_list = []

names = model.names  # class id -> name

if r.boxes is not None:
    boxes_xyxy = r.boxes.xyxy.cpu().numpy()   # [N, 4]  (x1, y1, x2, y2)
    cls_ids    = r.boxes.cls.cpu().numpy().astype(int)
    confs      = r.boxes.conf.cpu().numpy()

    for i in range(len(boxes_xyxy)):
        x1, y1, x2, y2 = boxes_xyxy[i]
        cls_id = cls_ids[i]
        conf   = float(confs[i])
        cls_name = names.get(cls_id, str(cls_id))

        pred_list.append({
            "id": int(i),
            "class_id": int(cls_id),
            "class_name": cls_name,
            "confidence": conf,
            "bbox_xyxy": [float(x1), float(y1), float(x2), float(y2)]
        })

# write JSON with all detections
json_path = r"C:\Users\siuts\Downloads\yolo_preds\predictions.json"
with open(json_path, "w") as f:
    json.dump(pred_list, f, indent=2)

print("Saved prediction details to:", json_path)

# ---------- SAVE MASKS ----------
if r.masks is not None:
    masks = r.masks.data.cpu().numpy()   # [N, H, W], values 0â€“1
    for i, m in enumerate(masks):
        mask_uint8 = (m * 255).astype(np.uint8)
        mask_path = rf"C:\Users\siuts\Downloads\yolo_preds\mask_{i}.png"
        cv2.imwrite(mask_path, mask_uint8)
        print("Saved mask:", mask_path)



0: 640x640 1 food, 1 card, 60.9ms
Speed: 11.1ms preprocess, 60.9ms inference, 4.3ms postprocess per image at shape (1, 3, 640, 640)
Saved segmented image to: C:\Users\siuts\Downloads\segmented_output.png
Saved prediction details to: C:\Users\siuts\Downloads\yolo_preds\predictions.json
Saved mask: C:\Users\siuts\Downloads\yolo_preds\mask_0.png
Saved mask: C:\Users\siuts\Downloads\yolo_preds\mask_1.png


In [12]:
import json

# 1. Load prediction info
json_path = r"C:\Users\siuts\Downloads\yolo_preds\predictions.json"

with open(json_path, "r") as f:
    preds = json.load(f)

# 2. Use the class names YOLO really predicted
CARD_CLASS_NAME = "card"
FOOD_CLASS_NAME = "food"

card_candidates = []
food_candidates = []

for p in preds:
    if p["class_name"] == CARD_CLASS_NAME:
        card_candidates.append(p)
    elif p["class_name"] == FOOD_CLASS_NAME:
        food_candidates.append(p)


# if there are many, pick the one with highest confidence
card_det = max(card_candidates, key=lambda d: d["confidence"])
food_det = max(food_candidates, key=lambda d: d["confidence"])

card_box = card_det["bbox_xyxy"]
food_box = food_det["bbox_xyxy"]

print("Card box:", card_box)
print("Food box:", food_box)

# 3. Helper to get width/height in pixels
def box_hw(b):
    x1, y1, x2, y2 = b
    w = x2 - x1
    h = y2 - y1
    return w, h

card_w_px, card_h_px = box_hw(card_box)
food_w_px, food_h_px = box_hw(food_box)

print("Card (px)  W,H:", card_w_px, card_h_px)
print("Food (px)  W,H:", food_w_px, food_h_px)

# 4. Real card size (change to your exact card if needed)
CARD_W_MM = 63.0   # mm
CARD_H_MM = 88.0   # mm

# use height as reference scale
mm_per_px = CARD_H_MM / card_h_px

food_w_mm = food_w_px * mm_per_px
food_h_mm = food_h_px * mm_per_px

print("\nScale: mm per pixel:", mm_per_px)
print("Estimated FOOD width (mm):", food_w_mm)
print("Estimated FOOD height (mm):", food_h_mm)


Card box: [715.5390014648438, 383.3559265136719, 956.7313842773438, 703.70361328125]
Food box: [94.12690734863281, 254.3801727294922, 552.501708984375, 760.3720703125]
Card (px)  W,H: 241.1923828125 320.3476867675781
Food (px)  W,H: 458.3748016357422 505.9918975830078

Scale: mm per pixel: 0.27470153097701827
Estimated FOOD width (mm): 125.91625977062544
Estimated FOOD height (mm): 138.9967489280189


In [18]:
import cv2
import json
import os

# ---- paths ----
img_path = r"C:\Users\siuts\Downloads\Gemini_Generated_Image_cc04zacc04zacc04.png"
json_path = r"C:\Users\siuts\Downloads\yolo_preds\predictions.json"
crop_out_path = r"C:\Users\siuts\Downloads\food_crop.jpg"

# 1. load image
img = cv2.imread(img_path)

h_img, w_img = img.shape[:2]

# 2. load predictions
with open(json_path, "r") as f:
    preds = json.load(f)

FOOD_CLASS_NAME = "food" 

food_candidates = [p for p in preds if p["class_name"] == FOOD_CLASS_NAME]

food_det = max(food_candidates, key=lambda d: d["confidence"])
x1, y1, x2, y2 = food_det["bbox_xyxy"]

x1 = max(0, min(int(x1), w_img - 1))
x2 = max(0, min(int(x2), w_img))
y1 = max(0, min(int(y1), h_img - 1))
y2 = max(0, min(int(y2), h_img))

food_crop = img[y1:y2, x1:x2]

# 5. save crop
os.makedirs(os.path.dirname(crop_out_path), exist_ok=True)
cv2.imwrite(crop_out_path, food_crop)
print("Saved food crop to:", crop_out_path)



Saved food crop to: C:\Users\siuts\Downloads\food_crop.jpg


In [16]:
#AI API call
import base64
import requests
import json
import mimetypes

# ========== 1. CONFIG ==========
crop_path = r"C:\Users\siuts\Downloads\Gemini_Generated_Image_cc04zacc04zacc04.png"

API_KEY = "AIzaSyBXSWCdwhbGKPr7JUhNMwyRVGWfKRx-BFA" 

API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={API_KEY}"

# ========== 2. READ + BASE64 ENCODE IMAGE ==========
with open(crop_path, "rb") as f:
    img_bytes = f.read()

img_b64 = base64.b64encode(img_bytes).decode("utf-8")
mime_type, _ = mimetypes.guess_type(crop_path)

# ========== 3. BUILD REQUEST TO GEMINI ==========
headers = {
    "Content-Type": "application/json",
}

payload = {
    "contents": [
        {
            "parts": [
                {
                    "text": (
                        "You are a food expert. Look at this image and:\n"
                        "1) Tell me what food it is (be as specific as you can).\n"
                        "2) Say if it looks cooked or raw.\n"
                        "3) If possible, guess the dish name in plain English.\n"
                        "4) How much calories is it.\n"
                    )
                },
                {
                    "inline_data": {
                        "mime_type": mime_type,
                        "data": img_b64
                    }
                }
            ]
        }
    ]
}

# ========== 4. SEND REQUEST ==========
response = requests.post(API_URL, headers=headers, data=json.dumps(payload))

print("Status code:", response.status_code)
if response.status_code != 200:
    print("Error response:", response.text)
    raise SystemExit("Gemini API call failed")

data = response.json()

# ========== 5. PARSE GEMINI'S ANSWER ==========
try:
    answer = data["candidates"][0]["content"]["parts"][0]["text"]
    print("\nGemini answer:\n")
    print(answer)
except (KeyError, IndexError) as e:
    print("Could not parse response, raw JSON:")
    print(json.dumps(data, indent=2))


Status code: 429
Error response: {
  "error": {
    "code": 429,
    "message": "You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.0-flash\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 0, model: gemini-2.0-flash\n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_input_token_count, limit: 0, model: gemini-2.0-flash\nPlease retry in 57.738556337s.",
    "status": "RESOURCE_EXHAUSTED",
    "details": [
      {
        "@type": "type.googleapis.com/google.rpc.Help",
        "links": [
          {
            "description": "Learn more about Gemini API quotas",
       

SystemExit: Gemini API call failed