# Single-image food volume estimation
Using a  monocular depth estimation network and a segmentation network, we will estimate the volume of the food displayed in the input image.

In [None]:
import sys
print(sys.executable)

In [None]:
import sys
import os
import json
import shutil
import cv2
import torch
import numpy as np
from typing import Any, Dict, List
import argparse
import tensorflow as tf
from keras.models import Model, model_from_json
from keras import backend as K
import nest_asyncio
from fastapi import FastAPI, UploadFile, File, Depends
import uvicorn
from pyngrok import ngrok
import threading
import time

# Thêm đường dẫn các thư viện tùy chỉnh
sys.path.append('.')
sys.path.append('./SAM')
sys.path.append('./mmseg')

# Import các module cần thiết
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
from FoodSAM.FoodSAM_tools.predict_semantic_mask import semantic_predict
from FoodSAM.FoodSAM_tools.enhance_semantic_masks import enhance_masks
from food_volume_estimation.volume_estimator import VolumeEstimator
from food_volume_estimation.depth_estimation.custom_modules import *

# Áp dụng nest_asyncio để chạy FastAPI
nest_asyncio.apply()

# Kiểm tra GPU
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
if device == "cuda":
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")

# Biến toàn cục cho estimator, graph, session
estimator = None
global_graph = None
global_session = None

def init_estimator():
    """Khởi tạo estimator với graph và session mới"""
    global estimator, global_graph, global_session
    # Xóa graph và session hiện tại
    K.clear_session()
    tf.reset_default_graph()

    # Tạo graph và session mới
    global_graph = tf.Graph()
    global_session = tf.Session(graph=global_graph)

    with global_graph.as_default():
        with global_session.as_default():
            K.set_session(global_session)
            depth_model_architecture = './models/fine_tune_food_videos/monovideo_fine_tune_food_videos.json'
            depth_model_weights = './models/fine_tune_food_videos/monovideo_fine_tune_food_videos.h5'
            estimator = VolumeEstimator(arg_init=False)
            try:
                with open(depth_model_architecture, 'r') as read_file:
                    custom_losses = Losses()
                    objs = {
                        'ProjectionLayer': ProjectionLayer,
                        'ReflectionPadding2D': ReflectionPadding2D,
                        'InverseDepthNormalization': InverseDepthNormalization,
                        'AugmentationLayer': AugmentationLayer,
                        'compute_source_loss': custom_losses.compute_source_loss
                    }
                    model_architecture_json = json.load(read_file)
                    estimator.monovideo = model_from_json(model_architecture_json, custom_objects=objs)
                estimator._VolumeEstimator__set_weights_trainable(estimator.monovideo, False)
                global_session.run(tf.global_variables_initializer())
                estimator.monovideo.load_weights(depth_model_weights)
                estimator.model_input_shape = estimator.monovideo.inputs[0].shape.as_list()[1:]
                depth_net = estimator.monovideo.get_layer('depth_net')
                estimator.depth_model = Model(inputs=depth_net.inputs, outputs=depth_net.outputs, name='depth_model')
                MIN_DEPTH = 0.01
                MAX_DEPTH = 10
                estimator.min_disp = 1 / MAX_DEPTH
                estimator.max_disp = 1 / MIN_DEPTH
                estimator.gt_depth_scale = 0.35
                estimator.relax_param = 0.01
                print('[*] Loaded depth estimation model.')
                # Bỏ graph.finalize() để tránh lỗi
            except Exception as e:
                print(f"Error loading model: {str(e)}")
                raise

# Hàm ghi log
def write_log(message):
    log_file_path = "debug/log.txt"
    with open(log_file_path, "a") as log_file:
        log_file.write(message + "\n")
    print(message)

# Hàm xóa thư mục
def clear_folder(folder_path: str):
    if os.path.exists(folder_path) and os.path.isdir(folder_path):
        for item in os.listdir(folder_path):
            item_path = os.path.join(folder_path, item)
            try:
                if os.path.isfile(item_path) or os.path.islink(item_path):
                    os.remove(item_path)
                elif os.path.isdir(item_path):
                    shutil.rmtree(item_path)
            except Exception as e:
                print(f"Lỗi khi xóa {item_path}: {e}")
        print(f"✅ Đã xóa toàn bộ nội dung trong thư mục '{folder_path}'.")
    else:
        print(f"⚠️ Thư mục '{folder_path}' không tồn tại.")

# Hàm lưu mask từ SAM
def write_masks_to_folder(masks: List[Dict[str, Any]], path: str) -> None:
    header = "id,area,bbox_x0,bbox_y0,bbox_w,bbox_h,point_input_x,point_input_y,predicted_iou,stability_score,crop_box_x0,crop_box_y0,crop_box_w,crop_box_h"
    metadata = [header]
    os.makedirs(os.path.join(path, "sam_mask"), exist_ok=True)
    masks_array = []
    for i, mask_data in enumerate(masks):
        mask = mask_data["segmentation"]
        masks_array.append(mask.copy())
        filename = f"{i}.png"
        cv2.imwrite(os.path.join(path, "sam_mask", filename), mask * 255)
        mask_metadata = [
            str(i),
            str(mask_data["area"]),
            *[str(x) for x in mask_data["bbox"]],
            *[str(x) for x in mask_data["point_coords"][0]],
            str(mask_data["predicted_iou"]),
            str(mask_data["stability_score"]),
            *[str(x) for x in mask_data["crop_box"]],
        ]
        row = ",".join(mask_metadata)
        metadata.append(row)
    masks_array = np.stack(masks_array, axis=0)
    np.save(os.path.join(path, "sam_mask", "masks.npy"), masks_array)
    metadata_path = os.path.join(path, "sam_metadata.csv")
    with open(metadata_path, "w") as f:
        f.write("\n".join(metadata))

# Hàm chạy FoodSAM (PyTorch)
def run_foodsam(image_path):
    args = argparse.Namespace(
        img_path=image_path,
        output="Output/Semantic_Results",
        device=device,
        SAM_checkpoint="ckpts/sam_vit_h_4b8939.pth",
        semantic_config="configs/SETR_MLA_768x768_80k_base.py",
        semantic_checkpoint="ckpts/SETR_MLA/iter_80000.pth",
        model_type="vit_h",
        color_list_path="FoodSAM/FoodSAM_tools/color_list.npy",
        category_txt="FoodSAM/FoodSAM_tools/category_id_files/foodseg103_category_id.txt",
        num_class=104
    )

    os.makedirs(args.output, exist_ok=True)
    write_log("Running SAM...")

    # Load mô hình SAM
    sam = sam_model_registry[args.model_type](checkpoint=args.SAM_checkpoint)
    sam.to(device=args.device)
    write_log(f"SAM model loaded on {args.device}")

    # Thiết lập bộ tạo mask
    output_mode = "binary_mask"
    generator = SamAutomaticMaskGenerator(sam, output_mode=output_mode)

    # Đọc ảnh và thực hiện segmentation
    targets = [args.img_path]
    for t in targets:
        write_log(f"Processing {t}...")
        image = cv2.imread(t)
        if image is None:
            write_log(f"Không thể tải {t}, bỏ qua...")
            continue
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        masks = generator.generate(image)

        # Lưu kết quả
        base = os.path.basename(t).split('.')[0]
        save_base = os.path.join(args.output, base)
        os.makedirs(save_base, exist_ok=True)
        write_masks_to_folder(masks, save_base)
        shutil.copyfile(t, os.path.join(save_base, "input.jpg"))

    write_log("SAM processing done!")
    write_log("Running semantic segmentation model...")
    semantic_predict(args.semantic_config, args.semantic_checkpoint,
                     args.output, args.color_list_path, args.img_path, device=args.device)
    write_log("Semantic segmentation done!")
    write_log("Enhancing semantic masks...")
    enhance_masks(args.output, args.category_txt, args.color_list_path, num_class=args.num_class)
    write_log("Enhancement done!")
    write_log(f"Results saved in {args.output}!")
    return base

# Hàm chạy depth estimation và tính khối lượng (TensorFlow)
def run_depth_estimation(image_path, base):
    global estimator, global_graph, global_session
    # Khởi tạo lại estimator với graph/session mới
    init_estimator()
    with global_graph.as_default():
        with global_session.as_default():
            K.set_session(global_session)
            # Bỏ log summary của depth model theo yêu cầu

            # Chạy ước lượng thể tích
            plate_diameter = 0  # Bỏ qua phát hiện đĩa
            outputs_list, food_volumes = estimator.estimate_volume(
                image_path, fov=70, plate_diameter_prior=plate_diameter,
                plot_results=True, para_folder_path=f"Output/Semantic_Results/{base}/masks/"
            )

            # Chuyển đổi thể tích sang khối lượng
            food_masses = estimator.convert_volume_to_mass('Density_sub_90.xlsx', food_volumes)
            write_log(f"Food masses: {food_masses}")

            # Nhập API_KEY của bạn
            API_KEY = "gP07Q5U7ULsiXCXLbCGVqk4c2Y6Yx39nMWJiuJxx"
            
            # Lấy dữ liệu dinh dưỡng
            nutrition_data = estimator.get_nutrition_for_all(food_masses, API_KEY)
            write_log(f"nutrition_data: {nutrition_data}")
            return nutrition_data

# Hàm xử lý ảnh đầu vào
def process_image_input(image: UploadFile, image_path: str):
    if image:
        input_folder = "Input"
        os.makedirs(input_folder, exist_ok=True)
        clear_folder("Input")
        clear_folder("debug")
        temp_file_path = os.path.join(input_folder, image.filename)
        with open(temp_file_path, "wb") as temp_file:
            shutil.copyfileobj(image.file, temp_file)
        return temp_file_path, True
    elif image_path:
        clear_folder("Input")
        clear_folder("debug")
        if not os.path.exists(image_path):
            raise ValueError(f"The image at path '{image_path}' does not exist.")
        write_log(f"Using image path: {image_path}")
        return image_path, False
    else:
        raise ValueError("Please provide either an uploaded image or an image path.")

# Khởi tạo FastAPI
app = FastAPI()

# Đảm bảo thư mục debug tồn tại
os.makedirs("debug", exist_ok=True)

# Endpoint FastAPI (chuyển sang đồng bộ)
@app.post("/process-image/", dependencies=[Depends(lambda: None)])
def process_image(image: UploadFile = File(None), image_path: str = None):
    try:
        # Xử lý ảnh đầu vào
        image_path_to_process, is_uploaded = process_image_input(image, image_path)

        # Bước 1: Chạy FoodSAM (PyTorch)
        base = run_foodsam(image_path_to_process)

        # Bước 2: Chạy depth estimation (TensorFlow)
        food_masses = run_depth_estimation(image_path_to_process, base)

        # Xóa file tạm nếu có
        if is_uploaded:
            os.remove(image_path_to_process)

        return {"food_nutrition": food_masses}
    except Exception as e:
        write_log(f"Error: {str(e)}")
        return {"error": str(e)}

# Hàm chạy FastAPI server
def start_uvicorn():
    uvicorn.run(app, host="127.0.0.1", port=8000)

# Chạy FastAPI trong thread riêng
threading.Thread(target=start_uvicorn, daemon=True).start()

# Thiết lập Ngrok
ngrok.set_auth_token("2w4ttBJNFtLVvtpgIKDbM3ord6S_4ZhiZ62czznikvh3rUnRM")
public_url = ngrok.connect(8000)

# Log và hiển thị URL công cộng
write_log(f"Public URL: {public_url}")
print("🚀 Public FastAPI server is running at:", public_url)

# Giữ thread chính chạy
while True:
    time.sleep(1)

In [None]:
#777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777

In [None]:
#777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777777#

In [None]:
########################################################################################################################################################

In [None]:
########################################################################################################################################################

In [None]:
image_path = r"rice_1.jpg"

In [None]:
import torch
print(torch.__version__)
print(torch.version.cuda)

SEGMENTATION

In [None]:
import sys
import os
import json
import shutil
import logging
import cv2
import torch
import numpy as np
from typing import Any, Dict, List
import argparse

# Thêm đường dẫn các thư viện tùy chỉnh
sys.path.append('.')
sys.path.append('./SAM')
sys.path.append('./mmseg')

# Import các module cần thiết
from segment_anything import SamAutomaticMaskGenerator, sam_model_registry
from FoodSAM.FoodSAM_tools.predict_semantic_mask import semantic_predict
from FoodSAM.FoodSAM_tools.enhance_semantic_masks import enhance_masks
from FoodSAM.FoodSAM_tools.evaluate_foodseg103 import evaluate


# Tạo đối tượng args thay thế argparse
args = argparse.Namespace(
    img_path = image_path,  # Cập nhật đường dẫn ảnh
    output="Output/Semantic_Results",
    device="cuda" if torch.cuda.is_available() else "cpu",
    SAM_checkpoint="ckpts/sam_vit_h_4b8939.pth",
    semantic_config="configs/SETR_MLA_768x768_80k_base.py",
    semantic_checkpoint="ckpts/SETR_MLA/iter_80000.pth",
    model_type="vit_h",
    color_list_path="FoodSAM/FoodSAM_tools/color_list.npy",
    category_txt="FoodSAM/FoodSAM_tools/category_id_files/foodseg103_category_id.txt",
    num_class=104
)


def write_masks_to_folder(masks: List[Dict[str, Any]], path: str) -> None:
    """Lưu các mask của SAM vào thư mục"""
    header = "id,area,bbox_x0,bbox_y0,bbox_w,bbox_h,point_input_x,point_input_y,predicted_iou,stability_score,crop_box_x0,crop_box_y0,crop_box_w,crop_box_h"  # noqa
    metadata = [header]
    os.makedirs(os.path.join(path, "sam_mask"), exist_ok=True)
    masks_array = []
    for i, mask_data in enumerate(masks):
        mask = mask_data["segmentation"]
        masks_array.append(mask.copy())
        filename = f"{i}.png"
        cv2.imwrite(os.path.join(path, "sam_mask", filename), mask * 255)
        mask_metadata = [
            str(i),
            str(mask_data["area"]),
            *[str(x) for x in mask_data["bbox"]],
            *[str(x) for x in mask_data["point_coords"][0]],
            str(mask_data["predicted_iou"]),
            str(mask_data["stability_score"]),
            *[str(x) for x in mask_data["crop_box"]],
        ]
        row = ",".join(mask_metadata)
        metadata.append(row)

    masks_array = np.stack(masks_array, axis=0)
    np.save(os.path.join(path, "sam_mask", "masks.npy"), masks_array)
    metadata_path = os.path.join(path, "sam_metadata.csv")
    with open(metadata_path, "w") as f:
        f.write("\n".join(metadata))



def create_logger(save_folder):
    """Tạo logger để ghi log trong quá trình chạy"""
    log_file = "sam_process.log"
    final_log_file = os.path.join(save_folder, log_file)

    logging.basicConfig(
        format='[%(asctime)s] [%(filename)s:%(lineno)d] [%(levelname)s] %(message)s',
        level=logging.INFO,
        handlers=[
            logging.FileHandler(final_log_file, mode='w'),
            logging.StreamHandler()
        ])
    logger = logging.getLogger()
    print(f"Logger created: {final_log_file}")
    return logger


def main(args):
    """Chạy quá trình segmentation"""
    os.makedirs(args.output, exist_ok=True)
    # logger = create_logger(args.output)
    # logger.info("Running SAM...")

    # Kiểm tra thiết bị có hỗ trợ CUDA không
    if args.device == "cuda" and not torch.cuda.is_available():
        # logger.warning("CUDA is not available. Switching to CPU.")
        args.device = "cpu"

    # Load mô hình SAM
    sam = sam_model_registry[args.model_type](checkpoint=args.SAM_checkpoint)
    _ = sam.to(device=args.device)

    # Thiết lập bộ tạo mask
    output_mode = "binary_mask"
    generator = SamAutomaticMaskGenerator(sam, output_mode=output_mode)

    assert args.img_path, "Bạn phải cung cấp đường dẫn ảnh."
    
    # Đọc ảnh và thực hiện segmentation
    targets = [args.img_path]
    for t in targets:
        # logger.info(f"Processing {t}...")
        image = cv2.imread(t)
        if image is None:
            # logger.error(f"Không thể tải {t}, bỏ qua...")
            continue
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        masks = generator.generate(image)

        # Lưu kết quả
        base = os.path.basename(t).split('.')[0]
        save_base = os.path.join(args.output, base)
        os.makedirs(save_base, exist_ok=True)
        write_masks_to_folder(masks, save_base)
        shutil.copyfile(t, os.path.join(save_base, "input.jpg"))

    # logger.info("SAM processing done!\n")

    # Chạy mô hình phân đoạn ngữ nghĩa
    # logger.info("Running semantic segmentation model...")
    semantic_predict(args.semantic_config, args.semantic_checkpoint,
                     args.output, args.color_list_path, args.img_path, device=args.device)
    # logger.info("Semantic segmentation done!\n")

    # Tăng cường segmentation mask
    # logger.info("Enhancing semantic masks...")
    enhance_masks(args.output, args.category_txt, args.color_list_path, num_class=args.num_class)
    # logger.info("Enhancement done!\n")

    # logger.info(f"Results saved in {args.output}!\n")



def clear_folder(folder_path: str):
    """
    Xóa toàn bộ nội dung bên trong thư mục (tất cả file, thư mục con, symbolic link),
    nhưng vẫn giữ lại thư mục gốc.

    Args:
        folder_path (str): Đường dẫn thư mục cần xóa nội dung.
    
    Returns:
        None
    """
    if os.path.exists(folder_path) and os.path.isdir(folder_path):
        # Duyệt qua tất cả file và thư mục con trong folder
        for item in os.listdir(folder_path):
            item_path = os.path.join(folder_path, item)
            try:
                if os.path.isfile(item_path) or os.path.islink(item_path):
                    os.remove(item_path)  # Xóa file hoặc symbolic link
                elif os.path.isdir(item_path):
                    shutil.rmtree(item_path)  # Xóa thư mục con và nội dung bên trong
            except Exception as e:
                print(f"Lỗi khi xóa {item_path}: {e}")
        print(f"✅ Đã xóa toàn bộ nội dung trong thư mục '{folder_path}'.")
    else:
        print(f"⚠️ Thư mục '{folder_path}' không tồn tại.")


# clear_folder("masks")
# clear_folder("Output")
# # Chạy chương trình
# main(args)
print("doneeeeee")


FOOD VOLUME

In [None]:
import sys
import json
from keras.models import Model, model_from_json
from food_volume_estimation.volume_estimator import VolumeEstimator
from food_volume_estimation.depth_estimation.custom_modules import *
from food_volume_estimation.food_segmentation.food_segmentator import FoodSegmentator
import matplotlib.pyplot as plt
from pyntcloud import PyntCloud

# Paths to model archiecture/weights
depth_model_architecture = './models/fine_tune_food_videos/monovideo_fine_tune_food_videos.json'
depth_model_weights = './models/fine_tune_food_videos/monovideo_fine_tune_food_videos.h5'
print("loaded model estimate volume")

In [None]:
# Create estimator object and intialize
estimator = VolumeEstimator(arg_init=False)
with open(depth_model_architecture, 'r') as read_file:
    custom_losses = Losses()
    objs = {'ProjectionLayer': ProjectionLayer,
            'ReflectionPadding2D': ReflectionPadding2D,
            'InverseDepthNormalization': InverseDepthNormalization,
            'AugmentationLayer': AugmentationLayer,
            'compute_source_loss': custom_losses.compute_source_loss}
    model_architecture_json = json.load(read_file)
    estimator.monovideo = model_from_json(model_architecture_json, custom_objects=objs)
estimator._VolumeEstimator__set_weights_trainable(estimator.monovideo, False)
estimator.monovideo.load_weights(depth_model_weights)
estimator.model_input_shape = estimator.monovideo.inputs[0].shape.as_list()[1:]
depth_net = estimator.monovideo.get_layer('depth_net')
estimator.depth_model = Model(inputs=depth_net.inputs, outputs=depth_net.outputs, name='depth_model')
print('[*] Loaded depth estimation model.')

# Depth model configuration
MIN_DEPTH = 0.01
MAX_DEPTH = 10
estimator.min_disp = 1 / MAX_DEPTH
estimator.max_disp = 1 / MIN_DEPTH
estimator.gt_depth_scale = 0.35 # Ground truth expected median depth

# Create segmentator object
# estimator.segmentator = FoodSegmentator(segmentation_model_weights)

# Set plate adjustment relaxation parameter
estimator.relax_param = 0.01
print("done set up model")

NUTRITION INFORMATION RETRIEAVAL

In [None]:
# Hàm ghi log vào file
def write_log(message):
    log_dir = "debug"
    log_file_path = os.path.join(log_dir, "log.txt")

    # Tạo thư mục nếu chưa tồn tại
    os.makedirs(log_dir, exist_ok=True)

    # Ghi log
    with open(log_file_path, "a") as log_file:
        log_file.write(message + "\n")
        
def run(image_path_run):
    write_log(f"Starting processing for image: {image_path_run}")
    # Tạo đối tượng args thay thế argparse
    args = argparse.Namespace(
        img_path=image_path_run,  # Cập nhật đường dẫn ảnh
        output="Output/Semantic_Results",
        device="cuda" if torch.cuda.is_available() else "cpu",
        SAM_checkpoint="ckpts/sam_vit_h_4b8939.pth",
        semantic_config="configs/SETR_MLA_768x768_80k_base.py",
        semantic_checkpoint="ckpts/SETR_MLA/iter_80000.pth",
        model_type="vit_h",
        color_list_path="FoodSAM/FoodSAM_tools/color_list.npy",
        category_txt="FoodSAM/FoodSAM_tools/category_id_files/foodseg103_category_id.txt",
        num_class=104
    )
    

    clear_folder("masks")
    clear_folder("Output")
    # Chạy chương trình
    main(args)

    name_without_ext = os.path.splitext(image_path_run)[0]
    write_log(f"Processed image name without extension: {name_without_ext}")
    # name_without_ext_without_input = name_without_ext.split("Input/")[1]
    # write_log(f"name_without_ext_without_input: {name_without_ext_without_input}")

    plate_diameter = 0  # Set as 0 to ignore plate detection and scaling
    outputs_list, food_volumes = estimator.estimate_volume(image_path_run, fov=70, plate_diameter_prior=plate_diameter,
                                                        plot_results=True, para_folder_path=rf"Output/Semantic_Results/{name_without_ext}/masks/")

    food_masses = estimator.convert_volume_to_mass(
        r'Density_sub_90.xlsx', food_volumes)
    
    return food_masses

In [None]:
print(run(r"rice_1.jpg"))