# Single-image food volume estimation
Using a  monocular depth estimation network and a segmentation network, we will estimate the volume of the food displayed in the input image.

In [1]:
import sys
import os

# Lấy đường dẫn thư mục gốc dự án
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))

# Thêm thư mục gốc vào sys.path
if project_root not in sys.path:
    sys.path.append(project_root)

# Kiểm tra sys.path
print("Current sys.path:", sys.path)

Current sys.path: ['e:\\Estimated volume\\food_volume_estimation', 'c:\\Users\\anhan\\anaconda3\\envs\\T37\\python37.zip', 'c:\\Users\\anhan\\anaconda3\\envs\\T37\\DLLs', 'c:\\Users\\anhan\\anaconda3\\envs\\T37\\lib', 'c:\\Users\\anhan\\anaconda3\\envs\\T37', '', 'c:\\Users\\anhan\\anaconda3\\envs\\T37\\lib\\site-packages', 'c:\\Users\\anhan\\anaconda3\\envs\\T37\\lib\\site-packages\\win32', 'c:\\Users\\anhan\\anaconda3\\envs\\T37\\lib\\site-packages\\win32\\lib', 'c:\\Users\\anhan\\anaconda3\\envs\\T37\\lib\\site-packages\\Pythonwin', 'c:\\Users\\anhan\\anaconda3\\envs\\T37\\lib\\site-packages\\IPython\\extensions', 'C:\\Users\\anhan\\.ipython', 'e:\\Estimated volume']


In [None]:
import sys
import json
from keras.models import Model, model_from_json
from food_volume_estimation.volume_estimator import VolumeEstimator
from food_volume_estimation.depth_estimation.custom_modules import *
from food_volume_estimation.food_segmentation.food_segmentator import FoodSegmentator
import matplotlib.pyplot as plt
from pyntcloud import PyntCloud

# Paths to model archiecture/weights
depth_model_architecture = '../models/fine_tune_food_videos/monovideo_fine_tune_food_videos.json'
depth_model_weights = '../models/fine_tune_food_videos/monovideo_fine_tune_food_videos.h5'
segmentation_model_weights = '../models/segmentation/mask_rcnn_food_segmentation.h5'

In [3]:
# Create estimator object and intialize
estimator = VolumeEstimator(arg_init=False)
with open(depth_model_architecture, 'r') as read_file:
    custom_losses = Losses()
    objs = {'ProjectionLayer': ProjectionLayer,
            'ReflectionPadding2D': ReflectionPadding2D,
            'InverseDepthNormalization': InverseDepthNormalization,
            'AugmentationLayer': AugmentationLayer,
            'compute_source_loss': custom_losses.compute_source_loss}
    model_architecture_json = json.load(read_file)
    estimator.monovideo = model_from_json(model_architecture_json, custom_objects=objs)
estimator._VolumeEstimator__set_weights_trainable(estimator.monovideo, False)
estimator.monovideo.load_weights(depth_model_weights)
estimator.model_input_shape = estimator.monovideo.inputs[0].shape.as_list()[1:]
depth_net = estimator.monovideo.get_layer('depth_net')
estimator.depth_model = Model(inputs=depth_net.inputs, outputs=depth_net.outputs, name='depth_model')
print('[*] Loaded depth estimation model.')

# Depth model configuration
MIN_DEPTH = 0.01
MAX_DEPTH = 10
estimator.min_disp = 1 / MAX_DEPTH
estimator.max_disp = 1 / MIN_DEPTH
estimator.gt_depth_scale = 0.35 # Ground truth expected median depth

# Create segmentator object
estimator.segmentator = FoodSegmentator(segmentation_model_weights)

# Set plate adjustment relaxation parameter
estimator.relax_param = 0.01

[*] VolumeEstimator not initialized.
Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use the `axis` argument instead
Instructions for updating:
Use tf.cast instead.
[*] Loaded depth estimation model.
[*] Loading segmentation model weights ../models/segmentation/mask_rcnn_food_segmentation.h5


In [4]:
import h5py


def check(file_path):
    print(file_path)
    with h5py.File(file_path, 'r') as f:
        # Kiểm tra các thuộc tính của file HDF5
        print("HDF5 file contains the following attributes:")
        for key in f.attrs.keys():
            print(f"{key}: {f.attrs[key]}")

        # Kiểm tra phiên bản Keras và TensorFlow trong metadata
        if 'keras_version' in f.attrs:
            print(f"Keras version: {f.attrs['keras_version']}")
        if 'tensorflow_version' in f.attrs:
            print(f"TensorFlow version: {f.attrs['tensorflow_version']}")
            

if __name__ == "__main__":
    check('../models/fine_tune_food_videos/monovideo_fine_tune_food_videos.h5')
    check('../models/segmentation/mask_rcnn_food_segmentation.h5')

../models/fine_tune_food_videos/monovideo_fine_tune_food_videos.h5
HDF5 file contains the following attributes:
backend: b'tensorflow'
keras_version: b'2.2.4'
layer_names: [b'input_11' b'input_12' b'input_13' b'augmentation_layer_1' b'pose_net'
 b'depth_net' b'reprojection_module' b'lambda_1' b'scale1_reprojections'
 b'scale2_reprojections' b'scale3_reprojections' b'scale4_reprojections']
Keras version: b'2.2.4'
../models/segmentation/mask_rcnn_food_segmentation.h5
HDF5 file contains the following attributes:
backend: b'tensorflow'
keras_version: b'2.2.4'
layer_names: [b'input_image' b'zero_padding2d_1' b'conv1' b'bn_conv1' b'activation_1'
 b'max_pooling2d_1' b'res2a_branch2a' b'bn2a_branch2a' b'activation_2'
 b'res2a_branch2b' b'bn2a_branch2b' b'activation_3' b'res2a_branch2c'
 b'res2a_branch1' b'bn2a_branch2c' b'bn2a_branch1' b'add_1' b'res2a_out'
 b'res2b_branch2a' b'bn2b_branch2a' b'activation_4' b'res2b_branch2b'
 b'bn2b_branch2b' b'activation_5' b'res2b_branch2c' b'bn2b_branch2c'

In [5]:
%matplotlib notebook

# Estimate volumes in input image
input_image = 'D:/Estimated volume/image/com_suon.jpg'
plate_diameter = 0 # Set as 0 to ignore plate detection and scaling
outputs_list = estimator.estimate_volume(input_image, fov=70, plate_diameter_prior=plate_diameter, 
                                         plot_results=True)

# Plot results for all detected food objects
for outputs in outputs_list:
    (estimated_volume, object_points_df, non_object_points_df, plane_points_df, object_points_transformed_df, 
        plane_points_transformed_df, simplices) = outputs
    fig = plt.gcf()
    fig.set_size_inches(6.5, 5.3)
    
    # Flip x and z coordinates to match point cloud with plotting axes
    object_points_df.values[:,0] *= -1
    object_points_df.values[:,2] *= -1
    non_object_points_df.values[:,0] *= -1
    non_object_points_df.values[:,2] *= -1
    plane_points_df.values[:,0] *= -1
    plane_points_df.values[:,2] *= -1
    
    # Create point cloud objects and plot
    object_pc = PyntCloud(object_points_df)
    non_object_pc = PyntCloud(non_object_points_df)
    plane_pc = PyntCloud(plane_points_df)
    scene = object_pc.plot(initial_point_size=0.005, return_scene=True)
    non_object_pc.plot(initial_point_size=0.005, scene=scene)
    plane_pc.plot(initial_point_size=0.001, use_as_color='z', cmap='cool', scene=scene)
    
    # Create transformed point clouds and plot
    plane_transformed_pc = PyntCloud(plane_points_transformed_df)
    object_transformed_pc = PyntCloud(object_points_transformed_df)
    scene = object_transformed_pc.plot(initial_point_size=0.005, return_scene=True)
    plane_transformed_pc.plot(initial_point_size=0.005, use_as_color='x', cmap='cool', scene=scene)

    # Plot x-y plane triangulation of food object
    volume_points = object_points_transformed_df.values[object_points_transformed_df.values[:,2] > 0]
    plt.figure()
    plt.triplot(volume_points[:,0], volume_points[:,1], simplices)
    plt.plot(volume_points[:,0], volume_points[:,1], 'o', markersize=1)
    plt.gca().set_aspect('equal', adjustable='box')
    plt.title('Food X-Y triangulation')

    plt.show()

AttributeError: 'NoneType' object has no attribute 'shape'

In [7]:
%matplotlib notebook
for i in range(1, 14): 
    print(f'../image/com_400ml_final/{i}.jpg')
    # Estimate volumes in input image
    input_image = f'../image/com_400ml_final/{i}.jpg'
    plate_diameter = 0 # Set as 0 to ignore plate detection and scaling
    outputs_list = estimator.estimate_volume(input_image, fov=70, plate_diameter_prior=plate_diameter, 
                                            plot_results=True)

    # Plot results for all detected food objects
    for outputs in outputs_list:
        (estimated_volume, object_points_df, non_object_points_df, plane_points_df, object_points_transformed_df, 
            plane_points_transformed_df, simplices) = outputs
        fig = plt.gcf()
        fig.set_size_inches(6.5, 5.3)
        
        # Flip x and z coordinates to match point cloud with plotting axes
        object_points_df.values[:,0] *= -1
        object_points_df.values[:,2] *= -1
        non_object_points_df.values[:,0] *= -1
        non_object_points_df.values[:,2] *= -1
        plane_points_df.values[:,0] *= -1
        plane_points_df.values[:,2] *= -1
        
        # Create point cloud objects and plot
        object_pc = PyntCloud(object_points_df)
        non_object_pc = PyntCloud(non_object_points_df)
        plane_pc = PyntCloud(plane_points_df)
        scene = object_pc.plot(initial_point_size=0.005, return_scene=True)
        non_object_pc.plot(initial_point_size=0.005, scene=scene)
        plane_pc.plot(initial_point_size=0.001, use_as_color='z', cmap='cool', scene=scene)
        
        # Create transformed point clouds and plot
        plane_transformed_pc = PyntCloud(plane_points_transformed_df)
        object_transformed_pc = PyntCloud(object_points_transformed_df)
        scene = object_transformed_pc.plot(initial_point_size=0.005, return_scene=True)
        plane_transformed_pc.plot(initial_point_size=0.005, use_as_color='x', cmap='cool', scene=scene)

        # Plot x-y plane triangulation of food object
        volume_points = object_points_transformed_df.values[object_points_transformed_df.values[:,2] > 0]
        plt.figure()
        plt.triplot(volume_points[:,0], volume_points[:,1], simplices)
        plt.plot(volume_points[:,0], volume_points[:,1], 'o', markersize=1)
        plt.gca().set_aspect('equal', adjustable='box')
        plt.title('Food X-Y triangulation')

        plt.show()

../image/com_400ml_final/1.jpg
[*] Creating intrinsics matrix from given FOV: 70
[*] No ellipse found. Scaling with expected median depth.
[*] Found 1 food object(s) in image.
[*] Estimated plane parameters (w0,w1,w2,w3): [0.29623645547096666, 0.02727352277958261, -0.2317518732837359, -0.9723925257758159]
[*] Estimated volume: 0.5562459212355686 L


<IPython.core.display.Javascript object>



Renderer(camera=PerspectiveCamera(aspect=1.6, fov=90.0, position=(0.0039785420991251055, 0.10600719431227841, …

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05), Label(value='Backgr…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05)))

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.001, max=0.01, step=1e-05)))

Renderer(camera=PerspectiveCamera(aspect=1.6, fov=90.0, position=(-0.000928398707049874, 0.06188413639175436, …

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05), Label(value='Backgr…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05)))

<IPython.core.display.Javascript object>

../image/com_400ml_final/2.jpg
[*] Creating intrinsics matrix from given FOV: 70
[*] No ellipse found. Scaling with expected median depth.
[*] Found 1 food object(s) in image.
[*] Estimated plane parameters (w0,w1,w2,w3): [0.29648086085517, -0.14841747522589518, -0.2508121325593563, -0.9565905744928698]
[*] Estimated volume: 0.4451600496354179 L


<IPython.core.display.Javascript object>



Renderer(camera=PerspectiveCamera(aspect=1.6, fov=90.0, position=(0.019139584093643795, 0.09164880069139954, 0…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05), Label(value='Backgr…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05)))

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.001, max=0.01, step=1e-05)))

Renderer(camera=PerspectiveCamera(aspect=1.6, fov=90.0, position=(-0.013501037443747635, 0.126802621027085, 0.…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05), Label(value='Backgr…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05)))

<IPython.core.display.Javascript object>

../image/com_400ml_final/3.jpg
[*] Creating intrinsics matrix from given FOV: 70
[*] No ellipse found. Scaling with expected median depth.
[*] Found 1 food object(s) in image.
[*] Estimated plane parameters (w0,w1,w2,w3): [0.29851651856144773, 0.05372360366190336, -0.22829544541618402, -0.9721085145248985]
[*] Estimated volume: 0.4897077476302581 L


<IPython.core.display.Javascript object>



Renderer(camera=PerspectiveCamera(aspect=1.6, fov=90.0, position=(-0.024079316957025386, 0.06370338828580299, …

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05), Label(value='Backgr…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05)))

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.001, max=0.01, step=1e-05)))

Renderer(camera=PerspectiveCamera(aspect=1.6, fov=90.0, position=(0.017273880021843803, 0.13361642673309546, 0…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05), Label(value='Backgr…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05)))

<IPython.core.display.Javascript object>

../image/com_400ml_final/4.jpg
[*] Creating intrinsics matrix from given FOV: 70
[*] No ellipse found. Scaling with expected median depth.
[*] Found 1 food object(s) in image.
[*] Estimated plane parameters (w0,w1,w2,w3): [0.2997023463006287, -0.16764799520845758, -0.22233879442791, -0.9604476092921095]
[*] Estimated volume: 0.3465917668205829 L


<IPython.core.display.Javascript object>



Renderer(camera=PerspectiveCamera(aspect=1.6, fov=90.0, position=(0.02489671723611675, 0.09383835548314957, 0.…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05), Label(value='Backgr…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05)))

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.001, max=0.01, step=1e-05)))

Renderer(camera=PerspectiveCamera(aspect=1.6, fov=90.0, position=(-0.012691778869745405, 0.14684557729331277, …

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05), Label(value='Backgr…

HBox(children=(Label(value='Point size:'), FloatSlider(value=0.005, max=0.05, step=5e-05)))

<IPython.core.display.Javascript object>

../image/com_400ml_final/5.jpg
[*] Creating intrinsics matrix from given FOV: 70


KeyboardInterrupt: 