# Food Object Detection and Volume estimation Based on Faster RCNN


#### A demo on Faster RCNN architecture to identifie and estimate volume of fruits

## Imports


In [2]:
# Including all the needed liberiers

import os
import cv2
import numpy as np
import tensorflow as tf
import sys
from utils import label_map_util
from utils import visualization_utils as vis_util
import json
from keras.models import Model, model_from_json
from food_volume_estimation.volume_estimator import VolumeEstimator
from food_volume_estimation.depth_estimation.custom_modules import *
from food_volume_estimation.food_segmentation.food_segmentator import FoodSegmentator
import matplotlib.pyplot as plt
from pyntcloud import PyntCloud


## Model preparation

In [11]:
# Initializing pre-trained weights

depth_model_architecture = 'Volume_Estimation/models/fine_tune_food_videos/monovideo_fine_tune_food_videos.json'
depth_model_weights = 'Volume_Estimation/models/fine_tune_food_videos/monovideo_fine_tune_food_videos.h5'
segmentation_model_weights = 'Volume_Estimation/models/segmentation/faster_rcnn_food_segmentation.h5'

MODEL_NAME = 'inference_graph'

# Initializing the input image

IMAGE_NAME = 'test23.jpg'
CWD_PATH = os.getcwd()
PATH_TO_CKPT = os.path.join(CWD_PATH,MODEL_NAME,'frozen_inference_graph.pb')

PATH_TO_LABELS = os.path.join(CWD_PATH,'training','labelmap.pbtxt')
PATH_TO_IMAGE = os.path.join(CWD_PATH,IMAGE_NAME)
NUM_CLASSES = 4

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

## Volume Estimation

In [5]:
# Create estimator object and intialize
estimator = VolumeEstimator(arg_init=False)
with open(depth_model_architecture, 'r') as read_file:
    custom_losses = Losses()
    objs = {'ProjectionLayer': ProjectionLayer,
            'ReflectionPadding2D': ReflectionPadding2D,
            'InverseDepthNormalization': InverseDepthNormalization,
            'AugmentationLayer': AugmentationLayer,
            'compute_source_loss': custom_losses.compute_source_loss}
    model_architecture_json = json.load(read_file)
    estimator.monovideo = model_from_json(model_architecture_json, custom_objects=objs)
estimator._VolumeEstimator__set_weights_trainable(estimator.monovideo, False)
estimator.monovideo.load_weights(depth_model_weights)
estimator.model_input_shape = estimator.monovideo.inputs[0].shape.as_list()[1:]
depth_net = estimator.monovideo.get_layer('depth_net')
estimator.depth_model = Model(inputs=depth_net.inputs, outputs=depth_net.outputs, name='depth_model')
print('[*] Loaded depth estimation model.')

# Depth model configuration
MIN_DEPTH = 0.01
MAX_DEPTH = 10
estimator.min_disp = 1 / MAX_DEPTH
estimator.max_disp = 1 / MIN_DEPTH
estimator.gt_depth_scale = 0.35 # Ground truth expected median depth

# Create segmentator object
estimator.segmentator = FoodSegmentator(segmentation_model_weights)

# Set plate adjustment relaxation parameter
estimator.relax_param = 0.01

[*] VolumeEstimator not initialized.
[*] Loaded depth estimation model.
[*] Loading segmentation model weights Volume_Estimation/models/segmentation/faster_rcnn_food_segmentation.h5


## Food Object Detection

In [6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)

## Running the input image for detection, vizualizing and volume estimation

In [12]:
%matplotlib notebook
# Input tensor is the image
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

# Output tensors are the detection boxes, scores, and classes
# Each box represents a part of the image where a particular object was detected
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

# Each score represents level of confidence for each of the objects.
# The score is shown on the result image, together with the class label.
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')

# Number of objects detected
num_detections = detection_graph.get_tensor_by_name('num_detections:0')

# Perform the actual detection by running the model with the image as input
image = cv2.imread(PATH_TO_IMAGE)
image_rgb = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
image_expanded = np.expand_dims(image_rgb, axis=0)

(boxes, scores, classes, num) = sess.run(
    [detection_boxes, detection_scores, detection_classes, num_detections],
    feed_dict={image_tensor: image_expanded})

# Visualization of the detection 
vis_util.visualize_boxes_and_labels_on_image_array(
   image_rgb,
   np.squeeze(boxes),
   np.squeeze(classes).astype(np.int32),
   np.squeeze(scores),
   category_index,
   use_normalized_coordinates=True,
   line_thickness=8,
   min_score_thresh=0.5)


plt.imshow(image_rgb)
plt.title('Object Detector')
plt.show()

# Estimate volumes in input image
plate_diameter = 0.25 # Set as 0 to ignore plate detection and scaling
outputs_list = estimator.estimate_volume(image, fov=30, plate_diameter_prior=plate_diameter, 
                                         plot_results=True)

# Plot results for all detected food objects
for outputs in outputs_list:
    (estimated_volume, object_points_df, non_object_points_df, plane_points_df, object_points_transformed_df, 
        plane_points_transformed_df, simplices) = outputs
    fig = plt.gcf()
    fig.set_size_inches(6.5, 5.3)

    # Plot x-y plane triangulation of food object
    volume_points = object_points_transformed_df.values[object_points_transformed_df.values[:,2] > 0]
    plt.figure()
    plt.triplot(volume_points[:,0], volume_points[:,1], simplices)
    plt.plot(volume_points[:,0], volume_points[:,1], 'o', markersize=1)
    plt.gca().set_aspect('equal', adjustable='box')
    plt.title('Food X-Y triangulation')

    plt.show()

<IPython.core.display.Javascript object>

[*] Creating intrinsics matrix from given FOV: 30




[*] Ellipse parameters: (116.22535705566406, 70.35861206054688, 73.0, 31.70707070707071, -0.02632619542114356)
[*] Found 3 food object(s) in image.
[*] Estimated plane parameters (w0,w1,w2,w3): [0.5270355072894952, 0.11686213044847406, -0.7234644407279216, -0.6803987400556233]
[*] Estimated volume: 0.12846606561287185 L


<IPython.core.display.Javascript object>

[*] Estimated plane parameters (w0,w1,w2,w3): [0.6660904816726018, -0.09896977646615376, -0.4095414291980006, -0.9069072726120895]
[*] Estimated volume: 0.0711817793956023 L


<IPython.core.display.Javascript object>

[*] Estimated plane parameters (w0,w1,w2,w3): [0.6235057171759847, -0.039692126469348665, -0.515273555130685, -0.8561061257106658]
[*] Estimated volume: 0.1344771268188491 L


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>