# SSD300 Inference Tutorial

This is a brief tutorial that shows how to use a trained SSD300 for inference on the Homemade datasets. If you'd like more detailed explanations, please refer to [`ssd300_training_validation.ipynb`](https://github.com/pierluigiferrari/ssd_keras/blob/master/ssd300_training.ipynb)

In [None]:
#from tensorflow.python.client import device_lib
#print (device_lib.list_local_devices())

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  
#os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [None]:
#import os 
os.environ['CUDA_VISIBLE_DEVICES'] = '/gpu:0'



from keras import backend as K
from keras.models import load_model
from keras.preprocessing import image
from keras.optimizers import Adam
from imageio import imread
import numpy as np
from matplotlib import pyplot as plt

from models.keras_ssd300 import ssd_300
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast
from keras_layers.keras_layer_L2Normalization import L2Normalization

from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

from data_generator.object_detection_2d_data_generator import DataGenerator
from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
from data_generator.object_detection_2d_geometric_ops import Resize
from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms

# from tensorflow import keras


%matplotlib inline

In [None]:
# Set the image size.
img_height = 300
img_width = 300

## 1. Load a trained SSD

Either load a trained model or build a model and load trained weights into it. Since the HDF5 files I'm providing contain only the weights for the various SSD versions, not the complete models, you'll have to go with the latter option when using this implementation for the first time. You can then of course save the model and next time load the full model directly, without having to build it.

You can find the download links to all the trained model weights in the README.

### 1.1. Build the model and load trained weights into it

In [None]:
# 1: Build the Keras model

K.clear_session() # Clear previous models from memory.

model = ssd_300(image_size=(img_height, img_width, 3),
                n_classes=2,
                mode='inference',
                l2_regularization=0.0005,
                scales=[0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05], # The scales for MS COCO are [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
                aspect_ratios_per_layer=[[1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                                         [1.0, 2.0, 0.5],
                                         [1.0, 2.0, 0.5]],
                two_boxes_for_ar1=True,
                steps=[8, 16, 32, 64, 100, 300],
                offsets=[0.5, 0.5, 0.5, 0.5, 0.5, 0.5],
                clip_boxes=False,
                variances=[0.1, 0.1, 0.2, 0.2],
                normalize_coords=True,
                subtract_mean=[123, 117, 104],
                swap_channels=[2, 1, 0],
                confidence_thresh=0.5,
                iou_threshold=0.45,
                top_k=200,
                nms_max_output_size=400)

# 2: Load the trained weights into the model.

# TODO: Set the path of the trained weights.
weights_path = 'lossrec/dataset_mask_epoch-28_loss-5.2064_val_loss-5.9367.h5'

model.load_weights(weights_path, by_name=True)

# 3: Compile the model so that Keras won't complain the next time you load it.

adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=adam, loss=ssd_loss.compute_loss)

In [None]:
model.summary()

In [None]:
model.save('model_inference.h5')

Or

### 1.2. Load a trained model

In [None]:
# TODO: Set the path to the `.h5` file of the model to be loaded.
model_path = 'model_inference.h5'

# We need to create an SSDLoss object in order to pass that to the model loader.
ssd_loss = SSDLoss(neg_pos_ratio=3, n_neg_min=0, alpha=1.0)

K.clear_session() # Clear previous models from memory.

model = load_model(model_path, custom_objects={'AnchorBoxes': AnchorBoxes,
                                               'L2Normalization': L2Normalization,
                                               'DecodeDetections': DecodeDetections,
                                               'compute_loss': ssd_loss.compute_loss})

## 2. Load some images

Load some images for which you'd like the model to make predictions.

In [None]:
orig_images = [] # Store the images here.
input_images = [] # Store resized versions of the images here.

# We'll only load one image in this example.
img_path = 'VideosAndImages/inputs/images/masks/mask3.jpg'
orig_images.append(imread(img_path))
img = image.load_img(img_path, target_size=(img_height, img_width))
img = image.img_to_array(img) 
input_images.append(img)
input_images = np.array(input_images)

## 3. Make predictions

In [None]:
y_pred = model.predict(input_images)

`y_pred` contains a fixed number of predictions per batch item (200 if you use the original model configuration), many of which are low-confidence predictions or dummy entries. We therefore need to apply a confidence threshold to filter out the bad predictions. Set this confidence threshold value how you see fit.

In [None]:
confidence_threshold = 0.5

y_pred_thresh = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]

np.set_printoptions(precision=2, suppress=True, linewidth=90)
print("Predicted boxes:\n")
print('   class   conf xmin   ymin   xmax   ymax')
print(y_pred_thresh[0])

## 4. Visualize the predictions

We just resized the input image above and made predictions on the distorted image. We'd like to visualize the predictions on the image in its original size though, so below we'll transform the coordinates of the predicted boxes accordingly.

In [None]:
# Display the image and draw the predicted boxes onto it.
 
# Set the colors for the bounding boxes
colors = plt.cm.hsv(np.linspace(0, 1, 21)).tolist()
classes = ['background',
           'right', 'no']

plt.figure(figsize=(20,12))
plt.imshow(orig_images[0])

current_axis = plt.gca()

for box in y_pred_thresh[0]:
    # Transform the predicted bounding boxes for the 300x300 image to the original image dimensions.
    xmin = box[2] * orig_images[0].shape[1] / img_width
    ymin = box[3] * orig_images[0].shape[0] / img_height
    xmax = box[4] * orig_images[0].shape[1] / img_width
    ymax = box[5] * orig_images[0].shape[0] / img_height
    color = colors[int(box[0])]
    label = '{}: {:.2f}'.format(classes[int(box[0])], box[1])
    current_axis.add_patch(plt.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, color=color, fill=False, linewidth=2))  
    current_axis.text(xmin, ymin, label, size='x-large', color='white', bbox={'facecolor':color, 'alpha':1.0})

# Images | Local Video | Real-time Video Detection

In [None]:
import imageio
from imageio import imread
import cv2

confidence_threshold = 0.5

# Different Classes of objects in VOC dataset
classes = ['background', 'right', 'no']

height = 300
width  = 300

In [None]:
# Transforming image size
def transform(input_image):
	return cv2.resize(input_image, (300, 300), interpolation = cv2.INTER_CUBIC)


# Function to detect objects in image
def detect_object(original_image):
	original_image_height, original_image_width = original_image.shape[:2]
	input_image = transform(original_image)
	input_image = np.reshape(input_image, (1, 300, 300, 3))
	y_pred = model.predict(input_image)
	actual_prediction = [y_pred[k][y_pred[k,:,1] > confidence_threshold] for k in range(y_pred.shape[0])]
	for box in actual_prediction[0]:
		# Coordinates of diagonal points of bounding box
		x0 = box[-4] * original_image_width / width
		y0 = box[-3] * original_image_height / height
		x1 = box[-2] * original_image_width / width
		y1 = box[-1] * original_image_height / height
		label_text = '{}: {:.2f}'.format(classes[int(box[0])], box[1])	# label text
		cv2.rectangle(original_image, (int(x0), int(y0)), (int(x1), int(y1)), (255, 0, 0), 2)	# drwaing rectangle
		cv2.putText(original_image, label_text, (int(x0), int(y0)), cv2.FONT_HERSHEY_DUPLEX, 1, (231, 237, 243), 2, cv2.LINE_AA) # putting lable
	return original_image

Images Detection

In [None]:
%%time
input_image_path =  'VideosAndImages/inputs/images/masks'
output_image_path = 'VideosAndImages/outputs/images/masks'

img = cv2.imread('VideosAndImages/inputs/images/masks/mask2.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.figure(figsize = (15, 15))
plt.subplot(1, 2, 1); plt.imshow(img)
img.shape

In [None]:
# Detecting objects in images
for file in os.listdir(input_image_path):
	original_image = imageio.imread(os.path.join(input_image_path, file))	# Reading image
	if original_image is not None:
		output_image = detect_object(original_image)	# detecting objects
		imageio.imwrite(os.path.join(output_image_path, file), output_image[:, :, :])	# savinng back images
print('TatalNum:', len(os.listdir(input_image_path)))


Local Videos Detection

In [None]:
%%time
input_video_path = 'VideosAndImages/inputs/videos/mask'
output_video_path = 'VideosAndImages/outputs/videos/mask'

from IPython.display import Video
Video("VideosAndImages/inputs/videos/mask/mask1.mp4")

In [None]:
%%time

# Detecting objects in video
for file in os.listdir(input_video_path):
	print('Reading', file)
	video_reader = imageio.get_reader(os.path.join(input_video_path, file))	# Reading video
	fps = video_reader.get_meta_data()['fps']	# gettinf fps of the image
	video_writer = imageio.get_writer(os.path.join(output_video_path, file), fps = fps)	# Writing back output image
	for i, frame in enumerate(video_reader):
		output_frame = detect_object(frame)	# detecting objects frame by frame
		video_writer.append_data(output_frame)	# appending frame to vidoe
		print('frame ', i, 'done')
	video_writer.close()

In [None]:
from IPython.display import Video
Video("VideosAndImages/outputs/videos/mask/mask1.mp4")

Real-Time Detection

In [None]:
%%time 
video_capture = cv2.VideoCapture(0) 
while video_capture.isOpened():
    _, frame = video_capture.read() 
    canvas = detect_object(frame)
    cv2.imshow('Video', canvas) 
    if cv2.waitKey(1) & 0xFF == ord('q'): # Press q to Escape 
        break 

video_capture.release() 
cv2.destroyAllWindows() 

## 5. Make predictions on Pascal VOC 2007 Test

Let's use a `DataGenerator` to make predictions on the Pascal VOC 2007 test dataset and visualize the predicted boxes alongside the ground truth boxes for comparison. Everything here is preset already, but if you'd like to learn more about the data generator and its capabilities, take a look at the detailed tutorial in [this](https://github.com/pierluigiferrari/data_generator_object_detection_2d) repository.