# Prediction of images and video using Keras YOLO v2 models

This notebook detects objects in images and videos using trained Keras YOLO v2 models. This notebook should be run from inside the [root folder](https://github.com/experiencor/keras-yolo2) of the Keras YOLO git project. Also, the respective back-end, configuration file and trained model should also be placed in the same folder.

In [8]:
import argparse
import os
import cv2
import numpy as np
from tqdm import tqdm
from preprocessing import parse_annotation
from utils import draw_boxes
from frontend import YOLO
import json
import time
import tensorflow as tf

In [9]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

In [10]:
# TensorFlow wizardry
config = tf.ConfigProto()
 
# Don't pre-allocate memory; allocate as-needed
config.gpu_options.allow_growth = True
 
# Only allow a total of half the GPU memory to be allocated
#config.gpu_options.per_process_gpu_memory_fraction = 0.5
 
# Create a session with the above options specified.
tf.keras.backend.set_session(tf.Session(config=config))
#keras.backend.tensorflow_backend.set_session(tf.Session(config=config))

Set/adjust the parameters required.

In [11]:
config_path = './config_full_yolo.json'
weights_path = './full_yolo_singapore_dataset.h5'
input_folder = '/home/tbontz2s/git/tensorflow/workspace/training_demo/images/test'
output_folder = '/home/tbontz2s/RESULTS_PHOTOS/yolo_results/full_yolo'

In [12]:
with open(config_path) as config_buffer:    
    config = json.load(config_buffer)

###############################
#   Make the model 
###############################

yolo = YOLO(backend             = config['model']['backend'],
            input_size          = config['model']['input_size'], 
            labels              = config['model']['labels'], 
            max_box_per_image   = config['model']['max_box_per_image'],
            anchors             = config['model']['anchors'])

###############################
#   Load trained weights
###############################    

yolo.load_weights(weights_path)

(13, 13)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 416, 416, 3)  0                                            
__________________________________________________________________________________________________
model_3 (Model)                 (None, 13, 13, 1024) 50547936    input_4[0][0]                    
__________________________________________________________________________________________________
DetectionLayer (Conv2D)         (None, 13, 13, 70)   71750       model_3[1][0]                    
__________________________________________________________________________________________________
reshape_2 (Reshape)             (None, 13, 13, 5, 14 0           DetectionLayer[0][0]             
__________________________________________________________________________________________________
i

Find all the images to be used for testing.

In [13]:
# select some handpicked images
images_filenames = [
    'MVI_1468_NIR_frame245.jpg',
    'MVI_1469_VIS_frame470.jpg',
    'MVI_1474_VIS_frame425.jpg',
    'MVI_1486_VIS_frame620.jpg',
    'MVI_1578_VIS_frame490.jpg',
    'MVI_1609_VIS_frame400.jpg',
    'MVI_0797_VIS_OB_frame425.jpg',
    'MVI_1520_NIR_frame490.jpg',
    'MVI_0895_NIR_Haze_frame340.jpg'
]

images = [os.path.join(input_folder, image_filename) for image_filename in images_filenames]

In [7]:
for i, img in enumerate(images):
    image = cv2.imread(img)
    boxes = yolo.predict(image)
    image = draw_boxes(image, boxes, config['model']['labels'])
    
    print(len(boxes), 'boxes are found')    

    cv2.imwrite(os.path.join(output_folder, img.split('/')[-1]), image)
    

8 boxes are found
6 boxes are found
14 boxes are found
4 boxes are found
7 boxes are found
7 boxes are found
3 boxes are found
2 boxes are found
2 boxes are found


Predict an unlabelled video.

In [17]:
image_path = '/home/tbontz2s/singapore_dataset/VIS_Onshore/Videos/MVI_1470_VIS.avi'

video_out = image_path[:-4] + '_detected' + image_path[-4:]
video_reader = cv2.VideoCapture(image_path)

nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))

video_writer = cv2.VideoWriter(video_out,
                       cv2.VideoWriter_fourcc(*'MPEG'), 
                       50.0, 
                       (frame_w, frame_h))

for i in tqdm(range(nb_frames)):
    _, image = video_reader.read()

    boxes = yolo.predict(image)
    image = draw_boxes(image, boxes, config['model']['labels'])

    video_writer.write(np.uint8(image))

video_reader.release()
video_writer.release() 

100%|██████████| 266/266 [00:22<00:00, 11.47it/s]
