In [1]:
import os
import imageio
from matplotlib.pyplot import imshow
import numpy as np
from PIL import Image
import cv2
from keras import backend as K
from keras.models import load_model

from yolo_utils import read_classes, read_anchors, generate_colors, preprocess_image, draw_boxes,detect_video
from yad2k.models.keras_yolo import yolo_head, yolo_eval

Using TensorFlow backend.


In [2]:
class_names = read_classes("model_data/coco_classes.txt")
anchors = read_anchors("model_data/yolo_anchors.txt")
yolo_model = load_model("model_data/yolo.h5")
# yolo_model.summary()



In [3]:
yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names))

model_image_size = yolo_model.layers[0].input_shape[1:3] # 608,608

In [4]:
video_name ='test_video.mp4'
image_path = os.path.join('videos', video_name)
output_path = os.path.join('out', video_name)

In [None]:
def detect_video(image_path, output_path, model_image_size, yolo_model, class_names, yolo_outputs): 
    from keras import backend as K
    input_image_shape = K.placeholder(shape=(2, ))
    sess = K.get_session()
    
    video_in = cv2.VideoCapture(image_path)
    #width = video_in.get(cv2.cv.CV_CAP_PROP_FRAME_WIDTH)   # float
    #height = video_in.get(cv2.cv.CV_CAP_PROP_FRAME_HEIGHT) # float
    width, height = int(video_in.get(3)), int(video_in.get(4))
    FPS = video_in.get(5)
    
    video_out = cv2.VideoWriter()
    video_out.open(output_path, cv2.VideoWriter_fourcc(*'DIVX'), FPS, (width, height))
    
    width = np.array(width, dtype=float)
    height = np.array(height, dtype=float)
    image_shape = (height, width)
    
    while video_in.isOpened():
        ret, data = video_in.read()
        if ret==False:
            break
        video_array = cv2.cvtColor(data,cv2.COLOR_BGR2RGB)
        image = Image.fromarray(video_array,mode='RGB')
        resized_image = image.resize(tuple(reversed(model_image_size)), Image.BICUBIC)
        image_data = np.array(resized_image, dtype='float32')
        image_data /= 255.
        image_data = np.expand_dims(image_data, 0)   # Add batch dimension.
        
        boxes, scores, classes = yolo_eval(yolo_outputs, image_shape)
        out_boxes, out_scores, out_classes = sess.run([boxes, scores, classes],
                                                      feed_dict={yolo_model.input: image_data,
                                                                 input_image_shape: [image.size[1], image.size[0]],
                                                                 K.learning_phase(): 0})
        colors = generate_colors(class_names)
        draw_boxes(image, out_scores, out_boxes, out_classes, class_names, colors)
        video_out.write(cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR))
        
    sess.close()
    video_in.release()
    video_out.release()
    print("detect Done")

In [5]:
detect_video(image_path, output_path, model_image_size, yolo_model, class_names, yolo_outputs)

car 0.61 (467, 279) (524, 315)
car 0.62 (459, 280) (509, 316)
car 0.63 (452, 283) (508, 314)
car 0.67 (459, 280) (509, 313)
car 0.65 (461, 282) (509, 313)
car 0.73 (463, 283) (514, 312)
car 0.63 (473, 284) (527, 313)
car 0.73 (490, 283) (537, 313)
car 0.62 (497, 282) (546, 311)
car 0.67 (502, 283) (556, 310)
truck 0.61 (303, 150) (695, 378)
car 0.64 (169, 274) (316, 365)
truck 0.70 (320, 164) (670, 391)
car 0.70 (102, 275) (307, 365)
bus 0.67 (336, 192) (613, 387)
car 0.77 (43, 274) (280, 369)
bus 0.67 (340, 210) (596, 401)
car 0.78 (0, 275) (254, 382)
car 0.75 (5, 270) (229, 382)
bus 0.84 (336, 218) (570, 399)
car 0.72 (10, 254) (176, 391)
bus 0.84 (319, 229) (564, 401)
person 0.62 (537, 336) (563, 362)
person 0.65 (658, 317) (691, 376)
bus 0.81 (317, 279) (461, 401)
bus 0.60 (317, 285) (457, 394)
person 0.62 (664, 317) (695, 375)
car 0.60 (269, 348) (307, 371)
bus 0.84 (319, 287) (453, 392)
car 0.60 (270, 347) (307, 372)
truck 0.68 (160, 318) (243, 381)
bus 0.80 (318, 290) (445, 387)