# load model

In [5]:
import cv2
import operator
from statistics import mode
from utils_video import preprocess_input
from utils_video import get_labels

from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt

%matplotlib inline

from keras_ssd300 import ssd_300
from keras_ssd_loss import SSDLoss
from ssd_box_encode_decode_utils import SSDBoxEncoder, decode_y, decode_y2
from ssd_batch_generator import BatchGenerator
from utils import draw_axis, plot_pose_cube

import tensorflow as tf  
import keras.backend.tensorflow_backend as KTF  
config = tf.ConfigProto()
#配置GPU内存分配方式
config.gpu_options.allow_growth = True
KTF.set_session(tf.Session(config=config))  


### Set up the model

# 1: Set some necessary parameters

img_height = 288 # Height of the input images
img_width = 384 # Width of the input images
img_channels = 3 # Number of color channels of the input images
n_classes = 2 # Number of classes including the background class, e.g. 21 for the Pascal VOC datasets
scales = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets, the factors for the MS COCO dataset are smaller, namely [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05]
aspect_ratios = [[0.5, 1.0, 2.0],
                 [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                 [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                 [1.0/3.0, 0.5, 1.0, 2.0, 3.0],
                 [0.5, 1.0, 2.0],
                 [0.5, 1.0, 2.0]] # The anchor box aspect ratios used in the original SSD300
two_boxes_for_ar1 = True
limit_boxes = False # Whether or not you want to limit the anchor boxes to lie entirely within the image boundaries
variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are scaled as in the original implementation
coords = 'centroids' # Whether the box coordinates to be used as targets for the model should be in the 'centroids' or 'minmax' format, see documentation
normalize_coords = True

# 2: Build the Keras model (and possibly load some trained weights)

K.clear_session() # Clear previous models from memory.
# The output `predictor_sizes` is needed below to set up `SSDBoxEncoder`
model, predictor_sizes = ssd_300(image_size=(img_height, img_width, img_channels),
                                  n_classes=n_classes,
                                  min_scale=None, # You could pass a min scale and max scale instead of the `scales` list, but we're not doing that here
                                  max_scale=None,
                                  scales=scales,
                                  aspect_ratios_global=None,
                                  aspect_ratios_per_layer=aspect_ratios,
                                  two_boxes_for_ar1=two_boxes_for_ar1,
                                  limit_boxes=limit_boxes,
                                  variances=variances,
                                  coords=coords,
                                  normalize_coords=normalize_coords)
model.load_weights('./ssd300_0_weights.h5', by_name=True) # You should load pre-trained weights for the modified VGG-16 base network here

In [15]:
# video 
video_capture = cv2.VideoCapture('output.avi')
font = cv2.FONT_HERSHEY_SIMPLEX
cv2.namedWindow('window_frame')
# video_capture.set(cv2.CAP_PROP_FRAME_WIDTH, 384);  
# video_capture.set(cv2.CAP_PROP_FRAME_HEIGHT, 288);  
c = 0
while True:
    ret, frame = video_capture.read()
    if ret == False:
        break
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    print(frame.shape)
    
    frame1 = cv2.resize(frame,(384,288),interpolation=cv2.INTER_CUBIC)
    X = frame1[np.newaxis, :]
    
    y_pred = model.predict(X)
    # 4: Decode the raw prediction `y_pred`
    y_pred_decoded = decode_y2(y_pred,
                               confidence_thresh=0.99,
                              iou_threshold=0.2,
                              top_k='all',
                              input_coords='centroids',
                              normalize_coords=True,
                              img_height=288,
                              img_width=384)


    # 5: Draw the predicted boxes onto the image
#     plt.figure(figsize=(20,12))
#     plt.subplot(16, 1, ind+1)
#     current_axis = plt.gca()
#     print(y_pred_decoded)
    
    # Draw the predicted boxes in blue
    for box in y_pred_decoded[0]:
        label = '{:.3f}: {:.3f}'.format(float(box[2])*100, box[3]*100)
        img = cv2.rectangle(frame1,(int(box[4]), int(box[6])), (int(box[5]), int(box[7])), (0,0,255), 2)
#         img = draw_axis(img, float(box[3])*100, float(box[2])*100, 0, tdx = box[4] + (box[5]-box[4])/2, tdy= box[6] + (box[7]-box[6])/2, size=50)
        
        img = plot_pose_cube(img, float(box[3])*100, float(box[2])*100, 0, tdx = box[4] + (box[5]-box[4])/2, tdy= box[6] + (box[7]-box[6])/2, size=60)

    
    
    
    
    
    
    
    
    try:
        img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
#         cv2.imshow('window_frame', img)
        cv2.imwrite('./videoPics300/image'+str(c) + '.jpg', img)
        c =c+1
    except:
        continue

    if cv2.waitKey(10) & 0xFF == ord('q'):
        break

video_capture.release()
cv2.destroyAllWindows()

(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 

(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 640, 3)
(480, 