In [1]:
from yolov3 import make_new_seq_yolov3_model
from utils.utils import WeightReader
from utils.utils import preprocess_input
from utils.utils import decode_netout
from utils.utils import correct_yolo_boxes
from utils.utils import do_nms
from utils.bbox import draw_boxes
from utils.utils import get_yolo_boxes
from keras.models import Sequential
from keras.layers import  Dense

import copy
import time
import keras.backend as K
import numpy as np
import cv2
import os

Using TensorFlow backend.


In [2]:
net_h, net_w = 416, 416
obj_thresh, nms_thresh = 0.5, 0.45
anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198, 373, 326]
labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
          "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
          "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
          "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
          "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
          "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
          "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
          "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
          "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
          "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

imgs_folder_path = 'Z:/dataset/KNU-Campus Dataset/images/20180312_172240/'
img_name = '20180312_172240_'
infer_model,seq_infer_model = make_new_seq_yolov3_model(input_shape=(416,416,3))

if not (os.path.exists('Weights/yolov3.h5')):
    weight_reader = WeightReader('yolov3.weights')
    weight_reader.load_weights(infer_model)
    infer_model.save_weights('Weights/yolov3.h5')
    seq_infer_model.load_weights('Weights/yolov3.h5', by_name=True)
else :
    infer_model.load_weights('Weights/yolov3.h5',by_name=True)
    seq_infer_model.load_weights('Weights/yolov3.h5', by_name=True)
    
output_layer_weights = [infer_model.get_layer(name='conv_81').get_weights()[0],
                        infer_model.get_layer(name='conv_93').get_weights()[0],
                        infer_model.get_layer(name='conv_105').get_weights()[0]]

In [3]:
def get_grad(yolo_num,row,col,out_idx):
    prev_layer_shape = [(13,13,1024),(26,26,512),(52,52,256)]
    result=np.zeros(shape=prev_layer_shape[yolo_num])
    result[row,col,:] = output_layer_weights[yolo_num][0,0,:,out_idx]
    return result

In [None]:
prev_feature=[]
for img_num in range(900):
        
    img_num2str=str(img_num)
    while(len(img_num2str)<4):
        img_num2str='0'+img_num2str

    img = cv2.imread(imgs_folder_path+img_name+img_num2str+'.jpg')
    image_h, image_w, _ = img.shape
    
    process_image = preprocess_input(img, net_h, net_w)
    
    if img_num == 0:
        yolos = infer_model.predict(process_image)
        prev_feature = copy.deepcopy(yolos[:3])
        yolos = yolos[:3]
    else:
        start_time = time.time()
        yolos = infer_model.predict(process_image)
        update_layer = yolos[3:]
        
        l_r = 0.5
        
        flag=[0]*3
        gradients=[None]*3
        count=[np.zeros(shape=(13,13,1024)),np.zeros(shape=(26,26,512)),np.zeros(shape=(52,52,256))]
        
        for i in range(len(rows)):
            row = rows[i]
            col = cols[i]
            b = bs[i]
            class_num = class_nums[i]
            yolo_num = yolo_nums[i]
            if (flag[yolo_num] == 0):
                gradients[yolo_num] = get_grad(yolo_num = yolo_num, row = row, col = col, out_idx = 85*b +4) 
                gradients[yolo_num] += get_grad(yolo_num = yolo_num, row = row, col = col, out_idx = 85*b +5+ class_num)
            else:
                gradients[yolo_num] += get_grad(yolo_num = yolo_num, row = row, col = col, out_idx = 85*b +4)
                gradients[yolo_num] += get_grad(yolo_num = yolo_num, row = row, col = col, out_idx = 85*b +5+ class_num)
            
            flag[yolo_num]=1
            count[yolo_num][row,col,:] += 1
            
        for i in range(3):
            for row in range(13*(2**i)):
                for col in range(13*(2**i)):
                    if count[i][row,col,0] == 0:
                        count[i][row,col,:]=1
        for i in range(3):
            if (flag[i]!=0):
                gradients[i] /= count[i]
                update_layer[i] = update_layer[i] + l_r * gradients[i]
        
        yolos = seq_infer_model.predict([update_layer[0],update_layer[1],update_layer[2],prev_feature[0],prev_feature[1],prev_feature[2]])
        prev_feature = copy.deepcopy(yolos[:3])
        yolos = yolos[:3]
        print(time.time()-start_time)

        
    
    #yolos = infer_model.predict(process_image)
    boxes = []

    for i in range(len(yolos)):
        # decode the output of the network
        yolo_anchors = anchors[(2 - i) * 6:(3 - i) * 6]
        boxes += decode_netout(yolos[i][0], yolo_anchors, obj_thresh, net_h, net_w, i)
    #print(boxes)
    # correct the sizes of the bounding boxes
    correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)

    # suppress non-maximal boxes
    do_nms(boxes, nms_thresh)

    # draw bounding boxes on the image using labels
    rows, cols, bs, class_nums, yolo_nums = draw_boxes(img, boxes, labels, obj_thresh, debug = False)
    cv2.imwrite('outputs/new_seq_attend/'+img_name+img_num2str+'_detected.jpg',cv2.resize(img,(1280,720)))
    cv2.imshow('video with bboxes', cv2.resize(img,(1280,720)))
    cv2.waitKey(6)
#     print()

0.3880457878112793
0.16794610023498535
0.16696810722351074
0.17797398567199707
0.18298649787902832
0.18048810958862305
0.1839892864227295
0.1804804801940918
0.18599438667297363
0.18800044059753418
0.17947840690612793
0.17897582054138184
0.1804800033569336
0.18298673629760742
0.18248534202575684
0.18897438049316406
0.19006943702697754
0.19251251220703125
0.18147945404052734
0.1839895248413086
0.19602108001708984
0.17747163772583008
0.17845702171325684
0.1905062198638916
0.18047857284545898
0.18900752067565918
0.18850302696228027
0.1834883689880371
0.18648171424865723
0.185044527053833
0.18899822235107422
0.18702054023742676
0.18298673629760742
0.1834704875946045
0.18148183822631836
0.18849897384643555
0.17750024795532227
0.17549347877502441
0.1804811954498291
0.17747211456298828
0.1809675693511963
0.17748069763183594
0.17847466468811035
0.17797279357910156
0.17749238014221191
0.17747187614440918
0.18098115921020508
0.18298554420471191
0.18448972702026367
0.183488130569458
0.190506935119

0.18499231338500977
0.1799788475036621
0.17897605895996094
0.17847323417663574
0.1804802417755127
0.1799793243408203
0.18198323249816895
0.17897582054138184
0.17596793174743652
0.1815028190612793
0.18248581886291504
0.17797207832336426
0.18298578262329102
0.17747211456298828
0.1799776554107666
0.17649412155151367
0.17945170402526855
0.17946553230285645
0.17947649955749512
0.17998075485229492
0.18148088455200195
0.17897558212280273
0.18950462341308594
0.17947745323181152
0.18499183654785156
0.17546653747558594
0.17847394943237305
0.17741727828979492
0.17897510528564453
0.18247723579406738
0.17798447608947754
0.17604732513427734
0.17546701431274414
0.17847514152526855
0.18097305297851562
0.18449115753173828
0.18448996543884277
0.19351458549499512
0.18950390815734863
0.18850111961364746
0.18599891662597656
0.18700027465820312
0.18696308135986328
0.19551992416381836
0.18799829483032227
0.19251251220703125
0.19251203536987305
0.19501900672912598
0.1910088062286377
0.19201064109802246
0.1885

0.19651055335998535
0.1940169334411621
0.19902920722961426
0.2010345458984375
0.19951844215393066
0.20003128051757812
0.1970233917236328
0.20654988288879395
0.20705056190490723
0.20705056190490723
0.20407462120056152
0.2060413360595703
0.20304536819458008
0.20554709434509277
0.20605015754699707
0.2045435905456543
0.20354151725769043
0.20455169677734375
0.1975252628326416
0.20454812049865723
0.20554685592651367
0.20704889297485352
0.20905637741088867
0.20253610610961914
0.1995220184326172
0.20605182647705078
0.2050459384918213
0.20504522323608398
0.2050457000732422
0.20604753494262695
0.20053315162658691
0.19702434539794922
0.2035353183746338
0.19903111457824707
0.2010347843170166
0.19855427742004395
0.20052075386047363
0.2010331153869629
0.20003104209899902
0.19903016090393066
0.20003223419189453
0.20003223419189453
0.20003271102905273
0.20000386238098145
0.2005317211151123
0.19953107833862305
0.20003199577331543
0.20554709434509277
0.20103216171264648
0.21707820892333984
0.21106171607