## shared

In [1]:
import numpy as np
import json

def boxes_match(box1,box2,iou_threshold=0.4):
    
    if(box1[1]!=box2[1] or box1[7]==1 or box2[7]==1):
        return False
    
    return iou(box1[3:7],box2[3:7],coords='corners')>0.7

def corners_to_width_height(box):
    box2 = np.zeros((4),np.float32)
    box2[0] = box[0]
    box2[1] = box[1]
    box2[2] = box[2]-box[0]
    box2[3] = box[3]-box[1]
    return box2
def box_matches_list(box1,list_boxes):
    index =-1
    if(len(list_boxes)==0):
        return index
    match_list = []
    box_index =0
    
    for box in list_boxes:
       
        if(box.shape[0]>0):
            
            if(box.shape[0]==1):
                
                if(boxes_match(box[0],box1)):
                    match_list.append(box_index)
                    
            elif(box.shape[0]==8):
                
                if(boxes_match(box,box1)):
                    
                    match_list.append(box_index)
        box_index+=1
    
    if(len(match_list)== 0):
        return index
    max_match_index = 0
    max_match_iou = 0
    for i in match_list:
        cur_iou = iou(box1[3:7],list_boxes[i][3:7],coords='corners')
        
        if(cur_iou>max_match_iou):
            max_match_iou=cur_iou
            max_match_index =i
    list_boxes[max_match_index][7] = 1
    box1[7] = 1
    
    return max_match_index


def center(box):
    return np.array([(box[3]+box[5])/2,(box[4]+box[6])/2],np.float32)
def topleft(box):
    return np.array([box[3],box[4]],np.float32)
def botright(box):
    return np.array([box[5],box[6]],np.float32)
def corners(box):
    z = np.zeros(4,np.float32)
    z[:2] = topleft(box)
    z[2:4] = botright(box)
    return z
classes = ['background',
           'aeroplane', 'bicycle', 'bird', 'boat',
           'bottle', 'bus', 'car', 'cat',
           'chair', 'cow', 'diningtable', 'dog',
           'horse', 'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor']

## Reading Text input of predictions

In [4]:
gt={}
json_ground_trth = "../data_graal1_rev.json"
with open(json_ground_trth) as json_file:  
    data = json.load(json_file)
    for box in data['annotations']:
        image_id_5 = 'graal_1/%s.jpg'%str(box['image_id']).zfill(5)
        bbox = box['bbox']
        bbox[2]+=bbox[0]
        bbox[3] += bbox[1]
        if(image_id_5 in gt):
            gt[image_id_5].append(list(map(float, bbox)))
        else:
            gt[image_id_5]=[]
            gt[image_id_5].append(list(map(float, bbox)))
print(gt)

{'graal_1/00001.jpg': [[237.0, 392.0, 520.0, 498.0]], 'graal_1/00002.jpg': [[573.0, 331.0, 712.0, 448.0], [225.0, 387.0, 508.0, 493.0]], 'graal_1/00003.jpg': [[573.0, 331.0, 712.0, 448.0], [231.0, 391.0, 506.0, 497.0]], 'graal_1/00004.jpg': [[573.0, 331.0, 712.0, 448.0], [224.0, 392.0, 507.0, 498.0]], 'graal_1/00005.jpg': [[573.0, 331.0, 702.0, 448.0], [212.0, 392.0, 495.0, 498.0]], 'graal_1/00006.jpg': [[573.0, 331.0, 702.0, 448.0], [217.0, 391.0, 483.0, 497.0]], 'graal_1/00007.jpg': [[564.0, 331.0, 693.0, 448.0], [216.0, 391.0, 480.0, 493.0]], 'graal_1/00008.jpg': [[561.0, 331.0, 696.0, 448.0], [208.0, 391.0, 472.0, 493.0]], 'graal_1/00009.jpg': [[561.0, 331.0, 696.0, 448.0], [207.0, 391.0, 471.0, 493.0]], 'graal_1/00010.jpg': [[555.0, 331.0, 691.0, 448.0], [201.0, 391.0, 465.0, 493.0]], 'graal_1/00011.jpg': [[550.0, 332.0, 686.0, 449.0], [196.0, 391.0, 460.0, 493.0]], 'graal_1/00012.jpg': [[550.0, 332.0, 686.0, 449.0], [189.0, 391.0, 462.0, 493.0]], 'graal_1/00013.jpg': [[544.0, 332

In [7]:
text_file_path = "venc_retinanet.txt"
f = open(text_file_path,"r")
line = f.readline()
detections={}
comps = []
while(line):
    
    line = line.replace("\n", "")
    comps = line.split(",")
    print(comps)
    if(comps[0] in detections):
        detections[comps[0]].append(list(map(float, comps[2:])))
    else:
        detections[comps[0]]=[]
        detections[comps[0]].append(list(map(float, comps[2:])))
    line=f.readline()
f.close()
print(detections)
frame_width = 640
frame_height = 464

['venc/00002.jpg', 'boat', '8', '0.877166', '710.273315', '157.810913', '928.814941', '228.186584']
['venc/00002.jpg', 'boat', '8', '0.766262', '0.187498', '725.839417', '373.764252', '920.299744']
['venc/00002.jpg', 'kite', '33', '0.736047', '429.656433', '592.777100', '477.655060', '691.508850']
['venc/00002.jpg', 'boat', '8', '0.692697', '176.883316', '207.716217', '783.619812', '366.771973']
['venc/00002.jpg', 'person', '0', '0.602930', '608.617615', '895.559814', '654.304382', '958.136658']
['venc/00002.jpg', 'boat', '8', '0.561864', '956.558289', '98.457291', '1080.338013', '188.377731']
['venc/00003.jpg', 'boat', '8', '0.868575', '701.625366', '157.098785', '921.781311', '229.913940']
['venc/00003.jpg', 'kite', '33', '0.789931', '429.867493', '592.846619', '477.153351', '691.495422']
['venc/00003.jpg', 'boat', '8', '0.756838', '0.327709', '723.922424', '370.853577', '917.523682']
['venc/00003.jpg', 'boat', '8', '0.660732', '184.307175', '209.280518', '803.001160', '365.962097']


['venc/00328.jpg', 'boat', '8', '0.845705', '0.000000', '729.732300', '384.936035', '934.418945']
['venc/00328.jpg', 'boat', '8', '0.631809', '1076.807739', '161.662506', '1141.574341', '197.279205']
['venc/00328.jpg', 'boat', '8', '0.614669', '1.709020', '330.483154', '143.450043', '409.159546']
['venc/00328.jpg', 'airplane', '4', '0.564448', '982.246582', '465.181396', '1221.376953', '633.930603']
['venc/00328.jpg', 'kite', '33', '0.531840', '430.120361', '592.664795', '476.436157', '691.426025']
['venc/00329.jpg', 'boat', '8', '0.853760', '53.975723', '201.699982', '452.538025', '299.136841']
['venc/00329.jpg', 'boat', '8', '0.775133', '1.161064', '331.785675', '155.354172', '407.440613']
['venc/00329.jpg', 'boat', '8', '0.749937', '0.000000', '730.002563', '386.715790', '937.541870']
['venc/00329.jpg', 'boat', '8', '0.634633', '1077.220337', '161.211746', '1141.014038', '196.922974']
['venc/00329.jpg', 'kite', '33', '0.584174', '429.614960', '595.767273', '476.165894', '691.296265'

['venc/00591.jpg', 'boat', '8', '0.541774', '1080.379395', '162.432312', '1141.226196', '197.658539']
['venc/00592.jpg', 'boat', '8', '0.855924', '445.783508', '273.134857', '767.137817', '366.699768']
['venc/00592.jpg', 'boat', '8', '0.759146', '978.854614', '197.421707', '1139.258301', '242.462570']
['venc/00592.jpg', 'boat', '8', '0.664626', '0.000000', '730.645935', '384.769562', '932.173279']
['venc/00592.jpg', 'kite', '33', '0.562126', '429.228638', '594.622498', '474.917633', '692.124939']
['venc/00593.jpg', 'boat', '8', '0.888031', '457.909668', '269.861145', '775.137695', '366.112915']
['venc/00593.jpg', 'boat', '8', '0.687373', '0.000000', '733.071411', '381.306000', '930.100281']
['venc/00593.jpg', 'boat', '8', '0.650572', '975.777039', '199.342697', '1139.009033', '242.440872']
['venc/00593.jpg', 'kite', '33', '0.623651', '429.230804', '595.481750', '474.914307', '692.135925']
['venc/00594.jpg', 'boat', '8', '0.858855', '483.915955', '270.976959', '802.153381', '369.375305'

## No Tracking (Baseline)

## GT

In [5]:
# import the necessary packages
from imutils.video import VideoStream
import argparse
import datetime
import imutils
import time
import cv2 as cv
import time
import json
import numpy as np
from bounding_box_utils.bounding_box_utils import iou
frame_width = 1032
frame_height = 778

no_tracking_res = [] 
firstFrame = None
frameCount =0
total_objects_tracking=0
out_tracking = cv.VideoWriter('graal_1_videos/gt.avi',cv.VideoWriter_fourcc('M','J','P','G'), 30, (frame_width,frame_height))
started = False
total_frames = 641
prev_frame=None

while frameCount<total_frames:
    
    frame = cv.imread('../graal_1/%s.jpg'%str(frameCount+2).zfill(5))
    
    
    
    if frame is None:
        break
   
        
    if 'graal_1/%s.jpg'%str(frameCount+1).zfill(5) in gt:
        for box in gt['graal_1/%s.jpg'%str(frameCount+1).zfill(5)]:
            
            xmin = int(box[0] )
            ymin = int(box[1])
            xmax =int(box[2])
            ymax =int(box[3])
            cv.rectangle(frame, (int(xmin), int(ymin)), (int(xmax),int(ymax)), (255, 0, 0), 2)
            cv.putText(frame,'{}'.format(classes[4]), (xmin, ymin),cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

            no_tracking_res.append({"image_id" : frameCount+1, "category_id" : 1, "bbox" : [float(xmin),float(ymin),float(xmax-xmin),float(ymax-ymin)], "score" : box[1]})
            total_objects_tracking+=1
        
  
    out_tracking.write(frame)
    #cv.imwrite('debug_frames/%s.jpg'%str(frameCount+1).zfill(5),frame)
    frameCount+=1

out_tracking.release()
#with open('graal_2_videos/yolo/baseline.json', 'w') as outfile:  
#    json.dump(no_tracking_res, outfile)
print(total_objects_tracking)


715


In [8]:
# import the necessary packages
from imutils.video import VideoStream
import argparse
import datetime
import imutils
import time
import cv2 as cv
import time
import motmetrics as mm
import json
import numpy as np
from bounding_box_utils.bounding_box_utils import iou
frame_width = 1280
frame_height = 960

no_tracking_res = [] 
firstFrame = None
frameCount =0
total_objects_tracking=0
out_tracking = cv.VideoWriter('v_videos/retinanet/baseline.avi',cv.VideoWriter_fourcc('M','J','P','G'), 30, (frame_width,frame_height))
started = False
total_frames = 600
prev_frame=None
acc = mm.MOTAccumulator(auto_id=True)
image_id_prefix= 'venc'
while frameCount<total_frames:
    
    frame = cv.imread('../v/%s.jpg'%str(frameCount+1).zfill(5))
    
    
    
    if frame is None:
        break
    preds = []
    if '%s/%s.jpg'%(image_id_prefix,str(frameCount+1).zfill(5)) in detections:
        
        for box in detections['%s/%s.jpg'%(image_id_prefix,str(frameCount+1).zfill(5))]:
            
            if(box[0]!=8 or box[1]<0.5):

                continue
            temp_pred = np.insert(box,0,0)
            temp_pred=np.insert(temp_pred,7,0)
            preds.append(temp_pred)
        
    if 'venc/%s.jpg'%str(frameCount+1).zfill(5) in detections:
        for box in detections['venc/%s.jpg'%str(frameCount+1).zfill(5)]:
            if(box[0] == 8 and box[1]>0.5):
                
                xmin = int(box[2] )
                ymin = int(box[3])
                xmax =int(box[4])
                ymax =int(box[5])
                cv.rectangle(frame, (int(xmin), int(ymin)), (int(xmax),int(ymax)), (0, 255, 0), 2)
                cv.putText(frame,'{}: {:.2f}'.format(classes[int(box[0])], box[1]), (xmin, ymin),cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)

                no_tracking_res.append({"image_id" : frameCount+1, "category_id" : 1, "bbox" : [float(xmin),float(ymin),float(xmax-xmin),float(ymax-ymin)], "score" : np.minimum(box[2],1)})
                total_objects_tracking+=1
        
        
        
        
    out_tracking.write(frame)
    #cv.imwrite('debug_frames/%s.jpg'%str(frameCount+1).zfill(5),frame)
    frameCount+=1

out_tracking.release()
with open('v_videos/retinanet/baseline.json', 'w') as outfile:  
    json.dump(no_tracking_res, outfile)
print(total_objects_tracking)


1793


In [35]:
mh = mm.metrics.create()

summary = mh.compute(acc, metrics=['num_frames', 'num_false_positives','num_misses','num_switches','num_matches','mostly_tracked','partially_tracked','mostly_lost','precision','recall','mota', 'motp',], name='acc')
print(summary)

     num_frames  num_false_positives  num_misses  num_switches  num_matches  \
acc         449                    3         166             0          283   

     mostly_tracked  partially_tracked  mostly_lost  precision   recall  \
acc               0                  1            0    0.98951  0.63029   

         mota      motp  
acc  0.623608  0.272828  


## BBox Center Flow

In [41]:
# import the necessary packages
from imutils.video import VideoStream
import argparse
import datetime
import imutils
import time
import cv2 as cv
import time
import json
from bounding_box_utils.bounding_box_utils import iou
feature_params = dict( maxCorners = 25,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )
lk_params = dict( winSize  = (15,15),
                  maxLevel = 2,
                  criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
no_tracking_res = [] 
tracking_res = []

# initialize the first frame in the video stream
firstFrame = None
frameCount =0
step = 0.05
total_objects_no_tracking=0
total_objects_tracking=0
frame_width = 640
frame_height = 464

preds = []
pred = None
tracks=[]
out_tracking = cv.VideoWriter('retina_center_flow.avi',cv.VideoWriter_fourcc('M','J','P','G'), 30, (frame_width,frame_height))
started = False
# your code

multiplier=0
cc=0
prev_frame=None
total_frames=641
frameCount=0
while frameCount<total_frames:
    # grab the current frame and initialize the occupied/unoccupied
    # text
    frame = cv.imread('../modd/%s.jpg'%str(frameCount+1).zfill(5))
    
 
    # if the frame could not be grabbed, then we have reached the end
    # of the video
    if frame is None:
        break
    frameCount+=1
    if(frameCount<0):
        continue
   
    preds = []
    if 'modd/%s.jpg'%str(frameCount+1).zfill(5) in detections:
        
        for box in detections['modd/%s.jpg'%str(frameCount+1).zfill(5)]:
           
            if(box[0]!=8):

                continue
            temp_pred = np.insert(box,0,0)
            temp_pred=np.insert(temp_pred,7,0)
            preds.append(temp_pred)
   
    for track in tracks:
        track[7]=0
    
    
    for pred in preds:
        index = box_matches_list(pred,tracks)
        if(index>-1):
            
            multiplier = tracks[index][0]
            org_conf =tracks[index][2]
            
            if(multiplier<0):
                multiplier=1
            else:
                multiplier +=1
            
             
            del tracks[index]
            
            pred[2] += multiplier*step
            
            pred[0] = multiplier
            
            tracks.append(pred)
        else:
            
            temp_pred = np.copy(pred)
           
            multiplier = 1
            
          
            temp_pred[0] = multiplier
            temp_pred[7]=1
            tracks.append(temp_pred)
         
            cc = frameCount
    #print('current tracks')
    #print(tracks)
    for track in tracks:
        if(track[7]==0 and box_matches_list(track,preds)==-1):
            #print('not found in current frame')
            track[0]-=1
            #print('multiplier decreased to ',track[0])
            track[2]+=(step*track[0])
            frame_grey = cv.cvtColor(frame,cv.COLOR_BGR2GRAY)
            prev_frame_grey = cv.cvtColor(prev_frame,cv.COLOR_BGR2GRAY)
            flow = cv.calcOpticalFlowFarneback(prev_frame_grey,frame_grey, None, 0.5, 3, 15, 3, 5, 1.2, 0)
            xmin = int(track[3])
            ymin = int(track[4] )
            xmax =int(track[5] )
            ymax =int(track[6])
            center_flow = flow[np.minimum(int((xmin+xmax)/2),flow.shape[0]-1),np.minimum(int((ymin+ymax)/2),flow.shape[1]-1)]
          
            
            track[3] += center_flow[0]
            track[5]+=center_flow[1]
            track[4]+=center_flow[0]
            track[6]+=center_flow[1]
            
            
    to_display = [track for track in tracks if track[2]>0.4]
    #print(tracks)
    for box in to_display:
    # Transform the predicted bounding boxes for the 512x512 image to the original image dimensions.
    
        xmin = int(box[3] )
        ymin = int(box[4] )
        xmax =int(box[5] )
        ymax =int(box[6] )
        cv.rectangle(frame, (int(xmin), int(ymin)), (int(xmax),int(ymax)), (0, 255, 0), 2)
        cv.putText(frame,'{}: {:.2f}'.format(classes[int(box[1])], box[2]), (xmin, ymin),cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        
        total_objects_tracking+=1
        tracking_res.append({"image_id" : frameCount, "category_id" : 1, "bbox" : [float(xmin),float(ymin),float(xmax-xmin),float(ymax-ymin)], "score" : int(box[1])})
 
    out_tracking.write(frame)
    prev_frame=frame
# cleanup the camera and close any open windows

out_tracking.release()
print(total_objects_tracking)
with open('retina_center_flow.json', 'w') as outfile:  
    json.dump(tracking_res, outfile)

KeyboardInterrupt: 

In [21]:
out_tracking.release()

## BBox Keypoints flow

In [None]:
# import the necessary packages
from imutils.video import VideoStream
import argparse
import datetime
import imutils
import time
import cv2 as cv
import time
import json
from bounding_box_utils.bounding_box_utils import iou
feature_params = dict( maxCorners = 25,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )
lk_params = dict( winSize  = (15,15),
                  maxLevel = 2,
                  criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
no_tracking_res = [] 
tracking_res = []

# initialize the first frame in the video stream
firstFrame = None
frameCount =0
step = 0.05
total_objects_no_tracking=0
total_objects_tracking=0

print(frame_width)
print(frame_height)
preds = []
pred = None
tracks=[]

out_tracking = cv.VideoWriter('retina_keypoint.avi',cv.VideoWriter_fourcc('M','J','P','G'), 30, (frame_width,frame_height))
started = False
# your code
print("prev_count   current_count   entered   exited")
multiplier=0
cc=0
prev_frame=None

total_frames=641
frameCount=0
while frameCount<total_frames:
    # grab the current frame and initialize the occupied/unoccupied
    # text
    frame = cv.imread('../modd/%s.jpg'%str(frameCount+1).zfill(5))
    
 
    # if the frame could not be grabbed, then we have reached the end
    # of the video
    if frame is None:
        break
    
    if(frameCount<0):
        continue
   
    preds = []
    if 'modd/%s.jpg'%str(frameCount+1).zfill(5) in detections:
        
        for box in detections['modd/%s.jpg'%str(frameCount+1).zfill(5)]:
           
            if(box[0]!=8):

                continue
            temp_pred = np.insert(box,0,0)
            temp_pred=np.insert(temp_pred,7,0)
            preds.append(temp_pred)

    for track in tracks:
        track[7]=0
    #print(len(prev_pred),len(temp),len(entered_pred),len(exited_pred))
    
    for pred in preds:
        index = box_matches_list(pred,tracks)
      
        if(index>-1):
            
            multiplier = tracks[index][0]
            org_conf =tracks[index][2]
            if(multiplier<0):
                multiplier=1
            else:
                multiplier +=1
            
             
            tracks[index][2] = org_conf+ multiplier*step
            tracks[index][3] = pred[3]
            tracks[index][4] = pred[4]
            tracks[index][5] = pred[5]
            tracks[index][6] = pred[6]
            tracks[index][0] = multiplier
        else:
            
            temp_pred = np.copy(pred)
           
            multiplier = 1
           
          
            temp_pred[0] = multiplier
            temp_pred[7]=1
            tracks.append(temp_pred)
         
            cc = frameCount
  
    for track in tracks:
        if(track[7]==0):
            #print('not found in current frame')
            track[0]=-1
            #print('multiplier decreased to ',track[0])
            track[2]+=(step*track[0])
            frame_grey = cv.cvtColor(frame,cv.COLOR_BGR2GRAY)
            prev_frame_grey = cv.cvtColor(prev_frame,cv.COLOR_BGR2GRAY)
            mask = np.zeros(frame_grey.shape, dtype = "uint8")


            cv.rectangle(mask, (int(track[3]), int(track[4])), (int(track[5]), int(track[6])), (255, 255, 255), -1)
            p0 = cv.goodFeaturesToTrack(prev_frame_grey, mask = mask, **feature_params)
            if(not p0 is None ):
                p1, st, err = cv.calcOpticalFlowPyrLK(prev_frame_grey, frame_grey, p0, None, **lk_params)
                
                average_flow = np.average(p1-p0,0)[0]
            else:
                average_flow=[0,0]
            flow_x_box_coords = int(average_flow[0])
            flow_y_box_coords = int(average_flow[1])
            track[3] += flow_x_box_coords
            track[5]+=flow_x_box_coords
            track[4]+=flow_y_box_coords
            track[6]+=flow_y_box_coords
            
            
    to_display = [track for track in tracks if track[2]>0.4]
    tracks= to_display
   
    for box in to_display:
    # Transform the predicted bounding boxes for the 512x512 image to the original image dimensions.
    
        xmin = int(box[3])
        ymin = int(box[4])
        xmax =int(box[5])
        ymax =int(box[6])
        cv.rectangle(frame, (int(xmin), int(ymin)), (int(xmax),int(ymax)), (0, 255, 0), 2)
        cv.putText(frame,'{}: {:.2f}'.format(classes[int(box[1])], box[2]), (xmin, ymin),cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        total_objects_tracking+=1
        tracking_res.append({"image_id" : frameCount, "category_id" : 1, "bbox" : [float(xmin),float(ymin),float(xmax-xmin),float(ymax-ymin)], "score" : int(box[1])})
    
    out_tracking.write(frame)
    
    frameCount+=1
    prev_frame=frame
# cleanup the camera and close any open windows



out_tracking.release()

print(total_objects_tracking)
with open('retina_keypoint.json', 'w') as outfile:  
    json.dump(tracking_res, outfile)

In [9]:
print(frameCount)

801


## Kalman Filter Prediction

In [16]:
# import the necessary packages
from imutils.video import VideoStream
import argparse
import datetime
import imutils
import time
import cv2 as cv
import time
import json
from bounding_box_utils.bounding_box_utils import iou
feature_params = dict( maxCorners = 25,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )
lk_params = dict( winSize  = (15,15),
                  maxLevel = 2,
                  criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
no_tracking_res = [] 
tracking_res = []

# initialize the first frame in the video stream
firstFrame = None
frameCount =0
step = 0.05
total_objects_no_tracking=0
total_objects_tracking=0
frame_width = 640
frame_height = 464
print(frame_width)
print(frame_height)
preds = []
pred = None
tracks=[]

out_tracking = cv.VideoWriter('yolo_kalman.avi',cv.VideoWriter_fourcc('M','J','P','G'), 30, (frame_width,frame_height))
started = False
# your code
print("prev_count   current_count   entered   exited")
multiplier=0
cc=0
prev_frame=None
kalman_trackers = []
def add_kalman(initialpred):
    
    kalman = cv.KalmanFilter(4,2)
    kalman.measurementMatrix = np.array([[1,0,0,0],
                                     [0,1,0,0]],np.float32)

    kalman.transitionMatrix = np.array([[1,0,1,0],
                                    [0,1,0,1],
                                    [0,0,1,0],
                                    [0,0,0,1]],np.float32)

    kalman.processNoiseCov = np.array([[1,0,0,0],
                                   [0,1,0,0],
                                   [0,0,1,0],
                                   [0,0,0,1]],np.float32) * 0.001
    kalman.predict()
    kalman.correct(initialpred)
    kalman.predict()
    kalman.correct(initialpred)
    kalman.predict()
    kalman.correct(initialpred)
    kalman.predict()
    kalman.correct(initialpred)
    kalman_trackers.append(kalman)
while frameCount<641:
    # grab the current frame and initialize the occupied/unoccupied
    # text
    frame = cv.imread('../modd/%s.jpg'%str(frameCount+1).zfill(5))
    
    if frame is None:
        break
    frameCount+=1
    if(frameCount<0):
        continue
   
    preds = []
    if 'modd/%s.jpg'%str(frameCount+1).zfill(5) in detections:
        
        for box in detections['modd/%s.jpg'%str(frameCount+1).zfill(5)]:
           
            if(box[0]!=4):

                continue
            temp_pred = np.insert(box,0,0)
            temp_pred=np.insert(temp_pred,7,0)
            preds.append(temp_pred)

    for track in tracks:
        track[7]=0
    #print(len(prev_pred),len(temp),len(entered_pred),len(exited_pred))
    
    for pred in preds:
        
        index = box_matches_list(pred,tracks)
        if(index>-1):
            
            multiplier = tracks[index][0]
            org_conf =tracks[index][2]
            if(multiplier<0):
                multiplier=1
            else:
                multiplier +=1
            
             
            
            kalman_trackers[index].correct(center(pred));
            tracks[index][2] = org_conf+ multiplier*step
            tracks[index][3] = pred[3]
            tracks[index][4] = pred[4]
            tracks[index][5] = pred[5]
            tracks[index][6] = pred[6]
            tracks[index][0] = multiplier
            #print(pred)
            
        else:
            print('failed to match')
            print(pred)
            print('in')
            print(tracks)
            temp_pred = np.copy(pred)
           
            multiplier = 1
            print("new object detected")
            print(temp_pred)
            #print("at frame %d"%frameCount)
           
            temp_pred[0] = multiplier
            temp_pred[7]=1
            tracks.append(temp_pred)
            add_kalman(center(temp_pred))
            
            cc = frameCount
    predictions = [i.predict() for i in kalman_trackers]
    
    i=0
    for track in tracks:
        if(track[7]==0):
            #print('not found in current frame')
            track[0]=-1
            #print('multiplier decreased to ',track[0])
            track[2]+=(step*track[0])
            print(predictions[i])
            predicted_center= np.array([predictions[i][0][0],predictions[i][1][0]],np.float32)
            offset = predicted_center- center(track)
            
            track[3] += offset[0]
            track[5]+=offset[0]
            track[4]+=offset[1]
            track[6]+=offset[1]
        i+=1
            
            
    to_display = [track for track in tracks if track[2]>0.4]
    incs = [i for i in range(len(tracks)) if tracks[i][2] >0.4]
    
    
    
    if(len(incs)>0 and len(tracks)>0):
        
        kalman_trackers = np.take(kalman_trackers,incs).tolist()
        predictions = np.take(predictions,incs,0).tolist()
    
    #print(len(incs))
    tracks= to_display
    print(len(tracks),len(predictions),len(kalman_trackers))
    #print(tracks)
    i=0
    for box in to_display:
    # Transform the predicted bounding boxes for the 512x512 image to the original image dimensions.
    
        xmin = int(box[3])
        ymin = int(box[4])
        xmax =int(box[5])
        ymax =int(box[6])
        cv.rectangle(frame, (int(xmin), int(ymin)), (int(xmax),int(ymax)), (0, 255, 0), 2)
        cv.putText(frame,'{}: {:.2f}'.format(classes[int(box[1])], box[2]), (xmin, ymin),cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        cv.circle(frame,(int(predictions[i][0][0]),int(predictions[i][1][0])),5,(255,0,0),2) 
            
            
        total_objects_tracking+=1
        tracking_res.append({"image_id" : frameCount, "category_id" : 1, "bbox" : [float(xmin),float(ymin),float(xmax-xmin),float(ymax-ymin)], "score" : int(box[1])})
        i+=1
    #print('preds%d'%len(preds))
    #print('tracks%d'%len(tracks))
    #print('displayed%d'%len(to_display))
    # if the `q` key is pressed, break from the lop
    #plt.figure()
    #plt.imshow(frame)
    out_tracking.write(frame)
    #cv.imwrite("512_frames/frame%d.jpg"%frameCount,frame)
    prev_frame=frame
# cleanup the camera and close any open windows



out_tracking.release()

print(total_objects_tracking)
with open('yolo_kalman.json', 'w') as outfile:  
    json.dump(tracking_res, outfile)

640
464
prev_count   current_count   entered   exited
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
0 0 0
failed to match
[  0.         4.         0.689186  -6.       202.       112.
 351.         0.      ]
in
[]
new object detected
[  0.         4.         0.689186  -6.       202.       112.
 351.         0.      ]
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 1 1
1 

[[327.73685   ]
 [183.21077   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[328.70947   ]
 [182.82564   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[329.6821    ]
 [182.4405    ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[330.65472   ]
 [182.05537   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[331.62735   ]
 [181.67024   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[332.59998   ]
 [181.28511   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[333.5726    ]
 [180.89998   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[334.54523   ]
 [180.51485   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[335.51785   ]
 [180.12971   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[336.49048   ]
 [179.74458   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[337.4631    ]
 [179.35945   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[338.43573   ]
 [178.97432   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[339.40836   ]
 [178.58919   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[340.38098   ]
 [178.20406   ]
 [  0.97261286]
 [ -0.38512695]]
1 1 1
[[341.

[[384.14914   ]
 [160.87312   ]
 [  0.97261286]
 [ -0.38512695]]
[[272.56906 ]
 [133.41353 ]
 [ 17.816462]
 [  8.715114]]
3 3 3
[[385.12177   ]
 [160.48799   ]
 [  0.97261286]
 [ -0.38512695]]
[[290.38553 ]
 [142.12865 ]
 [ 17.816462]
 [  8.715114]]
[[192.07867 ]
 [ 95.28908 ]
 [ 23.967249]
 [ 11.908214]]
3 3 3
[[ 3.8609439e+02]
 [ 1.6010286e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[308.202   ]
 [150.84377 ]
 [ 17.816462]
 [  8.715114]]
[[216.04593 ]
 [107.19729 ]
 [ 23.967249]
 [ 11.908214]]
3 3 3
failed to match
[  0.         4.         0.520352 355.       172.       435.
 227.         0.      ]
in
[array([ -1.        ,   4.        , 544.739186  , 324.59439087,
       118.6028595 , 447.59439087, 201.6028595 ,   0.        ]), array([ -1.        ,   4.        ,   0.495949  , 271.20199585,
       122.34376526, 345.20199585, 179.34376526,   0.        ]), array([ -1.        ,   4.        ,   1.195714  , 178.04592896,
        76.19728851, 254.04592896, 138.19728851,   0.        ])]
new o

[[ 4.2110892e+02]
 [ 1.4623811e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1397.9899  ]
 [ 685.5669  ]
 [  37.363163]
 [  18.31595 ]]
[[650.0032  ]
 [299.3429  ]
 [ 23.794487]
 [ 10.986915]]
[[277.48825 ]
 [128.6173  ]
 [ 31.639492]
 [ 14.676233]]
4 4 4
[[ 4.2208154e+02]
 [ 1.4585298e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1435.353   ]
 [ 703.8829  ]
 [  37.363163]
 [  18.31595 ]]
[[673.79767 ]
 [310.3298  ]
 [ 23.794487]
 [ 10.986915]]
[[309.12775 ]
 [143.29353 ]
 [ 31.639492]
 [ 14.676233]]
4 4 4
[[ 4.2305417e+02]
 [ 1.4546785e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1472.7162  ]
 [ 722.19885 ]
 [  37.363163]
 [  18.31595 ]]
[[697.59216 ]
 [321.3167  ]
 [ 23.794487]
 [ 10.986915]]
[[340.76724 ]
 [157.96977 ]
 [ 31.639492]
 [ 14.676233]]
4 4 4
[[ 4.2402679e+02]
 [ 1.4508272e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1510.0793  ]
 [ 740.51483 ]
 [  37.363163]
 [  18.31595 ]]
[[721.38666 ]
 [332.30362 ]
 [ 23.794487]
 [ 10.986915]]
[[372.40674 ]
 [172.64601 ]
 [ 31.639492]
 [ 14

[[ 4.5709607e+02]
 [ 1.3198824e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1071.1359  ]
 [ 465.34357 ]
 [  37.494057]
 [  16.248032]]
[[858.1969  ]
 [392.97534 ]
 [ 42.197845]
 [ 19.318056]]
[[23.721853 ]
 [10.510358 ]
 [ 4.307619 ]
 [ 1.9085616]]
4 4 4
[[ 4.5806870e+02]
 [ 1.3160310e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1108.6299  ]
 [ 481.5916  ]
 [  37.494057]
 [  16.248032]]
[[900.3948  ]
 [412.2934  ]
 [ 42.197845]
 [ 19.318056]]
[[28.029472 ]
 [12.41892  ]
 [ 4.307619 ]
 [ 1.9085616]]
4 4 4
failed to match
[  0.         4.         0.507558 401.       168.       485.
 218.         0.      ]
in
[array([ -1.        ,   4.        , 541.039186  , 396.56869507,
        90.10310364, 519.56869507, 173.10310364,   0.        ]), array([-1.00000000e+00,  4.00000000e+00,  9.87352000e-01,  1.06712988e+03,
        4.55091614e+02,  1.15012988e+03,  5.08091614e+02,  0.00000000e+00]), array([ -1.        ,   4.        ,   2.160592  , 857.39477539,
       387.793396  , 943.39477539, 436.79339

[[ 4.9600110e+02]
 [ 1.1658319e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
1 1 1
[[ 4.9697372e+02]
 [ 1.1619807e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
1 1 1
[[ 4.9794635e+02]
 [ 1.1581294e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
1 1 1
failed to match
[0.0000e+00 4.0000e+00 5.0481e-01 4.8700e+02 2.1200e+02 5.9700e+02
 2.6700e+02 0.0000e+00]
in
[array([ -1.       ,   4.       , 538.989186 , 436.4463501,  74.3129425,
       559.4463501, 157.3129425,   0.       ])]
new object detected
[0.0000e+00 4.0000e+00 5.0481e-01 4.8700e+02 2.1200e+02 5.9700e+02
 2.6700e+02 0.0000e+00]
[[ 4.9891898e+02]
 [ 1.1542782e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
2 2 2
[[ 4.99891602e+02]
 [ 1.15042694e+02]
 [ 9.72612858e-01]
 [-3.85126948e-01]]
[[28.990402 ]
 [12.810334 ]
 [ 5.264328 ]
 [ 2.3262112]]
2 2 2
[[ 5.0086423e+02]
 [ 1.1465757e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[34.25473  ]
 [15.136545 ]
 [ 5.264328 ]
 [ 2.3262112]]
2 2 2
failed to match
[  0.         4.         0.609557 484.       196. 

[[ 5.3393280e+02]
 [ 1.0156335e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1004.56665 ]
 [ 473.31134 ]
 [  41.663795]
 [  19.626064]]
3 3 3
[[ 5.3490540e+02]
 [ 1.0117822e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1046.2305  ]
 [ 492.9374  ]
 [  41.663795]
 [  19.626064]]
3 3 3
[[ 5.3587799e+02]
 [ 1.0079310e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1087.8943  ]
 [ 512.5635  ]
 [  41.663795]
 [  19.626064]]
3 3 3
[[ 5.36850586e+02]
 [ 1.00407974e+02]
 [ 9.72612858e-01]
 [-3.85126948e-01]]
[[1129.5581  ]
 [ 532.1895  ]
 [  41.663795]
 [  19.626064]]
3 3 3
[[ 5.3782318e+02]
 [ 1.0002285e+02]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1171.2219  ]
 [ 551.81555 ]
 [  41.663795]
 [  19.626064]]
3 3 3
[[ 5.3879578e+02]
 [ 9.9637726e+01]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1212.8857  ]
 [ 571.4416  ]
 [  41.663795]
 [  19.626064]]
3 3 3
[[ 5.3976837e+02]
 [ 9.9252602e+01]
 [ 9.7261286e-01]
 [-3.8512695e-01]]
[[1254.5496  ]
 [ 591.0676  ]
 [  41.663795]
 [  19.626064]]
3 3 3
[[ 5.4074097e+02

In [18]:
# import the necessary packages
from imutils.video import VideoStream
import argparse
import datetime
import imutils
import time
import cv2 as cv
import time
import json
import numpy as np
from bounding_box_utils.bounding_box_utils import iou
feature_params = dict( maxCorners = 25,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )
lk_params = dict( winSize  = (15,15),
                  maxLevel = 2,
                  criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
no_tracking_res = [] 
tracking_res = []

# initialize the first frame in the video stream
firstFrame = None
frameCount =70
step = 0.05
total_objects_no_tracking=0
total_objects_tracking=0
frame_width = 640
frame_height = 464
print(frame_width)
print(frame_height)
preds = []
pred = None
tracks=[]
kalman_trackers = []
def add_kalman():
    
    kalman = cv.KalmanFilter(4,2)
    kalman.measurementMatrix = np.array([[1,0,0,0],
                                     [0,1,0,0]],np.float32)

    kalman.transitionMatrix = np.array([[1,0,1,0],
                                    [0,1,0,1],
                                    [0,0,1,0],
                                    [0,0,0,1]],np.float32)

    kalman.processNoiseCov = np.array([[1,0,0,0],
                                   [0,1,0,0],
                                   [0,0,1,0],
                                   [0,0,0,1]],np.float32) * 0.03
    kalman_trackers.append(kalman)

out_tracking = cv.VideoWriter('ssd_300_kalman.avi',cv.VideoWriter_fourcc('M','J','P','G'), 30, (frame_width,frame_height))
started = False
# your code

multiplier=0
cc=0
prev_frame=None

while frameCount<641:
    # grab the current frame and initialize the occupied/unoccupied
    # text
    frame = cv.imread('../modd/%s.jpg'%str(frameCount+1).zfill(5))
    
 
    # if the frame could not be grabbed, then we have reached the end
    # of the video
    if frame is None:
        break
    
    if(frameCount<0):
        continue
    elif started==False:
        start_time = time.time()
        started=True
    # resize the frame, convert it to grayscale, and blur it
    
    
    
        
    # loop over the contours
    preds = []
    if 'modd/%s.jpg'%str(frameCount+1).zfill(5) in detections:
        
        for box in detections['modd/%s.jpg'%str(frameCount+1).zfill(5)]:
           
            if(box[0]!=4):

                continue
            temp_pred = np.insert(box,0,0)
            temp_pred=np.insert(temp_pred,7,0)
            preds.append(temp_pred)
   
    for track in tracks:
        track[7]=0
    
    for pred in preds:
        
        index = box_matches_list(pred,tracks)
        if(index>-1):
            
            multiplier = tracks[index][0]
            
            org_conf =tracks[index][2]
            if(multiplier<0):
                multiplier=1
            else:
                
                multiplier +=1
                
             
            del tracks[index]
            kalman_trackers[index].correct(center(pred));

          
            
    
            pred[2] = org_conf+ multiplier*step
            
            pred[0] = multiplier
            #print(pred)
            
            tracks.append(pred)
        else:
            
            temp_pred = np.copy(pred)
           
            multiplier = 1
            
            add_kalman()
            print('added')
            temp_pred[0] = multiplier
            temp_pred[2] = 1
            
            tracks.append(temp_pred)
            
            cc = frameCount
    
    predictions = [i.predict() for i in kalman_trackers]
    
    i=0
    for track in tracks:
        if(track[7]!=1):
            if(box_matches_list(track,preds)==-1):
                #print('not found in current frame')
                track[0]=-1
            #print('multiplier decreased to ',track[0])
                track[2]+=(step*track[0])
                predicted_center= np.array([predictions[i][0][0],predictions[i][1][0]],np.float32)
                offset = predicted_center- center(track)
                
                track[3] += offset[0]
                track[5]+=offset[0]
                track[4]+=offset[1]
                track[6]+=offset[1]
        i+=1
            
    
    to_display = [track for track in tracks if track[2]>0.4]
    tracks=to_display
    #print(len(to_display))
    i=0
    for box in to_display:
        
    # Transform the predicted bounding boxes for the 512x512 image to the original image dimensions.
        
        xmin = int(box[3] )
        ymin = int(box[4])
        xmax =int(box[5])
        ymax =int(box[6])
        cv.rectangle(frame, (int(xmin), int(ymin)), (int(xmax),int(ymax)), (0, 255, 0), 2)
        cv.putText(frame,'{}: {:.2f}'.format(classes[int(box[1])], box[2]), (xmin, ymin),cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        cv.circle(frame,(predictions[i][0][0],predictions[i][1][0]),5,(255,0,0),2)
        total_objects_tracking+=1
        tracking_res.append({"image_id" : frameCount, "category_id" : 1, "bbox" : [float(xmin),float(ymin),float(xmax-xmin),float(ymax-ymin)], "score" : int(box[1])})
        i+=1
    cv.imwrite("one.jpg",frame)
    out_tracking.write(frame)
    prev_frame=frame
    frameCount+=1
# cleanup the camera and close any open windows
elapsed_time = time.time() - start_time
print(elapsed_time)

cv.destroyAllWindows()

out_tracking.release()

print(total_objects_tracking)
with open('ssd_300_kalman.json', 'w') as outfile:  
    json.dump(tracking_res, outfile)

640
464
added
1
1
1
1
1
1
1
1
1
added
2
2
2
2
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
added
2
2
2
2
2
2
2
2
2
2
2
2
1
1
1
1
1
1
1
1
1
1
1
1
1
added
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
1
1
1
added
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
added
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
2
2
2
2
2
2
2
2
added
3
3
3
3
3
3
3
3
3
3
3
3
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
2
added
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
3
added
4
4
4
4
4
4
4
added
5
5
5
5
5
5
5
5
5
5
5
5
added
6
6
6
6
6
6
added
7
7
7
7
7
7
7
7
6
6
6
6
6
6
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5


In [11]:
print(frameCount)

70


## General Tracking

In [19]:
# import the necessary packages
from imutils.video import VideoStream
import argparse
import datetime
import imutils
import time
import cv2 as cv
import time
import json
from bounding_box_utils.bounding_box_utils import iou
feature_params = dict( maxCorners = 25,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )
lk_params = dict( winSize  = (15,15),
                  maxLevel = 2,
                  criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
no_tracking_res = [] 
tracking_res = []

# initialize the first frame in the video stream
firstFrame = None
frameCount =0
step = 0.05
total_objects_no_tracking=0
total_objects_tracking=0
frame_width = 640
frame_height = 464
print(frame_width)
print(frame_height)
preds = []
pred = None
tracks=[]

out_tracking = cv.VideoWriter('modd_1_videos/ssd300/kalman_corners.avi',cv.VideoWriter_fourcc('M','J','P','G'), 30, (frame_width,frame_height))
started = False
# your code
print("prev_count   current_count   entered   exited")
multiplier=0
cc=0
prev_frame=None
kalman_trackers = []
def add_kalman(initialpred1,initialpred2):
    
    kalman1 = cv.KalmanFilter(4,2)
    kalman1.measurementMatrix = np.array([[1,0,0,0],
                                     [0,1,0,0]],np.float32)

    kalman1.transitionMatrix = np.array([[1,0,1,0],
                                    [0,1,0,1],
                                    [0,0,1,0],
                                    [0,0,0,1]],np.float32)

    kalman1.processNoiseCov = np.array([[1,0,0,0],
                                   [0,1,0,0],
                                   [0,0,1,0],
                                   [0,0,0,1]],np.float32) * 0.001
    kalman1.predict()
    kalman1.correct(initialpred1)
    kalman1.predict()
    kalman1.correct(initialpred1)
    kalman1.predict()
    kalman1.correct(initialpred1)
    kalman1.predict()
    kalman1.correct(initialpred1)
    kalman1.predict()
    kalman1.correct(initialpred1)
    kalman1.predict()
    kalman1.correct(initialpred1)
    kalman1.predict()
    kalman1.correct(initialpred1)
    kalman1.predict()
    kalman1.correct(initialpred1)
    kalman2 = cv.KalmanFilter(4,2)
    kalman2.measurementMatrix = np.array([[1,0,0,0],
                                     [0,1,0,0]],np.float32)

    kalman2.transitionMatrix = np.array([[1,0,1,0],
                                    [0,1,0,1],
                                    [0,0,1,0],
                                    [0,0,0,1]],np.float32)

    kalman2.processNoiseCov = np.array([[1,0,0,0],
                                   [0,1,0,0],
                                   [0,0,1,0],
                                   [0,0,0,1]],np.float32) * 0.001
    kalman2.predict()
    kalman2.correct(initialpred2)
    kalman2.predict()
    kalman2.correct(initialpred2)
    kalman2.predict()
    kalman2.correct(initialpred2)
    kalman2.predict()
    kalman2.correct(initialpred2)
    kalman2.predict()
    kalman2.correct(initialpred2)
    kalman2.predict()
    kalman2.correct(initialpred2)
    kalman2.predict()
    kalman2.correct(initialpred2)
    kalman2.predict()
    kalman2.correct(initialpred2)
    kalman_trackers.append([kalman1,kalman2])
    
while frameCount<641:
    # grab the current frame and initialize the occupied/unoccupied
    # text
    frame = cv.imread('../modd/%s.jpg'%str(frameCount+1).zfill(5))
    
    if frame is None:
        break
    frameCount+=1
    if(frameCount<0):
        continue
   
    preds = []
    if 'modd/%s.jpg'%str(frameCount+1).zfill(5) in detections:
        
        for box in detections['modd/%s.jpg'%str(frameCount+1).zfill(5)]:
           
            if(box[0]!=4):

                continue
            temp_pred = np.insert(box,0,0)
            temp_pred=np.insert(temp_pred,7,0)
            preds.append(temp_pred)

    for track in tracks:
        track[7]=0
    #print(len(prev_pred),len(temp),len(entered_pred),len(exited_pred))
    
    for pred in preds:
        
        index = box_matches_list(pred,tracks)
        if(index>-1):
            
            multiplier = tracks[index][0]
            org_conf =tracks[index][2]
            if(multiplier<0):
                multiplier=1
            else:
                multiplier +=1
            
             
            
            kalman_trackers[index][0].correct(topleft(pred));
            kalman_trackers[index][1].correct(botright(pred));
            tracks[index][2] = org_conf+ multiplier*step
            tracks[index][3] = pred[3]
            tracks[index][4] = pred[4]
            tracks[index][5] = pred[5]
            tracks[index][6] = pred[6]
            tracks[index][0] = multiplier
            #print(pred)
            
        else:
            
            temp_pred = np.copy(pred)
           
            multiplier = 1
            
            #print("at frame %d"%frameCount)
           
            temp_pred[0] = multiplier
            temp_pred[7]=1
            tracks.append(temp_pred)
            add_kalman(topleft(temp_pred),botright(temp_pred))
            
            cc = frameCount
    predictions = [[i[0].predict(),i[1].predict()] for i in kalman_trackers]
    
    i=0
    for track in tracks:
        if(track[7]==0):
            #print('not found in current frame')
            track[0]=-1
            #print('multiplier decreased to ',track[0])
            track[2]+=(step*track[0])
            #print(predictions[i])
            
            track[3] = predictions[i][0][0][0]
            track[4]=predictions[i][0][1][0]
            track[5]=predictions[i][1][0][0]
            track[6]=predictions[i][1][1][0]
        i+=1
            
            
    to_display = [track for track in tracks if track[2]>0.4]
    incs = [i for i in range(len(tracks)) if tracks[i][2] >0.4]
    
    
    if(len(incs)>0 and len(tracks)>0):
        
        kalman_trackers = np.take(kalman_trackers,incs,0).tolist()
        predictions = np.take(predictions,incs,0).tolist()
    
    
    #print(len(incs))
    tracks= to_display
    
    #print(tracks)
    i=0
    for box in to_display:
    # Transform the predicted bounding boxes for the 512x512 image to the original image dimensions.
    
        xmin = int(box[3])
        ymin = int(box[4])
        xmax =int(box[5])
        ymax =int(box[6])
        #xmin = int(predictions[i][0][0][0])
        #ymin = int(predictions[i][0][1][0])
        #xmax =int(predictions[i][1][0][0])
        #ymax =int(predictions[i][1][1][0])
        cv.rectangle(frame, (int(xmin), int(ymin)), (int(xmax),int(ymax)), (0, 255, 0), 2)
        
        cv.putText(frame,'{}: {:.2f}'.format(classes[int(box[1])], box[2]), (xmin, ymin),cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        cv.circle(frame,(int(predictions[i][0][0][0]),int(predictions[i][0][1][0])),5,(255,0,0),2)
        cv.circle(frame,(int(predictions[i][1][0][0]),int(predictions[i][1][1][0])),5,(255,0,0),2)    
            
        total_objects_tracking+=1
        tracking_res.append({"image_id" : frameCount, "category_id" : 1, "bbox" : [float(xmin),float(ymin),float(xmax-xmin),float(ymax-ymin)], "score" : int(box[1])})
        i+=1
    #print('preds%d'%len(preds))
    #print('tracks%d'%len(tracks))
    #print('displayed%d'%len(to_display))
    # if the `q` key is pressed, break from the lop
    #plt.figure()
    #plt.imshow(frame)
    out_tracking.write(frame)
    #cv.imwrite("512_frames/frame%d.jpg"%frameCount,frame)
    #prev_frame=frame
# cleanup the camera and close any open windows



out_tracking.release()

print(total_objects_tracking)
with open('modd_1_videos/ssd300/kalman_corners.json', 'w') as outfile:  
    json.dump(tracking_res, outfile)

640
464
prev_count   current_count   entered   exited
1514


In [14]:
kalman_trackers[0][0].update()

AttributeError: 'cv2.KalmanFilter' object has no attribute 'update'

In [None]:
kalman_trackers[0][0].correct

In [None]:
kalman_trackers[0][0].correct

## kalman org

In [10]:
# import the necessary packages
from imutils.video import VideoStream
import argparse
import datetime
import imutils
import time
import cv2 as cv
import time
import json
import numpy as np
import motmetrics as mm
from bounding_box_utils.bounding_box_utils import iou
feature_params = dict( maxCorners = 25,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )
lk_params = dict( winSize  = (15,15),
                  maxLevel = 2,
                  criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
no_tracking_res = [] 
tracking_res = []

# initialize the first frame in the video stream
firstFrame = None
frameCount =70
step = 0.05
total_objects_no_tracking=0
total_objects_tracking=0
frame_width = 640
frame_height = 464
print(frame_width)
print(frame_height)
preds = []
pred = None
tracks=[]
kalman_trackers = []
acc = mm.MOTAccumulator(auto_id=True)
def add_kalman(initial):
    
    kalman = cv.KalmanFilter(8,4)
    kalman.measurementMatrix = np.array([[1,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0],[0,0,1,0,0,0,0,0],[0,0,0,1,0,0,0,0]],np.float32)

    kalman.transitionMatrix = np.array([[1,0,0,0,1,0,0,0],[0,1,0,0,0,1,0,0],[0,0,1,0,0,0,1,0],[0,0,0,1,0,0,0,1]
                                       ,[0,0,0,0,1,0,0,0],[0,0,0,0,0,1,0,0],[0,0,0,0,0,0,1,0],[0,0,0,0,0,0,0,1]],np.float32)

    kalman.processNoiseCov = np.array([[1,0,0,0,0,0,0,0],[0,1,0,0,0,0,0,0],[0,0,1,0,0,0,0,0],[0,0,0,1,0,0,0,0]
                                      ,[0,0,0,0,1,0,0,0],[0,0,0,0,0,1,0,0],[0,0,0,0,0,0,1,0],[0,0,0,0,0,0,0,1]],np.float32)*0.001
    
    kalman.predict();
    
    kalman.correct(initial)
    kalman.predict();
    kalman.correct(initial)
    kalman.predict();
    kalman.correct(initial)
    kalman.predict();
    kalman.correct(initial)
    kalman_trackers.append(kalman)

out_tracking = cv.VideoWriter('modd_1_vidoes/ssd300/kalman_corners.avi',cv.VideoWriter_fourcc('M','J','P','G'), 30, (frame_width,frame_height))
started = False
# your code

multiplier=0
cc=0
prev_frame=None

while frameCount<641:
    # grab the current frame and initialize the occupied/unoccupied
    # text
    frame = cv.imread('../modd/%s.jpg'%str(frameCount+1).zfill(5))
    
 
    # if the frame could not be grabbed, then we have reached the end
    # of the video
    if frame is None:
        break
    
    if(frameCount<0):
        continue
    elif started==False:
        start_time = time.time()
        started=True
    # resize the frame, convert it to grayscale, and blur it
    
    
    
        
    # loop over the contours
    preds = []
    if 'modd/%s.jpg'%str(frameCount+1).zfill(5) in detections:
        
        for box in detections['modd/%s.jpg'%str(frameCount+1).zfill(5)]:
           
            if(box[0]!=4):

                continue
            temp_pred = np.insert(box,0,0)
            temp_pred=np.insert(temp_pred,7,0)
            preds.append(temp_pred)
   
    for track in tracks:
        track[7]=0
    
    for pred in preds:
        
        index = box_matches_list(pred,tracks)
        if(index>-1):
            
            multiplier = tracks[index][0]
            
            org_conf =tracks[index][2]
            if(multiplier<0):
                multiplier=1
            else:
                
                multiplier +=1
                
             
            del tracks[index]
            kalman_trackers[index].correct(corners(pred));

          
            
    
            pred[2] = org_conf+ multiplier*step
            
            pred[0] = multiplier
            #print(pred)
            
            tracks.append(pred)
        else:
            
            temp_pred = np.copy(pred)
           
            multiplier = 1
            
            add_kalman(corners(pred))
            print('added')
            temp_pred[0] = multiplier
            temp_pred[2] = 1
            
            tracks.append(temp_pred)
            
            cc = frameCount
    
    predictions = [i.predict() for i in kalman_trackers]
    
    i=0
    for track in tracks:
        if(track[7]!=1):
            if(box_matches_list(track,preds)==-1):
                #print('not found in current frame')
                track[0]=-1
            #print('multiplier decreased to ',track[0])
                track[2]+=(step*track[0])
                
                
                track[3] =predictions[i][0][0]
                track[4]=predictions[i][1][0]
                track[5]=predictions[i][2][0]
                track[6]=predictions[i][3][0]
        i+=1
            
    
    to_display = [track for track in tracks if track[2]>0.4]
    tracks=to_display
    incs = [i for i in range(len(tracks)) if tracks[i][2] >0.4]
    
    
    if(len(incs)>0 and len(tracks)>0):
        
        kalman_trackers = np.take(kalman_trackers,incs,0).tolist()
        predictions = np.take(predictions,incs,0).tolist()
    track_boxes = [corners(t) for t in tracks]
    if(str(frameCount+1).zfill(5) in gt):
        ff = mm.distances.iou_matrix(track_boxes, gt[str(frameCount+1).zfill(5)][0], max_iou=0.5)
        
        acc.update(
    ['a'],                 # Ground truth objects in this frame
    np.arange(len(track_boxes)),                  # Detector hypotheses in this frame
    
             ff) # Distances from object 'b' to hypotheses 1, 2, 3
        #print(ff)
        #if(ff[0][0]>0.3):
            #print(track_boxes[0])
            #print(gt[str(frameCount+1).zfill(5)][0])
    #print(len(to_display))track_boxes[0]
    
    i=0
    for box in to_display:
        
    # Transform the predicted bounding boxes for the 512x512 image to the original image dimensions.
        
        xmin = int(box[3] )
        ymin = int(box[4])
        xmax =int(box[5])
        ymax =int(box[6])
        cv.rectangle(frame, (int(xmin), int(ymin)), (int(xmax),int(ymax)), (0, 255, 0), 2)
        cv.putText(frame,'{}: {:.2f}'.format(classes[int(box[1])], box[2]), (xmin, ymin),cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        cv.circle(frame,(int(predictions[i][0][0]),int(predictions[i][1][0])),5,(255,0,0),2)
        cv.circle(frame,(int(predictions[i][2][0]),int(predictions[i][3][0])),5,(255,0,0),2)
        total_objects_tracking+=1
        tracking_res.append({"image_id" : frameCount, "category_id" : 1, "bbox" : [float(xmin),float(ymin),float(xmax-xmin),float(ymax-ymin)], "score" : int(box[1])})
        i+=1
    cv.imwrite("one.jpg",frame)
    out_tracking.write(frame)
    prev_frame=frame
    frameCount+=1
# cleanup the camera and close any open windows
elapsed_time = time.time() - start_time
print(elapsed_time)

cv.destroyAllWindows()

out_tracking.release()

print(total_objects_tracking)
with open('modd_1_videos/ssd300/kalman_corners.json', 'w') as outfile:  
    json.dump(tracking_res, outfile)

640
464
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
added
8.078932046890259
1720


In [40]:
mh = mm.metrics.create()
summary = mh.compute(acc, metrics=['num_frames', 'mota', 'motp'], name='acc')
print(summary)


     num_frames      mota      motp
acc         568  0.926056  0.156211


In [41]:
summary = mh.compute_many(
    [acc, acc.events.loc[0:1]], 
    metrics=mm.metrics.motchallenge_metrics, 
    names=['full', 'part'],
    generate_overall=True
    )

strsummary = mm.io.render_summary(
    summary, 
    formatters=mh.formatters, 
    namemap=mm.io.motchallenge_metric_names
)
print(strsummary)
print(mm.metrics.motchallenge_metrics)

          IDF1    IDP    IDR   Rcll   Prcn GT MT PT ML FP FN IDs  FM   MOTA  MOTP
full     96.4%  94.1%  98.8%  98.8%  94.1%  1  1  0  0 35  7   0   3  92.6% 0.156
part    100.0% 100.0% 100.0% 100.0% 100.0%  1  1  0  0  0  0   0   0 100.0% 0.342
OVERALL  96.4%  94.1%  98.8%  98.8%  94.1%  2  2  0  0 35  7   0   3  92.6% 0.157
['idf1', 'idp', 'idr', 'recall', 'precision', 'num_unique_objects', 'mostly_tracked', 'partially_tracked', 'mostly_lost', 'num_false_positives', 'num_misses', 'num_switches', 'num_fragmentations', 'mota', 'motp']


## Using SORT

In [3]:
# import the necessary packages
from imutils.video import VideoStream
import argparse
import datetime
import imutils
import time
import cv2 as cv
import time
import json
import numpy as np
from sort import *
from bounding_box_utils.bounding_box_utils import iou
import motmetrics as mm
import numpy as np
acc = mm.MOTAccumulator(auto_id=True)
feature_params = dict( maxCorners = 25,
                       qualityLevel = 0.3,
                       minDistance = 7,
                       blockSize = 7 )
lk_params = dict( winSize  = (15,15),
                  maxLevel = 2,
                  criteria = (cv.TERM_CRITERIA_EPS | cv.TERM_CRITERIA_COUNT, 10, 0.03))
no_tracking_res = [] 
tracking_res = []

# initialize the first frame in the video stream
firstFrame = None
frameCount =70
step = 0.05
total_objects_no_tracking=0
total_objects_tracking=0
frame_width = 640
frame_height = 464
print(frame_width)
print(frame_height)
preds = []
pred = None
tracks=[]
kalman_trackers = []


out_tracking = cv.VideoWriter('modd_1_videos/ssd300/sort.avi',cv.VideoWriter_fourcc('M','J','P','G'), 30, (frame_width,frame_height))
started = False
# your code

multiplier=0
cc=0
prev_frame=None
mot_tracker = Sort()
while frameCount<641:
    # grab the current frame and initialize the occupied/unoccupied
    # text
    frame = cv.imread('../modd/%s.jpg'%str(frameCount+1).zfill(5))
    
 
    # if the frame could not be grabbed, then we have reached the end
    # of the video
    if frame is None:
        break
    
    if(frameCount<0):
        continue
    elif started==False:
        start_time = time.time()
        started=True
    # resize the frame, convert it to grayscale, and blur it
    
    
    
        
    # loop over the contours
    preds = []
    if 'modd/%s.jpg'%str(frameCount+1).zfill(5) in detections:
        
        for box in detections['modd/%s.jpg'%str(frameCount+1).zfill(5)]:
           
            if(box[0]!=4):

                continue
            
            temp_pred = box[2:]
            
            temp_pred = np.insert(temp_pred,4,box[1])
            
            preds.append(temp_pred)
   
    
    preds = np.asarray(preds)
    trackers = mot_tracker.update(preds)
    i=0
    for box in trackers:
        
    # Transform the predicted bounding boxes for the 512x512 image to the original image dimensions.
        
        xmin = int(box[0] )
        ymin = int(box[1])
        xmax =int(box[2])
        ymax =int(box[3])
        cv.rectangle(frame, (int(xmin), int(ymin)), (int(xmax),int(ymax)), (0, 255, 0), 2)
        #cv.putText(frame,'{}: {:.2f}'.format(classes[int(box[1])], box[2]), (xmin, ymin),cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
        #cv.circle(frame,(int(predictions[i][0][0]),int(predictions[i][1][0])),5,(255,0,0),2)
        #cv.circle(frame,(int(predictions[i][2][0]),int(predictions[i][3][0])),5,(255,0,0),2)
        total_objects_tracking+=1
        tracking_res.append({"image_id" : frameCount, "category_id" : 1, "bbox" : [float(xmin),float(ymin),float(xmax-xmin),float(ymax-ymin)], "score" : int(box[1])})
        i+=1
    cv.imwrite("one.jpg",frame)
    out_tracking.write(frame)
    prev_frame=frame
    frameCount+=1
# cleanup the camera and close any open windows
elapsed_time = time.time() - start_time
print(elapsed_time)

cv.destroyAllWindows()

out_tracking.release()

print(total_objects_tracking)
with open('retina_sort.json', 'w') as outfile:  
    json.dump(tracking_res, outfile)

640
464
16.634819507598877
280


In [25]:
mh = mm.metrics.create()
summary = mh.compute(acc, metrics=['num_frames', 'mota', 'motp'], name='acc')
print(summary)


IndexError: list index out of range

In [15]:
summary = mh.compute_many(
    [acc, acc.events.loc[0:1]], 
    metrics=mm.metrics.motchallenge_metrics, 
    names=['full', 'part'],
    generate_overall=True
    )

strsummary = mm.io.render_summary(
    summary, 
    formatters=mh.formatters, 
    namemap=mm.io.motchallenge_metric_names
)
print(strsummary)

IndexError: list index out of range

In [16]:
print( acc.events)

IndexError: list index out of range

NameError: name 'get_iou' is not defined