In [1]:
import tensorflow as tf
import matplotlib.pyplot as plt
import sys
sys.path.append('../')
import os
import csv
import numpy as np
from PIL import Image
import time

import src.siamese as siam
from src.visualization import show_frame,show_crops,show_scores


In [2]:
#read default parameters and override with custom ones
def tracker(hp,run,design,frame_name_list,pos_x,pos_y,target_w,target_h,final_score_sz,filename,image,
            templates_z,scores,start_frame):
    #get the size of frame_name_list
    num_frames=np.size(frame_name_list)
    #stores tracker's output for evaluation
    bboxes=np.zeros((num_frames,4))
    
    #?????????
    #np.ceil():取整
    #numpy.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)
    #在指定的间隔[start,stop]内返回均匀间隔的数字。[-1,0,1]
    #hp.scale_num=3
    #hp.scale_step=1.04
    scale_factors=hp.scale_step**np.linspace(-np.ceil(hp.scale_num/2),np.ceil(hp.scale_num/2),hp.scale_num)
    
    #cosine window to penalize large displacements
    #np.hanning(Parameters: M ) M: int Number of points in the output window. If zero or 
    #Returns: out : ndarray, shape(M,))
    #调用np.hanning()函数生成惩罚系数
    hann_1d=np.expand_dims(np.hanning(final_score_sz),axis=0)
    #Hz*H
    penalty=np.transpose(hann_1d)*hann_1d
    #归一化
    penalty=penalty/np.sum(penalty)
    
    #???????看不懂
    #design.context=0.5
    context=design.context*(target_w+target_h)
    #np.prod():连乘操作
    #????可能存在多个尺寸
    z_sz=np.sqrt(np.prod((target_w+context)*(target_h+context)))
    #design.exemplar_sz=127
    #同比例缩放
    x_sz=float(design.search_sz)/(design.exemplar_sz)*z_sz
    
    #thresholds to saturate patches shrinking/growth
    #scale_min=0.2 scale_max=5
    min_z=hp.scale_min*z_sz
    max_z=hp.scale_max*z_sz
    min_x=hp.scale_min*x_sz
    max_x=hp.scale_max*x_sz
    
    
    run_opts={}
    
    #with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess:
    with tf.Session() as sess:
        tf.global_variables_initializer().run()
        #Coordinate the loading of image files.
        #线程的协调器
        coord=tf.train.Coordinator()
        threads=tf.train.start_queue_runners(coord=coord)
        
        #save first frame position(from ground-truth)
        bboxes[0,:]=pos_x-target_w/2,pos_y-target_h/2,target_w,target_h
        
        image_,templates_z_=sess.run([image,templates_z],feed_dict={
                                                                   siam.pos_x_ph:pos_x,
                                                                   siam.pos_y_ph:pos_y,
                                                                   siam.z_sz_ph:z_sz,
                                                                   filename:frame_name_list[0]})
        new_templates_z_=templates_z_
        
        t_start=time.time()
        
        #Get an image from the queue
        #利用了三个不同的search尺寸
        for i in range(1,num_frames):
            scaled_exemplar=z_sz*scale_factors
            scaled_search_area=x_sz*scale_factors
            scaled_target_w=target_w*scale_factors
            scaled_target_h=target_h*scale_factors
            #sess.run([op1,op2,feed_dict={赋给tf.placeholder的值}])
            image_,scores_=sess.run([image,scores],
                                    feed_dict={
                                        siam.pos_x_ph:pos_x,
                                        siam.pos_y_ph:pos_y,
                                        siam.x_sz0_ph:scaled_search_area[0],
                                        siam.x_sz1_ph:scaled_search_area[1],
                                        siam.x_sz2_ph:scaled_search_area[2],
                                        templates_z:np.squeeze(templates_z_),
                                        filename:frame_name_list[i],
                                    },**run_opts)
            scores_=np.squeeze(scores_)
            #penalize change of scale 
            #hp.scale_penalty=0.97
            scores_[0,:,:]=hp.scale_penalty*scores_[0,:,:]
            scores_[2,:,:]=hp.scale_penalty*scores_[2,:,:]
            #find scale with highest peak(after penalty)
            #np.amax(scores_,axis=(1,2)):计算一维和二维的最大值
            #np.argmax(arr1,axis=None):沿轴axis返回最大值的index，没有axis就返回所有的最大的
            new_scale_id=np.argmax(np.amax(scores_,axis=(1,2)))
            #update scaled sizes
            #hp.scale_lr=0.59
            x_sz=(1-hp.scale_lr)*x_sz+hp.scale_lr*scaled_search_area[new_scale_id]
            target_w=(1-hp.scale_lr)*target_w+hp.scale_lr*scaled_target_w[new_scale_id]
            target_h=(1-hp.scale_lr)*target_h+hp.scale_lr*scaled_target_h[new_scale_id]
            #select response with new_scale_id
            score_=scores_[new_scale_id,:,:]
            #归一化
            score_=score_-np.min(score_)
            score_=score_/np.sum(score_)
            #apply displacement penalty
            #hp.window_influence=0.25
            score_=(1-hp.window_influence)*score_+hp.window_influence*penalty
            #search_sz:预定义search size
            #x_sz:current search_sz
            pos_x,pos_y=_update_target_position(pos_x,pos_y,score_,final_score_sz,design.tot_stride,design.search_sz,
                                               hp.response_up,x_sz)
            
            #convert <cx,cy,w,h> to <x,y,w,h> and save output
            bboxes[i,:]=pos_x-target_w/2,pos_y-target_h/2,target_w,target_h
            #update the target respresetation with a rolling average(更新模板信息)
            #hp.z_lr:z_learning rate
            if hp.z_lr>0:
                new_templates_z_=sess.run([templates_z],feed_dict={
                                                    siam.pos_x_ph:pos_x,
                                                    siam.pox_y_ph:pos_y,
                                                    siam.z_sz_ph:z_sz,
                                                    image:image_
                                                    })
                templates_z_=(1-hp.z_lr)*np.asarray(templates_z_)+hp.z_lr*np.asarray(new_templates_z_)
                
            #update template patch size
            z_sz=(1-hp.scale_lr)*z_sz+hp.scale_lr*scaled_exemplar[new_scale_id]
            
            if run.visualization:
                show_frame(image_,bboxes[i,:],1)
                
        t_elapsed=time.time()-t_start
        speed=num_frames/t_elapsed
    
        #Finish off the filename queue coordinator
        coord.request_stop()
        coord.join(threads)
    
    plt.close('all')

    
    return bboxes,speed

            

In [3]:
#hp.tot_stride=4
#hp.response_up=8
#final_score_sz=33
def _update_target_position(pos_x,pos_y,score,final_score_sz,tot_stride,search_sz,response_up,x_sz):
    #find location of score maximizer
    #np.argmax(score):未定义axis，则返回flatten后的index
    #np.unravel():重新获得最大值在score中的坐标
    p=np.asarray(np.unravel_index(np.argmax(score),np.shape(score)))
    #displacement from the center in search area final representation...
    #get the center of score map
    center=float(final_score_sz-1)/2
    #get the disp of score map center
    disp_in_area=p-center
    #displacement from the center in instance crop
    disp_in_xcrop=disp_in_area*float(tot_stride)/response_up
    #displacement from the center in instance crop (in frame coordinates)
    #current search_size:x_sz   define search_size:search_sz
    disp_in_frame=disp_in_xcrop*x_sz/search_sz
    #position within frame in frame coordinates
    pos_y,pos_x=pos_y+disp_in_frame[0],pos_x+disp_in_frame[1]
    return pos_y,pos_x