## Solving a constrained Integer Linear Program using learned cost

We consider the problem of minimizing a constrained ILP program. The problem can be written formally as
\begin{equation}
\begin{aligned}
\mathbf{x}^{\ast} =& \mathop{\arg\min}_{\mathbf{x} \in \mathcal{X}} \mathbf{c(f;\mathbf{w})}^T \mathbf{x} \\
&\text{s.t.} \begin{aligned}[t]
     \mathbf{A}\mathbf{x} & = \mathbf{b} \\
     \mathbf{G}\mathbf{x} & \leq \mathbf{h}
  \end{aligned}
\end{aligned}
\end{equation}
where $\mathbf{c(f;\mathbf{w})} \in \mathbb{R}^n$ is the cost function parametrized by $\mathbf{\mathbf{w}}$, given input $\mathbf{f}$. And $\mathbf{A,b}$ and $\mathbf{G,h}$ defines the equality and in-equality constraints, respectively.

In [1]:
import numpy as np
np.set_printoptions(suppress=True)
import sklearn, pickle, random, time, datetime, cv2, os, joblib

import torch
import torch.nn as nn
import torch.nn.functional as F
torch.set_printoptions(sci_mode=False)
from torch_geometric.data import Data

import gurobipy as gp
from lib.utils import getIoU,computeBoxFeatures, pruneTracks
from lib.inference import forwardLP, generateGraph, buildConstraint, buildConstraintTracklet, clusterTracklets
from lib.postprocess import recoverTracklets, recoverClusteredTracklets, mergeTracklets

  return torch._C._cuda_getDeviceCount() > 0


In [None]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.fc = nn.Sequential(nn.Linear(6,6), nn.ReLU(), nn.Linear(6,1))
    def forward(self, data):
        x = self.fc(data)
        x = nn.Sigmoid()(x)
        return x

net = Model()
net.load_state_dict(torch.load('ckpt/qp/epoch_8.pth'))
#net.load_state_dict(torch.load('ckpt/qp/epoch_11.pth'))

def getTransitionCost(net, curr_dets, curr_app_feat_norm, maxFrameGap = 1):
    """
    Get the transition cost of network flow, entry/exit and detection costs should be included later as a whole.
    net: an instance of the matching network, MLP.
    curr_dets: np.ndarray, frame, x1, y1, x2, y2, conf, node_ind 
    curr_app_feat_norm: Nxd array, normalized appearance features for curr_det, N the number of detections.
    maxFrameGap: frame gap used to connect detections.
    """
    edge_feat_list = []
    iou_list = []
    for i in range(curr_dets.shape[0]):
        for j in range(curr_dets.shape[0]):
            frameGap = curr_dets[j][0] - curr_dets[i][0]
            if frameGap == maxFrameGap:
                box_feats = computeBoxFeatures(curr_dets[i, 1:5], curr_dets[j, 1:5])
                iou = getIoU(curr_dets[i, 1:5], curr_dets[j, 1:5])
                rel_app = np.dot(curr_app_feat_norm[i], curr_app_feat_norm[j])
                box_feats.extend((iou, rel_app))
                edge_feat_list.append(box_feats)
    edge_feats = np.array(edge_feat_list)
    prob = net(torch.from_numpy(edge_feats).float())
    prob = torch.clamp(prob, min=1e-7, max=1-1e-7)
    prob = prob.detach().numpy() 
    edgeCost = - np.log(prob + 1e-05) 
    #edgeCost = np.log((1-prob+1e-05)/(prob+1e-05))
    return edgeCost

# Inference!

In [None]:
app_thres, dist_thres, nms_thre = 0.75, 100, 0.3
prune_len = 3

for sequence in ['MOT17-01', 'MOT17-03', 'MOT17-06', 'MOT17-07', 'MOT17-08', 'MOT17-12', 'MOT17-14']:
    for detector in ['DPM', 'FRCNN', 'SDP']:
        
        if sequence == 'MOT17-03':
            batch_size = 70 #MOT17-03 is rather crowd, it is more efficient to work on small batch size
        else:
            batch_size = 100

        batch_overlap = 5  #temporal overlapping frames between two batches
        tracks_list, assignments_list, features_list, nms_list = [],[],[],[]

        #det_file = '/home/lishuai/Experiment/MOT/MOT17/ByteTrack/dets/{}-{}.txt'.format(sequence, detector)
        #app_file = '/home/lishuai/Experiment/MOT/MOT17/ByteTrack/app/{}-{}.npy'.format(sequence, detector)

        det_file = '/home/lishuai/Experiment/MOT/LP/data/{}/det_{}.txt'.format(sequence, detector)
        app_file = '/home/lishuai/Experiment/MOT/LP/data/{}/app_det_{}.npy'.format(sequence, detector)

        print('On {} and {}'.format(sequence, detector))
        dets = np.loadtxt(det_file, delimiter=',')
#         if np.unique(dets[:, 6]).shape[0] == 1:
#             print('No detector confidence used, assuming they are all one')
#             dets[:, 6] = np.ones_like(dets[:, 6])

        app_feat = np.load(app_file)
        assert dets.shape[0] == app_feat.shape[0], 'Shape Mismatch!'
        num_frames = int(dets[:, 0].max()) # number of frames in this sequence
        for start_frame in range(1, num_frames+1, batch_size-batch_overlap):
            end_frame = start_frame + batch_size - 1
            if end_frame >= num_frames:
                end_frame = num_frames
            print('Tracking from frame %d to %d'%(start_frame, end_frame))
            start_time = time.time()

            curr_ind = np.logical_and(dets[:, 0] >= start_frame, dets[:, 0] <= end_frame)
            curr_dets = np.concatenate([dets[curr_ind, 0][:, None], 
                                        dets[curr_ind, 2:7],
                                        np.arange(dets[curr_ind].shape[0])[:, None]],
                                        axis=1)

            curr_dets[:, 3:5] = curr_dets[:, 3:5] + curr_dets[:, 1:3] # convert to frame,x1,y1,x2,y2,conf,det_index
            curr_app_feat = app_feat[curr_ind]
            curr_app_feat_norm = curr_app_feat / np.linalg.norm(curr_app_feat, axis=1, keepdims=True)
            for iteration in range(2):
                if iteration == 0:
                    print('%d-th iteration'%iteration)
                    detCost = -1 * curr_dets[:, -2][:, None]
                    entryCost = 0.5 * np.ones((curr_dets.shape[0], 1))
                    exitCost = 0.5 * np.ones((curr_dets.shape[0], 1))
                    transitionCost = getTransitionCost(net, curr_dets, curr_app_feat_norm, 1)
                    cost = np.concatenate([detCost, entryCost, exitCost, transitionCost], axis=0).squeeze()
                    linkIndexGraph = generateGraph(curr_dets)
                    A_eq, b_eq, A_ub, b_ub = buildConstraint(linkIndexGraph)
                    sol = forwardLP(c=cost, A_eq=A_eq, b_eq=b_eq, A_ub=A_ub, b_ub=b_ub)
                    tracklets = recoverTracklets(curr_dets, sol, linkIndexGraph, prune_len=prune_len) 
                else:
                    print('%d-th iteration'%iteration)
                    assignment_list, feature_list = clusterTracklets(tracklets, curr_app_feat_norm, dist_thres, app_thres)
                    tracks = recoverClusteredTracklets(tracklets, assignment_list)
                    #Do some batch-level post processing steps here.
                    assignments_list.append(assignment_list)
                    tracks, nms_array = pruneTracks(tracks, nms_thresh)
                    tracks = tracks.astype(np.int32)
                    nms_list.append(nms_array)
                    #import ipdb; ipdb.set_trace()
                    feature_array = np.array(feature_list)[(1-nms_array).astype(np.bool)]
                    feature_array = feature_array/np.linalg.norm(feature_array, axis=1, keepdims=True)
            tracks_list.append(tracks)
            features_list.append(feature_array)

            end_time = time.time()
            print('Elapsed time is {:.2f} seconds'.format(end_time - start_time))
            del A_eq, b_eq, A_ub, b_ub

        tracks = mergeTracklets(tracks_list, features_list)
        #save_file = 'BYTE_Results/{}.txt'.format(sequence)
        save_file = '{}.txt'.format(sequence) 
        print('Finished tracking, saving to {}'.format(save_file))
        np.savetxt(save_file, tracks, fmt='%d',delimiter=',')

In [None]:
# colors = np.random.rand(5000, 3)
# resize_scale = 0.5
# for frame in range(tracks[:, 0].min(), tracks[:, 0].max()+1):
#     if frame % 100 == 0:
#         print('Writing to frame %d' %frame)
#     img_file = os.path.join('/home/lishuai/Experiment/MOT/MOT20/test/{}/img1/{:06d}.jpg'.format(sequence, frame))
#     img = cv2.imread(img_file)
#     img = cv2.resize(img, (int(resize_scale*img.shape[1]), int(resize_scale*img.shape[0])))
#     cv2.putText(img, '{:04}'.format(frame), (0,50) ,cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255,0,255), thickness=2)
    
#     bboxes = tracks[tracks[:, 0] == frame, 0:6]    
#     if bboxes.shape[0] == 0:
#         pass
#     else:
#         for i in range(bboxes.shape[0]):            
#             ID = int(bboxes[i][1])
#             x, y = int(resize_scale*(bboxes[i][2])), int(resize_scale*(bboxes[i][3]))
#             w, h = int(resize_scale*(bboxes[i][4])) , int(resize_scale*(bboxes[i][5])) 
#             cv2.rectangle(img, (x,y),(x+w,y+h), 255*colors[ID], thickness=2)
#             cv2.putText(img, str(ID), (x,y) ,cv2.FONT_HERSHEY_SIMPLEX, 0.8, 255*colors[ID], thickness=2)
            
#     cv2.imwrite('MOT20-04/tracks/{:06d}.jpg'.format(frame), img)

In [None]:
# def visDetections(sequence, detector, detections, save_dir):
    
#     resize_scale = 0.5
#     for frame in range(int(dets[:, 0].min()), int(dets[:, 0].max())+1):
#         img_file = os.path.join('/home/lishuai/Experiment/MOT/MOT17/test/MOT17-{}-{}/img1/{:06d}.jpg'.format(
#             sequence.split('-')[1], detector, frame))
#         img = cv2.imread(img_file)
#         img = cv2.resize(img, (int(resize_scale*img.shape[1]), int(resize_scale*img.shape[0])))
#         cv2.putText(img, '{:04}'.format(frame), (0,50) ,cv2.FONT_HERSHEY_SIMPLEX, 1.5, (255,0,255), thickness=2)
#         bboxes = detections[detections[:, 0] == frame, 2:7]

#         for i in range(bboxes.shape[0]):
#             x  = int(resize_scale*(bboxes[i][0]))
#             y  = int(resize_scale*(bboxes[i][1]))
#             w  = int(resize_scale*(bboxes[i][2])) 
#             h  = int(resize_scale*(bboxes[i][3])) 
#             score = bboxes[i][4]

#             drawrect(img,(x,y),(x+w,y+h),(0,0,255),2,'dotted')
#             cv2.putText(img, '{:.2f}'.format(score), (x,y-5) ,cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255,0,0), 2)
#         cv2.imwrite(save_dir + '/' + '{:06d}.jpg'.format(frame), img)

# print(sequence, detector)
# visDetections(sequence, detector, dets, save_dir='tmp')