In [1]:
import torch

In [2]:
torch.__version__

'1.5.0+cu101'

In [3]:
from google.colab import drive

drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [4]:
"""
%load_ext autoreload
%autoreload 2
%matplotlib inline

import os
import sys

!pip install tqdm lap
!pip install https://github.com/timmeinhardt/py-motmetrics/archive/fix_pandas_deprecating_warnings.zip
!pip install torch-scatter==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
!pip install torch-sparse==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
!pip install torch-cluster==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
!pip install torch-spline-conv==latest+cu101 -f https://pytorch-geometric.com/whl/torch-1.5.0.html
!pip install torch-geometric
"""



In [5]:
import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import time
from tqdm.autonotebook import tqdm

import torch
from torch.utils.data import DataLoader

In [6]:
root_dir1 = "gdrive/My Drive/Colab Notebooks/cv3dst_exercise/cv3dst_exercise"

In [7]:
sys.path.append(os.path.join(root_dir1, 'src'))

In [8]:
from tracker.data_track import MOT16Sequences
from tracker.data_obj_detect import MOT16ObjDetect
from tracker.object_detector import FRCNN_FPN
from tracker.tracker import Tracker
from tracker.utils import (plot_sequence, evaluate_mot_accums, get_mot_accum,
                           evaluate_obj_detect, obj_detect_transforms)
import motmetrics as mm
mm.lap.default_solver = 'lap'

In [9]:
seq_name = 'MOT16-02'
root_dir = "gdrive/My Drive/Colab Notebooks/cv3dst_exercise/cv3dst_exercise"
data_dir = os.path.join(root_dir, 'data/MOT16')
sequences = MOT16Sequences(seq_name, data_dir)


In [10]:
obj_detect_model_file = os.path.join(root_dir, 'models/faster_rcnn_fpn.model')
obj_detect_nms_thresh = 0.3

device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
print(device)
# object detector
obj_detect = FRCNN_FPN(num_classes=2, nms_thresh=obj_detect_nms_thresh)
obj_detect_state_dict = torch.load(obj_detect_model_file,
                                   map_location=lambda storage, loc: storage)
obj_detect.load_state_dict(obj_detect_state_dict)
obj_detect.eval()
obj_detect.to(device)

cuda


FRCNN_FPN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): FrozenBatchNorm2d()
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): FrozenBatchNorm2d()
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): FrozenBatchNorm2d()
          (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn3): FrozenBatchNorm2d()
          (relu): ReLU(inplace=True)
          (downsample): Sequenti

In [11]:
def collate_fn(batch):
    img_list = []
    gt = {}
    img_path = []
    vis = {}
    seg_img = []
    for sample in batch: 
      img_list.append(sample['img'])
      img_path.append(sample['img_path'])
      if 'seg_img' in sample: 
        seg_img.append(torch.tensor(sample['seg_img']))
      #tensor_batch['img'] = torch.stack([sample['img']for sample in batch])
      for key in sample['gt'].keys():
        if key not in gt.keys():
          gt[key] = [] 
        gt[key].append(torch.from_numpy(sample['gt'][key]))
      for key in sample['vis'].keys(): 
        if key not in vis.keys():
          vis[key] = [] 
        vis[key].append(sample['vis'][key])
    img = torch.stack(img_list)
    for key in gt.keys(): 
      gt[key] = torch.stack(gt[key])
    for key in vis.keys(): 
      vis[key] = torch.tensor(vis[key])
    
    
    batch_frames = {}
    batch_frames['img'] = img
    batch_frames['gt'] = gt
    batch_frames['img_path'] = img_path
    batch_frames['vis'] = vis
    if len(seg_img): 
      seg_img = torch.stack(seg_img)
      batch_frames['seg_img'] = seg_img
    return batch_frames

      

In [None]:
from torchvision.ops import roi_align
from tracker.feature_encoder import NodeEncoder, EdgeEncoder
from torch_geometric.data import Data
from torch_geometric import utils

def load_data(): 
    node_encoder = NodeEncoder()
    node_encoder.eval()
    #node_encoder.to(device)
    edge_encoder = EdgeEncoder()
    edge_encoder.eval()
    #edge_encoder.to(device)

    inc = 0
    list_boxes = []
    list_img = []
    edge_list = []
    total_node = 0
    k = 50
    timestamp = []
    graph_list = []
    edge_features = []

    for seq in sequences:# Each seq is a MOT16Sequence object
        print(f"Tracking: {seq}")
        data_loader = DataLoader(seq, batch_size=5, shuffle=False, num_workers = 0, collate_fn = collate_fn) #NOTE: shuffle  = False. This is order sequence !!!!
        for batch_frame in data_loader: #batch_frame: size 5x3x1080x1920 (NxCxHxW)
            boxes, scores = obj_detect.detect(torch.unsqueeze(batch_frame['img'][0],0)) #boxes: size Bx4, B: number of boxes in that frame
            list_boxes.append(boxes)#list of boxes
            list_img.append(batch_frame['img'][0]) #list of frame. each frame size CxHxW
            timestamp.extend([inc]*boxes.shape[0])
            inc = inc+1
            print(f"Loading data {inc}")
            if inc==15: 
              batch_images = torch.stack(list_img) #batch_images: size NxCxHxW, N: number of frames 
              detections = roi_align(batch_images, list_boxes, (128,64)) #detections: size KxCx128x64, K: number of boxes in 15 frames 
              #detections = detections.to(device)
              node_embeddings = node_encoder(detections)  #size  Kx32

          #######1. Create adjacency matrix and add it to the graph
              for boxes_perframe in list_boxes: 

                  num_node = boxes_perframe.size()[0]
                  current_nodes = torch.unsqueeze(node_embeddings[total_node: total_node + num_node],1)
                  other_nodes = torch.cat([node_embeddings[:total_node],node_embeddings[total_node + num_node:]])
                  total_node = total_node + num_node

                  matrix_norm = (current_nodes-other_nodes).norm(dim = 2)
                  _,targetnodes_idx = matrix_norm.topk(k,1) 
                  targetnodes_idx[targetnodes_idx>total_node-num_node-1] += num_node


                  targetnodes_idx = torch.squeeze(targetnodes_idx.view(-1,1))
                  curnodes_idx = torch.tensor(range(total_node-num_node, total_node)).repeat_interleave(k) #should not start from 0, but from numnode => change range()
                  #curnodes_idx = curnodes_idx.to(device)
                  edge_index = torch.stack((curnodes_idx, targetnodes_idx), dim = 1)
                  edge_list.append(edge_index)

          #Generate adjacency matrix 
              #node_embeddings = node_embeddings.to('cpu')
              #adjacency_mat = torch.cat(edge_list, dim = 0).to('cpu') 
              adjacency_mat = torch.cat(edge_list, dim = 0) #adjacency_mat is for one graph of 15 frames 
              remove_list = []
          #Create a graph based on node features and adjacency matrix, then add the graph to list
              data = Data(x=node_embeddings, edge_index=adjacency_mat.t().contiguous())
              nx_graph = utils.to_networkx(data)
              to_remove = [(v,u) for v,u in nx_graph.edges() if not nx_graph.has_edge(u,v)]
              nx_graph.remove_edges_from(to_remove)
              data = utils.from_networkx(nx_graph)
              data.x = node_embeddings


          #######2. Generate edge attributes and add it to the graph
              timestamp = torch.tensor(timestamp) #size K 
              boxes_info = torch.cat(list_boxes) #all_boxes: size Kx4
              boxes_info[:,2] = boxes_info[:,2]-boxes_info[:,0]+1 #W
              boxes_info[:,3] = boxes_info[:,3]-boxes_info[:,1]+1 #H

              for edge in range(data.edge_index.shape[1]): 
                source_idx = data.edge_index[0,edge]
                target_idx = data.edge_index[1,edge]
                feat1 = 2*(boxes_info[target_idx,0]-boxes_info[source_idx,0])/(boxes_info[target_idx,3]+boxes_info[source_idx,3]) # 2(xj-xi)/(hi+hj)
                feat2 = 2*(boxes_info[target_idx,1]-boxes_info[source_idx,1])/(boxes_info[target_idx,3]+boxes_info[source_idx,3])# 2(yj-yi)/(hi+hj)
                feat3 = torch.log(boxes_info[source_idx,3]/boxes_info[target_idx,3]) #log(hi/hj)
                feat4 = torch.log(boxes_info[source_idx,2]/boxes_info[target_idx,2]) #log (wi/wj)
                feat5 = timestamp[target_idx] - timestamp[source_idx]
                feat6 = (node_embeddings[target_idx]-node_embeddings[source_idx]).norm()
                edge_feature = torch.tensor([feat1,feat2,feat3,feat4,feat5,feat6])
                edge_features.append(edge_feature)
              
              #edge_features = torch.stack(edge_features).to(device)
              edge_features = torch.stack(edge_features) #size Ex6, E: number of edges in the graph 
              print(edge_features.shape)
              #edge_embeddings = edge_encoder(edge_features).to('cpu')
              edge_embeddings = edge_encoder(edge_features)


              data.edge_attr = edge_embeddings
              graph_list.append(data)
              print("Create one graph")
          
          #Set lists to 0 and to start new graph
              inc = 0
              list_boxes = []
              list_img = []
              total_node = 0
              edge_list = []
              timestamp = []
              edge_features = []
              #crashed after using all available RAM. try to set all variables to None to solve problem
              batch_images = None
              detections = None
              node_embeddings = None
              current_nodes = None
              other_nodes = None
              matrix_norm = None
              targetnodes_idx = None
              data = None
              edge_embeddings = None
    return graph_list
graph_list = load_data()
    

Tracking: MOT16-02
Loading data 1
Loading data 2
Loading data 3
Loading data 4
Loading data 5
Loading data 6
Loading data 7
Loading data 8
Loading data 9
Loading data 10
Loading data 11
Loading data 12
Loading data 13
Loading data 14
Loading data 15
torch.Size([5620, 6])
Create one graph
Loading data 1
Loading data 2
Loading data 3
Loading data 4
Loading data 5
Loading data 6
Loading data 7
Loading data 8
Loading data 9
Loading data 10
Loading data 11
Loading data 12
Loading data 13
Loading data 14
Loading data 15


In [13]:
graph_list[0]

Data(edge_attr=[5782, 16], edge_index=[2, 5782], x=[241, 32])

In [29]:
graph_list[0].edge_index[:,5000:5100]

tensor([[223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223,
         223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223,
         223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223, 223,
         223, 223, 223, 223, 224, 224, 224, 225, 225, 225, 225, 225, 225, 225,
         225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225,
         225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225, 225,
         225, 225, 226, 226, 226, 226, 226, 227, 227, 227, 227, 227, 227, 227,
         227, 227],
        [ 12, 235, 138,  21, 236, 231, 130, 163,  76,  55,  27,   6, 152,  49,
          16, 187,  98,  51,  73, 201, 199, 113,  66,   5,  36,  88, 229, 148,
         146,  57,  28, 122, 135,  83,  60, 190, 183,  13,  32, 234,  35,  61,
         233, 177, 160, 166,  95, 219, 176,  71,  88,  43, 138,  41, 178,  57,
         105,  50,  59,  83,  25,  19,  77,  47, 133,   3, 115, 181, 148, 150,
         211,  66, 125,  92, 214

In [None]:
"""
from torchvision.ops import roi_align
for seq in sequences:# Each seq is a MOT16Sequence object

    print(f"Tracking: {seq}")
    inc = 0
    

    data_loader = DataLoader(seq, batch_size=6, shuffle=False) #NOTE: shuffle  = False. This is order sequence !!!!
    print("Done loading data")
    for batch_frame in tqdm(data_loader):
        list_boxes, list_scores = obj_detect.detect(batch_frame['img'])
        detections = roi_align(batch_frame['img'], list_boxes, (128,64))
        break

"""

"""
from torchvision.ops import roi_align
for seq in sequences:# Each seq is a MOT16Sequence object

    print(f"Tracking: {seq}")
    list_boxes = []
    list_img = []
    inc = 0
    

    data_loader = DataLoader(seq, batch_size=1, shuffle=False) #NOTE: shuffle  = False. This is order sequence !!!!
    print("Done loading data")
    for frame in tqdm(data_loader):
        boxes, scores = obj_detect.detect(frame['img'])
        list_boxes.append(boxes)
        list_img.append(torch.squeeze(frame['img']))
        inc = inc+1
        if inc==5: 
            break
    batch_images = torch.stack(list_img)
    detections = roi_align(batch_images, list_boxes, (128,64))
"""
    