In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

root_dir = '..'

import os
import sys
sys.path.append(os.path.join(root_dir, 'src'))

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import time
from tqdm.autonotebook import tqdm

import torch
import torch.nn as nn
from torch.utils.data import DataLoader

from tracker.data_track import MOT16Sequences
from tracker.data_obj_detect import MOT16ObjDetect
from tracker.object_detector import FRCNN_FPN
from tracker.tracker import Tracker
from tracker.utils import (plot_sequence, evaluate_mot_accums, get_mot_accum,
                           evaluate_obj_detect, obj_detect_transforms)

import motmetrics as mm
mm.lap.default_solver = 'lap'

  after removing the cwd from sys.path.


In [3]:
dataset_test = MOT16ObjDetect(os.path.join(root_dir, 'data/MOT16/train'),
                              obj_detect_transforms(train=False))

In [4]:
def collate_fn(batch):
    return tuple(zip(*batch))
data_loader_test = DataLoader(
    dataset_test, batch_size=1, shuffle=False, num_workers=4,
    collate_fn=collate_fn)

# Multi-object tracking

We provide you with a simple baseline tracker which predicts object detections for each frame and generates tracks by assigning current detections to previous detections via Intersection over Union.

Try to understand the baseline tracker and think of ideas on how to improve it with the knowledge from the lecture or even beyond.

## Configuration

In [5]:
seed = 12345
seq_name = 'MOT16-02'
data_dir = os.path.join(root_dir, 'data/MOT16')
output_dir = os.path.join(root_dir, 'output')

## Setup

In [6]:
sequences = MOT16Sequences(seq_name, data_dir, )

data_loader = DataLoader(sequences[0], batch_size=1, shuffle=False)

In [7]:
seq_labels = {}

In [32]:
for i, frame in enumerate(data_loader):
    for k, v in frame['gt'].items():
        if k not in seq_labels:
            seq_labels[k] = np.zeros((len(data_loader), 4), )
        seq_labels[k][i] = v.numpy() #* [1.0/1920, 1.0/1080, 1.0/1920, 1.0/1080]

In [33]:
from random import shuffle

train_data = []
for v in seq_labels.values():
    for i in range(5, len(v), 1):
        x = np.array(v[i - 5: i])
        y = v[i]
        if np.any(x == 0.0) or np.all(y == 0.0):
            continue
        train_data.append((x, y))
        
shuffle(train_data)

In [34]:
len(train_data)

18279

In [35]:
test_data = train_data[int(len(train_data) * 0.8):]
train_data = train_data[:int(len(train_data) * 0.8)]

In [22]:
class LSTM(nn.Module):
    def __init__(self, input_size=4, hidden_layer_size=128, output_size=4):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size

        self.lstm = nn.LSTM(input_size, hidden_layer_size, batch_first=True)

        self.linear = nn.Linear(hidden_layer_size, output_size)

    def forward(self, input_seq):
        _, (h_last, _) = self.lstm(input_seq)
        predictions = self.linear(h_last)
        return predictions.reshape(input_seq.shape[0], 4)

In [23]:
model = LSTM().cuda()
loss_function = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [24]:
from torch.utils import data

tensor_x = torch.Tensor([t[0] for t in train_data]) # transform to torch tensor
tensor_y = torch.Tensor([t[1] for t in train_data])

my_dataset = data.TensorDataset(tensor_x,tensor_y) # create your datset
my_dataloader = data.DataLoader(my_dataset, batch_size=8, shuffle=True, num_workers=4)

In [25]:
epochs = 25

for i in range(epochs):
    for seq, labels in tqdm(my_dataloader):
        optimizer.zero_grad()
        
        seq = seq.cuda()
        labels = labels.cuda()

        y_pred = model(seq)

        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()

    print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')

HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))


epoch:   0 loss: 0.00005060


HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))


epoch:   1 loss: 0.00003443


HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))


epoch:   2 loss: 0.00003061


HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))


epoch:   3 loss: 0.00004678


HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))


epoch:   4 loss: 0.00004524


HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))


epoch:   5 loss: 0.00001656


HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))


epoch:   6 loss: 0.00001787


HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))


epoch:   7 loss: 0.00015205


HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))


epoch:   8 loss: 0.00002238


HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))


epoch:   9 loss: 0.00000702


HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))


epoch:  10 loss: 0.00001020


HBox(children=(IntProgress(value=0, max=1828), HTML(value='')))




KeyboardInterrupt: 

In [None]:
model(torch.Tensor(test_data[0][0]).unsqueeze(0).cuda()), test_data[0][1]

In [29]:
ind = 3
model(torch.Tensor(test_data[ind][0]).unsqueeze(0).cuda()).detach().cpu().numpy()  * [1920, 1080, 1920, 1080], test_data[ind][1] * [1920, 1080, 1920, 1080]

(array([[929.41778183, 436.79534554, 961.26068115, 547.5700736 ]]),
 array([926., 434., 968., 547.]))

In [37]:
test_data[ind]

(array([[1310.,  444., 1355.,  547.],
        [1311.,  444., 1356.,  547.],
        [1312.,  444., 1358.,  547.],
        [1313.,  444., 1359.,  547.],
        [1314.,  444., 1361.,  547.]]), array([1315.,  444., 1363.,  547.]))

In [31]:
model(torch.Tensor(test_data[ind][0]).unsqueeze(0).cuda())

tensor([[0.4841, 0.4044, 0.5007, 0.5070]], device='cuda:0',
       grad_fn=<AsStridedBackward>)

In [95]:
y = test_data[ind][0][:, 2][1:4]

In [96]:
x = np.array(list(range(5)))[:3]

In [100]:
A = np.vstack([x, np.ones(len(x))]).T
m, b = np.linalg.lstsq(A, y, rcond=None)[0]

In [101]:
m * len(x) + b

1360.6666666666665

In [102]:
len(x)

3

In [105]:
len(seq_labels.keys())

62