## Data & Setup

In [None]:
# Clone and move into repo
!git clone https://github.com/bharathsivaram10/trackformer.git
%cd trackformer

In [None]:
%cd models
!wget https://vision.in.tum.de/webshare/u/meinhard/trackformer_models_v1.zip
!unzip trackformer_models_v1.zip
%cd ..

In [None]:
# Download data zips
%cd data
!gdown '1-qX2d-P1Xr64ke6nTdlm33om1VxCUTSh'
!gdown '1rqnKe9IgU_crMaxRoel9_nuUsMEBBVQu'
!gdown '14z8Acxopj1d86-qhsF1NwS4Bv3KYa4Wu'
!unzip VisDrone2019-MOT-train.zip
!unzip VisDrone2019-MOT-val.zip
!unzip VisDrone2019-MOT-test-dev.zip

In [None]:
# Classes
# names:
#   0: pedestrian
#   1: people
#   2: bicycle
#   3: car
#   4: van
#   5: truck
#   6: tricycle
#   7: awning-tricycle
#   8: bus
#   9: motor

# We only want 0, 3, 5

In [None]:
import os
import json
import shutil
from collections import defaultdict

def visdrone2coco(data_root, custom_data_dir, subset):
  '''
  This function does two things:
  - Convert the MOT annotations to COCO style expected by Trackformer
  - Rename and change file structure such that it is the following:
  - CustomDataDir/
    - subset
      - *.jpg
    - annotations
      - subset.json
  '''

  class_map = {0:1, 3:2, 5:3}

  # Define paths
  sequences_path = os.path.join(data_root, "sequences")
  annotations_path = os.path.join(data_root, "annotations")
  output_annotations_file = os.path.join(custom_data_dir, "annotations", f"{subset}.json")

  # Prepare COCO structure
  coco_data = {
      "images": [],
      "annotations": [],
      "categories": [{"id": 1, "name": "person"}, {"id": 2, "name": "car"}, {"id": 3, "name": "truck"}]
  }
  image_id = 1
  annotation_id = 1
  seq_info = {}

  # Process each sequence
  for seq_name in sorted(os.listdir(sequences_path)):
      seq_folder = os.path.join(sequences_path, seq_name)
      annotation_file = os.path.join(annotations_path, f"{seq_name}.txt")

      if not os.path.isdir(seq_folder) or not os.path.exists(annotation_file):
        continue

      # Get all images and sort by frame number
      images = sorted(os.listdir(seq_folder))
      seq_length = len(images)
      first_frame_image_id = image_id

      # Move and rename images
      frame_map = {}
      for img in images:
          frame_num = int(os.path.splitext(img)[0])
          new_filename = f"{seq_name}_{frame_num}.jpg"
          shutil.move(os.path.join(seq_folder, img), os.path.join(custom_data_dir, subset, new_filename))

          # Register image in COCO
          frame_map[frame_num] = image_id
          coco_data["images"].append({
              "id": image_id,
              "file_name": new_filename,
              "seq_name": seq_name,
              "frame_id": frame_num,
              "seq_length": seq_length,
              "first_frame_image_id": first_frame_image_id
          })
          image_id += 1

      # Process annotations
      with open(annotation_file, "r") as f:
          for line in f:
              frame_id, track_id, x, y, w, h, not_ignored, class_id, _, _ = map(float, line.strip().split(","))

              if not not_ignored or int(class_id) not in class_map:
                  continue

              frame_id = int(frame_id)
              if frame_id not in frame_map:
                  continue  # Skip if frame_id does not exist in the image mapping

              coco_data["annotations"].append({
                  "id": annotation_id,
                  "bbox": [x, y, w, h],
                  "image_id": frame_map[frame_id],
                  "segmentation" : [],
                  "ignore": 0,
                  "visibility" : 0.5,
                  "area" : w * h,
                  "iscrowd" : 0,
                  "seq": seq_name,
                  "category_id": class_map[int(class_id)],
                  "track_id": int(track_id),
              })
              annotation_id += 1


  # Save COCO annotations
  with open(output_annotations_file, "w") as f:
      json.dump(coco_data, f, indent=4)

  print("Dataset restructuring and annotation conversion completed!")

In [None]:
visdrone2coco('VisDrone2019-MOT-train', 'VisDrone', 'train')
visdrone2coco('VisDrone2019-MOT-val', 'VisDrone', 'val')
visdrone2coco('VisDrone2019-MOT-test-dev', 'VisDrone', 'test')

## Training

In [None]:
!python src/trackformer/models/ops/setup.py build --build-base=src/trackformer/models/ops/ install

In [None]:
!pip install sacred visdom motmetrics

In [None]:
# Start Training
!python src/train.py with \
    mot17 \
    deformable \
    multi_frame \
    tracking \
    resume=models/mot17_crowdhuman_deformable_multi_frame/checkpoint_epoch_40.pth \
    output_dir=models/VisDrone_deformable \
    mot_path_train=data/VisDrone \
    mot_path_val=data/VisDrone \
    train_split=train \
    val_split=val \
    epochs=20