# Basic Setup

Installing Pytorch and Detectron2.

In [1]:
# install dependencies: 
!pip install pyyaml==5.1
import torch, torchvision
print(torch.__version__, torch.cuda.is_available())
import torch


1.8.0 True


# Importing the required packages and dependencies

Some packages like the google.colab packages must be prevented from getting imported

In [2]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from os import path

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances

## FFMPEG Installation

If the code is being executed on a local linux machine, then this step can be skipped if you already have FFMPEG.
Also, this completely doesnot work for Windows PC !!!

In [3]:
from IPython.display import clear_output
import os, urllib.request
HOME = os.path.expanduser("~")
pathDoneCMD = f'{HOME}/doneCMD.sh'
if not os.path.exists(f"{HOME}/.ipython/ttmg.py"):
    hCode = "https://raw.githubusercontent.com/yunooooo/gcct/master/res/ttmg.py"
    urllib.request.urlretrieve(hCode, f"{HOME}/.ipython/ttmg.py")

from ttmg import (
    loadingAn,
    textAn,
)

loadingAn(name="lds")
textAn("Installing Dependencies...", ty='twg')
os.system('pip install git+git://github.com/AWConant/jikanpy.git')
os.system('add-apt-repository -y ppa:jonathonf/ffmpeg-4')
os.system('apt-get update')
os.system('apt install mediainfo')
os.system('apt-get install ffmpeg')
clear_output()
print('FFMPEG Installation finished.')

FFMPEG Installation finished.


# Input Data

The folder structure, in which all the clips and the associated annotations must be placed

**!! Images directory is not required. It is created in the next code cell**

```
input/
│
└─── 1/
│       1.mp4
│       1_Annotations.json
|       images/
|            labels/
|            frame_000001.jpg
|            frame_000002.jpg and so on
|
└─── 2/
│       2.mp4
│       2_Annotations.json
|       images/
|            labels/
|            frame_000001.jpg
|            frame_000002.jpg and so on
|
.
.
.
│   
└─── {Clip_Number}/
        {Clip_Number}.mp4
        {Clip_Number}_Annotations.json
        images/
             labels/
             frame_000001.jpg
             frame_000002.jpg and so on
```

## Splitting the clips into frames
In the below code cell, we are using FFMPEG Commands to split the clip into images. 

In [7]:
# main_path refers to current working directory
main_path = os.getcwd()
print(main_path)
# input_path refers to the folder containing the clips folders (as mentioned above in the diagram)
#input_path = main_path + '/input'
input_path = os.path.abspath(r'D:\Project_Escooter_Tracking\input')
print(input_path)

for dir in os.listdir(input_path):
  clip_path = input_path + f'\\{dir}'
  print(clip_path)
  
  image_path = clip_path + '\\images'
  os.system(f'mkdir {image_path}')

  labels_path = image_path + '\\labels'
  os.system(f'mkdir {labels_path}')

  clip_path += f'\\{dir}.mp4'

  ffmpeg_command = 'ffmpeg -i ' + clip_path + " " + image_path + '\\frame_%06d.png'
  print(ffmpeg_command)
  os.system(ffmpeg_command)

C:\Users\balaji\Desktop\Traffic_Camera_Tracking\Main_Code
D:\Project_Escooter_Tracking\input
D:\Project_Escooter_Tracking\input\16
ffmpeg -i D:\Project_Escooter_Tracking\input\16\16.mp4 D:\Project_Escooter_Tracking\input\16\images\frame_%06d.png
D:\Project_Escooter_Tracking\input\18
ffmpeg -i D:\Project_Escooter_Tracking\input\18\18.mp4 D:\Project_Escooter_Tracking\input\18\images\frame_%06d.png
D:\Project_Escooter_Tracking\input\2
ffmpeg -i D:\Project_Escooter_Tracking\input\2\2.mp4 D:\Project_Escooter_Tracking\input\2\images\frame_%06d.png
D:\Project_Escooter_Tracking\input\20
ffmpeg -i D:\Project_Escooter_Tracking\input\20\20.mp4 D:\Project_Escooter_Tracking\input\20\images\frame_%06d.png
D:\Project_Escooter_Tracking\input\21
ffmpeg -i D:\Project_Escooter_Tracking\input\21\21.mp4 D:\Project_Escooter_Tracking\input\21\images\frame_%06d.png
D:\Project_Escooter_Tracking\input\22
ffmpeg -i D:\Project_Escooter_Tracking\input\22\22.mp4 D:\Project_Escooter_Tracking\input\22\images\frame_%0

# Reading, manipulating and checking the annotations on the images

### This code section is partly derived from Check_Dataset.ipynb and ReadJSON.py. 
---------------------------------------------------------------------

This is my Custom Code for checking the dataset 😎. 

When we annotate in CVAT, for each clip we receive one annotation file. Unfortunately, these annotation files have _'frame numbers'_ that start only from 0 but Detectron 2 needs it to start from **1**.

From a separate file in the repo called **ReadJSON.py**, I have written a script to combin all these annotation files into 3 files **Final_Annnotation_Train.json**, **Final_Annnotation_Valid.json** and **Final_Annnotation_Test.json** . The below given code cell replicates that.

In the 2nd code cell, we are incrementing all the frame numbers by 1 and showing the images along with their labels for verfication.

In [8]:
import json
from copy import deepcopy
import random

# coco_format is the dict file which includes all the values that needs to be output in the final annotations json file
# Some of the key values like 'licenses', 'info' and 'categories' are constant and declared at first here

coco_format = {
    "licenses": [{
        "name": "",
        "id": 0,
        "url": ""
    }],
    "info": {
        "contributor": "Vishal Balaji",
        "date_created": "",
        "description": "Escooter Dataset",
        "url": "",
        "version": "",
        "year": ""
    },
    "categories": [{
        "id": 1,
        "name": "Escooter",
        "supercategory": ""
    }]
}

# The key values 'images' and 'annotations' needs to be processed and appended. The below given lines is the format for
# those dicts.
"""
"images":[
    {
        "id":1,
        "width": 1920,
        "height": 1080,
        "file_name":"sdfa.PNG",
        "license":0,
        "flickr_url": "",
        "coco_url": "",
        "date_captured": 0
    }
]

"annotations":[
    {
        "id": 1,
        "image_id": 55,
        "category_id": 1,
        "segmentation": [[]],
        "area": {some area number in float},
        "bbox": [].
        "iscrowd": 0
    }
]
"""

# Path where the annotations are stored, when the repo is the path of current working directory
#main_file_path = os.path.abspath(r'D:\Carissma Video Copy\Traffic Camera Tracking\Finished')
input_path = r'D:\Project_Escooter_Tracking\input'
main_file_path = input_path

# Declaration of empty lists that is later appended it with images and annotations.
images_list = []
annotations_list = []

# Each image and annotations has an ID associated with it and it starts with 1.
# These values are incremented as the images and annotations are being added.
img_num = 1
anno_num = 1

"""
Folder Structure

This folder structure must be maintained as the code with Google Colab also works with the same structure.
- Finished
    - 2
        - images
        - annotations
            - 2_Annotations.json
        - 2.mp4
    - 3
        - images
        - annotations
            - 3_Annotations.json
        - 3.mp4
    - 4
    .
    .
    .
    .
    - {Clip_Number}
        - images (contains all images starting with 'frame_000001.jpg')
        - annotations
            - {Clip_Number}_Annotations.json
        - {Clip_Number}.mp4
"""


print("Processed the following annotation files: ")
for clip_number, clips in enumerate(os.listdir(main_file_path)):
    # Checking that only numbers are given as folder names for the clips
    if all(char.isdigit() for char in clips):
      # Path of the clips folder
      clips_path = main_file_path + '/' + clips
      # Path of the annotation of the clips
      annotation_file = clips_path + f'/{str(clips)}_Annotations.json'

      file = open(annotation_file)
      json_file = json.load(file)
      print(f'  - {annotation_file}')

      # !! Testing purpose only for restricting number of annotations
      # flag = 1
      for annotations in json_file['annotations']:

          anno_image_ID = annotations['image_id']
          anno_ID = annotations['id']

          image_filename = ''
          for images in json_file['images']:
              if images['id'] == anno_image_ID:
                  image_filename = images['file_name']

          filename = input_path + '/' + clips + '/images/' + image_filename
          # The formats for 'images' dictionary and 'annotations' dictionary in COCO
          image_dict = {
              'id': img_num,
              "width": 1920,
              "height": 1080,
              "file_name": filename,
              "license": 0,
              "flickr_url": "",
              "coco_url": "",
              "date_captured": 0
          }
          anno_dict = {
              "id": anno_num,
              'image_id': img_num,
              "category_id": 1,
              'segmentation': annotations['segmentation'],
              'area': annotations['area'],
              'bbox': annotations['bbox'],
              'iscrowd': annotations['iscrowd']
          }

          # In the COCO-Format, every images and associated annotations are passed as array of dicts.
          images_list.append(image_dict)
          annotations_list.append(anno_dict)

          # Incrementing the Image ID and Annotation ID for each loop
          img_num += 1
          anno_num += 1

          # !! Meant only for testing purpose. To check with just 2 annotations per file
          # if flag == 2:
          #     break
          # flag += 1

      # Storing the processed arrays of images and annotations with their
      # respective keys in the final dataset
      # coco_format["images"] = images_list
      # coco_format["annotations"] = annotations_list

      file.close()

      # !! Meant for testing purpose.
      # if clip_number == 1:
      #     break

train_json = deepcopy(coco_format)
valid_json = deepcopy(coco_format)
test_json = deepcopy(coco_format)

train_split = 0.8
valid_split = 0.1
test_split = 0.1

# Function to split the whole dataset of images and annotations into train,
# valid and test sets
def splitDataset(images, annotations, trainSplit, validSplit):
  trainSize = int(len(images) * trainSplit)
  train_images = []
  train_annotations = []
  
  copy_images = list(images)
  copy_annotations = list(annotations)
  while len(train_images) < trainSize:
    index = random.randrange(len(copy_images))
    train_images.append(copy_images.pop(index))
    train_annotations.append(copy_annotations.pop(index))
  

  copySize = int(len(copy_images) * (validSplit/(1 - trainSplit)))
  valid_images = []
  valid_annotations = []

  test_images = copy_images
  test_annotations = copy_annotations
  while len(valid_images) < copySize:
    index = random.randrange(len(test_images))
    valid_images.append(test_images.pop(index))
    valid_annotations.append(test_annotations.pop(index))
  
  return [(train_images, train_annotations), (valid_images, valid_annotations), (test_images, test_annotations)]

train_set, valid_set, test_set = splitDataset(images_list, annotations_list, 0.8, 0.1)
print("\nSplitting the dataset into Train, Valid and Test is successfull\n")

train_json['images'] = train_set[0]
train_json['annotations'] = train_set[1]

valid_json['images'] = valid_set[0]
valid_json['annotations'] = valid_set[1]

test_json['images'] = test_set[0]
test_json['annotations'] = test_set[1]

# Code Snippet to automatically create new names for the many
# .json files created during the testing
base_filename = 'Test_'
for numbers in range(20):
    check_filename = base_filename + str(numbers+1) + '.json'
    if check_filename not in os.listdir(os.getcwd()):
        base_filename = check_filename
        break


# These lines writes all the dictionaries into the final required .json file
# For train, valid and test individually
train_file = f"{os.getcwd()}/input/{base_filename[:-5]}_Train.json"
valid_file = f"{os.getcwd()}/input/{base_filename[:-5]}_Valid.json"
test_file = f"{os.getcwd()}/input/{base_filename[:-5]}_Test.json"

with open(train_file, "w") as file:
    json.dump(train_json, file)
    print(f"Final training set file saved as: {train_file}")

with open(valid_file, "w") as file:
    json.dump(valid_json, file)
    print(f"Final valid set file saved as: {valid_file}")

with open(test_file, "w") as file:
    json.dump(test_json, file)
    print(f"Final test set file saved as: {test_file}")

Processed the following annotation files: 
  - D:\Project_Escooter_Tracking\input/16/16_Annotations.json
  - D:\Project_Escooter_Tracking\input/18/18_Annotations.json
  - D:\Project_Escooter_Tracking\input/2/2_Annotations.json
  - D:\Project_Escooter_Tracking\input/20/20_Annotations.json
  - D:\Project_Escooter_Tracking\input/21/21_Annotations.json
  - D:\Project_Escooter_Tracking\input/22/22_Annotations.json
  - D:\Project_Escooter_Tracking\input/23/23_Annotations.json
  - D:\Project_Escooter_Tracking\input/24/24_Annotations.json
  - D:\Project_Escooter_Tracking\input/25/25_Annotations.json
  - D:\Project_Escooter_Tracking\input/26/26_Annotations.json
  - D:\Project_Escooter_Tracking\input/27/27_Annotations.json
  - D:\Project_Escooter_Tracking\input/28/28_Annotations.json
  - D:\Project_Escooter_Tracking\input/29/29_Annotations.json
  - D:\Project_Escooter_Tracking\input/3/3_Annotations.json
  - D:\Project_Escooter_Tracking\input/30/30_Annotations.json
  - D:\Project_Escooter_Trackin

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\balaji\\Desktop\\Traffic_Camera_Tracking\\Main_Code/input/Test_1_Train.json'

In [None]:
from IPython.display import Image, display

path = os.getcwd()
path_images = os.getcwd() + '/images'

register_coco_instances("escooter_train", {}, train_file, '')
register_coco_instances("escooter_valid", {}, valid_file, '')
register_coco_instances("escooter_test", {}, test_file, '')

existing_files = []
for img in os.listdir(path_images):
  fileName = path_images + '/' + img
  existing_files.append(fileName)

dataset_dicts = DatasetCatalog.get("escooter_train")

# List of labelled_images for visualization purposes
out_files = []
for d in dataset_dicts:
    # Adjusting for difference in frame
    file_name_from_dict = d["file_name"].split('.')[0]
    file_number = int(file_name_from_dict[-6:])
    
    # 1 is the offset number for the frame difference between the annotations 
    # from CVAT and frames extracted from the FFMPEG Script
    file_number += 1
    
    # Adding the write number of 0's and taking care of proper filename
    if int(file_number / 10) == 0:
      new_file_name = file_name_from_dict[:-6] + '00000' + str(file_number) + '.png'
    elif int(file_number / 100) == 0:
      new_file_name = file_name_from_dict[:-6] + '0000' + str(file_number) + '.png'
    elif int(file_number / 1000) == 0:
      new_file_name = file_name_from_dict[:-6] + '000' + str(file_number) + '.png'
    elif int(file_number / 10000) == 0:
      new_file_name = file_name_from_dict[:-6] + '00' + str(file_number) + '.png'
    
  
    
    if new_file_name in existing_files:   
      print(new_file_name)  
      file_name = new_file_name
      img = cv2.imread(file_name)
  
      visualizer = Visualizer(img[:, :, ::-1], scale=1)
      out = visualizer.draw_dataset_dict(d)
      
      #cv2_imshow(out.get_image()[:, :, ::-1])

      out_filename = file_name[:-4] + '_labelled.png'
      out_filename = '/content/images/labels/' + file_name[:-4].split('/')[-1] + '.png'
      cv2.imwrite(out_filename, out.get_image()[:, :, ::-1])
      out_files.append(out_filename)


for images in out_files:
  display(Image(images))

# Training the model

We are using transfer learning here to use an already COCO-pretrained R50-FPN Mask R-CNN model

In [None]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("escooter_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 300    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256   # faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
# Look at training curves in tensorboard:
%load_ext tensorboard
%tensorboard --logdir output