<a href="https://colab.research.google.com/github/joangog/object-detection/blob/main/mask_training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Model training on mask dataset





Datasets:
*   MASKD (AICrowd)
*   Properly Wearing Masked Face Detection Dataset 

Models
*   YOLOv5n
*   YOLOv5s
*   YOLOv5m

**Note: GPU Runtime needed (hosted or local)**

*Example GPU: Tesla K80, 460.32.03, 11441 MiB*

In [1]:
# Show system specs
!nvidia-smi --query-gpu=gpu_name,driver_version,memory.total --format=csv

name, driver_version, memory.total [MiB]
Tesla K80, 460.32.03, 11441 MiB


### Initialization


In [4]:
# Parameters

dataset_name = 'PWMFD'  # 'MASKD' or 'PWMFD'

load_ckpt = False  # Flag for whether to continue from existing training checkpoint or not
load_cfg = False  # Flag for whether to load custom model configs or not (model config determines model architecture)
pretrained = True  # Flag for whether to start from COCO-pretrained model or randomly init weights
freeze_layers = True # Flag for freezing layers or not (for YOLO models)

img_res = 320  # Image resolution (for YOLO models), try 608, 512, 416, 320 or 224
num_workers = 2  #  Data loader workers
batch_size = 32  # Data loader batch size

mosaic = 1  #  Probability for image mosaic data augmentation (for YOLO models)
mixup = 0 #  Probability for image mixup data augmentation, works only when mosaic = 1 (for YOLO models)
fl_gamma = 0 # Focal Loss gamma  (for YOLO models)
 
num_epochs = 50  # Epochs

frozen_layers = 10  # Layers to freeze (for YOLO models), try 10 (freeze backbone) or 24 (freeze all but last layer)

optimizer_type = 'sgd'  # 'sgd' or 'adam'
momentum = 0.937  # Optimizer momentum (only for SGD optimizer)
weight_decay = 0.0005  # Optimizer weight decay

lr = 0.01  # Learning rate (lr_max=lr)
lr_f = 0.1  # Learning rate multiplier (lr_min=lr*lr_f)

step_size = 3  # (Experimental) Learning rate step size (not used for YOLO, only models that use StepLR)
gamma = 0.1  # (Experimental) Learning rate decay (not used for YOLO, only models that use StepLR)


# Directories

save_ckpt_folder = None  # When None, a name for the model folder is generated automatically
load_ckpt_path = ''  # Loaded weights path (used when load_ckpt is True)
save_ckpt_dir = '/content/drive/MyDrive/object-detection-checkpoints'  # Model save root directory (used when load_ckpt is False)
load_cfg_path  = ''  # For YOLO models (used when load_cfg is True)

import os
root_dir = os.getcwd()  # Root dir of project
dataset_dir = os.path.join(root_dir,f'dataset_{dataset_name}')

img_dir = os.path.join(dataset_dir,'images')
val_img_dir = os.path.join(img_dir,'val_images')
train_img_dir = os.path.join(img_dir,'train_images')

label_dir = os.path.join(dataset_dir,'labels')
val_label_dir = os.path.join(label_dir,'val_images')
train_label_dir = os.path.join(label_dir,'train_images')

ann_dir = os.path.join(dataset_dir,'annotations')

### Get requirements
*Note : Takes about 5 minutes. Restart runtime after installation if a problem arises.*

In [None]:
# Clone asset files
!cd {root_dir}
!git clone https://github.com/joangog/object-detection-assets
!mv -n {os.path.join(root_dir,'object-detection-assets','scripts')} ./
!mv -n {os.path.join(root_dir,'object-detection-assets','config')} ./
!mv -n {os.path.join(root_dir,'object-detection-assets','requirements.txt')} ./
!rm -rf {os.path.join(root_dir,'object-detection-assets')}


In [None]:
# Install packages
!cd {root_dir}
!pip install -r requirements.txt

In [None]:
# Install YOLOv5
!cd {root_dir}
!git clone https://github.com/joangog/yolov5.git
!pip install -r {os.path.join(root_dir,'yolov5','requirements.txt')}

In [None]:
# Install YOLOv3
!cd {root_dir}
!git clone https://github.com/ultralytics/yolov3
!pip install -r {os.path.join(root_dir,'yolov3','requirements.txt')}

In [None]:
# Install unrar command
if os.geteuid() != 0:  # If not root, ask for sudo priviledges
  from getpass import getpass
  password = getpass('Insert sudo password:')
  !echo {password} | sudo -S -k apt-get install unrar
else:
  !apt-get install unrar

### Import packages

In [10]:
import os, sys
import math
import time
import copy
import re

import numpy as np
import pandas as pd
import json
import xml.etree.ElementTree as ET
import PIL
import IPython

import matplotlib
import matplotlib.pyplot as plt

import torch
from torch.utils.tensorboard import SummaryWriter
import torchvision
import torchvision.models.detection as M
import torchvision.transforms.functional as F
import torchvision.utils as U
from torchvision.datasets import CocoDetection

from pycocotools import coco
from pycocotools import mask as cocomask

from ptflops import get_model_complexity_info

import scripts.utils as SU
import scripts.transforms as ST
import scripts.engine as SE
import scripts.coco_utils as SCU
from scripts.coco_eval import CocoEvaluator

### Connect to GDrive for storage access
*Note: Not possible with local runtime*

In [None]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

### Download Mask dataset

In [12]:
!cd {root_dir}
!mkdir -p dataset_{dataset_name}
!cd {dataset_dir}
!mkdir -p {img_dir} {ann_dir} {label_dir}
!cd {img_dir}
!mkdir {val_img_dir} {train_img_dir}
!cd {label_dir}
!mkdir {val_label_dir} {train_label_dir}

In [None]:
if dataset_name == 'MASKD':

  !cd {root_dir}

  # Download validation images
  if not os.path.exists('val_images.zip'):
    !gdown --id '101F2k6PJ-tD_uwlsCG7zzGF9ILJW01M1'
  !unzip -q -n 'val_images.zip' -d {img_dir}

  # Download train images
  if not os.path.exists('train_images.zip'):
    !gdown --id '1vD_Sxg7dHkB_8OJLsHngBWvp5iGJAETQ'
  !unzip -q -n 'train_images.zip' -d {img_dir}

  # Download validation annotations
  if not os.path.exists('val.json'):
    !gdown -O {os.path.join(ann_dir,'val.json')} --id '1YLV7-7vmiNdFI8Xpdx_jbhnxfgQRWrgF'

  # Download train annotations
  if not os.path.exists('train.json'):
    !gdown -O {os.path.join(ann_dir,'train.json')}  --id '1AqeDJps-aZ743vFJ6p2_RjtSFjPtIOtD'

elif dataset_name == 'PWMFD':

  # Download validation images
  if not os.path.exists('val_images.rar'):
    !gdown -O 'val_images.rar' --id  1ZXuSwoRvTnnca81RUj3kMoLFZJ6auAwT
  !unrar e -idq -o- 'val_images.rar' -d {val_img_dir}

  # Download train images
  if not os.path.exists('train_images.rar'):
    !gdown -O 'train_images.rar' --id  16uI5ZEiq2JEYBH4_DhmxVAHSqzY5UQ1b
  !unrar e -idq -o- 'train_images.rar' -d {train_img_dir} 

  # Convert annotation files from PASCAL VOC .xml to COCO .json (only for PWMFD dataset)
  
  label_ids = {'with_mask': 1, 'without_mask': 2, 'incorrect_mask': 3}  # BG class is 0
    
  for type_img in ['val', 'train']:

    ann_count = 0  # Annotation counter

    images = []
    categories = []
    annotations = []

    xml_files = os.listdir(os.path.join(os.path.join(img_dir,f'{type_img}_images')))
    xml_files = [file for file in xml_files if '.xml' in file]

    # Categories
    for label in label_ids:
      categories.append(
          {
            'supercategory': 'none',
            'id': label_ids[label],
            'name': label
          }
      )

    for xml_file in xml_files:  # For each annotation file

      tree = ET.parse(os.path.join(os.path.join(img_dir,f'{type_img}_images'),xml_file))
      root = tree.getroot()

      # Image
      file_name = root[0].text
      height = int(root[1][1].text)
      width = int(root[1][0].text)
      id = int(re.sub(r'^\D*0*', '', file_name).replace('.jpg',''))
      images.append(
          {
            'file_name': file_name,
            'height': height,
            'width': width,
            'id': id
          }
      )
      
      # Annotations
      if len(root) > 2:  # If annotations (object attribute) exist, they will be after the 1-index attribute in the XML
        for i in range(2,len(root)):
          category_id = label_ids[root[i][0].text]
          xmin = int(root[i][1][0].text)
          ymin = int(root[i][1][1].text)
          xmax = int(root[i][1][2].text)
          ymax = int(root[i][1][3].text)
          annotations.append(
              {
                'iscrowd': 0,
                'image_id': id,
                'bbox': [xmin, ymin, xmax-xmin, ymax-ymin],
                'area': (xmax-xmin) * (ymax-ymin),
                'category_id': category_id,
                'ignore': 0,
                'id': ann_count,
                'segmentation': []
              }
          )
          ann_count += 1

    coco_dict = {
    'info': {},
    'images': images,
    'categories': categories,
    'annotations': annotations,
    'licenses': []
    }

    with open(os.path.join(ann_dir,f'{type_img}.json'),'w') as outfile:
      json.dump(coco_dict, outfile, indent=3)


# Copy COCO annotations in images folder
!cp {os.path.join(ann_dir,'val.json')} {val_img_dir}
!cp {os.path.join(ann_dir,'train.json')} {train_img_dir}

### (Only for YOLO models) Convert format of annotations from COCO to YOLO

In [None]:
for type_images in ['val', 'train']:

    ann_path = os.path.join(ann_dir, type_images + ".json")
    dataset = coco.COCO(ann_path)
    img_ids = dataset.getImgIds()

    for img_id in img_ids:

      img_anns = dataset.loadAnns(dataset.getAnnIds([img_id]))
      img_data = dataset.loadImgs([img_id])[0]
      img_file = img_data['file_name']
      img_width = img_data['width']
      img_height = img_data['height']

      label_file = img_file.replace('.jpg','.txt')
      
      with open(os.path.join(label_dir, f'{type_images}_images', label_file), 'w') as outfile:
        for ann in img_anns:
          x_center = (ann['bbox'][0] + ann['bbox'][2]/2) / img_width  # convert x_min to x_center and normalize to [0,1]
          y_center = (ann['bbox'][1] + ann['bbox'][3]/2) / img_height  # convert y_min to y_center and normalize to [0,1]
          width = ann['bbox'][2] / img_width
          height = ann['bbox'][3] / img_height 
          outfile.write("{} {} {} {} {}\n".format(int(ann['category_id'])-1,x_center,y_center,width,height))   # Category ids must be 0-indexed


### Load Mask dataset

In [None]:
val_ann_file = 'val.json'  # annotations
val_ann_path = os.path.join(val_img_dir,val_ann_file)  

train_ann_file = 'train.json'  # annotations
train_ann_path = os.path.join(train_img_dir,train_ann_file)  

# Define data transforms
transforms = ST.Compose([ST.ToTensor()])

# Create datasets
val_dataset = CocoDetection(val_img_dir, val_ann_path, transforms = transforms)
train_dataset = CocoDetection(train_img_dir, train_ann_path, transforms = transforms)

# Create data loaders
val_data_loader = torch.utils.data.DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=SU.collate_fn)
train_data_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, collate_fn=SU.collate_fn)

### Load model

In [16]:
# Delete utils package to reload it (if loaded), because YOLOv3 and YOLOv5 have
# the same name for it and it causes error
try:
  sys.modules.pop('utils')
except:
  pass

# @markdown Model Selection { display-mode: 'form', run: 'auto' }
model_name = 'YOLOv5s' # @param ['YOLOv5m', 'YOLOv5s', 'YOLOv5n']

# @markdown *Note: If you get the error "Cache may be out of date, try 'force_reload=True'" then restart runtime.*

if model_name == 'SSD300 VGG16':
  model_id = 'ssd300_vgg16'
  model = M.ssd300_vgg16(pretrained=True, progress=True)
  model_img_size = (3,300,300)
elif model_name == 'SSDlite320 MobileNetV3-Large':
  model_id = 'ssdlite320_mobilenet_v3_large'
  model = M.ssdlite320_mobilenet_v3_large(pretrained=True, progress=True)
  model_img_size = (3,320,320)
elif model_name == 'Faster R-CNN ResNet-50 FPN':
  model_id = 'fasterrcnn_resnet50_fpn'
  model = M.fasterrcnn_resnet50_fpn(pretrained=True, progress=True)
  model_img_size = (3,800,800) # COCO's 640x640 in upscaled to the model's minimum 800x800
elif model_name == 'Faster R-CNN MobileNetV3-Large FPN':
  model_id = 'fasterrcnn_mobilenet_v3_large_fpn'
  model = M.fasterrcnn_mobilenet_v3_large_fpn(pretrained=True, progress=True)
  model_img_size = (3,800,800) 
elif model_name == 'Mask R-CNN ResNet-50 FPN':
  model_id = 'maskrcnn_resnet50_fpn'
  model = M.maskrcnn_resnet50_fpn(pretrained=True, progress=True)
  model_img_size = (3,800,800)
elif model_name == 'YOLOv5n':
  model_id = 'yolov5n'
  model = torch.hub.load('ultralytics/yolov5', 'yolov5n', force_reload=True)
  model_img_size = (3,img_res,img_res)
elif model_name == 'YOLOv5s':
  model_id = 'yolov5s'
  model = torch.hub.load('ultralytics/yolov5', 'yolov5s', force_reload=True)
  model_img_size = (3,img_res,img_res)
elif model_name == 'YOLOv5m':
  model_id = 'yolov5m'
  model = torch.hub.load('ultralytics/yolov5', 'yolov5m', force_reload=True)
  model_img_size = (3,img_res,img_res)
elif model_name == 'YOLOv5l':
  model_id = 'yolov5l'
  model = torch.hub.load('ultralytics/yolov5', 'yolov5l', force_reload=True)
  model_img_size = (3,img_res,img_res)
elif model_name == 'YOLOv3':
  model_id = 'yolov3'
  model = torch.hub.load('ultralytics/yolov3', 'yolov3', force_reload=True)
  model_img_size = (3,img_res,img_res)
elif model_name == 'YOLOv3-tiny':
  model_id = 'yolov3_tiny'
  model = torch.hub.load('ultralytics/yolov3', 'yolov3_tiny', force_reload=True)
  model_img_size = (3,img_res,img_res)
elif model_name == 'YOLOv3-spp':
  model_id = 'yolov3_spp'
  model = torch.hub.load('ultralytics/yolov3', 'yolov3_spp', force_reload=True)
  model_img_size = (3,img_res,img_res)

print('-------------------------------------------------------------------------------------------------------\n')

print(f'Loaded model: {model_name}')
model_params = round(sum([param.numel() for param in model.parameters()]) / 1000000, 1)
print(f'\t- Parameters: {model_params}M')
model_macs, _ = get_model_complexity_info(model, model_img_size, as_strings=False, 
                                          print_per_layer_stat=False, verbose=False)
model_gflops = round(2 * int(model_macs) / 1000000000, 1)
print(f'\t- GFLOPs: {model_gflops}')

Downloading: "https://github.com/ultralytics/yolov5/archive/master.zip" to /root/.cache/torch/hub/master.zip
Downloading https://ultralytics.com/assets/Arial.ttf to /root/.config/Ultralytics/Arial.ttf...
[31m[1mrequirements:[0m matplotlib>=3.2.2 not found and is required by YOLOv5, attempting auto-update...

[31m[1mrequirements:[0m PyYAML>=5.3.1 not found and is required by YOLOv5, attempting auto-update...

[31m[1mrequirements:[0m torch>=1.7.0 not found and is required by YOLOv5, attempting auto-update...

[31m[1mrequirements:[0m torchvision>=0.8.1 not found and is required by YOLOv5, attempting auto-update...

[31m[1mrequirements:[0m pandas>=1.1.4 not found and is required by YOLOv5, attempting auto-update...

[31m[1mrequirements:[0m 5 packages updated per /root/.cache/torch/hub/ultralytics_yolov5_master/requirements.txt
[31m[1mrequirements:[0m ⚠️ [1mRestart runtime or rerun command for updates to take effect[0m

YOLOv5 🚀 2022-3-13 torch 1.9.0+cu102 CUDA:0 (Tes

Downloading https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt to yolov5s.pt...


  0%|          | 0.00/14.1M [00:00<?, ?B/s]

Fusing layers... 





Model Summary: 213 layers, 7225885 parameters, 0 gradients, 16.5 GFLOPs
Adding AutoShape... 


-------------------------------------------------------------------------------------------------------

Loaded model: YOLOv5s
	- Parameters: 7.2M
	- GFLOPs: 4.1


### (Optional) Open Tensorboard Monitor
*Note: Tensorboard may not show properly in some browsers and operating systems.*

In [17]:
# Load Tensorboard
%reload_ext tensorboard
%tensorboard --bind_all --logdir {save_ckpt_dir}

<IPython.core.display.Javascript object>

### Train model

In [None]:
# Set model checkpoint folder
start_time = int(time.time())
if not save_ckpt_folder:
  save_ckpt_folder = f'{dataset_name}_{model_id}_{start_time}_run'

# Prepare model for new dataset (for Fast R-CNN or Mask R-CNN)
if 'R-CNN' in model_name: 
  num_classes = len(val_dataset.coco.getCatIds())+1  # includes background (0) class
  # Get the number of input features for the bbox predictor
  in_features = model.roi_heads.box_predictor.cls_score.in_features
  # Replace the pre-trained head with a new one
  model.roi_heads.box_predictor = M.faster_rcnn.FastRCNNPredictor(in_features, num_classes)
  if 'Mask R-CNN' in model_name:
    # Get the number of input features for the segmentation max predictor
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # Replace the mask predictor with a new one
    model.roi_heads.mask_predictor = M.mask_rcnn.MaskRCNNPredictor(in_features_mask, hidden_layer,num_classes)

# Model Training 

if 'YOLO' in model_name: # For YOLO models

  yolo_version = int(model_id[5])

  # Generate hyperparameter file
  hyp_path = os.path.join(save_ckpt_dir,save_ckpt_folder,'hyp.yaml')
  !cp {root_dir}/yolov{yolo_version}/data/hyps/hyp.scratch.yaml 'hyp.yaml'
  with open('hyp.yaml','a') as outfile:
    outfile.write(

f"""lr0: {lr:.9f}
lrf: {lr_f:.9f}
momentum: {momentum}
weight_decay: {weight_decay}
mosaic: {mosaic}
mixup: {mixup}
fl_gamma: {fl_gamma}
"""
# .9f is for suppressing scientific notation (e.g. 2e10) for float numbers

)

  train_script_path = os.path.join(root_dir,f'yolov{yolo_version}','train.py')
  data_path = os.path.join(root_dir,'config',f'{dataset_name}_yolov{yolo_version}.yaml')

  if load_ckpt:  # If load a pre-existing checkpoint, resume from it
    temp_save_ckpt_folder = re.search(r'/' + dataset_name + '[^/]*/', load_ckpt_path).group(0).replace('/','')  # Get the save folder name from the loaded folder name
    temp_load_ckpt_path = load_ckpt_path
  else:  # If not load a pre-existing checkpoint, load pre-trained COCO weights or randomly initialize weights
    if pretrained:
      temp_save_ckpt_folder = save_ckpt_folder
      temp_load_ckpt_path = f'{model_id.replace("_","-")}.pt'
    else:
      temp_save_ckpt_folder = save_ckpt_folder
      temp_load_ckpt_path = '""'
  if load_cfg:  # If load custom model config
    temp_cfg_path = load_cfg_path
  else:
    temp_cfg_path = model_id.replace("_","-") + '.yaml'

  # Run train script
  !python {train_script_path} --img {img_res} --epochs {num_epochs} --batch {batch_size} --project {save_ckpt_dir} --name {temp_save_ckpt_folder} --hyp 'hyp.yaml' --data {data_path} \
  {'--resume ' + temp_load_ckpt_path if load_ckpt else '--weights ' + temp_load_ckpt_path} \
  {'--adam ' if optimizer_type == 'adam' else ''} \
  {'--cfg '  +  temp_cfg_path} \
  {'--freeze ' + str(frozen_layers) if freeze_layers else ''}

  # Copy hyp.yaml in trained model folder
  if not load_ckpt:
    !cp 'hyp.yaml' {hyp_path}

else: # For all the other models

  # Make checkpoint folder
  !mkdir {os.path.join(ckpt_dir,ckpt_folder)}

  # Load pre-existing checkpoint weights if required
  last_epoch = 0
  if load_ckpt:
    model.load_state_dict(torch.load(load_ckpt_path))
    last_epoch = int(re.search(r'epoch[0-9]+', load_ckpt_path).group(0)[5:])
    print(f'Loaded checkpoint: {load_ckpt_path}')

  # Get appropriate device for model
  device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
  model.to(device)
  print(f'Pre-trained Model: {model_name}')

  # Construct an optimizer
  params = [p for p in model.parameters() if p.requires_grad]
  if optimizer_type == 'adam':
    optimizer = torch.optim.Adam(params, lr=lr, weight_decay=weight_decay)
  elif optimizer_type == 'sgd':
    optimizer = torch.optim.SGD(params, lr=lr, momentum=momentum, weight_decay=weight_decay)

  # Construct learning rate scheduler
  lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)

  # Save training configs to .txt
  hyp_path = os.path.join(save_ckpt_dir,save_ckpt_folder,'hyp.txt')
  with open(hyp_path,'w') as outfile:
    outfile.write( 
                         
f"""dataset: {dataset_name}
model: {model_id}
timestamp: {start_time}
last_checkpoint: {load_ckpt_path if load_ckpt else None}
num_epochs: {num_epochs}
batch_size: {batch_size}
optimizer: {optimizer_type}
learning_rate: {lr}
step_size: {step_size}
gamma: {gamma}
momentum: {momentum}
weight_decay: {weight_decay}"""

)

  # Train model
  for epoch in range(num_epochs):

      curr_epoch = last_epoch + epoch + 1

      # Train for one epoch, printing every 10 iterations
      metric_logger = SE.train_one_epoch(model, optimizer, train_data_loader, device, epoch, print_freq=1)
      
      # Update the learning rate
      lr_scheduler.step()
      
      # Evaluate on the test dataset
      evaluator, _, _, _ = SE.evaluate(model, val_data_loader, device)

      # Save model checkpoint
      ckpt_file = f'epoch{curr_epoch}'
      ckpt_path = os.path.join(ckpt_dir,ckpt_folder,ckpt_file)
      torch.save(model.state_dict(), ckpt_path)

      # Delete previous model checkpoint
      previous_ckpt_file = f'epoch{curr_epoch-1}'
      if os.path.exists(previous_ckpt_file):
        os.remove(previous_ckpt_file)