## Summary

- [Main Tutorial](https://towardsdatascience.com/face-detection-on-custom-dataset-with-detectron2-and-pytorch-using-python-23c17e99e162)
- []()

## Libraries

In [13]:
import os
import ntpath
import numpy as np
import cv2
import random
import itertools
import pandas as pd
from tqdm import tqdm
import urllib
import json
import PIL.Image as Image

In [22]:
import torch, torchvision
import detectron2
from detectron2.utils.logger import setup_logger

In [23]:
import glob

In [24]:
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import DatasetCatalog, MetadataCatalog, build_detection_test_loader
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.structures import BoxMode

In [15]:
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc

%matplotlib inline
%config InlineBackend.figure_format='retina'

## Inits and Vars

In [21]:
setup_logger()

<Logger detectron2 (DEBUG)>

In [17]:
sns.set(style='whitegrid', palette='muted', font_scale=1.2)

In [18]:
HAPPY_COLORS_PALETTE = ["#01BEFE", "#FFDD00", "#FF7D00", "#FF006D", "#ADFF02", "#8F00FF"]

sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))

rcParams['figure.figsize'] = 12, 8

In [25]:
RANDOM_SEED = 88
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

<torch._C.Generator at 0x7f6ec7811070>

## Get Data

### Download Data

In [3]:
!gdown --id 1K79wJgmPTWamqb04Op2GxW0SW9oxw8KS

Downloading...
From: https://drive.google.com/uc?id=1K79wJgmPTWamqb04Op2GxW0SW9oxw8KS
To: /home/aicoe/vision_exploration/notebooks/face_detection.json
  0%|                                                | 0.00/274k [00:00<?, ?B/s]100%|████████████████████████████████████████| 274k/274k [00:00<00:00, 48.1MB/s]


In [5]:
faces_df = pd.read_json('face_detection.json', lines=True)

In [7]:
faces_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 409 entries, 0 to 408
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   content     409 non-null    object 
 1   annotation  409 non-null    object 
 2   extras      0 non-null      float64
dtypes: float64(1), object(2)
memory usage: 9.7+ KB


In [6]:
faces_df.head()

Unnamed: 0,content,annotation,extras
0,http://com.dataturks.a96-i23.open.s3.amazonaws...,"[{'label': ['Face'], 'notes': '', 'points': [{...",
1,http://com.dataturks.a96-i23.open.s3.amazonaws...,"[{'label': ['Face'], 'notes': '', 'points': [{...",
2,http://com.dataturks.a96-i23.open.s3.amazonaws...,"[{'label': ['Face'], 'notes': '', 'points': [{...",
3,http://com.dataturks.a96-i23.open.s3.amazonaws...,"[{'label': ['Face'], 'notes': '', 'points': [{...",
4,http://com.dataturks.a96-i23.open.s3.amazonaws...,"[{'label': ['Face'], 'notes': '', 'points': [{...",


In [11]:
# sample an annotation
faces_df.iloc[1]["annotation"]

[{'label': ['Face'],
  'notes': '',
  'points': [{'x': 0.70530877573131, 'y': 0.23260437375745502},
   {'x': 0.769230769230769, 'y': 0.36182902584493004}],
  'imageWidth': 1280,
  'imageHeight': 697}]

### Process Data to DF

In [26]:
# get dataset

# 1 make directory
os.makedirs("faces", exist_ok=True)

dataset = []

# call image url for each image
for index, row in tqdm(faces_df.iterrows(), total=faces_df.shape[0]):
    img = urllib.request.urlopen(row["content"])
    img = Image.open(img)
    img = img.convert('RGB')
    
    # image name is by index num
    image_name = f'face_{index}.jpeg'
    
    img.save(f'faces/{image_name}', "JPEG")

    annotations = row['annotation']
    for an in annotations:
        data = {}

        width = an['imageWidth']
        height = an['imageHeight']
        points = an['points']

        data['file_name'] = image_name # file name / index
        data['width'] = width
        data['height'] = height

        data["x_min"] = int(round(points[0]["x"] * width))
        data["y_min"] = int(round(points[0]["y"] * height))
        data["x_max"] = int(round(points[1]["x"] * width))
        data["y_max"] = int(round(points[1]["y"] * height))

        data['class_name'] = 'face'

        dataset.append(data)

100%|██████████| 409/409 [08:05<00:00,  1.19s/it]


In [27]:
# put dataset in df
df = pd.DataFrame(dataset)
print(df.file_name.unique().shape[0], df.shape[0])
# having 409 images and 1132 annotations

409 1132


In [28]:
df.head()

Unnamed: 0,file_name,width,height,x_min,y_min,x_max,y_max,class_name
0,face_0.jpeg,650,333,56,102,112,151,face
1,face_0.jpeg,650,333,379,97,428,156,face
2,face_1.jpeg,1280,697,903,162,985,252,face
3,face_2.jpeg,460,240,216,12,279,80,face
4,face_2.jpeg,460,240,289,2,343,74,face


In [41]:
df.to_csv("annotations.csv", index=False)

## Explore Images

In [32]:
# function to take df and annotate (buggy)
def annotate_image(annotations, resize=True):
    file_name = annotations.file_name.to_numpy()[0]
    img = cv2.cvtColor(cv2.imread(f'faces/{file_name}'), 
                       cv2.COLOR_BGR2RGB)

    for i, a in annotations.iterrows():
        cv2.rectangle(img, 
                        (a.x_min, a.y_min), 
                        (a.x_max, a.y_max), 
                        (0, 255, 0), 
                      2)

    if not resize:
        return img

    return cv2.resize(img, (384, 384), interpolation = cv2.INTER_AREA)


## Train Test Split

In [42]:
IMAGES_PATH = f'faces'

In [43]:
unique_files=df.file_name.unique()

In [44]:
train_files = set(np.random.choice(unique_files, 
                    int(len(unique_files) * 0.95), 
                replace=False))
train_df = df[df.file_name.isin(train_files)]
test_df = df[~df.file_name.isin(train_files)]

## Convert to Detectron Format

In [48]:
# get classes
classes = df.class_name.unique().tolist()
classes

['face']

In [49]:
# define create dataset dict func
def create_dataset_dicts(df, classes):
    
    # 1. init dataset dicts list
    dataset_dicts = []
    
    # 2. loop through file names
    for image_id, img_name in enumerate(df.file_name.unique()):
        record = {}

        image_df = df[df.file_name == img_name]

        file_path = f'{IMAGES_PATH}/{img_name}'
        
        record["file_name"] = file_path
        record["image_id"] = image_id
        record["height"] = int(image_df.iloc[0].height)
        record["width"] = int(image_df.iloc[0].width)
        
        # compile objects list that house object formats
        objs = []
        for _, row in image_df.iterrows():
            xmin = int(row.x_min)
            ymin = int(row.y_min)
            xmax = int(row.x_max)
            ymax = int(row.y_max)

            poly = [
              (xmin, ymin), (xmax, ymin),
              (xmax, ymax), (xmin, ymax)
            ]
            
            # iterable poly list
            poly = list(itertools.chain.from_iterable(poly))
            
            # main annotations format per object
            obj = {
            "bbox": [xmin, ymin, xmax, ymax],
            "bbox_mode": BoxMode.XYXY_ABS,
            "segmentation": [poly],
            "category_id": classes.index(row.class_name), #background =0?
            "iscrowd": 0
            }
            objs.append(obj)
        
        # each record is:
        # "file_name"
        # "image_id"
        # "height"
        # "width"
        # "annotations" - list of dicts where annotations live
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

In [50]:
# register dataset into dataset and metadata catalogs
for d in ["train", "val"]:
    DatasetCatalog.register("faces_" + d, 
        lambda d=d: create_dataset_dicts(
                train_df if d == "train" else test_df, 
                                         classes)
                           )
    MetadataCatalog.get("faces_" + d).set(thing_classes=classes)

statement_metadata = MetadataCatalog.get("faces_train")

# DOESNT EVAL the lambda yet

## Create Evaluator for Test Set

In [52]:
class CocoTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder=None):

        if output_folder is None:
            os.makedirs("coco_eval", exist_ok=True)
            output_folder = "coco_eval"
        
        # COCOEvaluator
        return COCOEvaluator(dataset_name, cfg, False, output_folder)

## Set Config File

In [53]:
cfg = get_cfg()

cfg.merge_from_file(
  model_zoo.get_config_file(
    "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"
  )
)

cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(
  "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"
)

In [54]:
cfg.DATASETS.TRAIN = ("faces_train",)
cfg.DATASETS.TEST = ("faces_val",)
cfg.DATALOADER.NUM_WORKERS = 4

In [55]:
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.SOLVER.BASE_LR = 0.001
cfg.SOLVER.WARMUP_ITERS = 1000
cfg.SOLVER.MAX_ITER = 1500
cfg.SOLVER.STEPS = (1000, 1500)
cfg.SOLVER.GAMMA = 0.05

In [56]:
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 64
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes)

cfg.TEST.EVAL_PERIOD = 500

## Train

In [57]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

trainer = CocoTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[32m[06/28 15:10:44 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[06/28 15:10:44 d2.data.build]: [0mRemoved 0 images with no usable annotations. 388 images left.
[32m[06/28 15:10:44 d2.data.build]: [0mDistribution of instances among all 1 categories:
[36m|  category  | #instances   |
|:----------:|:-------------|
|    face    | 1085         |
|            |              |[0m
[32m[06/28 15:10:44 d2.data.common]: [0mSerializing 388 elements to byte tensors and concatenating them all ...
[32m[06/28 15:10:44 d2.data.common]: [0mSerialized dataset takes 0.14 MiB
[32m[06/28 15:10:44 d2.data.detection_utils]: [0mTransformGens used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[32m[06/28 15:10:44 d2.data.build]: [0mUsing training sampler TrainingSampler


model_final_2d9806.pkl: 431MB [00:39, 11.1MB/s]                               
Unable to load 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model!
Unable to load 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model!
Unable to load 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model!
Unable to load 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model!
Unable to load 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in the model!
Unable to load 'roi_heads.mask_head.predictor.bias' to the model due to incompatible shapes: (80,) in the checkpoint but (1,) in the model!


[32m[06/28 15:11:25 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[06/28 15:12:03 d2.utils.events]: [0m eta: 0:45:36  iter: 19  total_loss: 2.014  loss_cls: 0.752  loss_box_reg: 0.307  loss_mask: 0.685  loss_rpn_cls: 0.245  loss_rpn_loc: 0.025  time: 1.8181  data_time: 0.1004  lr: 0.000020  max_mem: 11293M
[32m[06/28 15:12:06 d2.engine.hooks]: [0mOverall training speed: 19 iterations in 0:00:36 (1.9097 s / it)
[32m[06/28 15:12:06 d2.engine.hooks]: [0mTotal training time: 0:00:36 (0:00:00 on hooks)


KeyboardInterrupt: 

## Placeholder