In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
!env | grep PYTHONPATH

In [3]:
import numpy as np
import matplotlib.pylab as plt
import detectron2
import cv2
import os
import pathlib
import json
import random
from PIL import Image, ImageDraw2
import pandas as pd
import torchvision
from torchvision import transforms
import torch
import shutil
import glob

In [4]:
from detectron2.utils.logger import setup_logger
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer, DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_train_loader, build_detection_test_loader

## Helper Functions

In [8]:
def get_dicts(IMG_DIR):
    '''Returns a list of dicts - one for each image
    Each dict contains labels and bounding boxes
    
    Each folder (train, val, test) contains a data.json file
    '''

    path = os.path.join(IMG_DIR, 'data.json')
    dataset_dict = json.load(open(path))
    
    #this is hacky but replace boxmode (add enum encoder/decoder)
    for item in dataset_dict:
        for ann in item['annotations']:
            ann['bbox_mode'] = BoxMode.XYXY_ABS    

    return dataset_dict

def register(IMG_DIR, class_names, subfolders=['train', 'test']):
    '''Register datasets for detectron2
    '''

    for d in subfolders:
        DatasetCatalog.register(f"{IMG_DIR}_{d}", lambda d=d: get_dicts(f'{IMG_DIR}/{d}'))
        MetadataCatalog.get(f"{IMG_DIR}_{d}").set(thing_classes=class_names)

def get_metadata(dataset_name):
    metadata = MetadataCatalog.get(dataset_name)

    return metadata   

def sample_plot(dataset_dict, metadata, LOC):
    #LOC = 'logos3/train'

    d = random.sample(dataset_dict, 1)[0]
    print(d)
    img = cv2.imread(os.path.join(LOC, d["file_name"]))
    visualizer = Visualizer(img[:, :, ::-1], metadata=metadata, scale=0.5)
    vis = visualizer.draw_dataset_dict(d)
    plt.imshow(vis.get_image()[:, :, ::-1])
    
def prepare_for_training(N_iter,
                         output_dir,
                         train_dataset_name,
                         N_classes,
                         model_yaml,
                         start_training=False):
    #model_yaml = "COCO-Detection/faster_rcnn_R_50_C4_1x.yaml"
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file(model_yaml))
    cfg.OUTPUT_DIR = output_dir
    cfg.DATASETS.TRAIN = (train_dataset_name,)
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url(model_yaml)  # Let training initialize from model zoo
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
    cfg.SOLVER.MAX_ITER = N_iter    # 300 iterations seems good enough for this toy dataset; you may need to train longer for a practical dataset
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # faster, and good enough for this toy dataset (default: 512)
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = N_classes  # 4 classes

    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)

    if start_training:
        trainer.train()

    return trainer, cfg    

def prepare_for_inference(cfg, test_dataset_name, threshold=0.70):
    print(f"Reading weights from output dir: {cfg.OUTPUT_DIR}")
    cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = threshold   # set the testing threshold for this model
    cfg.DATASETS.TEST = (test_dataset_name, )
    predictor = DefaultPredictor(cfg)    

    return predictor

def infer_img(predictor, img_filename, metadata):
    img = cv2.imread(img_filename)
    outputs = predictor(img)

    v = Visualizer(img[:,:,::-1], metadata=metadata, scale=0.8)
    v = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.imshow(v.get_image())
    
    return outputs

#### Fixed dataset - register

In [9]:
DatasetCatalog.clear()
MetadataCatalog._NAME_TO_META = {}

In [10]:
#get labels
labels = list(pd.read_csv('logo_detector/combined/train/labels.csv')['class'].unique())
print(labels)

['sas', 'rh', 'anaconda', 'cloudera']


In [11]:
#register datasets
register('logo_detector/combined', labels, ['train', 'test'])

In [12]:
logo_train_metadata = get_metadata('logo_detector/combined_train')

## Train with different architectures

In [15]:
N_classes = 4

In [16]:
if False: #switch to true if you want to train
    trainer, cfg = prepare_for_training(2000, 
                                        'logo_detector_output_r101fpn3x', 
                                        'logo_detector/combined_train', 
                                        N_classes,
                                        'COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml',
                                        start_training=True)

In [17]:
#read persisted model from output file
trainer_discard, cfg = prepare_for_training(2000, 
                                            'logo_detector_output_r101fpn3x', 
                                            'logo_detector/combined_train', 
                                            N_classes,
                                            'COCO-Detection/faster_rcnn_R_101_FPN_3x.yaml',
                                            start_training=False)
predictor = prepare_for_inference(cfg, 'logo_detector/combined_test', threshold=0.50)

[32m[03/16 10:35:25 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

[32m[03/16 10:35:25 d2.data.build]: [0mRemoved 0 images with no usable annotations. 512 images left.
[32m[03/16 10:35:25 d2.data.build]: [0mDistribution of instances among all 4 categories:
[36m|  category  | #instances   |  category  | #instances   |  category  | #instances   |
|:----------:|:-------------|:----------:|:-------------|:----------:|:-------------|
|    sas     | 128          |     rh     | 128          |  anaconda  | 128          |
|  cloudera  | 128          |            |              |            |              |
|   total    | 512          |            |              |            |              |[0m
[32m[03/16 10:35:25 d2.data.common]: [0mSerializing 512 elements to byte tensors and concatenating them all ...
[32m[03/16 10:35:25 d2.data.common]: [0mSerialized dataset takes 0.11 MiB
[32m[03/16 10:35:25 d2.data.detection_utils]: [0mTransformGens used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_st

'roi_heads.box_predictor.cls_score.weight' has shape (81, 1024) in the checkpoint but (5, 1024) in the model! Skipped.
'roi_heads.box_predictor.cls_score.bias' has shape (81,) in the checkpoint but (5,) in the model! Skipped.
'roi_heads.box_predictor.bbox_pred.weight' has shape (320, 1024) in the checkpoint but (16, 1024) in the model! Skipped.
'roi_heads.box_predictor.bbox_pred.bias' has shape (320,) in the checkpoint but (16,) in the model! Skipped.


Reading weights from output dir: logo_detector_output_r101fpn3x


In [None]:
train_evaluator = COCOEvaluator('logo_detector/combined_train', predictor.cfg, False, output_dir="logo_detector_output_r101fpn3x")

In [None]:
train_loader = build_detection_test_loader(predictor.cfg, "logo_detector/combined_train")

In [None]:
train_metrics = inference_on_dataset(predictor.model, train_loader, train_evaluator)

In [None]:
test_evaluator = COCOEvaluator('logo_detector/combined_test', predictor.cfg, False, output_dir="logo_detector_output_r101fpn3x")

In [None]:
test_loader = build_detection_test_loader(predictor.cfg, "logo_detector/combined_test")

In [None]:
test_metrics = inference_on_dataset(predictor.model, test_loader, test_evaluator)