In [None]:
# Based on https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5#scrollTo=h9tECBQCvMv3

### Install detectron2

In [None]:
# To avoid error: NotImplementedError: A UTF-8 locale is required. Got ANSI_X3.4-1968

import locale
print(locale.getpreferredencoding())

def getpreferredencoding(do_setlocale = True):
    return "UTF-8"
locale.getpreferredencoding = getpreferredencoding

UTF-8


In [None]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m274.2/274.2 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h  [1;31merror[0m: [1msubprocess-exited-with-error[0m
  
  [31m×[0m [32mpython setup.py egg_info[0m did not run successfully.
  [31m│[0m exit code: [1;36m1[0m
  [31m╰─>[0m See above for output.
  
  [1;35mnote[0m: This error originates from a subprocess, and is likely not a problem with pip.
  Preparing metadata (setup.py) ... [?25l[?25herror
[1;31merror[0m: [1mmetadata-generation-failed[0m

[31m×[0m Encountered error while generating package metadata.
[31m╰─>[0m See above for output.

[1;35mnote[0m: This is an issue with the package mentioned above, not pip.
[1;36mhint[0m: See above for details.
Cloning into 'detectron2'...
remote: Enumerating objects: 15280, done.[K
remote: Counting objects: 100% (5/5), done.[K
remote: Compressing objects: 100% (5/5), done.[K
remo

In [None]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2022 NVIDIA Corporation
Built on Wed_Sep_21_10:33:58_PDT_2022
Cuda compilation tools, release 11.8, V11.8.89
Build cuda_11.8.r11.8/compiler.31833905_0
torch:  2.1 ; cuda:  cu118
detectron2: 0.6


In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

### Train a custom data set

In [None]:
# See what needs to be in dataset:
# https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html

In [None]:
%pip install pycocotools # to convert mask to rle_dict



In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
import os, json
from detectron2.structures import BoxMode

def get_custom_dicts(folder_path):

    # List all files in the folder
    all_files = os.listdir(folder_path)

    # Filter out files that are not in correct pairs (both image and json formats)
    file_pairs = []
    for file in all_files:
        base_name, extension = os.path.splitext(file)
        if extension.lower() == '.jpg' or extension.lower() == '.jpeg' or extension.lower() == '.png':
            json_file = base_name + '.json'
            if json_file in all_files:
                file_pairs.append((file, json_file))

    # Sort from paired list
    image_files = sorted([image_file for image_file, _ in file_pairs])
    json_files = sorted([json_file for _, json_file in file_pairs])

    dataset_dicts = []

    for idx, json_file in enumerate(json_files):

        # Load JSON data from file
        with open(os.path.join(folder_path, json_file), 'r') as file:
            data = json.load(file)

        # Add basic image information to record
        record = {}

        record["file_name"] = os.path.join(folder_path, data["imagePath"])
        record["image_id"] = idx
        record["height"] = data["imageHeight"]
        record["width"] = data["imageWidth"]

        data["imageWidth"]

        shapes = data["shapes"]

        # Save annotations in a list
        objs = []

        # Iterate through each item in the 'shapes' list
        for shape in shapes:

            # Extract points for the current shape
            points = shape['points']

            # segmentation = [item for sublist in points for item in sublist] # unlist points
            # segmentation = points

            # Calculate bounding box
            min_x = min(point[0] for point in points)
            min_y = min(point[1] for point in points)
            max_x = max(point[0] for point in points)
            max_y = max(point[1] for point in points)

            # Create the object dictionary
            obj = {
                "bbox": [min_x, min_y, max_x, max_y],
                "bbox_mode": BoxMode.XYXY_ABS,
                "segmentation": [points],
                "category_id": 0,  # If only one class, id = 0
            }

            objs.append(obj)

        record["annotations"] = objs
        dataset_dicts.append(record)

    return(dataset_dicts)

dataset_dicts = get_custom_dicts("gdrive/My Drive/Master Geo Information Science/Internship/Data/kreeften omcirkelt + rest/")

# Split dataset of dicts into training and validation sets
def split_dataset(dataset, split_ratio=0.8):
    """
    Splits a dataset into training and validation sets.

    Args:
        dataset (list): The dataset to be split.
        split_ratio (float): The ratio of the dataset to be used for training.

    Returns:
        tuple: Two lists, first is training set, second is validation set.
    """
    total_samples = len(dataset)
    split_idx = int(total_samples * split_ratio)

    return dataset[:split_idx], dataset[split_idx:]

# Get training and validation dicts
train_dicts, vali_dicts = split_dataset(dataset_dicts)

# Clear/overwrite registered datasets (if necessary)
DatasetCatalog.clear()
MetadataCatalog.remove("all")
MetadataCatalog.remove("train")
MetadataCatalog.remove("vali")

# Register datasets
DatasetCatalog.register("all", lambda:dataset_dicts)
MetadataCatalog.get("train").set(thing_classes=["Kreeft"])
dataset_metadata = MetadataCatalog.get("all")

DatasetCatalog.register("train", lambda:train_dicts)
MetadataCatalog.get("train").set(thing_classes=["Kreeft"])
train_metadata = MetadataCatalog.get("train")

DatasetCatalog.register("vali", lambda:vali_dicts)
MetadataCatalog.get("vali").set(thing_classes=["Kreeft"])
vali_metadata = MetadataCatalog.get("vali")

In [None]:
# Verify if dataset also in correct format
train_metadata = MetadataCatalog.get("train")
vali_metadata = MetadataCatalog.get("vali")

for d in random.sample(train_dicts, 1):

    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=train_metadata, scale=2)

    out = visualizer.draw_dataset_dict(d)
    cv2_imshow(out.get_image()[:, :, ::-1])


Output hidden; open in https://colab.research.google.com to view.

## Train

In [None]:
# Parameters that seem to do reasonably well:

#  it seems that the lower the batch size the more predictions are made

# cfg.SOLVER.IMS_PER_BATCH = 1, cfg.SOLVER.BASE_LR = 0.00025 and with cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 16 and 300 iterations (for 100x100 images) (works well)

# "COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml" also good, but also only when less than 300/500 iterations (why?)

In [None]:
%pip install -U sahi # Necessary to save cfg as yaml (and for later SAHI use)

Collecting sahi
  Downloading sahi-0.11.15-py3-none-any.whl (105 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.4/105.4 kB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting opencv-python<=4.8 (from sahi)
  Downloading opencv_python-4.7.0.72-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (61.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.8/61.8 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Collecting pybboxes==0.1.6 (from sahi)
  Downloading pybboxes-0.1.6-py3-none-any.whl (24 kB)
Collecting fire (from sahi)
  Downloading fire-0.5.0.tar.gz (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.3/88.3 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting terminaltables (from sahi)
  Downloading terminaltables-3.1.10-py2.py3-none-any.whl (15 kB)
Building wheels for collected packages: fire
  Building wheel for fire (setup.py) ... [?25l

In [None]:
from detectron2.engine import DefaultTrainer
from sahi.utils.detectron2 import export_cfg_as_yaml

# "COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml" used by https://www.mdpi.com/2072-4292/12/18/3015 (crops segmentation)?

# Base parameters
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml")) # Used "COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml" before
cfg.DATASETS.TRAIN = ("train",)
cfg.DATASETS.TEST = ("vali",) # Or should use different one here ?
# cfg.TEST.EVAL_PERIOD = 100
cfg.DATALOADER.NUM_WORKERS = 2 # was 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml")  # Let training initialize from model zoo (used R 50 FPN before)

cfg.SOLVER.IMS_PER_BATCH = 1  # Was 2. This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # (was 0.00025) pick a good LR
cfg.SOLVER.MAX_ITER = 300    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128  # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.

if account_n == "main":
  cfg.OUTPUT_DIR = "gdrive/My Drive/Master Geo Information Science/Internship/Data/Model/Kreeften"

if account_n == "alt":
  cfg.OUTPUT_DIR = "gdrive/My Drive/Stage/Data/Model"

# Custom parameters
# Adjust the parameters for detecting smaller objects (?)

# cfg.MODEL.RPN.IN_FEATURES = ['p2', 'p3', 'p4', 'p5', 'p6']
# cfg.MODEL.ANCHOR_GENERATOR.ASPECT_RATIOS = [[0.25, 0.5, 1.0]] #, 2.0, 4.0, 8.0]]
# cfg.MODEL.ANCHOR_GENERATOR.SIZES = [[4], [8], [16], [32], [64], [128]]
# cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 10240
# cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION = 0.7
# cfg.MODEL.ROI_HEADS.IOU_THRESHOLDS = [0.5] # Intersection over union threshold

# Set the MASK_FORMAT to bitmask
cfg.INPUT.MASK_FORMAT = "bitmask" # important (?)

# Check if a GPU is available
if torch.cuda.is_available():
    cfg.MODEL.DEVICE = 'cuda' # Otherwise will get Runtime error as no NVIDIA
else:
    cfg.MODEL.DEVICE = 'cpu'

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
# trainer = CocoTrainer(cfg) # Overwrite default trainer
trainer.resume_or_load(resume=False)
trainer.train()

[11/15 15:31:03 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[11/15 15:31:05 d2.engine.train_loop]: Starting training from iteration 0
[11/15 15:31:27 d2.utils.events]:  eta: 0:04:23  iter: 19  total_loss: 2.063  loss_cls: 0.5986  loss_box_reg: 0.3208  loss_mask: 0.6904  loss_rpn_cls: 0.2846  loss_rpn_loc: 0.05489    time: 1.0406  last_time: 0.9591  data_time: 0.4427  last_data_time: 0.2886   lr: 1.6068e-05  max_mem: 3743M
[11/15 15:31:47 d2.utils.events]:  eta: 0:03:50  iter: 39  total_loss: 2.28  loss_cls: 0.5336  loss_box_reg: 0.5326  loss_mask: 0.6857  loss_rpn_cls: 0.1625  loss_rpn_loc: 0.08033    time: 1.0021  last_time: 0.9727  data_time: 0.3204  last_data_time: 0.4838   lr: 3.2718e-05  max_mem: 3743M
[11/15 15:32:07 d2.utils.events]:  eta: 0:03:40  iter: 59  total_loss: 2.172  loss_cls: 0.5048  loss_box_reg: 0.7772  loss_mask: 0.6694  loss_rpn_cls: 0.09493  loss_rpn_loc: 0.06889    time: 0.9982  last_time: 1.0688  data_time: 0.3398  last_data_time: 0.4119   lr: 4.9367e-05  max_mem: 3743M
[11/15 15:32:25 d2.utils.events]:  eta: 0:03:14  i

In [None]:
# Save the model weights
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
torch.save(trainer.model.state_dict(), os.path.join(cfg.OUTPUT_DIR, "model_weights.pt"))
# trainer.model.save_model(cfg.MODEL.WEIGHTS) # not working (?)

export_cfg_as_yaml(cfg, export_path=os.path.join(cfg.OUTPUT_DIR, "model_cfg.yaml"))

### Inference and evaluation

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

[11/15 15:37:30 d2.checkpoint.detection_checkpoint]: [DetectionCheckpointer] Loading from gdrive/My Drive/Master Geo Information Science/Internship/Data/Model/Kreeften/model_final.pth ...


In [None]:
# Randomly select and visualise samples of vali predictions

from detectron2.utils.visualizer import ColorMode

for d in random.sample(vali_dicts, 10):
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
    v = Visualizer(im[:, :, ::-1],
                   metadata=vali_metadata,
                   scale=2,
                   instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
    )
    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
# Evaluate based on AP metric implemented in COCO API

from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

evaluator = COCOEvaluator("vali", output_dir="./output")
val_loader = build_detection_test_loader(cfg, "vali")
print(inference_on_dataset(predictor.model, val_loader, evaluator))
# another equivalent way to evaluate the model is to use `trainer.test`