In [None]:
import os
import sys
from google.colab import drive

if 'google.colab' in sys.modules:
  drive.mount('/content/drive')
  os.chdir('/content/drive/MyDrive/Colab Notebooks/COS40007/Assignment4')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
if 'google.colab' in sys.modules:
  %pip install 'git+https://github.com/facebookresearch/detectron2.git'
  # !pip3 uninstall --yes torch torchaudio torchvision torchtext torchdata
  %pip install torch torchaudio torchvision torchtext torchdata
  %pip install labelme2coco
else:
  !pip install 'git+https://github.com/facebookresearch/detectron2.git'
  !pip install labelme2coco
  !pip install torch torchaudio torchvision torchtext torchdata

Collecting git+https://github.com/facebookresearch/detectron2.git
  Cloning https://github.com/facebookresearch/detectron2.git to /tmp/pip-req-build-7ah9mcox
  Running command git clone --filter=blob:none --quiet https://github.com/facebookresearch/detectron2.git /tmp/pip-req-build-7ah9mcox
  Resolved https://github.com/facebookresearch/detectron2.git to commit 9604f5995cc628619f0e4fd913453b4d7d61db3f
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting torchtext
  Using cached torchtext-0.18.0-cp311-cp311-manylinux1_x86_64.whl.metadata (7.9 kB)
Collecting torchdata
  Using cached torchdata-0.11.0-py3-none-any.whl.metadata (6.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12

In [None]:
import cv2
import torch
import json
from detectron2 import engine, config, utils, data, structures, engine, checkpoint, model_zoo
import labelme2coco
import random

In [None]:
BASE_PATH = os.getcwd()
DS_LOG_PATH = os.path.join(BASE_PATH, "log-labelled")
DS_CONVERTED_PATH = os.path.join(BASE_PATH, "converted-labelled")
ML_OUT_PATH = os.path.join(BASE_PATH, "model-output")
IMG_OUT_PATH = os.path.join(BASE_PATH, "result-image")

print(BASE_PATH)
print(DS_LOG_PATH)
print(DS_CONVERTED_PATH)
print(ML_OUT_PATH)
print(IMG_OUT_PATH)

/content/drive/MyDrive/Colab Notebooks/COS40007/Assignment4
/content/drive/MyDrive/Colab Notebooks/COS40007/Assignment4/log-labelled
/content/drive/MyDrive/Colab Notebooks/COS40007/Assignment4/converted-labelled
/content/drive/MyDrive/Colab Notebooks/COS40007/Assignment4/model-output
/content/drive/MyDrive/Colab Notebooks/COS40007/Assignment4/result-image


In [None]:
def ensure_folder_exists():
  """
  Utility function to create neccessary directories if not exists
  :return: None
  """
  if not os.path.exists(DS_CONVERTED_PATH):
    os.makedirs(DS_CONVERTED_PATH)
  if not os.path.exists(ML_OUT_PATH):
    os.makedirs(ML_OUT_PATH)
  if not os.path.exists(IMG_OUT_PATH):
    os.makedirs(IMG_OUT_PATH)

In [None]:
def with_model_config():
  """
  Utility function to define config for the Mask RCNN
  :return: detectron2.config.Config
  """
  _config = config.get_cfg()
  # Merge from config file using COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x
  _config.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
  # Force to run with cuda, if not you can set it with CPU
  _config.MODEL.DEVICE = "cuda"
  # Model output directory
  _config.MODEL_OUTPUT_DIR = ML_OUT_PATH
  # Config model's weights using COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x
  _config.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
  # Number of classes for Region of Interests (RoI)
  _config.MODEL.ROI_HEADS.NUM_CLASSES = 1
  # Define batch size per image, recommend 128
  _config.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
  # Max iteration between epochs
  _config.SOLVER.MAX_ITER = 100
  # Base LR
  _config.SOLVER.BASE_LR = 0.00025
  # Image per batch
  _config.SOLVER.IMS_PER_BATCH = 2
  # Number of workers, recommended 2
  _config.DATALOADER.NUM_WORKERS = 2
  # If there are no log_train or log_val, assign it as Train and Test datasets
  if not _config.DATASETS.TRAIN == ("log_train",) and not _config.DATASETS.TEST == ("log_val",):
    _config.DATASETS.TRAIN = ("log_train",)
    _config.DATASETS.TEST = ("log_val",)

  return _config

In [None]:
def use_test_images():
  """
  Utility function to randomly extract 10 images for testing
  :return: None
  """
  _images = []
  for file in os.listdir(DS_LOG_PATH):
    if file.endswith('.png'):
      _images.append(file)

  _test_images = random.sample(_images, 10)
  return _test_images

def convert_from_labelme_to_coco():
  """
  Function to convert from labelme to coco
  :return: None
  """
  labelme2coco.convert(labelme_folder=DS_LOG_PATH, export_dir=DS_CONVERTED_PATH, train_split_rate=0.9, category_id_start=1)

In [None]:
def extract_log(img_dir, json_file):
  """
  Utility function to extract necessary data from coco dataset for registering datasets via detectron2.data.DatasetCatalog.register
  :img_dir: Directory contains images
  :json_file: Directory to put the val and train data in json format
  :return: Extracted Logs
  """
  # Load coco data
  _json_dir = os.path.join(img_dir, json_file)
  _imported_coco_data = json.load(open(_json_dir))
  extracted_logs = []

  # Looking for images tag in coco data
  for img in _imported_coco_data['images']:
    temp = {}
    # Set file name to the right path
    fName = os.path.join(img_dir, img['file_name'])
    temp["file_name"] = fName
    # Set the image_id using id tag in json
    temp["image_id"] = img['id']
    # Set the height using height tag in json
    temp["height"] = img['height']
    # Set the width using width tag in json
    temp["width"] = img['width']

    # Extract important annotations metadata inside annotations tag if image_id is equal to id
    annots = [a for a in _imported_coco_data['annotations'] if a['image_id'] == img['id']]
    annotations = []

    for ann in annots:
      # Convert and append to the annotations array
      inner_anno = {
          "bbox": ann['bbox'],
          "bbox_mode": structures.BoxMode.XYWH_ABS,
          "segmentation": ann['segmentation'],
          "category_id": ann['category_id'] - 1
      }
      annotations.append(inner_anno)
    # Then set it as annotations
    temp["annotations"] = annotations
    # Append to the extracted logs array, end of pipeline
    extracted_logs.append(temp)
  # Return the extracted logs
  return extracted_logs

In [None]:
def reg_datasets():
  """
  Function to register extracted informations to detectron2
  :return: None
  """
  keys = ["train", "val"]
  for k in keys:
    data.DatasetCatalog.register("log_" + k, lambda k=k: extract_log(DS_CONVERTED_PATH, f"{k}.json"))
    data.MetadataCatalog.get("log_" + k).set(thing_classes=["log"])

In [None]:
def visualiser(with_predictor, img_path, out_path):
  """
  Function to write the predicted images to a specific output path
  :with_predictor: Trained ML Model
  :img_path: Image location
  :out_path: Path to write predicted image
  :return: Output of predictor
  """

  # Read and predict image
  imr = cv2.imread(img_path)
  out = with_predictor(imr)

  # Get predicted boxes and scores
  instances = out["instances"].to("cpu")
  boxes = instances.pred_boxes.tensor.numpy()
  scores = instances.scores.numpy()

  # Iterate and draw output box with the score
  for box, score in zip(boxes, scores):
    x1, y1, x2, y2 = box.astype(int)
    box_color = (69, 173, 130)
    text_color = (0, 0, 0)
    cv2.rectangle(imr, (x1, y1), (x2, y2), box_color, 2)
    cv2.putText(imr, f"log: {score:.2f}", (x1, y1-10), cv2.FONT_HERSHEY_PLAIN, 0.5, text_color, 2)

  # Write the processed image to the output path
  cv2.imwrite(out_path, imr)
  return out

In [None]:
def counter(outs):
  """
  Function to count number of logs
  """
  return len(outs["instances"])

In [None]:
ensure_folder_exists()
cfg = with_model_config()
IS_TRAIN = True

if IS_TRAIN:
  convert_from_labelme_to_coco()
  reg_datasets()

  trainer = engine.DefaultTrainer(cfg)
  trainer.resume_or_load(resume=False)

  trainer.train()

  checker = checkpoint.DetectionCheckpointer(trainer.model, save_dir=cfg.MODEL_OUTPUT_DIR)
  checker.save("final_model")

cfg.MODEL.WEIGHTS = os.path.join(cfg.MODEL_OUTPUT_DIR, "final_model.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.66

pred = engine.DefaultPredictor(cfg)
test_imgs = use_test_images()

for img in test_imgs:
  img_path = os.path.join(DS_LOG_PATH, img)
  out_path = os.path.join(IMG_OUT_PATH, f"result_{img}")

  outputs = visualiser(pred, img_path, out_path)

  print(f"Detected logs in {img} are: {counter(outputs)}")

There are 600 listed files in folder log-labelled.


Converting labelme annotations to COCO format: 100%|██████████| 600/600 [01:51<00:00,  5.36it/s]


[04/12 12:38:22 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res

model_final_f10217.pkl: 178MB [00:01, 141MB/s]                           
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}


[04/12 12:38:24 d2.engine.train_loop]: Starting training from iteration 0


  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[04/12 12:38:36 d2.utils.events]:  eta: 0:00:33  iter: 19  total_loss: 2.215  loss_cls: 0.6639  loss_box_reg: 0.7971  loss_mask: 0.6773  loss_rpn_cls: 0.03613  loss_rpn_loc: 0.02763    time: 0.4428  last_time: 0.4084  data_time: 0.0255  last_data_time: 0.0058   lr: 4.7703e-05  max_mem: 2047M
[04/12 12:38:54 d2.utils.events]:  eta: 0:00:25  iter: 39  total_loss: 1.841  loss_cls: 0.4702  loss_box_reg: 0.8408  loss_mask: 0.512  loss_rpn_cls: 0.0123  loss_rpn_loc: 0.02842    time: 0.4528  last_time: 0.4147  data_time: 0.0114  last_data_time: 0.0088   lr: 9.7653e-05  max_mem: 2047M
[04/12 12:39:02 d2.utils.events]:  eta: 0:00:17  iter: 59  total_loss: 1.534  loss_cls: 0.3224  loss_box_reg: 0.8303  loss_mask: 0.3352  loss_rpn_cls: 0.009215  loss_rpn_loc: 0.02558    time: 0.4495  last_time: 0.4885  data_time: 0.0099  last_data_time: 0.0085   lr: 0.0001476  max_mem: 2047M
[04/12 12:39:12 d2.utils.events]:  eta: 0:00:08  iter: 79  total_loss: 1.224  loss_cls: 0.2249  loss_box_reg: 0.719  loss_m