In [1]:
"""
# get files
!wget {LINK_TO_FILE}
!mv ./efce70ed6e537523655b0601b9c9e7137377469cf7583452369fae1dd0189c9283f7645bb5722b1d1dded81f2d867086acde75ade396a8a7274ac95467dca9c6 ./cross_val_train
!unzip -q ./cross_val_train

# init setup to get detectron working
!python3 -m pip install pyyaml==5.1 --user
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python3 -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'
# !git clone https://github.com/matterport/Mask_RCNN.git
# !python -m pip install -r ./Mask_RCNN/requirements.txt
!python ./Mask_RCNN/setup.py install --user
"""

'\n# get files\n!wget {LINK_TO_FILE}\n!mv ./efce70ed6e537523655b0601b9c9e7137377469cf7583452369fae1dd0189c9283f7645bb5722b1d1dded81f2d867086acde75ade396a8a7274ac95467dca9c6 ./cross_val_train\n!unzip -q ./cross_val_train\n\n# init setup to get detectron working\n!python3 -m pip install pyyaml==5.1 --user\nimport sys, os, distutils.core\n# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).\n# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions\n!git clone \'https://github.com/facebookresearch/detectron2\'\ndist = distutils.core.run_setup("./detectron2/setup.py")\n!python3 -m pip install {\' \'.join([f"\'{x}\'" for x in dist.install_requires])}\nsys.path.insert(0, os.path.abspath(\'./detectron2\'))\n# !python -m pip install \'git+https://github.com/facebookresearch/detectron2.git\'\n# !git clone https://github.com/matterport/Mask_RCNN.git\n# !python -m pip install -r 

In [1]:
# import torch and detectron
import torch, detectron2
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger() # we need to setup detectron before using it

# common libraries
import numpy as np
import os, json, cv2, random
import matplotlib.pyplot as plt
# from google.colab.patches import cv2_imshow

# import detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor, DefaultTrainer
from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog, DatasetMapper
from detectron2.structures import BoxMode
from detectron2.evaluation import COCOEvaluator

import skimage.io
import skimage.draw

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2018 NVIDIA Corporation
Built on Sat_Aug_25_21:08:01_CDT_2018
Cuda compilation tools, release 10.0, V10.0.130
torch:  2.2 ; cuda:  cu118
detectron2: 0.6


In [2]:
# check and refresh gpu
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

# make sure cuda works by running a small test
def force_cudnn_initialization():
    s = 32
    dev = torch.device("cuda")
    torch.nn.functional.conv2d(torch.zeros(s, s, s, s, device = dev), torch.zeros(s, s, s, s, device = dev))

# make sure we have enough memory
def check_gpu_memory():
    if torch.cuda.is_available():
        current_device = torch.cuda.current_device()
        gpu = torch.cuda.get_device_properties(current_device)
        print(f"GPU Name: {gpu.name}")
        print(f"GPU Memory Total: {gpu.total_memory / 1024 ** 2} MB")
        print(f"GPU Memory Free: {torch.cuda.memory_allocated(current_device) / 1024 ** 2} MB")
        print(f"GPU Memory Used: {torch.cuda.memory_reserved(current_device) / 1024 ** 2} MB")

    else:
        print("no GPU.")
    
force_cudnn_initialization()
check_gpu_memory()

GPU Name: NVIDIA A40
GPU Memory Total: 45634.0625 MB
GPU Memory Free: 0.0 MB
GPU Memory Used: 36.0 MB


In [3]:
# formats the paradim data set into something detectron can use (coco format)
class ParadimDatasetMapper:

    # we have 3 classes
    def __init__(self):
        self.categories = {
            0: "goodMelt",
            1: "fastBottom",
            2: "fastTop",

        }

    # get relevant information from the initial data set
    def __call__(self, dataset_dict):
        dataset_dict = copy.deepcopy(dataset_dict)
        image = skimage.io.imread(dataset_dict["file_name"])
        height, width = image.shape[:2]

        annotations = dataset_dict["annotations"]
        polygons = [obj["polygons"] for obj in annotations]
        category_ids = [obj["category_id"] for obj in annotations]

        # our current mask is shown as a polygon
        masks = np.zeros((height, width, len(polygons)), dtype=np.uint8)
        
        objs = []

        for i, polygon in enumerate(polygons):
            
            # lets mark pixels in the image to add our mask
            x, y = zip(*polygon)
            rr, cc = skimage.draw.polygon(y, x)
            masks[rr, cc, i] = 1

            poly = np.array(polygon).reshape(-1, 2)
            obj = {
                "bbox": self._polygon_to_box(poly),
                "bbox_mode": BoxMode.XYWH_ABS,
                "segmentation": [poly.flatten().tolist()],
                "category_id": category_ids[i],
                "iscrowd": 0,

            }
            objs.append(obj)

        dataset_dict["annotations"] = objs
        dataset_dict["height"] = height
        dataset_dict["width"] = width

        return dataset_dict

    # coco box format for classifying a space
    def _polygon_to_box(self, polygon):
        x, y = polygon.transpose()
        return [
            np.min(x),
            np.min(y),
            np.max(x) - np.min(x),
            np.max(y) - np.min(y),

        ]

# before using the dataset for training/testing, we need to register
def register_paradim_dataset(dataset_dir, subset):
    dataset_dicts = []
    image_id = 0

    for classdir in os.listdir(os.path.join(dataset_dir, subset)):
        current_class = {
            "goodMelt": 0,
            "fastBottom": 1,
            "fastTop": 2,
        }.get(classdir, None)
        
        # cover all edge cases
        if current_class is None:
            continue

        for jsonfile in os.listdir(os.path.join(dataset_dir, subset, classdir)):
            if ".json" in jsonfile:
                annotations = json.load(open(os.path.join(dataset_dir, subset, classdir, jsonfile)))
                polygons = annotations["shapes"][0]["points"]
                image_path = os.path.join(dataset_dir, subset, classdir, annotations["imagePath"])

                image = cv2.imread(image_path)
                height, width = image.shape[:2]

                x_coords, y_coords = zip(*polygons)
                min_x, min_y = min(x_coords), min(y_coords)
                max_x, max_y = max(x_coords), max(y_coords)
                bbox = [min_x, min_y, max_x - min_x, max_y - min_y]

                segmentation = [coord for point in polygons for coord in point]

                # complete format of our dataset
                dataset_dict = {
                    "image_id": image_id,
                    "file_name": image_path,
                    "height": height,
                    "width": width,
                    "annotations": [
                        {
                            "segmentation": [segmentation],
                            "category_id": current_class,
                            "bbox": bbox,
                            "bbox_mode": BoxMode.XYWH_ABS,
                        }
                    ],
                }

                dataset_dicts.append(dataset_dict)
                image_id += 1 

    # register with "paradim_" prefix
    DatasetCatalog.register("paradim_" + subset, lambda: dataset_dicts)
    MetadataCatalog.get("paradim_" + subset).set(thing_classes = [v for k, v in sorted(ParadimDatasetMapper().categories.items())])

# a way to plot the test images
def visualize_predictions(dataset_name, num_samples):
    dataset_dicts = DatasetCatalog.get(dataset_name)

    metadata = MetadataCatalog.get(dataset_name)

    samples = random.sample(dataset_dicts, num_samples)

    # opens each image, gets the prediction, draws the predicted area, and then plots
    for d in samples:
        im = cv2.imread(d["file_name"])
        outputs = predictor(im)

        v = Visualizer(im[:, :, ::-1],
                       metadata = metadata,
                       scale = 0.5,
                       instance_mode = ColorMode.IMAGE_BW)

        out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
        plt.figure(figsize = (10, 10))
        plt.imshow(out.get_image()[:, :, ::-1])
        plt.title(d["file_name"])
        plt.show()

In [4]:
# register training and validation datasets
path_to_dataset = "./PARADIM_BC_LDFZ_10_fold_crossVal/cross_val_1/" # we only need 1 copy of the crossval split since detectron handles its own split
register_paradim_dataset(path_to_dataset, "train")
register_paradim_dataset(path_to_dataset, "val")

In [5]:
# initial configs from detectron
cfg = get_cfg()
cfg.merge_from_file("./detectron2/configs/COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
cfg.DATASETS.TRAIN = ("paradim_train",)
cfg.DATASETS.TEST = ("paradim_val",)
cfg.MODEL.WEIGHTS = "detectron2://COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x/137849600/model_final_f10217.pkl"

cfg.DATALOADER.NUM_WORKERS = 16
cfg.SOLVER.IMS_PER_BATCH = 64

cfg.SOLVER.BASE_LR = 0.00005
cfg.SOLVER.MAX_ITER = 10000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]).thing_classes)  

# from detectron tutorial code
class CustomDatasetMapper(DatasetMapper):
    def __call__(self, dataset_dict):
        dataset_dict = super().__call__(dataset_dict)
        
        return dataset_dict

class CustomTrainer(DefaultTrainer):
    @classmethod
    def build_evaluator(cls, cfg, dataset_name, output_folder = None):
        if output_folder is None:
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")

        return COCOEvaluator(dataset_name, cfg, True, output_folder)

In [7]:
# now we can train
trainer = CustomTrainer(cfg)
trainer.resume_or_load(resume = False)
trainer.train()

[32m[03/24 13:02:16 d2.engine.defaults]: [0mModel:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
 

Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (12, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (12,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (3, 256, 1, 1) 

[32m[03/24 13:02:16 d2.engine.train_loop]: [0mStarting training from iteration 0
[32m[03/24 13:02:37 d2.utils.events]: [0m eta: 2:50:31  iter: 19  total_loss: 2.138  loss_cls: 1.364  loss_box_reg: 0.03175  loss_mask: 0.6917  loss_rpn_cls: 0.0369  loss_rpn_loc: 0.01059    time: 1.0213  last_time: 1.0180  data_time: 0.0440  last_data_time: 0.0245   lr: 9.9905e-07  max_mem: 21479M
[32m[03/24 13:02:58 d2.utils.events]: [0m eta: 2:49:46  iter: 39  total_loss: 2.096  loss_cls: 1.324  loss_box_reg: 0.02859  loss_mask: 0.6899  loss_rpn_cls: 0.03908  loss_rpn_loc: 0.01167    time: 1.0243  last_time: 1.0319  data_time: 0.0246  last_data_time: 0.0236   lr: 1.998e-06  max_mem: 21480M
[32m[03/24 13:03:18 d2.utils.events]: [0m eta: 2:49:24  iter: 59  total_loss: 2.005  loss_cls: 1.241  loss_box_reg: 0.03321  loss_mask: 0.6859  loss_rpn_cls: 0.03496  loss_rpn_loc: 0.01017    time: 1.0258  last_time: 0.9587  data_time: 0.0244  last_data_time: 0.0245   lr: 2.997e-06  max_mem: 21480M
[32m[03/24

In [8]:
# once we have a saved model, we can just run from here
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained (default output_dir = ""./output")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.7   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

[32m[03/24 16:13:31 d2.checkpoint.detection_checkpoint]: [0m[DetectionCheckpointer] Loading from ./output/model_final.pth ...


In [None]:
# get data and metadata
dataset_dicts = DatasetCatalog.get("paradim_val")
metadata = MetadataCatalog.get("paradim_val")

# plot 3 random pictures and their results
for d in random.sample(dataset_dicts, 3):
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)

    v = Visualizer(im[:, :, ::-1],
                   metadata = metadata,
                   scale = 0.5,
                   instance_mode = ColorMode.IMAGE_BW)

    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    plt.imshow(out.get_image()[:, :, ::-1])
    plt.title(d["file_name"])
    plt.show()