In [None]:
import os
import shutil
import cv2
import numpy as np
import json
import torch
from os import listdir
from os.path import isfile, isdir, join
from google_drive_downloader import GoogleDriveDownloader as gdd

In [None]:
!pip install pyyaml==5.1

# Install detectron2 that matches the pytorch version
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/$CUDA_VERSION/torch$TORCH_VERSION/index.html
# If there is not yet a detectron2 release that matches the given torch + CUDA version, you need to install a different pytorch.
# exit(0)  # After installation, you may need to "restart runtime" in Colab. This line can also restart runtime

import detectron2
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.engine import DefaultTrainer
from detectron2.utils.logger import setup_logger
setup_logger()

Collecting pyyaml==5.1
  Downloading PyYAML-5.1.tar.gz (274 kB)
[?25l[K     |█▏                              | 10 kB 19.4 MB/s eta 0:00:01[K     |██▍                             | 20 kB 24.0 MB/s eta 0:00:01[K     |███▋                            | 30 kB 12.2 MB/s eta 0:00:01[K     |████▉                           | 40 kB 9.1 MB/s eta 0:00:01[K     |██████                          | 51 kB 5.2 MB/s eta 0:00:01[K     |███████▏                        | 61 kB 5.5 MB/s eta 0:00:01[K     |████████▍                       | 71 kB 6.0 MB/s eta 0:00:01[K     |█████████▋                      | 81 kB 6.7 MB/s eta 0:00:01[K     |██████████▊                     | 92 kB 6.8 MB/s eta 0:00:01[K     |████████████                    | 102 kB 5.4 MB/s eta 0:00:01[K     |█████████████▏                  | 112 kB 5.4 MB/s eta 0:00:01[K     |██████████████▍                 | 122 kB 5.4 MB/s eta 0:00:01[K     |███████████████▌                | 133 kB 5.4 MB/s eta 0:00:01[K     |███

<Logger detectron2 (DEBUG)>

In [None]:
# Connect Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Download dataset
gdd.download_file_from_google_drive(file_id='1nEJ7NTtHcCHNQqUXaoPk55VH3Uwh4QGG',
                  dest_path='./dataset.zip',
                  unzip=True)

Downloading 1nEJ7NTtHcCHNQqUXaoPk55VH3Uwh4QGG into ./dataset.zip... Done.
Unzipping...Done.


In [None]:
# Convert mask information into COCO format
inpath = "/content/dataset/train/"  # the train folder downloaded
if not os.path.isdir('/content/dataset/train_new'):
    os.mkdir('/content/dataset/train_new')
outpath = "/content/dataset/train_new/"  # the folder putting all nuclei image
images_name = listdir(inpath)
cocoformat = {"licenses":[], "info":[], "images":[], "annotations":[], "categories":[]}
cat = {"id": 1, 
       "name": "nucleus", 
       "supercategory": "nucleus",
      }
cocoformat["categories"].append(cat)

mask_id = 1
for i, im_name in enumerate(images_name):
    t_image = cv2.imread(inpath + im_name + "/images/" + im_name + ".png")
    mask_folder = listdir(inpath + im_name + "/masks/")
    im = {"id": int(i+1), 
          "width": int(t_image.shape[1]), 
          "height": int(t_image.shape[0]), 
          "file_name": im_name + ".png",
         }
    cocoformat["images"].append(im)
    print('im_name: ',im_name)
    for mask in mask_folder:
        if(mask[-4:]=='.png'):
            t_image = cv2.imread(inpath + im_name + "/masks/" + mask, 0)
            ret, binary = cv2.threshold(t_image,127,255,cv2.THRESH_BINARY)
            contours, hierarchy = cv2.findContours(binary,cv2.RETR_TREE,cv2.CHAIN_APPROX_SIMPLE)
            t_seg = np.where(t_image[:,:]==255)
            
            all_seg_in_mask = []
            for s in range(len(contours)):
                seg = []
                for x in range(len(contours[s])):
                    seg.append(int(contours[s][x][0][0]))
                    seg.append(int(contours[s][x][0][1]))
                all_seg_in_mask.append(seg)
            ann = {"id": int(mask_id), 
                  "image_id": int(i) + 1, 
                  "category_id": int(1), 
                  "segmentation": all_seg_in_mask, 
                  "area": float(len(t_seg[0])), 
                  "bbox": [int(np.min(t_seg[1])), int(np.min(t_seg[0])), int(np.max(t_seg[1]) - np.min(t_seg[1])), 
                            int(np.max(t_seg[0]) - np.min(t_seg[0]))], 
                  "iscrowd": 0,
                  }
            mask_id = mask_id+1
            cocoformat["annotations"].append(ann)

with open("nucleus_cocoformat.json", "w") as f:
    json.dump(cocoformat, f)

im_name:  TCGA-18-5592-01Z-00-DX1
im_name:  TCGA-G9-6363-01Z-00-DX1
im_name:  TCGA-G9-6362-01Z-00-DX1
im_name:  TCGA-49-4488-01Z-00-DX1
im_name:  TCGA-HE-7128-01Z-00-DX1
im_name:  TCGA-NH-A8F7-01A-01-TS1
im_name:  TCGA-38-6178-01Z-00-DX1
im_name:  TCGA-E2-A1B5-01Z-00-DX1
im_name:  TCGA-E2-A14V-01Z-00-DX1
im_name:  TCGA-21-5786-01Z-00-DX1
im_name:  TCGA-B0-5711-01Z-00-DX1
im_name:  TCGA-HE-7129-01Z-00-DX1
im_name:  TCGA-21-5784-01Z-00-DX1
im_name:  TCGA-A7-A13F-01Z-00-DX1
im_name:  TCGA-AR-A1AK-01Z-00-DX1
im_name:  TCGA-AR-A1AS-01Z-00-DX1
im_name:  TCGA-HE-7130-01Z-00-DX1
im_name:  TCGA-CH-5767-01Z-00-DX1
im_name:  TCGA-G9-6356-01Z-00-DX1
im_name:  TCGA-KB-A93J-01A-01-TS1
im_name:  TCGA-B0-5698-01Z-00-DX1
im_name:  TCGA-B0-5710-01Z-00-DX1
im_name:  TCGA-DK-A2I6-01A-01-TS1
im_name:  TCGA-RD-A8N9-01A-01-TS1


In [None]:
# Move all images to a new file
for f in images_name:
    image = listdir(inpath + f + "/images/")
    shutil.copyfile(inpath + f + "/images/" + image[0], outpath + image[0])

# Train

In [None]:
# DatasetCatalog.clear()
register_coco_instances("my_dataset", {}, "nucleus_cocoformat.json", "/content/dataset/train_new")
metadata = MetadataCatalog.get("my_dataset")
dataset_dicts = DatasetCatalog.get("my_dataset")

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_X_101_32x8d_FPN_3x.yaml")
cfg.OUTPUT_DIR = "/content/drive/MyDrive/VRDL_HW3_output"
cfg.DATASETS.TRAIN = ("my_dataset",)
cfg.DATASETS.TEST = ()   # no metrics implemented for this dataset
cfg.DATALOADER.NUM_WORKERS = 2
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025
cfg.SOLVER.MAX_ITER = 3000
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 
print(cfg)

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)  # build output folder
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

[32m[12/16 09:31:17 d2.data.datasets.coco]: [0mLoaded 24 images in COCO format from nucleus_cocoformat.json
CUDNN_BENCHMARK: False
DATALOADER:
  ASPECT_RATIO_GROUPING: True
  FILTER_EMPTY_ANNOTATIONS: True
  NUM_WORKERS: 2
  REPEAT_THRESHOLD: 0.0
  SAMPLER_TRAIN: TrainingSampler
DATASETS:
  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  PROPOSAL_FILES_TEST: ()
  PROPOSAL_FILES_TRAIN: ()
  TEST: ()
  TRAIN: ('my_dataset',)
GLOBAL:
  HACK: 1.0
INPUT:
  CROP:
    ENABLED: False
    SIZE: [0.9, 0.9]
    TYPE: relative_range
  FORMAT: BGR
  MASK_FORMAT: polygon
  MAX_SIZE_TEST: 1333
  MAX_SIZE_TRAIN: 1333
  MIN_SIZE_TEST: 800
  MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800)
  MIN_SIZE_TRAIN_SAMPLING: choice
  RANDOM_FLIP: horizontal
MODEL:
  ANCHOR_GENERATOR:
    ANGLES: [[-90, 0, 90]]
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]
    NAME: DefaultAnchorGenerator
    OFFSET: 0.0
    SIZES: [[32], [64], [128], [256], [512]]
  BACKBONE:
    FREEZE_AT: 2
    NAME: buil

model_final_2d9806.pkl: 431MB [00:14, 30.7MB/s]                           
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to i

[32m[12/16 09:31:49 d2.engine.train_loop]: [0mStarting training from iteration 0


  max_size = (max_size + (stride - 1)) // stride * stride
  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


[32m[12/16 09:32:24 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7fc067b50a70> to CPU due to CUDA OOM
[32m[12/16 09:33:15 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7fc067b50a70> to CPU due to CUDA OOM
[32m[12/16 09:33:46 d2.utils.events]: [0m eta: 2:23:44  iter: 19  total_loss: 9.526  loss_cls: 0.735  loss_box_reg: 0.1255  loss_mask: 0.6941  loss_rpn_cls: 7.192  loss_rpn_loc: 0.7057  time: 6.0922  data_time: 0.0812  lr: 4.9953e-06  max_mem: 8878M
[32m[12/16 09:34:23 d2.utils.memory]: [0mAttempting to copy inputs of <function pairwise_iou at 0x7fc067b50a70> to CPU due to CUDA OOM
[32m[12/16 09:35:19 d2.utils.events]: [0m eta: 2:22:56  iter: 39  total_loss: 4.339  loss_cls: 0.7158  loss_box_reg: 0.2636  loss_mask: 0.6848  loss_rpn_cls: 2.162  loss_rpn_loc: 0.5195  time: 5.3372  data_time: 0.0567  lr: 9.9902e-06  max_mem: 9579M
[32m[12/16 09:35:42 d2.utils.memory]: [0mAttempting to copy inputs of <function pa