<a href="https://colab.research.google.com/github/PacktPublishing/Modern-Computer-Vision-with-PyTorch-2E/blob/main/Chapter10/Multi_object_segmentation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# install the required packages
%pip install -qU openimages torch_snippets

In [5]:
#download the required annotation files
from torch_snippets import *
!wget -O train-annotations-object-segmentation.csv -q https://storage.googleapis.com/openimages/v5/train-annotations-object-segmentation.csv
!wget -O classes.csv -q https://raw.githubusercontent.com/openimages/dataset/master/dict.csv

In [7]:

from torch_snippets import *
from builtins import print

In [8]:
#specify the classes that we want the model to predict
required_classes = 'person,dog,bird,car,elephant,football,jug,laptop,mushroom,pizza,rocket,shirt,traffic sign,watermelon,zebra'
required_classes = [c.lower() for c in required_classes.lower().split(',')]

classes = pd.read_csv('classes.csv', header=None)
classes.columns = ['class','class_name']
classes = classes[classes['class_name'].map(lambda x: x in required_classes)]

In [9]:
#fetch the image IDs and masks corresponding to required_classes
from torch_snippets import *
df = pd.read_csv('train-annotations-object-segmentation.csv')
df.head()

data = pd.merge(df, classes, left_on='LabelName', right_on='class')

#only fetching 500 images per class in buset_data
subset_data = data.groupby('class_name').agg({'ImageID': lambda x: list(x)[:500]})
subset_data = flatten(subset_data.ImageID.tolist())
subset_data = data[data['ImageID'].map(lambda x: x in subset_data)]
subset_masks = subset_data['MaskPath'].tolist()

In [None]:
#now that we have the subset of masks data to download, lets start the download.,
# we have to run this step once for each of the 16 files
!mkdir -p masks
for c in Tqdm('0123456789abcdef'):
    !wget -q https://storage.googleapis.com/openimages/v5/train-masks/train-masks-{c}.zip
    !unzip -q train-masks-{c}.zip -d tmp_masks
    !rm train-masks-{c}.zip
    tmp_masks = Glob('tmp_masks', silent=True)
    items = [(m,fname(m)) for m in tmp_masks]
    items = [(i,j) for (i,j) in items if j in subset_masks]
    for i,j in items:
        os.rename(i, f'masks/{j}')
    !rm -rf tmp_masks

 12%|█▎        | 2/16 [03:43<25:27, 109.08s/it]

In [None]:
#download the images corresponding to ImageId
masks = Glob('masks')
masks = [fname(mask) for mask in masks]

subset_data = subset_data[subset_data['MaskPath'].map(lambda x: x in masks)]
subset_imageIds = subset_data['ImageID'].tolist()

from openimages.download import _download_images_by_id
!mkdir images
_download_images_by_id(subset_imageIds, 'train', './images/')

In [None]:
# zip all images, masks, and ground truths and save them - just in case our session crashes
import zipfile
files = Glob('images') + Glob('masks') + ['train-annotations-object-segmentation.csv', 'classes.csv']
with zipfile.ZipFile('data.zip','w') as zipme:
    for file in Tqdm(files):
        zipme.write(file, compress_type=zipfile.ZIP_DEFLATED)

In [None]:
# move data into a singe directory
!mkdir -p train/
!mv images train/myData2020
!mv masks train/abnnotations

In [None]:
%cd /content/
!git clone https://github.com/sizhky/pycococreator.git
%cd pycococreator
%pip install -e .
%cd -
%pip install pycocotools

In [None]:
# define the required categories in COCO format
import datetime
INFO = {
    "description": "MyData2020",
    "url": "None",
    "version": "1.0",
    "year": 2020,
    "contributor": "sizhky",
    "date_created": datetime.datetime.utcnow().isoformat(' ')
}

LICENSES = [
    {
        "id": 1,
        "name": "MIT"
    }
]

# we are not interested in supercategories and so we will specify it as none
CATEGORIES = [{'id': id+1, 'name': name.replace('/',''), 'supercategory': 'none'} for id,(_,(name, clss_name)) in enumerate(classes.iterrows())]

In [None]:
# import the relevant packages
from pycococreator.pycococreatortools import pycococreatortools
from os import listdir
from os.path import isfile, join
from PIL import Image

# create an empty dictionary with the keys needed to save the COCO JSON file
coco_output = {
    "info": INFO,
    "licenses": LICENSES,
    "categories": CATEGORIES,
    "images": [],
    "annotations": []
}

In [None]:
#set a few vairables in place that contain the info on the image locations and annotation file locations
ROOT_DIR = "train"
IMAGE_DIR, ANNOTATION_DIR = 'train/myData2020/', 'train/annotations/'
image_files = [f for f in listdir(IMAGE_DIR) if isfile(join(IMAGE_DIR, f))]
annotation_files = [f for f in listdir(ANNOTATION_DIR) if isfile(join(ANNOTATION_DIR, f))]

In [None]:
# loop through each image filename and populate the images key in the coco_output dictionary
image_id = 1
# go through each image
for image_filename in Tqdm(image_files):
    image = Image.open(IMAGE_DIR + '/' + image_filename)
    image_info = pycococreatortools.create_image_info(image_id, os.path.basename(image_filename), image.size)
    coco_output["images"].append(image_info)
    image_id = image_id + 1

In [None]:
# loop through each segmentation annotations and populate the annotations key in the coco_output dictionary
segmentation_id = 1
for annotation_filename in Tqdm(annotation_files):
    image_id = [f for f in coco_output['images'] if stem(f['file_name'])==annotation_filename.split('_')[0]][0]['id']
    class_id = [x['id'] for x in CATEGORIES if x['name'] in annotation_filename][0]
    category_info = {'id': class_id, 'is_crowd': 'crowd' in image_filename}
    binary_mask = np.asarray(Image.open(f'{ANNOTATION_DIR}/{annotation_filename}').convert('1')).astype(np.uint8)

    annotation_info = pycococreatortools.create_annotation_info(segmentation_id, image_id, category_info, binary_mask, image.size, tolerance=2)

    if annotation_info is not None:
        coco_output["annotations"].append(annotation_info)
        segmentation_id = segmentation_id + 1

In [None]:
#save coco_output in a json file
coco_output['categories'] = [{'id': id+1, 'name': clss_name, 'supercategory': 'none'} for id,(_,(name, clss_name)) in enumerate(classes.iterrows())]

import json
with open('images.json', 'w') as output_json_file:
    json.dump(coco_output, output_json_file)

In [None]:
# install the required detectron2 packages
%cd /content/
# install detectron2:
!git clone https://github.com/facebookresearch/detectron2
%cd /content/detectron2
%pip install -r requirements.txt
!python setup.py install
%cd /content
!git clone https://github.com/facebookresearch/fvcore.git
%cd /content/fvcore
%pip install -e .
%cd /content/


In [None]:
# given that we have restarted colab, lets refetch the required classes
from torch_snippets import *
required_classes = 'person,dog,bird,car,elephant,football,jug,laptop,mushroom,pizza,rocket,shirt,traffic sign,watermelon,zebra'
required_classes = [c.lower() for c in required_classes.lower().split(',')]

classes = pd.read_csv('classes.csv', header=None)
classes.columns = ['class','class_name']
classes = classes[classes['class_name'].map(lambda x: x in required_classes)]

restart the notebook if below cell fails to execute,  
and start running from above cell  
no need to run from the very top

In [None]:
# import the relevant detectron2 packages
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.engine import DefaultTrainer

In [None]:
#register the created datasets
from detectron2.data.datasets import register_coco_instances
register_coco_instances("dataset_train", {}, "images.json", "train/myData2020")

In [None]:
#define all the parameters in the cfg configuration file, cfg is a special detectron object that holds all the relevant info for training a model
cfg = get_cfg()
# import all the core parameters from a pre-existing configuration file that was used for pretraining mask_rcnn with FPN as the backbone
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("dataset_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml") # pretrained weights
cfg.SOLVER.IMS_PER_BATCH = 2
cfg.SOLVER.BASE_LR = 0.00025 # pick a good LR
cfg.SOLVER.MAX_ITER = 5000 # instead of epochs, we train on 5000 batches
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 512
cfg.MODEL.ROI_HEADS.NUM_CLASSES = len(classes)

In [None]:
#train the model to predict classes, bounding boxes, and also the segmentation of objecgs belonging to the defined classes within out custom dataset
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
### making inferences on a new image - we load the path, set the probablity threshold and pass it thru the DefaultPredictot method

# load the weights with the trained model - use the same cfg and load the model weights
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")
# set the threshold for the probability of the object belonging to a certain class
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.25
# define the predictor method
predictor = DefaultPredictor(cfg)
# perform segmentation on the image of interest and visualize it - we are randomly plotting 30 training images
from detectron2.utils.visualizer import ColorMode
files = Glob('train/myData2020')
for _ in range(30):
    im = cv2.imread(str(choose(files)))
    outputs = predictor(im)
    # visualizer is detectron2's way of plotting onject instances
    v = Visualizer(im[:, :, ::-1], # the image we want to visualize
                    scale=0.5, # the size of the image when plotter- we are shrinking the image down to 50%
                    metadata=MetadataCatalog.get("dataset_train"), # class leve info for the dataset - index-to-class mapping so that when we send the raw tensors as input to be plotted, the class will decode them into actual human-readable classes
                    instance_mode=ColorMode.IMAGE_BW # we are asking the model to only hightlight the segmented pixels
    # remove the colors of unsegmented pixels.
    # This option is only available for segmentation models
    )

    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    show(out.get_image())