In [None]:
# Used Google Colab
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import json
import skimage
import matplotlib.pyplot as plt
import matplotlib.patches as patches
# Have to use tensorflow version 1... because library has some issues with tf 2..
%tensorflow_version 1.x
# Have to use keras < 2.1.. because lib has problems with new keras
!pip install keras==2.1.0

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.visualize import display_images
from mrcnn.model import log

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

In [5]:
class ClassConfig(Config):
    NAME = "classes"

    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

    NUM_CLASSES = 1 + 1  # background + box

    IMAGE_MIN_DIM = 128
    IMAGE_MAX_DIM = 1024

    RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)  # anchor side in pixels

    # High training ROIs per image because the images are big and have
    # many objects.
    TRAIN_ROIS_PER_IMAGE = 512

    # Small number of epoch with high steps per epoch becasue models.h5 file
    # is saved after each epoch so to save memory
    STEPS_PER_EPOCH = 1000

    # Skip detections with < 60% confidence
    #DETECTION_MIN_CONFIDENCE = 0.6 May need latter

    # use small validation steps since the epoch is small
    VALIDATION_STEPS = 5

config = ClassConfig()
#config.display()

In [6]:
class ClassDataset(utils.Dataset):
  
    def load_data(self, dataset_dir, json_file):
        
        # Add classes
        self.add_class("classes", 1, "class")
        classes_dict = {"class":1} # add more in neccessary 
        # Load annotations
        annotations = json.load(open(os.path.join(dataset_dir, json_file)))
        annotations = list(annotations.values())  # don't need the dict keys

        # The VIA tool saves images in the JSON even if they don't have any
        # annotations. Skip unannotated images.
        annotations = [a for a in annotations if a['regions']]
        
        # Add images
        for a in annotations:
            # Get the x, y coordinaets of points of the polygons that make up
            # the outline of each object instance. There are stored in the
            # shape_attributes 
            polygons = [r['shape_attributes'] for r in a['regions'].values()]
            objects = [s['region_attributes'] for s in a['regions'].values()]

            # load_mask() needs the image size to convert polygons to masks.
            # Unfortunately, VIA doesn't include it in JSON, so we must read
            num_ids = [classes_dict[n['label']] for n in objects] 
            # the image. This is only managable since the dataset is tiny.
            image_path = os.path.join(dataset_dir, a['filename'])
            image = skimage.io.imread(image_path)
            height, width = image.shape[:2]

            self.add_image(
                "classes",
                image_id=a['filename'],  # use file name as a unique image id
                path=image_path,
                width=width, height=height,
                polygons=polygons,num_ids=num_ids)
                
    def load_mask(self, image_id):
        """ Load instance masks for the given image.
        MaskRCNN expects masks in the form of a bitmap [height, width, instances].
        Args:
            image_id: The id of the image to load masks for
        Returns:
            masks: A bool array of shape [height, width, instance count] with
                one mask per instance.
            class_ids: a 1D array of class IDs of the instance masks.
        """
        info = self.image_info[image_id]
        if info["source"] != "classes":
            return super(self.__class__, self).load_mask(image_id)
        num_ids = info['num_ids']
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)

        for i, p in enumerate(info["polygons"]):
            # Get indexes of pixels inside the polygon and set them to 1
            rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x'])
            mask[rr, cc, i] = 1

        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        num_ids = np.array(num_ids, dtype=np.int32)
        return mask, num_ids
    
    def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "classes":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)

In [7]:
# Load training dataset
dataset_train = ClassDataset()
dataset_train.load_data("Main/train", "train_label.json")
dataset_train.prepare()

# Validation dataset
dataset_val = ClassDataset()
dataset_val.load_data("Main/val", "val_label.json")
dataset_val.prepare()

In [None]:
# Training code block 1
#Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config, model_dir=MODEL_DIR)

# Which weights to start with?
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(model.find_last(), by_name=True)

In [None]:
# Training code block 2
# Training - Stage 1
# Finetune heads only of Resnet 
print("Fine tune Resnet stage 4 and up")
model.train(dataset_train,dataset_val,
            learning_rate=config.LEARNING_RATE,
            epochs=5,
            layers='heads')

# Training - Stage 2
# Finetune layers from ResNet stage 4 and up 
#(this number can be changed and performance should be monitored)
print("Fine tune Resnet stage 4 and up")
model.train(dataset_train,dataset_val,
            learning_rate=config.LEARNING_RATE,
            epochs=10,
            layers='4+')

# Training - Stage 3
# Fine tune all layers 
print("Fine tune all layers")
model.train(dataset_train,dataset_val,
            learning_rate=config.LEARNING_RATE/10,
            epochs=15,
            layers='all')


In [8]:
# For model testing if needed update the configs
class InferenceConfig(ClassConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    DETECTION_MIN_CONFIDENCE = 0.8

inference_config = InferenceConfig()

In [None]:
# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = 'Mask_RCNN-master/logs/classes20200713T0016/mask_rcnn_classes_0014.h5'
#model_path = model.find_last()

print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

In [None]:
from PIL import Image

#For now testing on validation dataset
real_test_dir = 'Main/test'
image_paths = []
for filename in os.listdir(real_test_dir):
    if os.path.splitext(filename)[1].lower() in ['.png', '.jpg', '.jpeg']:
        image_paths.append(os.path.join(real_test_dir, filename))

predictions = []
for image_path in image_paths:
    img1 = Image.open(image_path).convert("RGB") #png has 4 channels so to remove that extra channel and do processing
    img = skimage.io.imread(image_path)
    img_arr = np.array(img1)
    results = model.detect([img_arr], verbose=1)
    r = results[0]
    predictions.append(r)
    print("Current image is ", str(image_path))
    visualize.display_instances(img, r['rois'], r['masks'], r['class_ids'], 
                                dataset_val.class_names, r['scores'], figsize=(5,5), show_bbox=False)
    break

In [None]:
visualize.display_top_masks(img, r['masks'], r['class_ids'], dataset_val.class_names)

In [None]:
display_images([img]+[r['masks'][:,:,i] for i in range(r['masks'].shape[-1])], cols=8)

In [None]:
#Cropping w.r.t boundary boxes
def get_segment_crop(img,tol=0, mask=None):
    if mask is None:
        mask = img > tol
    return img[np.ix_(mask.any(1), mask.any(0))]

display_images([img]+[get_segment_crop(img, mask=r['masks'][:,:,i]) for i in range(r['masks'].shape[-1])], cols=8)

In [None]:
# Cropping w.r.t mask region and boundry box
def to_rgb(im, datatype=np.uint8):
    w, h = im.shape
    ret = np.empty((w, h, 3), dtype=datatype)
    ret[:, :, 2] =  ret[:, :, 1] =  ret[:, :, 0] =  im
    return ret

display_images([img]+[get_segment_crop(img*to_rgb(r['masks'][:,:,i]), mask=r['masks'][:,:,i]) for i in range(r['masks'].shape[-1])], cols=8)

In [None]:
cols = 8
rows = r['masks'].shape[-1] // cols + 1
plt.figure(figsize=(14, 14 * rows // cols))

for i in range(r['masks'].shape[-1]):
    plt.subplot(rows, cols, i+1)
    a = get_segment_crop(img, mask=r['masks'][:,:,i])
    rgba = cv2.cvtColor(a, cv2.COLOR_RGB2RGBA)
    rgba[:, :, 3] = get_segment_crop(r['masks'][:,:,i]*255, mask=r['masks'][:,:,i])
    plt.axis('off')
    plt.imshow(rgba)

plt.show()

In [None]:
# Finding the topmost, bottommost, leftmost, rightmost point
# But we need top left, top right, bottom left and bottom right, before skewing the image

mask = r['masks'][:,:,0]
idx0 = np.nonzero(mask.ravel())[0]
idx1 = np.nonzero(mask.ravel())[0]
idxs = [idx0.min(), idx0.max(), idx1.min(), idx1.max()]
out = np.column_stack(np.unravel_index(idxs,mask.shape))
out

In [None]:
# Finding the topmost, bottommost, leftmost, rightmost point
# But we need top left, top right, bottom left and bottom right, before skewing the image
# Similar approach as mentioned above

array = np.float32(mask)
H,W = array.shape
left_edges = np.where(array.any(axis=1),array.argmax(axis=1),W+1)
flip_lr = cv2.flip(array,1) #1 horz vert 0
right_edges = W-np.where(flip_lr.any(axis=1),flip_lr.argmax(axis=1),W+1)
top_edges = np.where(array.any(axis=0),array.argmax(axis=0),H+1)
flip_ud = cv2.flip(array,0) #1 horz vert 0
bottom_edges = H - np.where(flip_ud.any(axis=0),flip_ud.argmax(axis=0),H+1)
leftmost = left_edges.min()
rightmost = right_edges.max()
topmost = top_edges.min()
bottommost = bottom_edges.max()
bb = [(leftmost, topmost), (rightmost, bottommost)]


[(1452, 195), (1716, 562)]

In [None]:
# Extract corners first using cv2 which are in terms of cordinates (x,y)
# Problem is cv2 finds many points as corners but we need to extract only 4 corners(top left, topright, ... ....)

corners = cv2.goodFeaturesToTrack(array, 100, 0.01, 10) 
corners = np.int0(corners) 

for i in corners: 
    x, y = i.ravel()   
    #cv2.circle(img, (x, y), 3, 255, -1)

plt.imshow(img), plt.show()

In [None]:
# This method is not working
contours, hier = cv2.findContours(np.int16(array), cv2.RETR_FLOODFILL, cv2.CHAIN_APPROX_SIMPLE)

In [None]:
#Once we can find the cordinates of top right, top left, bottom right, bottom left
# we can easily skew our images to desired rectangular

all_plots = [img]
for i in range(r['masks'].shape[-1]):
  #rows,cols,ch = img.shape
  pts1 = np.where(r['masks'][:,:,i])
  pts2 = np.float32([[0,0],[150,0],[0,150],[150,150]])
  M = cv2.getPerspectiveTransform(pts1,pts2)
  dst = cv2.warpPerspective(img,M,(150,150))
  all_plots += [dst]

display_images(all_plots, cols=8)