# Basic imports for Mask R-CNN

In [None]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

from openimages2019 import setup as st

from skimage import transform
import skimage.io

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

DATA_DIR = os.path.join(ROOT_DIR, "../data")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(DATA_DIR, "mask_rcnn_coco.h5")

#Make GPUs visible
!export HIP_VISIBLE_DEVICES=1,2,3

#Set which GPU devices' memory should be accessible to running GPUs
os.environ["CUDA_VISIBLE_DEVICES"]="1,2,3"


def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

## Object Segmentation vs Object Detection

In [None]:
USE_MASKS = False

MASK_DIR  = os.path.join(DATA_DIR,'segmentation')
CLASS_DESC_CSV = os.path.join(DATA_DIR, 'seg_class_descriptions.csv')

In [None]:
class_descriptions = st.load_classes()

class_descriptions.head()

# Load datasets

In [None]:
if USE_MASKS:
    class_descriptions = st.load_classes(path_to_csv=CLASS_DESC_CSV)
    
    anns = st.load_annotations_by_image(class_descriptions, use_masks=True)
    train_data = st.load_dataset(anns, DATA_DIR, class_descriptions, is_train=True,mask_path=MASK_DIR)
    val_data = st.load_dataset(anns, DATA_DIR, class_descriptions, is_train=False, mask_path=MASK_DIR)
else:
    class_descriptions = st.load_classes()
    
    anns = st.load_annotations_by_image(class_descriptions)
    train_data = st.load_dataset(anns, DATA_DIR, class_descriptions, is_train=True)
    val_data = st.load_dataset(anns, DATA_DIR, class_descriptions, is_train=False)


# Configurations

In [None]:
class KaggleConfig(Config):
    
    NAME = "kaggle"

    GPU_COUNT = 3
    IMAGES_PER_GPU = 4  # TODO how many can we use, authors had 2 for 12 GB, we have 32 GB so ...
    
    # Number of classes (including background)
    NUM_CLASSES = 1 + len(class_descriptions)  # + 1 for background

    IMAGE_MIN_DIM = 512
    IMAGE_MAX_DIM = 512
    
    # Skip detections with < 90% confidence
    DETECTION_MIN_CONFIDENCE = 0.9
    
config = KaggleConfig()
# config.display()

# Training

## Create model and load prior weights

In [None]:
model = modellib.MaskRCNN(mode="training", config=config, model_dir=MODEL_DIR)

In [None]:
if USE_MASKS:
    init_with = os.path.join(MODEL_DIR, "kaggle20190923T1355/mask_rcnn_kaggle_0822.h5")
else:
    init_with = "coco"

if init_with == "coco":
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    model.load_weights(model.find_last(), by_name=True)
else:
    model.load_weights(init_with, by_name=True)

## Train

Train in two stages:
1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, pass `layers='heads'` to the `train()` function.

2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. Simply pass `layers="all` to train all layers.

In [None]:
# !pip3 install imgaug
import imgaug

# Image Augmentation ... pulled from coco example
# Right/Left flip 50% of the time
augmentation = imgaug.augmenters.Fliplr(0.5)

n_epochs = 1 

model.train(train_data, val_data, 
                learning_rate= 0.001, 
                epochs=n_epochs, 
                layers='heads',
                augmentation=augmentation)

# model.train(train_data, val_data, 
#                 learning_rate= 0.0001, 
#                 epochs=n_epochs, 
#                 layers='all',
#                 augmentation=augmentation)


# Inference

In [None]:
class InferenceConfig(Config):
    NAME = 'KaggleInf'
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    NUM_CLASSES = 1 + len(class_descriptions)
    IMAGE_MIN_DIM = 512
    IMAGE_MAX_DIM = 512

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

if USE_MASKS:
    model_path = os.path.join(ROOT_DIR, "logs/kaggle20190923T1355/mask_rcnn_kaggle_0822.h5")
else:
    model_path = os.path.join(ROOT_DIR, "logs/kaggle20191004T1454/mask_rcnn_kaggle_0001.h5")

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

## Sanity Check on validation set

In [None]:
def visual_check(image_ids, dataset):
    for iid in image_ids:
        original_image, image_meta, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt(dataset, inference_config, iid, use_mini_mask=False)

        # might need to pass use_mask=False if USE_MASKS=FALSE ... maybe
        print('--------------\n GROUND TRUTH \n--------------')
        visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
                                dataset.class_names, figsize=(8, 8))

        r = model.detect([original_image], verbose=0)[0]

        print('--------------\n PREDICTION \n--------------')
        visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], 
                                    dataset.class_names, r['scores'],figsize=(8, 8))

In [None]:
dataset = val_data

visual_check(np.random.choice(dataset.image_ids, 5), dataset)

# Evaluation

In [None]:
# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.
num_images = 50

image_ids = np.random.choice(val_data.image_ids, num_images)

#each input is a tuple of form : image, image_meta, gt_class_id, gt_bbox, gt_mask
inputs = [modellib.load_image_gt(val_data, inference_config, iid, use_mini_mask=False) for iid in image_ids]

APs = []

n = inference_config.BATCH_SIZE

for i in range(0,len(image_ids),n): 

    curr_inputs = inputs[i:i+n]
    
    results = model.detect([inp[0] for inp in curr_inputs], verbose=0)
    
    for j in range(len(results)):
        r = results[j]
        # Compute AP
        AP, precisions, recalls, overlaps =\
            utils.compute_ap(curr_inputs[j][3], curr_inputs[j][2], curr_inputs[j][4],
                             r["rois"], r["class_ids"], r["scores"], r['masks'])
        APs.append(AP)
    
print("mAP: ", np.mean(APs))

# Generate submission file

## Encode instance segmentation map (from kaggle competition website)

In [None]:
# container does not include this library by default, will need to run this once
# !pip install pycocotools

## Define batch-write functions

In [None]:
from PIL import ImageFile
import base64
from pycocotools import _mask as coco_mask
import typing as t
import zlib
ImageFile.LOAD_TRUNCATED_IMAGES = True

def encode_binary_mask(mask: np.ndarray):

     # check input mask --
    if mask.dtype != np.bool:
        raise ValueError(
           "encode_binary_mask expects a binary mask, received dtype == %s" %
           mask.dtype)

    mask = np.squeeze(mask)
    if len(mask.shape) != 2:
        raise ValueError(
           "encode_binary_mask expects a 2d mask, received shape == %s" %
           mask.shape)

     # convert input mask to expected COCO API input --
    mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
    mask_to_encode = mask_to_encode.astype(np.uint8)
    mask_to_encode = np.asfortranarray(mask_to_encode)

    # RLE encode mask --
    encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

    # compress and base64 encoding --
    binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
    base64_str = base64.b64encode(binary_str)
    return base64_str

#image_info should be 2d array, with each row of form id, width, height
def segmentation_append(filename, results, image_info):
    
    all_preds = []
    
    for j, r in enumerate(results):
        preds = []
        ids = r['class_ids']
        masks = r['masks']
        scores = r['scores']

        img_id = image_info[j][0]
        height = image_info[j][2]
        width = image_info[j][1]
        
        preds =''

        for i in range(len(r['class_ids'])):
            # masks are stored as a 3d array, <height,width,# examples>, so we need to index into it in a special way
            enc_mask = encode_binary_mask(masks[:,:,i])
            #subtract 1 to compensate for the background
            class_name = class_descriptions.iloc[ids[i]-1]['LabelName']
            preds += " " + " ".join(map(str,[class_name, scores[i], enc_mask.decode()]))

        img_lvl_fields = ','.join(map(str,[img_id,width,height]))
        all_preds.append(img_lvl_fields + "," + preds)
        
    with open(filename, 'a') as f: 
        f.write('\n'.join(all_preds))
        f.write('\n')

        
def detection_append(filename, results, image_info):
    all_preds = []
    
    for j, r in enumerate(results):
        preds = []
        ids = r['class_ids']
        boxes = r['rois']
        scores = r['scores']

        img_id = image_info[j][0]
        height = image_info[j][2]
        width = image_info[j][1]
        
        preds = ''

        for i in range(len(r['class_ids'])):
            xmin = max(boxes[i][1] / width , 0.0)
            ymin = max(boxes[i][0] / height, 0.0)
            xmax = min(boxes[i][3] / width, 1.0)
            ymax = min(boxes[i][2] / height, 1.0)
            
            preds += " " + " ".join(map(str,[class_descriptions.iloc[ids[i]-1]['LabelName'], scores[i], xmin,ymin,xmax,ymax]))

        all_preds.append(img_id + "," + preds)
        
    with open(filename, 'a') as f: 
        f.write('\n'.join(all_preds))
        f.write('\n')

    
appender = segmentation_append if USE_MASKS else detection_append    

testdir = os.path.join(DATA_DIR, "test")

def write_sub_file(filename, batch_size=500, start_index=0):
    results = []
    image_info = []


    for subdir, dirs, files in os.walk(testdir):
        for cnt,file in enumerate(files):

            #use this if the process broke down at some point and you need to restart midway through ... total hack
            if cnt < start_index:
                continue

            img = skimage.io.imread(os.path.join(subdir, file))

            #filename, width, height
            image_info.append([file[:-4],img.shape[1],img.shape[0]])

            results += model.detect([img], verbose=0)

            if (cnt%batch_size == (batch_size-1)):
                print("writing to file ... ")
                appender(filename, results,image_info)
                results = []
                image_info = []
                print(cnt," completed") #100,000 images in the test set
                
    print("writing final records to file ... ")
    append_to_file(filename, results,image_info)
    print(cnt," completed")
    
    return cnt

## Execute batch writing

In [None]:
filename = os.path.join(ROOT_DIR, 'det_submission_test.csv')

### Generate the entire file

In [None]:
with open(filename, 'w+') as f:
    f.write('ImageId,ImageWidth,ImageHeight,PredictionString\n')
    
write_sub_file(filename, start_index=0)

### Resume writing the file

In [None]:
# use X + 1 , where X is from the 'X completed' statement above
write_sub_file(filename, start_index=50)

### Manipulate submission file

In [None]:
# df = pd.read_csv(submission_file)

# nu = df['ImageId'].nunique()  # Should be 99999

# if len(df) > nu
#     df.drop_duplicates('ImageId', inplace = True)

# df.to_csv('submission_9_11_0.csv',index=False)