# Setup

## Basic imports and constants

In [None]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import skimage

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

from openimages2019 import setup as st
from openimages2019 import utils as u

from skimage.draw import rectangle

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

DATA_DIR = os.path.join(ROOT_DIR, "../data")

#Make GPUs visible
!export HIP_VISIBLE_DEVICES=0,1,2,3


def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax




## Partition the classes according to frequency

In [None]:
class_sets = st.partition_classes()

## Core Functions

In [None]:
class TrainConfig(Config):
    
    NAME = "kaggle"
    GPU_COUNT = 2
    IMAGES_PER_GPU = 2
    IMAGE_MIN_DIM = 512
    IMAGE_MAX_DIM = 512
    STEPS_PER_EPOCH = 500
    
    
def get_infer_model(config, model_path=None):
    inf_model = modellib.MaskRCNN(mode="inference", config=config, model_dir=MODEL_DIR)

    if model_path is None:
        model_path = inf_model.find_last()
        
    inf_model.load_weights(model_path, by_name=True)
    
    return inf_model


def load_member_models(model_paths, class_sets, images_per_gpu=2):
    models = []

    for i,mpath in enumerate(model_paths):

        class KaggleConfig(TrainConfig):
            NUM_CLASSES = len(class_sets[i])+ 1 # + 1 for background class

        class InferenceConfig(KaggleConfig):
            GPU_COUNT = 1
            IMAGES_PER_GPU = images_per_gpu 

        inf_config = InferenceConfig()

        model = get_infer_model(inf_config,model_path=mpath)
        
        models.append(model)
        
    return models


def assemble(ind_results, iou_threshold=0.3):
    """ Combines the results from many different models across a single images.  Uses NMS to handle overlaps"""

    classes = np.concatenate([x['class_ids'] for x in ind_results])
    scores = np.concatenate([x['scores'] for x in ind_results])

    rois = np.concatenate([x['rois'] for x in ind_results],axis = 0)

    #TODO just to display stuff ... not needed here
    masks = np.concatenate([x['masks'] for x in ind_results],axis = 2)

    to_keep = utils.non_max_suppression(rois, scores, iou_threshold)

    return {'class_ids' : classes[to_keep], 'rois' : rois[to_keep],
            'scores' : scores[to_keep], 'masks' : masks[:,:,to_keep]}


def ensemble_detect(models, images, batch_size, id_offsets):
    """
    
    For each model, run inference on image batches.  Then groups inference results by image and 
    applys non maximum suppression (via assemble method) to each group.
    
    """
    all_results = [[] for x in images]

    for midx,model in enumerate(models):

        results = []
        
        # Have to do this because of model.detect batch size assertion
        for j in range(0,len(images),batch_size):
            results += model.detect(images[j:j+batch_size], verbose=0)

        for i in range(len(results)):
            r = results[i]

            r['model_id'] = midx
            r['class_ids'] += id_offsets[midx]

            all_results[i].append(r)
            
    return [assemble(x) for x in all_results]
    

# Inference

## Member model label ids --> ensemble model label ids

In [None]:
background = ['/mnull','Background']
    
z = [np.insert(cset[['LabelName','LabelDescription']].values,0,background,axis=0) for cset in class_sets]

omni_class_set = pd.DataFrame(np.concatenate(z),columns=['LabelName','LabelDescription'])
omni_class_set['LabelID'] = omni_class_set.index

id_offsets = omni_class_set[omni_class_set['LabelName'] == '/mnull'].index.values

## Load models

In [None]:
# rel_model_paths = ['kaggle20190815T2120/mask_rcnn_kaggle_0025.h5','kaggle20190815T2229/mask_rcnn_kaggle_0025.h5',
#                'kaggle20190815T2347/mask_rcnn_kaggle_0025.h5','kaggle20190816T0115/mask_rcnn_kaggle_0025.h5',
#                'kaggle20190816T0256/mask_rcnn_kaggle_0025.h5','kaggle20190816T0441/mask_rcnn_kaggle_0100.h5']


# model_paths = [os.path.join(MODEL_DIR, mpath) for mpath in rel_model_paths]

model_paths = [os.path.join(DATA_DIR,'models','cset_' + str(i) + '_model.h5') for i in range(6)]

models = load_member_models(model_paths, class_sets)

## Run detection

In [None]:
img = skimage.io.imread(os.path.join(DATA_DIR, 'train/2fef4dd2f83feb18.jpg'))
img2 = skimage.io.imread(os.path.join(DATA_DIR, 'train/55dee1384cd565ee.jpg'))

images = [img,img2]

detected = ensemble_detect(models, images,2, id_offsets)

## Visualize

In [None]:
for i,r in enumerate(detected):
    visualize.display_instances(images[i], r['rois'], r['masks'], r['class_ids'], omni_class_set['LabelDescription'].values, r['scores'], show_mask=False)

## Evaluate

### Build the validation data set

In [None]:
anns = st.load_annotations_by_image()

#down select to our validation data
val_anns = anns[anns['RelativePath'].str.contains('validation',regex=False)]

#get rid of the old labelId
val_anns.drop(columns='LabelID',inplace=True)

# join with tmp_set_classes on LabelName to get updated LabelID
anns_by_image = pd.merge(val_anns,omni_class_set, on='LabelName',how='inner')

anns_grouped = anns_by_image.groupby('ImageID')

# validation dataset
dataset = st.FullKaggleImageDataset()
dataset.add_classes(omni_class_set.iloc[1:])  # do not add first background
dataset.load_kaggle_images(DATA_DIR, anns_grouped)
dataset.prepare()

### Visually validate dataset creation

In [None]:
# Load and display random samples ... sanity check for data load
image_ids = np.random.choice(dataset.image_ids, 5)
for image_id in image_ids:
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset.class_names)

### Calculate mAP

#### Define functions

In [None]:
def calc_mAP(models, config, batch_size=2, sample_size=50):
    
    assert sample_size%batch_size == 0, 'Sample size must be divisible by batch_size'

    image_ids = np.random.choice(dataset.image_ids, sample_size)

    #each input is a tuple of form : image, image_meta, gt_class_id, gt_bbox, gt_mask
    inputs = [modellib.load_image_gt(dataset, config, iid, use_mini_mask=False) for iid in image_ids]

    APs = []

    results = ensemble_detect(models, [inp[0] for inp in inputs],images_per_gpu, id_offsets)

    for j in range(len(results)):
        r = results[j]
        # Compute AP
        AP, precisions, recalls, overlaps = utils.compute_ap(inputs[j][3], inputs[j][2], inputs[j][4], 
                                            r["rois"], r["class_ids"], r["scores"], r['masks'])
        APs.append(AP)

    return np.mean(APs)



class KaggleConfig(TrainConfig):
    NUM_CLASSES = len(omni_class_set)
    
class InferenceConfig(KaggleConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = images_per_gpu 


#### Run the function

In [None]:
calc_mAP(models, InferenceConfig(),sample_size=100)

## Generate kaggle submission file

### Run inference on 'test' set

In [None]:
config = InferenceConfig()
batch_size=2

#### Start code for sample ... real file should include entire test set ####
sample_size=50 #todo this shouldn't be used, should be all of the training data
    
assert sample_size%batch_size == 0, 'Sample size must be divisible by batch_size'

image_ids = np.random.choice(dataset.image_ids, sample_size)

#each input is a tuple of form : image, image_meta, gt_class_id, gt_bbox, gt_mask
inputs = [modellib.load_image_gt(dataset, config, iid, use_mini_mask=False) for iid in image_ids]

images = [inp[0] for inp in inputs]

#### End code for sample ... real file should include entire test set ####


results = ensemble_detect(models, images,batch_size, id_offsets)

### Build Kaggle Submission File from results

In [None]:
#TODO This cell will probably fail in the future as it assumes that 
# the body of the calculate_mAP function was executed outside of a function so that the
# results and image_ids variables are exposed

img_ids = anns_grouped.sum().index.values

all_preds = []

for j, r in enumerate(results):
    preds = []
    ids = r['class_ids']
    boxes = r['rois']
    scores = r['scores']
    
    preds = ''
    
    for i in range(len(r['class_ids'])):
        preds += " " + " ".join(map(str,[omni_class_set.iloc[ids[i]]['LabelName'], scores[i], boxes[i][1],boxes[i][0],boxes[i][3],boxes[i][2]]))
    
    iid = image_ids[j]  # this assumes that image_ids 
    
    all_preds.append(img_ids[iid] + ", " + preds[1:])

#TODO grab from image_ids (based on index list) to get image name
all_preds

#TODO write to csv file
with open(os.path.join(ROOT_DIR, 'submission.txt'), 'w') as f: 
    f.write('\n'.join(all_preds)) 

# Misc

## Visualize results for individual models

In [None]:
import skimage

# test on a few random images
# image_ids = np.random.choice(dataset_val.image_ids, inference_config.BATCH_SIZE)

# images = [skimage.io.imread(os.path.join(DATA_DIR, rel_path)) for rel_path in ]

#TODO should store image id in result as well ... this is just a quick hack

img = skimage.io.imread(os.path.join(DATA_DIR, 'train/2fef4dd2f83feb18.jpg'))
img2 = skimage.io.imread(os.path.join(DATA_DIR, 'train/55dee1384cd565ee.jpg'))

images = [img,img2]

#To store each models result for each image
all_results = [[] for x in images]

model_paths = ['kaggle20190815T2120/mask_rcnn_kaggle_0025.h5','kaggle20190815T2229/mask_rcnn_kaggle_0025.h5',
               'kaggle20190815T2347/mask_rcnn_kaggle_0025.h5','kaggle20190816T0115/mask_rcnn_kaggle_0025.h5',
               'kaggle20190816T0256/mask_rcnn_kaggle_0025.h5','kaggle20190816T0441/mask_rcnn_kaggle_0100.h5']


models = load_member_models(model_paths, class_sets)


for midx,model in enumerate(models):

    results = model.detect(images, verbose=0)
    
    for i in range(len(results)):
        r = results[i]
        
        r['model_id'] = midx
        r['class_ids'] += id_offsets[midx]
        
        all_results[i].append(r)
        
        visualize.display_instances(images[i], r['rois'], r['masks'], r['class_ids'], omni_class_set['LabelDescription'].values, r['scores'])