# Setup

## Basic imports and constants

In [None]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import skimage

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

from openimages2019 import setup as st
from openimages2019 import utils as u

from skimage.draw import rectangle

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

DATA_DIR = os.path.join(ROOT_DIR, "../data")

MASK_DIR = os.path.join(DATA_DIR, "segmentation")

##############################
USE_MASKS = True
##############################

#Make only 1 GPU visible
!export HIP_VISIBLE_DEVICES=0

#Set which GPU devices' memory should be accessible to running GPUs
os.environ["CUDA_VISIBLE_DEVICES"]="0"
# os.environ["CUDA_VISIBLE_DEVICES"]="-1"


def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax




## Partition the classes according to frequency

In [None]:
#TODO update this to account for masks (USE_MASK)
# class_sub_sets = st.partition_classes()

if USE_MASKS:
    all_classes = st.load_classes(path_to_csv=os.path.join(DATA_DIR,'seg_class_descriptions.csv'))
    anns = st.load_annotations_by_image(classes=all_classes, use_masks=True)
else:
    all_classes = st.load_classes()
    anns = st.load_annotations_by_image(classes=all_classes, use_masks=False)

    
cnts = anns['LabelName'].value_counts()

class_sub_sets = []

n_partitions = 10

p_size = int(len(all_classes) / n_partitions)

for i in range(n_partitions):
    s = i*p_size
    idxs = cnts.iloc[s:(s+p_size)].index.values
    tmp_set = all_classes[all_classes['LabelName'].isin(idxs)]
    tmp_set = tmp_set.reset_index()
    tmp_set['LabelID'] = tmp_set.index + 1
    class_sub_sets.append(tmp_set)



def class_set_of(label_desc=None, label_name=None):
    """
    Determine which class set the label name or label description falls under.  -1 returned if object is 
    not present in any class set
    """

    
    if label_name:
        for i,cs in enumerate(class_sub_sets):
            if not cs[cs['LabelName'] == label_name].empty:
                return i
        
    elif label_desc:
        for i,cs in enumerate(class_sub_sets):
            if not cs[cs['LabelDescription'] == label_desc].empty:
                return i
        
    return -1

## Core Functions

In [None]:
class TrainConfig(Config):
    
    NAME = "kaggle"
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    DETECTION_NMS_THRESHOLD = 0.0
    IMAGE_MIN_DIM = 512
    IMAGE_MAX_DIM = 512
    
    
def get_infer_model(config, model_path):
    inf_model = modellib.MaskRCNN(mode="inference", config=config, model_dir=MODEL_DIR)
        
    inf_model.load_weights(model_path, by_name=True)
    
    return inf_model


def load_member_models(model_paths, class_sets, images_per_gpu=1):
    models = []

    for i,mpath in enumerate(model_paths):
    
        class InferenceConfig(TrainConfig):
            NUM_CLASSES = len(class_sets[i])+ 1 # + 1 for background class
            DETECTION_MIN_CONFIDENCE = 0.9

        inf_config = InferenceConfig()

        print("loading model: ", mpath )
        model = get_infer_model(inf_config,model_path=mpath)
        
        models.append(model)
        
    return models


def assemble(ind_results, iou_threshold=0.3):
    """ Combines the results from many different models across a single images.  Uses NMS to handle overlaps"""

    classes = np.concatenate([x['class_ids'] for x in ind_results])
    
    scores = np.concatenate([x['scores'] for x in ind_results])

    rois = np.concatenate([x['rois'] for x in ind_results],axis = 0)

    masks = np.concatenate([x['masks'] for x in ind_results],axis = 2)

    if len(scores) > 0:
#         to_keep = range(len(scores)) #don't use NMS
        to_keep = utils.non_max_suppression(rois, scores, iou_threshold)
        rval = {'class_ids' : classes[to_keep], 'rois' : rois[to_keep],
            'scores' : scores[to_keep], 'masks' : masks[:,:,to_keep]}
    else:
        rval = {'class_ids' : np.array([]), 'rois' : np.array([]), 'scores' : np.array([]), 'masks' : np.array([])}

    return rval


def assemble_hierarchy(ind_results, iou_threshold=0.3):
    """ Combines the results from many different models across a single images.  Uses NMS to handle overlaps"""

    classes = np.concatenate([x['class_ids'] for x in ind_results])
    
    # We assume that the last result is from a general model and prefer its outputs 
    ind_results[-1]['scores'] += 0.25
    
    scores = np.concatenate([x['scores'] for x in ind_results])

    rois = np.concatenate([x['rois'] for x in ind_results],axis = 0)

    masks = np.concatenate([x['masks'] for x in ind_results],axis = 2)

    if len(scores) > 0:
        to_keep = utils.non_max_suppression(rois, scores, iou_threshold)
        rval = {'class_ids' : classes[to_keep], 'rois' : rois[to_keep],
            'scores' : np.array([min(0.999,x) for x in scores[to_keep]]), 'masks' : masks[:,:,to_keep]}
    else:
        rval = {'class_ids' : np.array([]), 'rois' : np.array([]), 'scores' : np.array([]), 'masks' : np.array([])}

    return rval


#TODO should eventually pass in 2 sets of models, expert models and general models
def ensemble_detect(models, images, id_conv_fn):
    """
    
    For each model, run inference on image batches.  Then groups inference results by image and 
    applys non maximum suppression (via assemble method) to each group.
    
    """
    all_results = [[] for x in images]

    for midx,model in enumerate(models):

        results = []
        
        for img in images:
            results += model.detect([img], verbose=0)
        
        #HARD code batchsize of 4 ... for now
#         for z in range(0,len(images),4):
#             tmp = images[z:z+4]
#             results += model.detect(tmp, verbose=0)

        for i in range(len(results)):
            r = results[i]

            r['model_id'] = midx
            r['class_ids'] = id_conv_fn[midx](r['class_ids'])

            all_results[i].append(r)
    
#     return [assemble(x) for x in all_results]
    return [assemble_hierarchy(x) for x in all_results]
    

# Inference

## Member model label ids --> ensemble model label ids

In [None]:
background = ['/mnull','Background']
    
z = [np.insert(cset[['LabelName','LabelDescription']].values,0,background,axis=0) for cset in class_sub_sets]

omni_class_set = pd.DataFrame(np.concatenate(z),columns=['LabelName','LabelDescription'])
omni_class_set['LabelID'] = omni_class_set.index

id_offsets = omni_class_set[omni_class_set['LabelName'] == '/mnull'].index.values


## For the partitions, a simple offset will work to map from model id to ensemble id
def simp_offset(offset):
    return lambda cids : cids + offset

offset_fns = [simp_offset(i) for i in id_offsets]

#### map from all_classes labelid -> omni_class_set labelid
mapping = all_classes.merge(omni_class_set, on='LabelName')[['LabelID_x','LabelID_y']]
mapping_map = {r['LabelID_x'] : r['LabelID_y'] for _,r in mapping.iterrows()}
tmp_fn = lambda x : mapping_map[x]
conv_fn = lambda cids : np.array(list(map(tmp_fn,cids)))

#subselect for now while testing
id_mappings = offset_fns #[0:10]
id_mappings.append(conv_fn)

## Load models

In [None]:
# model_paths = [os.path.join(DATA_DIR,'models','cset_' + str(i) + '_model.h5') for i in range(6)]
# models = load_member_models(model_paths, class_sub_sets)


######## expert classifiers for 2 groups of classes plus 1 for the entire data set

model_paths = [os.path.join(MODEL_DIR,'kaggle20190920T1549/mask_rcnn_kaggle_0038.h5'),
               os.path.join(MODEL_DIR,'kaggle20190921T0133/mask_rcnn_kaggle_0046.h5'),
               os.path.join(MODEL_DIR,'kaggle20190921T1014/mask_rcnn_kaggle_0045.h5'),
               os.path.join(MODEL_DIR,'kaggle20190921T1813/mask_rcnn_kaggle_0048.h5'),
               os.path.join(MODEL_DIR,'kaggle20190922T0153/mask_rcnn_kaggle_0042.h5'),
               os.path.join(MODEL_DIR,'kaggle20190922T0930/mask_rcnn_kaggle_0049.h5'),
               os.path.join(MODEL_DIR,'kaggle20190922T1704/mask_rcnn_kaggle_0045.h5'),
               os.path.join(MODEL_DIR,'kaggle20190923T0040/mask_rcnn_kaggle_0035.h5'),
               os.path.join(MODEL_DIR,'kaggle20190923T0821/mask_rcnn_kaggle_0025.h5'),
               os.path.join(MODEL_DIR,'kaggle20190923T1532/mask_rcnn_kaggle_0054.h5'),
               os.path.join(MODEL_DIR,'kaggle20190923T1355/mask_rcnn_kaggle_0822.h5')
              ]


class_groups = class_sub_sets #[0:10]
class_groups.append(all_classes)

models = load_member_models(model_paths, class_groups)

## Simple demo

### Run inference on tiny set

In [None]:
img = skimage.io.imread(os.path.join(DATA_DIR, 'train/2fef4dd2f83feb18.jpg'))
img2 = skimage.io.imread(os.path.join(DATA_DIR, 'train/55dee1384cd565ee.jpg'))

images = [img,img2]

# detected = ensemble_detect(models, images,1, id_offsets)
detected = ensemble_detect(models, images, id_mappings)

### Visualize

In [None]:
for i,r in enumerate(detected):
    visualize.display_instances(images[i], r['rois'], r['masks'], r['class_ids'], omni_class_set['LabelDescription'].values, r['scores'], show_mask=USE_MASKS)

## Evaluate

### Build the validation data set

In [None]:
anns = st.load_annotations_by_image(use_masks=USE_MASKS)

if USE_MASKS:
    val_anns = anns[anns['SourceDataset'] == 'validation']
else:
    val_anns = anns[anns['RelativePath'].str.contains('validation',regex=False)]

val_anns.drop(columns='LabelID',inplace=True)

# join with tmp_set_classes on LabelName to get updated LabelID
anns_by_image = pd.merge(val_anns,omni_class_set, on='LabelName',how='inner')

anns_grouped = anns_by_image.groupby('ImageID')

# validation dataset
dataset = st.OpenImageDataset()
dataset.add_classes(omni_class_set.iloc[1:])  #load all but background class, which gets added automatically

if USE_MASKS:
    dataset.set_mask_path(MASK_DIR)

dataset.load_image_files(DATA_DIR, anns_grouped)
dataset.prepare()

### Visually validate dataset creation

In [None]:
# Load and display random samples ... sanity check for data load
image_ids = np.random.choice(dataset.image_ids, 5)
for image_id in image_ids:
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset.class_names)

### Calculate mAP

#### Define functions

In [None]:
def calc_mAP(models, config, sample_size=250):
    
    image_ids = np.random.choice(dataset.image_ids, sample_size)

    #each input is a tuple of form : image, image_meta, gt_class_id, gt_bbox, gt_mask
    inputs = [modellib.load_image_gt(dataset, config, iid, use_mini_mask=False) for iid in image_ids]

    APs = []

    results = ensemble_detect(models, [inp[0] for inp in inputs], id_mappings)

    for j in range(len(results)):
        r = results[j]
        # Compute AP
        
        if len(r["rois"]) > 0: # function has bug, errors when nothing is found in an image ... so we ignore those for now
            AP, precisions, recalls, overlaps = utils.compute_ap(inputs[j][3], inputs[j][2], inputs[j][4], 
                                                r["rois"], r["class_ids"], r["scores"], r['masks'])
            APs.append(AP)

    return np.mean(APs)


class KaggleConfig(TrainConfig):
    NUM_CLASSES = len(omni_class_set)
    
class InferenceConfig(KaggleConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    DETECTION_MIN_CONFIDENCE = 0.9


#### Check acc threshold vs score

In [None]:
config = InferenceConfig()

image_ids = np.random.choice(dataset.image_ids, 500)

inputs = [modellib.load_image_gt(dataset, config, iid, use_mini_mask=False) for iid in image_ids]

APs = []

results = ensemble_detect(models, [inp[0] for inp in inputs], id_mappings)

In [None]:
def check_mAP(acc_thresh):
    for j in range(len(results)):
        r = results[j]
        
        to_keep = np.where(r['scores'] >= acc_thresh)
        z = {k : r[k][to_keep] for k in ['rois','class_ids','scores']}
        
        
        if r['masks'].shape[0] == 0:
            continue
        
        z['masks'] = r['masks'][:,:,to_keep]
        r=z
        
        if len(r["rois"]) > 0: # function has bug, errors when nothing is found in an image ... so we ignore those for now
            AP, precisions, recalls, overlaps = utils.compute_ap(inputs[j][3], inputs[j][2], inputs[j][4], 
                                                r["rois"], r["class_ids"], r["scores"], r['masks'])
            APs.append(AP)

    return np.mean(APs)

In [None]:
x = np.arange(0.8, 1.0, 0.005)
y = []

for r in x:
    try:
        y.append(check_mAP(r))
    except:
        y.append(0.0)
        

In [None]:
plt.plot(x,y)
plt.show()

In [None]:
omni_df = pd.read_csv(os.path.join(ROOT_DIR, 'seg_omni_submission_9_29.csv'))
ex_df = pd.read_csv(os.path.join(DATA_DIR, 'submissions/tlong/seg_submission_9_25.csv'))

In [None]:
#goal is to step over each row and remove any elements of the prediction string with an accuracy lower than our threshold

#### Run the function

In [None]:
calc_mAP(models, InferenceConfig(), sample_size=250)

### Visualize some validation samples

In [None]:
config = InferenceConfig()

#### Start code for sample ... real file should include entire test set ####
sample_size=50 #todo this shouldn't be used, should be all of the training data

image_ids = np.random.choice(dataset.image_ids, sample_size)

#each input is a tuple of form : image, image_meta, gt_class_id, gt_bbox, gt_mask
inputs = [modellib.load_image_gt(dataset, config, iid, use_mini_mask=False) for iid in image_ids]

images = [inp[0] for inp in inputs]

#### End code for sample ... real file should include entire test set ####


results = ensemble_detect(models, images, id_mappings)

In [None]:
cnt = 0

for i,r in enumerate(results):
    if(len(r['class_ids']) > 0):
        visualize.display_instances(images[i], r['rois'], r['masks'], r['class_ids'], omni_class_set['LabelDescription'].values, r['scores'], show_mask=USE_MASKS)
    
        cnt += 1
    
    if cnt > 5:
        break

## Generate kaggle submission file

### Define functions for batch writing

#### Object Detection

In [None]:
from PIL import ImageFile
import os
ImageFile.LOAD_TRUNCATED_IMAGES = True

#TODO image_info should be 2d array, with each row of form id, width, height
def append_to_file_det(filename, results, image_info):
    
    all_preds = []
    
    for j, r in enumerate(results):
        preds = []
        ids = r['class_ids']
        boxes = r['rois']
        scores = r['scores']

        img_id = image_info[j][0]
        height = image_info[j][2]
        width = image_info[j][1]
        
        preds = ''

        for i in range(len(r['class_ids'])):
            xmin = max(boxes[i][1] / width , 0.0)
            ymin = max(boxes[i][0] / height, 0.0)
            xmax = min(boxes[i][3] / width, 1.0)
            ymax = min(boxes[i][2] / height, 1.0)
            
            preds += " " + " ".join(map(str,[omni_class_set.iloc[ids[i]]['LabelName'], scores[i], xmin,ymin,xmax,ymax]))

        all_preds.append(img_id + "," + preds)
        
    with open(filename, 'a') as f: 
        f.write('\n'.join(all_preds))
        f.write('\n')
    
    

testdir = os.path.join(DATA_DIR, "test")
batch_size = 500


def write_sub_file_det(filename, start_index=0):
    results = []
    image_info = []


    for subdir, dirs, files in os.walk(testdir):
        for cnt,file in enumerate(files):

            #use this if the process broke down at some point and you need to restart midway through ... total hack
            if cnt < start_index:
                continue

            img = skimage.io.imread(os.path.join(subdir, file))

            #filename, width, height
            image_info.append([file[:-4],img.shape[1],img.shape[0]])

            results += ensemble_detect(models, [img], id_mappings)

            if (cnt%batch_size == (batch_size-1)):
                print("writing to file ... ")
                append_to_file_det(filename, results,image_info)
                results = []
                image_info = []
                print(cnt," completed") #100,000 images in the test set
                
    print("writing final records to file ... ")
    append_to_file_det(filename, results,image_info)
    print(cnt," completed")
    
    return cnt

#### Object Segmentation

In [None]:
# container does not include this library by default, will need to run this once
# !pip install pycocotools

import base64
from pycocotools import _mask as coco_mask
import typing as t
import zlib


def encode_binary_mask(mask: np.ndarray):

     # check input mask --
    if mask.dtype != np.bool:
        raise ValueError(
           "encode_binary_mask expects a binary mask, received dtype == %s" %
           mask.dtype)

    mask = np.squeeze(mask)
    if len(mask.shape) != 2:
        raise ValueError(
           "encode_binary_mask expects a 2d mask, received shape == %s" %
           mask.shape)

     # convert input mask to expected COCO API input --
    mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
    mask_to_encode = mask_to_encode.astype(np.uint8)
    mask_to_encode = np.asfortranarray(mask_to_encode)

    # RLE encode mask --
    encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

    # compress and base64 encoding --
    binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
    base64_str = base64.b64encode(binary_str)
    return base64_str


from PIL import ImageFile
import os
ImageFile.LOAD_TRUNCATED_IMAGES = True

#TODO image_info should be 2d array, with each row of form id, width, height
def append_to_file_seg(filename, results, image_info):
    
    all_preds = []
    
    for j, r in enumerate(results):
        preds = []
        ids = r['class_ids']
        masks = r['masks']
        scores = r['scores']

        img_id = image_info[j][0]
        height = image_info[j][2]
        width = image_info[j][1]
        
        preds =''
        
        for i in range(len(r['class_ids'])):
            # masks are stored as a 3d array, <height,width,# examples>, so we need to index into it in a special way
            enc_mask = encode_binary_mask(masks[:,:,i].astype(np.bool))
            #TODO figure out when class ids are getting converted to a float, patch over for now...
            class_name = omni_class_set.iloc[int(ids[i])]['LabelName']
            preds += " " + " ".join(map(str,[class_name, scores[i], enc_mask.decode()]))

        img_lvl_fields = ','.join(map(str,[img_id,width,height]))
        all_preds.append(img_lvl_fields + "," + preds)
        
    with open(filename, 'a') as f: 
        f.write('\n'.join(all_preds))
        f.write('\n')
        

testdir = os.path.join(DATA_DIR, "test")

def write_sub_file_seg(filename, batch_size=500, start_index=0):
    results = []
    image_info = []


    for subdir, dirs, files in os.walk(testdir):
        for cnt,file in enumerate(files):

            #use this if the process broke down at some point and you need to restart midway through ... total hack
            if cnt < start_index:
                continue

            img = skimage.io.imread(os.path.join(subdir, file))

            #image id (filename - .png), width, height
            image_info.append([file[:-4],img.shape[1],img.shape[0]])

            results += ensemble_detect(models, [img], id_mappings)
            
            if (cnt%batch_size == (batch_size-1)):
                print("writing to file ... ")
                append_to_file_seg(filename, results,image_info)
                results = []
                image_info = []
                print(cnt," completed") #100,000 images in the test set
                
    print("writing final records to file ... ")
    append_to_file_seg(filename, results,image_info)
    print(cnt," completed")
    
    return cnt

#### Start the process from scratch

In [None]:
base_filename = 'experts_submission_9_29.csv'

if USE_MASKS:
    base_filename = 'seg_' + base_filename

filename = os.path.join(ROOT_DIR, base_filename)

In [None]:
# YOU ONLY WANT THIS IN PLACE FOR THE FIRST RUN ... afterwards it will wipe the file ... NOT GOOD !!!  

if USE_MASKS:
    with open(filename, 'w+') as f:
        f.write('ImageId,ImageWidth,ImageHeight,PredictionString\n')
        
    write_sub_file_seg(filename)
else:
    with open(filename, 'w+') as f:
        f.write('ImageId,PredictionString\n')
        
    write_sub_file_det(filename)
        
    

#### Resume work at some file number

In [None]:
strt_idx = 9500

if USE_MASKS:   
    write_sub_file_seg(start_index=strt_idx)
else:
    write_sub_file_det(start_index=strt_idx)


#### Fix submission file

In [None]:
df = pd.read_csv(os.path.join(ROOT_DIR, 'submission9_11.csv'))

# nu = df['ImageId'].nunique()  # Should be 99999

# if len(df) > nu:
#     df.drop_duplicates('ImageId', inplace = True)

# df.to_csv('submission_9_11_0.csv',index=False)
df.iloc[0]['PredictionString']

### Run inference on test set

In [None]:
import os
testdir = os.path.join(DATA_DIR, "test")

results = []

for subdir, dirs, files in os.walk(testdir):
    for i,file in enumerate(files):
        results += ensemble_detect(models, [skimage.io.imread(os.path.join(subdir, file))],1, id_offsets)
        
        if (i%10 == 0):
            print(i/1000) #100,000 images in the test set

### Visualize

In [None]:
# TO USE THIS:
# You need to store the images above instead of loading them as a function argument

cnt = 0

for i,r in enumerate(results):
    visualize.display_instances(images[i], r['rois'], r['masks'], r['class_ids'], omni_class_set['LabelDescription'].values, r['scores'], show_mask=False)
    
    cnt += 1
    
    if cnt > 5:
        break   

### Build Kaggle Submission File from results

In [None]:
#TODO This cell will probably fail in the future as it assumes that 
# the body of the calculate_mAP function was executed outside of a function so that the
# results and image_ids variables are exposed

img_ids = anns_grouped.sum().index.values

all_preds = []

#DONT KNOW ABOUT ORDERING ANYMORE ...
for j, r in enumerate(results):
    preds = []
    ids = r['class_ids']
    boxes = r['rois']
    scores = r['scores']
    
    preds = ''
    
    for i in range(len(r['class_ids'])):
        preds += " " + " ".join(map(str,[omni_class_set.iloc[ids[i]]['LabelName'], scores[i], boxes[i][1],boxes[i][0],boxes[i][3],boxes[i][2]]))
    
    iid = image_ids[j]
    
    all_preds.append(img_ids[iid] + ", " + preds[1:])


#TODO write to csv file
with open(os.path.join(ROOT_DIR, 'submission.csv'), 'w') as f: 
    f.write('ImageId,PredictionString\n')
    f.write('\n'.join(all_preds)) 

# Misc

## Visualize results for debugging

In [None]:
class InferenceConfig(TrainConfig):
    NUM_CLASSES = len(omni_class_set)
    DETECTION_MIN_CONFIDENCE = 0.9

inf_config = InferenceConfig()

In [None]:
# random images
num_samples = 10

image_ids = np.random.choice(dataset.image_ids, num_samples)


# single object scenes
# image_ids = [28542,16831,26167,1290,1694]

# single object with parts scenes
# image_ids = [16020,31270,16615,16364,29407,12841,19720]

#more complicated scenes
# image_ids = [6596,14954,6081,28724,9376]

#complicated scenes
# image_ids = [30670, 4918,13704,15065,22429,8799]

ver_images = []

for iid in image_ids:
    original_image, image_meta, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt(dataset, inf_config, iid, use_mini_mask=False)

    #UNCOMMENT THESE LINES TO VISUALIZE
#     print(iid)
#     visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
#                             dataset.class_names, figsize=(8, 8))
    
    ver_images.append(original_image)
    
# [30670, 4918, 6596, 13792]

In [None]:
def visualize_em(image,iid,mode='all'):

    if mode == 'all' :
        for midx,model in enumerate(models):
            results = model.detect([image], verbose=0)

            for i in range(len(results)):
                r = results[i]

                r['model_id'] = midx
                r['class_ids'] = id_mappings[midx](r['class_ids'])
                print('Model: ', midx)

                visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], omni_class_set['LabelDescription'].values,
                                            r['scores'], show_mask=USE_MASKS,figsize=(10, 10))
            
    if mode == 'all_fulls' :
        for midx,model in enumerate(models[10:]):
            results = model.detect([image], verbose=0)

            for i in range(len(results)):
                r = results[i]

                r['model_id'] = midx
                r['class_ids'] = id_mappings[10](r['class_ids'])
                print('Model: ', midx)

                visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], omni_class_set['LabelDescription'].values,
                                            r['scores'], show_mask=USE_MASKS,figsize=(10, 10))

    print("----------------- ENSEMBLE ---------------------")
    eresults = ensemble_detect(models, [image], id_mappings)
    r = eresults[0]
    
    if(len(r['class_ids']) > 0):
        #hack warning
        cids = np.array(list(map(int,r['class_ids'])))
        visualize.display_instances(image, r['rois'], r['masks'], cids, omni_class_set['LabelDescription'].values,
                                r['scores'], show_mask=USE_MASKS,figsize=(10, 10))


    print("----------------- Ground Truth ---------------------")
    inf_config = InferenceConfig()
    original_image, image_meta, gt_class_id, gt_bbox, gt_mask = modellib.load_image_gt(dataset, inf_config, iid, use_mini_mask=False)

    # visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
    #                             dataset.class_names, figsize=(8, 8))
    visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
                                dataset.class_names,show_mask=USE_MASKS,figsize=(10, 10))


    if(len(r["class_ids"]) > 0):
        AP, precisions, recalls, overlaps = utils.compute_ap(gt_bbox, gt_class_id, gt_mask, 
                                                     r["rois"], r["class_ids"], r["scores"], r['masks'])

        print("AP: ", AP)

In [None]:
# for i in range(num_samples):
#     visualize_em(ver_images[i],image_ids[i],mode='ensemble')

for i in range(num_samples):
    visualize_em(ver_images[i],image_ids[i],mode='all_fulls')

## Evaluate mAP for member models

In [None]:
def eval_single_model(model_path,cset_index=None, mAP_sample_size=250,class_set=None,val_data=None,mask_path=None):

    if class_set is None:
        class_set = class_sub_sets[cset_index]
        
    if val_data is None:
        anns = st.load_annotations_by_image(class_set,use_masks=USE_MASKS)    
        val_data = st.load_dataset(anns, DATA_DIR, class_set, is_train=False,mask_path=mask_path)

    class KaggleConfig(TrainConfig):
        NUM_CLASSES = len(class_set) + 1 # + 1 for background class

    inf_config = KaggleConfig()
    
    inf_model = modellib.MaskRCNN(mode="inference", config=inf_config, model_dir=MODEL_DIR)

    inf_model.load_weights(model_path, by_name=True)
    
    return u.eval_mAP(inf_model, val_data, inf_config, mAP_sample_size)

### Single model using recently trained model

#### Single evaluation for a single model 

In [None]:
eval_single_model(os.path.join(MODEL_DIR,'kaggle20190923T0040','mask_rcnn_kaggle_0050.h5'),cset_index=7, mAP_sample_size=250,mask_path=MASK_DIR)

#### Multiple evaluations of a single model

In [None]:
# Build the validation data set once for faster mAP evaluation

cset_index = 7

class_set = class_sub_sets[cset_index]
        
anns = st.load_annotations_by_image(class_set,USE_MASKS)    
val_data = st.load_dataset(anns, DATA_DIR, class_set, is_train=False,mask_path=MASK_DIR)

In [None]:

for i in range(47,50):
    print(i,eval_single_model(os.path.join(MODEL_DIR,'kaggle20190923T0040',"mask_rcnn_kaggle_00" + str(i) + ".h5"),
                  class_set=class_set, val_data=val_data, mAP_sample_size=250))

# eval_single_model(os.path.join(MODEL_DIR,'kaggle20190903T1053',"mask_rcnn_kaggle_0165.h5"),
#                   class_set=class_set, val_data=val_data, mAP_sample_size=250)
    
# copy recently trained model to models directory

#model from epoch 164 had mAP of 0.189, val_loss of 2.00
#model from epoch 165 had mAP of 0.271, val_loss of 1.322
#model from epoch 166 had mAP of 0.29, val_loss of 1.551
#model from epoch 74, had mAP of 0.201, val_loss of 4.6

### All models

#### Load models (duplicates cell from above)

In [None]:
model_paths = [os.path.join(DATA_DIR,'models','cset_' + str(i) + '_model.h5') for i in range(6)]

models = load_member_models(model_paths, class_sets)

#### Run all models

In [None]:
for i,mpath in enumerate(model_paths):
    print(i, eval_single_model(mpath,cset_index=i,mask_path=MASK_DIR))