In [9]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# Root directory of the project
ROOT_DIR = os.path.abspath("../../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils_2 as utils
from mrcnn import visualize
from mrcnn.visualize import display_images
import mrcnn.model as modellib
from mrcnn.model import log

from mrcnn.model import log

from doc import train

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Path to Ballon trained weights
# You can download this file from the Releases page
# https://github.com/matterport/Mask_RCNN/releases
BALLON_WEIGHTS_PATH = "./pretrained_model_indiscapes.h5"  # TODO: update this path

In [10]:
config = train.Config()
DOC_DIR = os.path.join(ROOT_DIR, "datasets/doc/")

In [11]:
# class InferenceConfig(config.__class__):
# 	GPU_COUNT = 1
# 	IMAGES_PER_GPU = 1
# 	IMAGE_RESIZE_MODE = "square"
# 	DETECTION_MIN_CONFIDENCE = 0.6
# 	DETECTION_NMS_THRESHOLD = 0.3
# 	PRE_NMS_LIMIT = 12000
# 	RPN_ANCHOR_SCALES = (8,32,64,256,1024)
# 	RPN_ANCHOR_RATIOS = [1,3,10]

# 	POST_NMS_ROIS_INFERENCE = 12000
    
# config = InferenceConfig()
# config.display()

In [12]:
# Override the training configurations with a few
# changes for inferencing.
class InferenceConfig(config.__class__):
    # Run detection on one image at a time
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    IMAGE_RESIZE_MODE = "square"
    DETECTION_MIN_CONFIDENCE = 0.5
    DETECTION_NMS_THRESHOLD = 0.3
    PRE_NMS_LIMIT = 4000

    # ROIs kept after non-maximum suppression (training and inference)
    POST_NMS_ROIS_INFERENCE = 1000
    

config = InferenceConfig()
config.display()


Configurations:
BACKBONE                       resnet50
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        150
DETECTION_MIN_CONFIDENCE       0.5
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                23
IMAGE_MIN_DIM                  256
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.0001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 2.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 2.0, 'mrcnn_mask_loss': 5.0}
MASK_POOL_SIZE                 14
MASK_SHAPE         

In [13]:
# Device to load the neural network on.
# Useful if you're training a model on the same 
# machine, in which case use CPU and leave the
# GPU for training.
DEVICE = "/gpu:0"  # /cpu:0 or /gpu:0

# Inspect the model in training or inference modes
# values: 'inference' or 'training'
# TODO: code for 'training' test mode not ready yet
TEST_MODE = "inference"

In [14]:
def get_ax(rows=1, cols=1, size=16):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Adjust the size attribute to control how big to render images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

In [15]:
# Load validation dataset
dataset = train.Dataset()
dataset.load_data(DOC_DIR, "val")

# Must call before using the dataset
dataset.prepare()
print("Image Count: {}".format(len(dataset.image_ids)))
print("Class Count: {}".format(dataset.num_classes))
for i, info in enumerate(dataset.class_info):
    print("{:3}. {:50}".format(i, info['name']))

Image Count: 53
Class Count: 11
  0. BG                                                
  1. H-V                                               
  2. H                                                 
  3. CLS                                               
  4. PD                                                
  5. PB                                                
  6. CC                                                
  7. P                                                 
  8. D                                                 
  9. LM                                                
 10. BL                                                


In [16]:
# Create model in inference mode
with tf.device(DEVICE):
    model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR,
                              config=config)

ValueError: Shape must be rank 1 but is rank 3 for 'roi_align_classifier_1/CropAndResize' (op: 'CropAndResize') with input shapes: [?,?,?,256], [?,4], [?], [2,1,4].

In [None]:
# Set path to balloon weights file

# Download file from the Releases page and set its path
# https://github.com/matterport/Mask_RCNN/releases
# weights_path = "/path/to/mask_rcnn_balloon.h5"

# Or, load the last model you trained
# weights_path = model.find_last()
weights_path=BALLON_WEIGHTS_PATH
# Load weights
print("Loading weights ", weights_path)
#model.load_weights(weights_path, by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc","mrcnn_bbox", "mrcnn_mask"])
model.load_weights(weights_path, by_name=True,exclude=["mrcnn_bbox_fc"])

In [None]:
out_dic={}
all_images_test=dataset.image_ids
cnt=0


avg_pagewise=[]
avg_classwise=[]
acc_classwise=[]
avg_fIOU=0.0
avg_mAP=0.0
avg_p=0.0
avg_r=0.0
avg_pres=[]
avg_rec=[]
avg_mAP_range=[]

for ind in range(len(all_images_test)):
    cnt+=1
    image_id=all_images_test[ind]
    print(ind," : ",image_id)
    #image_id=34
    image, image_meta, gt_class_id, gt_bbox, gt_mask =    modellib.load_image_gt(dataset, config, image_id, use_mini_mask=False)
    info = dataset.image_info[image_id]
    # print("image ID: {}.{} ({}) {}".format(info["source"], info["id"], image_id, 
    #                                        dataset.image_reference(image_id)))
    img_name=info['id']
    print(img_name)
    # Run object detection
    # print(image.shape)
    # print(image)
    # image=cv2.imread('efeo_010_01_03.jpg',1)
    # image,_,_,_,_=utils.resize_image(image,min_dim=256, max_dim=1024)
    #print(image.shape)
    results = model.detect([image], verbose=0)
    #print(results)
    # Display results
    #ax = get_ax(1)
    r = results[0]
    #print(r)
    # ccc=visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], 
    #                             dataset.class_names, r['scores'], ax=ax,
    #                             title="Predictions",show_bbox=False,show_mask=True)
    # visualize.display_instances(image, gt_bbox, gt_mask, gt_class_id, 
    #                             dataset.class_names, ax=get_ax(1),
    #                             show_bbox=False, show_mask=False,
    #                             title="Ground Truth")
    # visualize.display_instances(image, gt_bbox, gt_mask, gt_class_id, 
    #                             dataset.class_names, ax=get_ax(1),
    #                             show_bbox=False, show_mask=True,
    #                             title="Ground Truth")

    # print("gt_bbox : ",gt_bbox.shape)
    # print("gt_class_id : ",gt_class_id)
    # print("gt_mask : ",gt_mask.shape)
    # print("scores: ",r['scores'].shape )
    # print("gt_bbox : ",r['rois'].shape)
    # print("gt_class_id : ",r['class_ids'])
    # print("gt_mask : ",r['masks'].shape)
    if len(r['rois'])>0:
        pagewise,weighted,com_freq,mAP_out,pres_out,rec_out,mAP_range,class_wise,class_acc=utils.compute_per_region_ap(gt_bbox, gt_class_id, gt_mask,
                                r['rois'], r['class_ids'], r['scores'], r['masks'],iou_threshold=0.0,score_threshold=0.0)
        # # res1,_,_,_=utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
        # #                        r['rois'], r['class_ids'], r['scores'], r['masks'])
        # # print(res)
        # # print(res1)
        # visualize.display_differences(
        #     image,
        #     gt_bbox, gt_class_id, gt_mask,
        #     r['rois'], r['class_ids'], r['scores'], r['masks'],
        #     dataset.class_names, ax=get_ax(),
        #     show_box=False, show_mask=False,
        #     iou_threshold=0.5, score_threshold=0.5)
        avg_classwise.append(list(class_wise.values()))
        acc_classwise.append(list(class_acc.values()))
        #print(avg_classwise)
        tt_dic={}
        pres_out=np.array(pres_out)
        rec_out=np.array(rec_out)   
        p=pres_out.mean()
        r=rec_out.mean()
        print("pagewise : ",pagewise)
        print("f-weighted :",weighted)
        #print(com_freq)
        print("mean AP : ",mAP_out)
        print("mean precision : ",p)
        print("mean recall : ",r)
        print("prec_range : ",pres_out)
        print("rec_range : ",rec_out)
        print("AP_range : ",mAP_range)
        avg_pagewise.append(list(pagewise.values()))
        avg_fIOU+=weighted
        avg_mAP+=mAP_out
        avg_p+=p
        avg_r+=r
        avg_pres.append(pres_out)
        avg_rec.append(rec_out)
        avg_mAP_range.append(mAP_range)



        tt_dic['pagewise']=pagewise
        tt_dic['f-weighted']=weighted
        tt_dic['mAP']=mAP_out
        tt_dic['mprec']=p
        tt_dic['mrec']=r
        tt_dic['Precision']=pres_out
        tt_dic['recall']=rec_out
        tt_dic['maP_range']=mAP_range

        # print(res[2][1])
        # print(r['masks'].shape,gt_mask.shape)
        # res2=utils.compute_overlaps_masks(gt_mask,r['masks'])
        # print(np.array(res2).shape)
        # log("gt_class_id", gt_class_id)
        # log("gt_bbox", gt_bbox)
        # log("gt_mask", gt_mask)
        out_dic[img_name]=tt_dic

    # In[ ]:
dic_freq_bhoomi={1:1.0,2:46.0,3:159.0,4:305.0,5:28.0,6:26.0,7:1.0,8:1.0,9:14.0,10:14.0}
dic_freq_PIH={1:1.0,2:2.0,3:372.0,4:6.0,5:53.0,6:73.0,7:7.0,8:13.0,9:8.0,10:52.0}

dic_freq_all={1:1.0,2:48.0,3:531,4:311,5:81,6:99,7:8.0,8:14.0,9:22.0,10:66.0}
# arr_freq=list(dic_freq.values())

avg_classwise=np.sum(avg_classwise,axis=0)
acc_classwise=np.sum(acc_classwise,axis=0)
avg_fIOU=avg_fIOU*1.0/cnt
avg_mAP=avg_mAP*1.0/cnt
avg_p=avg_p*1.0/cnt
avg_r=avg_r*1.0/cnt
avg_pres=np.mean(np.array(avg_pres),axis=0)
avg_rec=np.mean(np.array(avg_rec),axis=0)
avg_pagewise=np.mean(np.array(avg_pagewise),axis=0)
avg_mAP_range=np.mean(np.array(avg_mAP_range),axis=0)

class_weighted=0.0
for i in range(len(avg_classwise)):
    avg_classwise[i]=(avg_classwise[i]*1.0)/dic_freq_PIH[i+1]
    acc_classwise[i]=(acc_classwise[i]*1.0)/dic_freq_PIH[i+1]


print("final results: /////########################")

print("avg_IOU_classwise : ",avg_classwise)
print("acc_classwise : ",acc_classwise)
print("avg_fIOU : ",avg_fIOU)
print("avg_pagewise : ",avg_pagewise)
print("avg_mAP : ",avg_mAP)
print("avg prec : ",avg_p)
print("avg rec : ",avg_r)
print("avg_mAP_range : ",avg_mAP_range)
print("avg_pres_range : ",avg_pres)
print("avg_rec_range : ",avg_rec)


with open('pih_metrics.pickle','wb') as f:
    pickle.dump(out_dic,f) 


In [None]:
out_dic={}
all_images_test=dataset.image_ids
cnt=0
avg_pagewise=[]
avg_classwise=[]
acc_classwise=[]
avg_fIOU=0.0
avg_mAP=0.0
avg_p=0.0
avg_r=0.0
avg_pres=[]
avg_rec=[]
avg_mAP_range=[]

for ind in range(len(all_images_test)):
    cnt+=1
    image_id=all_images_test[ind]
    print(ind," : ",image_id)
    #image_id=34
    image, image_meta, gt_class_id, gt_bbox, gt_mask =    modellib.load_image_gt(dataset, config, image_id, use_mini_mask=False)
    info = dataset.image_info[image_id]
    # print("image ID: {}.{} ({}) {}".format(info["source"], info["id"], image_id, 
    #                                        dataset.image_reference(image_id)))
    img_name=info['id']
    print(img_name)
    # Run object detection
    # print(image.shape)
    # print(image)
    # image=cv2.imread('efeo_010_01_03.jpg',1)
    # image,_,_,_,_=utils.resize_image(image,min_dim=256, max_dim=1024)
    #print(image.shape)
    results = model.detect([image], verbose=0)
    #print(results)
    # Display results
    #ax = get_ax(1)
    r = results[0]
    #print(r)
    # ccc=visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], 
    #                             dataset.class_names, r['scores'], ax=ax,
    #                             title="Predictions",show_bbox=False,show_mask=True)
    # visualize.display_instances(image, gt_bbox, gt_mask, gt_class_id, 
    #                             dataset.class_names, ax=get_ax(1),
    #                             show_bbox=False, show_mask=False,
    #                             title="Ground Truth")
    # visualize.display_instances(image, gt_bbox, gt_mask, gt_class_id, 
    #                             dataset.class_names, ax=get_ax(1),
    #                             show_bbox=False, show_mask=True,
    #                             title="Ground Truth")

    # print("gt_bbox : ",gt_bbox.shape)
    # print("gt_class_id : ",gt_class_id)
    # print("gt_mask : ",gt_mask.shape)
    # print("scores: ",r['scores'].shape )
    # print("gt_bbox : ",r['rois'].shape)
    # print("gt_class_id : ",r['class_ids'])
    # print("gt_mask : ",r['masks'].shape)
    pagewise,weighted,com_freq,mAP_out,pres_out,rec_out,mAP_range,class_wise,class_acc=utils.compute_per_region_ap(gt_bbox, gt_class_id, gt_mask,
                        r['rois'], r['class_ids'], r['scores'], r['masks'],iou_threshold=0.4,score_threshold=0.4)
# # res1,_,_,_=utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
# #                        r['rois'], r['class_ids'], r['scores'], r['masks'])
# # print(res)
# # print(res1)
# visualize.display_differences(
#     image,
#     gt_bbox, gt_class_id, gt_mask,
#     r['rois'], r['class_ids'], r['scores'], r['masks'],
#     dataset.class_names, ax=get_ax(),
#     show_box=False, show_mask=False,
#     iou_threshold=0.5, score_threshold=0.5)

#         cnt+=1
    avg_classwise.append(list(class_wise.values()))
    acc_classwise.append(list(class_acc.values()))
    #print(avg_classwise)
    tt_dic={}
    pres_out=np.array(pres_out)
    rec_out=np.array(rec_out)   
    p=pres_out.mean()
    r=rec_out.mean()
#     print("pagewise : ",pagewise)
#     print("f-weighted :",weighted)
#     #print(com_freq)
#     print("mean AP : ",mAP_out)
#     print("mean precision : ",p)
#     print("mean recall : ",r)
#     print("prec_range : ",pres_out)
#     print("rec_range : ",rec_out)
#     print("AP_range : ",mAP_range)
    avg_pagewise.append(list(pagewise.values()))
    avg_fIOU+=weighted
    avg_mAP+=mAP_out
    avg_p+=p
    avg_r+=r
    avg_pres.append(pres_out)
    avg_rec.append(rec_out)
    avg_mAP_range.append(mAP_range)

    tt_dic['pagewise']=pagewise
    tt_dic['f-weighted']=weighted
    tt_dic['mAP']=mAP_out
    tt_dic['mprec']=p
    tt_dic['mrec']=r
    tt_dic['Precision']=pres_out
    tt_dic['recall']=rec_out
    tt_dic['maP_range']=mAP_range

# print(res[2][1])
# print(r['masks'].shape,gt_mask.shape)
# res2=utils.compute_overlaps_masks(gt_mask,r['masks'])
# print(np.array(res2).shape)
# log("gt_class_id", gt_class_id)
# log("gt_bbox", gt_bbox)
# log("gt_mask", gt_mask)
    out_dic[img_name]=tt_dic

    # In[ ]:
dic_freq_bhoomi={1:1.0,2:46.0,3:159.0,4:305.0,5:28.0,6:26.0,7:1.0,8:1.0,9:14.0,10:14.0}
dic_freq_PIH={1:1.0,2:2.0,3:372.0,4:6.0,5:53.0,6:73.0,7:7.0,8:13.0,9:8.0,10:52.0}

dic_freq_all={1:1.0,2:48.0,3:531,4:311,5:81,6:99,7:8.0,8:14.0,9:22.0,10:66.0}

# dic_freq_bhoomi={1:1.0,2:1.0,3:159.0,4:305.0,5:28.0,6:26.0,7:1.0,8:1.0,9:14.0,10:14.0}
# dic_freq_PIH={1:1.0,2:1.0,3:372.0,4:6.0,5:53.0,6:73.0,7:7.0,8:13.0,9:8.0,10:52.0}

# dic_freq_all={1:1.0,2:1.0,3:531,4:311,5:81,6:99,7:8.0,8:14.0,9:22.0,10:66.0}

# dic_freq_bhoomi={1:1.0,2:1.0}
# dic_freq_PIH={1:1.0,2:1.0}
# dic_freq_all={1:1.0,2:1.0}

# arr_freq=list(dic_freq.values())

avg_classwise=np.sum(avg_classwise,axis=0)
acc_classwise=np.sum(acc_classwise,axis=0)
avg_fIOU=avg_fIOU*1.0/cnt
avg_mAP=avg_mAP*1.0/cnt
avg_p=avg_p*1.0/cnt
avg_r=avg_r*1.0/cnt
avg_pres=np.mean(np.array(avg_pres),axis=0)
avg_rec=np.mean(np.array(avg_rec),axis=0)
avg_pagewise=np.mean(np.array(avg_pagewise),axis=0)
avg_mAP_range=np.mean(np.array(avg_mAP_range),axis=0)

class_weighted=0.0
for i in range(len(avg_classwise)):
    avg_classwise[i]=(avg_classwise[i]*1.0)/dic_freq_all[i+1]
    acc_classwise[i]=(acc_classwise[i]*1.0)/dic_freq_all[i+1]


print("final results: /////########################")
print("avg_IOU_classwise : ",avg_classwise)
print("acc_classwise : ",acc_classwise)
print("avg_fIOU : ",avg_fIOU)
print("avg_pagewise : ",avg_pagewise)
print("avg_mAP : ",avg_mAP)
print("avg prec : ",avg_p)
print("avg rec : ",avg_r)
print("avg_mAP_range : ",avg_mAP_range)
print("avg_pres_range : ",avg_pres)
print("avg_rec_range : ",avg_rec)


with open('pih_metrics.pickle','wb') as f:
    pickle.dump(out_dic,f) 

In [None]:
acc_classwise

In [None]:

#iou_threshold=0.4,score_threshold=0.4
avg_IOU_classwise :  [('Hole(Virtual)', 0), ('Hole(Physical)', 0.7678649591965663), ('Character Line Segment', 0.5116226973857138), ('Physical Degradation', 0.0892081546979525), ('Page Boundary', 0.7404136104523582), ('Character Component', 0.432609309277567), ('Picture', 0.5850817594661933), ('Decorator', 0.034911221074079464), ('Library Marker', 0.3794140331304556), ('Boundary Line', 0.20353904331114614)]
avg_acc_classwise :  [('Hole(Virtual)', 0), ('Hole(Physical)', 0.9994667309981126), ('Character Line Segment', 0.7429944756767025), ('Physical Degradation', 0.16640194344049966), ('Page Boundary', 0.886257289207145), ('Character Component', 0.6586898501071866), ('Picture', 0.8223311106363932), ('Decorator', 0.06186455488204956), ('Library Marker', 0.5513625144958496), ('Boundary Line', 0.3658131544930594)]
class_count :  [('Hole(Virtual)', 0), ('Hole(Physical)', 26), ('Character Line Segment', 77), ('Physical Degradation', 24), ('Page Boundary', 73), ('Character Component', 36), ('Picture', 6), ('Decorator', 8), ('Library Marker', 19), ('Boundary Line', 20)]
avg_fIOU :  0.606578642588633
avg_mAP :  0.25803234347983095

In [None]:
avg_classwise

In [None]:
avg_fIOU

In [None]:
avg_mAP