## Mask R-CNN for Pantograph Pose Estimation
---

In [1]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import pandas as pd
import cv2
import matplotlib
import matplotlib.pyplot as plt

# Root directory of the project
ROOT_DIR = os.path.abspath("../")

# Add root to path 
if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)

# Import Mask RCNN
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log
from mrcnn.model import utils

# Import pantogrograph class
from dev import pantograph

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "models")

# Set path to root of images. 
DATA_DIR = os.path.join(ROOT_DIR, "datasets/pantograph")

print("Using Root dir:",ROOT_DIR)
print("Using Model dir:",MODEL_DIR)
print("Using Data dir:",DATA_DIR)

Using TensorFlow backend.


I've been imported
Using Root dir: /home/jupyter/GCP_Test
Using Model dir: /home/jupyter/GCP_Test/models
Using Data dir: /home/jupyter/GCP_Test/datasets/pantograph


In [2]:
import tensorflow as tf
print(tf.__version__)

1.15.2


In [3]:
import json
# Write json to file
def WriteJSON(obj,filename):
    try:
        with open(filename, 'w') as outfile:
#             obj_json = json.dumps(obj, sort_keys=True, indent=4,default=str)
            obj_json = json.dumps(obj, cls=NumpyArrayEncoder)
            outfile.write(obj_json)
    except Exception as e:
        print(e)
        print('File not written.')

# Read and return json object from file. If none, return empty object.
def ReadJSON(filename):
    try: 
        with open(filename) as f:
            obj = json.loads(f.read())
    except Exception as e: 
        obj = [] 
    return obj

In [4]:
class NumpyArrayEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        else:
            return super(NumpyArrayEncoder, self).default(obj)

## Configurations

Run one of the code blocks below to import and load the configurations to use.

In [5]:
class InferenceConfig(pantograph.PantographConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

config = InferenceConfig()

config.display()


Configurations Superlee:
BACKBONE                       resnet101
BACKBONE_SHAPES                [[256 256]
 [128 128]
 [ 64  64]
 [ 32  32]
 [ 16  16]]
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        50
DETECTION_MIN_CONFIDENCE       0.9
DETECTION_NMS_THRESHOLD        0.3
GPU_COUNT                      1
IMAGES_PER_GPU                 1
IMAGE_MAX_DIM                  1024
IMAGE_MIN_DIM                  512
IMAGE_MIN_SCALE                0.5
IMAGE_PADDING                  True
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
KEYPOINT_MASK_POOL_SIZE        7
KEYPOINT_MASK_SHAPE            [56, 56]
KEYPOINT_THRESHOLD             0.005
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.002
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTANCES        

## Notebook Preferences

In [6]:
# Device to load the neural network on. Use CPU to leave GPU for training.
DEVICE = "/cpu:0"  # /cpu:0 or /gpu:0

# Inspect the model in training or inference modes
# TODO: code for 'training' test mode not ready yet
TEST_MODE = "inference" # 'inference' or 'training'

## Load Dataset

In [7]:
# Load dataset
dataset = pantograph.PantographDataset()
dataset.load_pantograph(DATA_DIR, "val")#test

# Must call before using the dataset
dataset.prepare()

print("Image Count: {}".format(len(dataset.image_ids)))
print("Class Count: {}".format(dataset.num_classes))

for i, info in enumerate(dataset.class_info):
    print("{:3}. {:50}".format(i, info['name']))

loading annotations into memory...
Done (t=0.06s)
creating index...
index created!
Skeleton: (5, 2)
Keypoint names: (6,)
Image Count: 24
Class Count: 4
  0. BG                                                
  1. front_bar                                         
  2. middle_bar                                        
  3. rear_bar                                          


## Load Model

In [8]:
# Load model using selected device
with tf.device(DEVICE):
    
    # Recreate the model in inference mode
    model = modellib.MaskRCNN(mode="inference", 
                              config=config,
                              model_dir=MODEL_DIR)

    # Set local path to trained weights file
    LOG_DIR = os.path.join(MODEL_DIR, "pantograph20200412T2157")
    MODEL_PATH = os.path.join(LOG_DIR, "mask_rcnn_pantograph_0010.h5")


    # Load trained weights
    model.load_weights(MODEL_PATH, by_name=True)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Instructions for updating:
box_ind is deprecated, use box_indices instead

Instructions for updating:
Use `tf.cast` instead.


Instructions for updating:
Use `tf.cast` instead.


In [None]:
# model.keras_model.summary()

### Single Image Detection

In [None]:
'''
Load sample image
'''

# Set random image_id
image_id = random.choice(dataset.image_ids)

# Set specific image_id
image_id = 1

print("image_id ", image_id, dataset.image_reference(image_id))

In [None]:
'''
Display Ground Truth Test Image
'''

image, image_meta, class_ids, bbox, masks, keypoints =\
    modellib.load_image_gt_keypoints(dataset, config, 
                           image_id, augment=False,use_mini_mask=False) # Set to False to preview original annotation

# How to ensure alignment/where to get classnames, colors, etc??
class_names = [dataset.class_names[1:][i-1] for i in class_ids]
# print(len(class_names),len(dataset.class_names))
 
if masks.shape[0] < image.shape[0]:
    masks = utils.expand_mask(bbox, masks, image.shape)
    
visualize.display_instances(image, bbox, masks, class_ids,dataset.class_names,title="Predictions")
visualize.DrawAnnotations(image,class_ids,class_names,bbox=bbox,masks=masks,keypoints=keypoints,skeleton=dataset.skeleton,figsize=[16,16])

In [None]:
'''
Run Detection
'''

# Reload to be sure no changes
info = dataset.image_info[image_id]
print("image ID: {}.{} ({}) {}".format(info["source"], info["id"], image_id, 
                                       dataset.image_reference(image_id)))

# Run detection and get results
results = model.detect_keypoint([image], verbose=1)
r = results[0] # for one image

# Get class names
class_names = [dataset.class_names[1:][i-1] for i in class_ids]

# Expand masks if needed
if masks.shape[0] < image.shape[0]:
    masks = utils.expand_mask(r['rois'], r['masks'], image.shape)
else:
    masks = r['masks']

In [None]:
visualize.display_instances(image, r['rois'], masks, r['class_ids'], 
                            dataset.class_names, r['scores'], 
                            title="Predictions")

# Draw vis
visualize.DrawAnnotations(image, 
                          r['class_ids'],
                          class_names=class_names,
                          bbox=r['rois'],
                          masks=masks,
                          keypoints=r['keypoints'],
                          skeleton=dataset.skeleton,
                          scores=r['scores'],
                          figsize=[16,16])

In [None]:

for i,cat_id in enumerate(r['class_ids']):
    hit = False
    if cat_id == dataset.class_ids[1:][i]:
        hit = True
    print(hit)

### Multi-Image Detection

In [11]:
'''
Run detection on multiple images. Results saved to JSON.
'''

def PredictMultiple(image_ids,RESULTS_FILE = '../datasets/pantograph/val/pred_region_data.json'):
    
    # Container for results
    for image_id in image_ids:
        
        print("Running detection on:", image_id, dataset.image_reference(image_id))
        
        try:

            # Load image 
            image, image_meta, class_ids, bbox, masks, keypoints =\
            modellib.load_image_gt_keypoints(dataset, config, 
                                   image_id, augment=False,use_mini_mask=True)

            # Save each result
            results = model.detect_keypoint([image], verbose=0)
            r = results[0] # for one image

            # open pred json
            obj = ReadJSON(RESULTS_FILE)

            # Find image_id in images
            ids = [i['id'] for i in obj['images']]
            if image_id in ids:
                print('Updating image information')
                # Find image_id in annotations
                obj['images'][image_id] = {
                    'file_name':dataset.image_reference(image_id).split("/")[-1],
                    'height':image.shape[1],
                    'id':image_id,
                    'num_annotations':len(r['class_ids'].tolist()),
                    'path':dataset.image_reference(image_id),
                    'width':image.shape[0]
                }
                WriteJSON(obj,RESULTS_FILE)

                for anno in obj['annotations']:
                    obj = ReadJSON(RESULTS_FILE)
                    for i,cat_id in enumerate(r['class_ids']):
                        if image_id == anno['image_id'] and cat_id == anno['id']:

                            # Determine if classification is successful
                            hit = False
                            if cat_id == dataset.class_ids[1:][i]:
                                hit = True
                            obj['annotations'][cat_id] = {
                                'area':5463.6864,
                                'category_id':cat_id,
                                'id':cat_id,
                                'image_id':image_id,
                                'iscrowd':0,
                                'num_keypoints':6,
                                'hit':hit
                            }
                            WriteJSON(obj,RESULTS_FILE)

            else:
                print('Writing new image information')
                img_info = {
                    'file_name':dataset.image_reference(image_id).split("/")[-1],
                    'height':image.shape[1],
                    'id':image_id,
                    'num_annotations':len(r['class_ids'].tolist()),
                    'path':dataset.image_reference(image_id),
                    'width':image.shape[0],
                }
                obj['images'].append(img_info)
                WriteJSON(obj,RESULTS_FILE)

                for i in range(len(r['class_ids'].tolist())):
                    hit = False
                    if r['class_ids'].tolist()[i] == dataset.class_ids[1:][i]:
                        hit = True
    #             for cat_id in r['class_ids']:
                    obj = ReadJSON(RESULTS_FILE)
                    anno_info = {
                        'area':5463.6864,
                        'category_id':r['class_ids'].tolist()[i],
                        'id':len(obj['annotations']),
                        'image_id':image_id,
                        'iscrowd':0,
                        'num_keypoints':6,
                        'bbox':r['rois'][i],
                        'segmentation':r['masks'][i],
                        'keypoints':r['keypoints'][i],
                        'hit':hit
                    }
                    obj['annotations'].append(anno_info)
                    WriteJSON(obj,RESULTS_FILE)
                    
        except Exception as e:
            print(e)

In [12]:
'''
Call to run detection on multiple images
'''

image_ids = dataset.image_ids.tolist()[:]
image_ids
PredictMultiple(image_ids)

Running detection on: 0 /home/jupyter/GCP_Test/datasets/pantograph/val/swin_to_padd_frame_220500.jpg
Molding Inputs
(1, 1024, 1024, 3)
Starting Detection
Writing new image information
Running detection on: 1 /home/jupyter/GCP_Test/datasets/pantograph/val/swin_to_padd_frame_242996.jpg
Molding Inputs
(1, 1024, 1024, 3)
Starting Detection
Writing new image information
Running detection on: 2 /home/jupyter/GCP_Test/datasets/pantograph/val/padd_to_swin_frame_37354.jpg
Molding Inputs
(1, 1024, 1024, 3)
Starting Detection
Writing new image information
Running detection on: 3 /home/jupyter/GCP_Test/datasets/pantograph/val/padd_to_swin_frame_215562.jpg
Molding Inputs
(1, 1024, 1024, 3)
Starting Detection
Writing new image information
Running detection on: 4 /home/jupyter/GCP_Test/datasets/pantograph/val/swin_to_padd_frame_130516.jpg
Molding Inputs
(1, 1024, 1024, 3)
Starting Detection
Writing new image information
Running detection on: 5 /home/jupyter/GCP_Test/datasets/pantograph/val/swin_to_pa