In [1]:
%load_ext autoreload
%autoreload 2

In [15]:
#!apt-get update
#!python3 -m pip install --upgrade pip
#!pip install opencv-python
#!apt-get install -y libglib2.0-0
#!apt-get install -y libsm6 libxext6 libxrender-dev
#!pip install matplotlib --upgrade
#!apt-get install -y python3-skimage
#!pip install scikit-image==0.16.2

Requirement already up-to-date: matplotlib in /usr/local/lib/python3.6/dist-packages (3.2.1)


In [2]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
from PIL import Image
from sklearn.metrics import confusion_matrix, classification_report


# Root directory of the project
ROOT_DIR = os.path.abspath(".")
print(ROOT_DIR)
# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, 'models')

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, 'models/mask_rcnn_coco.h5')
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

/root/image-recognition-with-synthetic-training-data/model



Using TensorFlow backend.


## Configuration

In [3]:
class VehiclesConfig(Config):
    NAME = "vehicles"
    GPU_COUNT = 1
    IMAGES_PER_GPU = 4
    NUM_CLASSES = 1 + 2  # background + 2 shapes
    IMAGE_MIN_DIM = 256
    IMAGE_MAX_DIM = 256
    #BACKBONE = "resnet50" -- default is resnet101

    #These are for BW
    #IMAGE_CHANNEL_COUNT = 1
    #MEAN_PIXEL = 1
    
    #Faster epochs?
    #STEPS_PER_EPOCH = 100
config = VehiclesConfig()
#config.display()

## Dataset class

Here we create a class that overrides utils.Dataset. Must override the following methods:

* load_images() -- this loads images from files into the Dataset object
* load_mask() -- this returns the mask for the identified image


In [4]:
class VehiclesDataset(utils.Dataset):
    '''
    This provides tools to load the training images/masks and encapsulates them for the model
    '''
    def load_images(self, dataset_dir, proportion=1.0):
        '''
        
        '''
        self.add_class('vehicles', 1, 'car')
        self.add_class('vehicles', 2, 'pickup')
        for filename in os.listdir(dataset_dir):
            if(os.path.isdir(dataset_dir + '\\' + filename)):
                continue
            image_path = os.path.join(dataset_dir, filename)
            if(random.random() < proportion):            
                self.add_image(
                    "vehicles",
                    image_id=filename,  
                    path=image_path,
                    width=256, 
                    height=256
            )

    def load_mask(self, image_id):
        '''
        Return the mask for the id'd image
        '''
        id = self.image_info[image_id]['id']
        car_id = id.split('_')[2]
        cam_id = id.split('_')[4]
        shot_id = id.split('_')[5]
        mask_id = '{0}_{1}_{2}'.format(car_id, cam_id, shot_id)
        mask = Image.open(mask_dir + mask_id)
        if('car' in car_id):
            class_id = 1
        else:
            class_id = 2
        mask_as_array = np.expand_dims(np.array(mask), axis=2)
        return mask_as_array*1, np.array([class_id])


## Create training and validation datasets

In [5]:
train_dataset_dir = '../../data/color/train'
val_dataset_dir = '../../data/color/val'
mask_dir = '../../data/masks/'


# Training dataset
dataset_train = VehiclesDataset()
dataset_train.load_images(train_dataset_dir)
dataset_train.prepare()

# Validation dataset
dataset_val = VehiclesDataset()
dataset_val.load_images(val_dataset_dir)
dataset_val.prepare()

In [20]:
class InferenceConfig(VehiclesConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
#model_path = os.path.join(MODEL_DIR, "mask_rcnn_vehicles25.h5")
model_path = os.path.join(MODEL_DIR, "mask_rcnn_vehicles-clean-start-10.h5")

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

Loading weights from  /root/image-recognition-with-synthetic-training-data/model/models/mask_rcnn_vehicles-clean-start-10.h5


## Evaluation

In [26]:
# Compute VOC-Style mAP @ IoU=0.5
# Running on 10 images. Increase for better accuracy.
image_ids = np.random.choice(dataset_train.image_ids, 1000)
APs = []
for image_id in image_ids:
    # Load image and ground truth data
    image, image_meta, gt_class_id, gt_bbox, gt_mask =\
        modellib.load_image_gt(dataset_train, inference_config,
                               image_id, use_mini_mask=False)
    molded_images = np.expand_dims(modellib.mold_image(image, inference_config), 0)
    # Run object detection
    results = model.detect([image], verbose=0)
    r = results[0]
    # Compute AP
    AP, precisions, recalls, overlaps =\
        utils.compute_ap(gt_bbox, gt_class_id, gt_mask,
                         r["rois"], r["class_ids"], r["scores"], r['masks'])
    APs.append(AP)
    
print("mAP: ", np.mean(APs))

mAP:  0.918


In [21]:
def score_test(directory='../../data/test/'):
    preds = []
    #axs = get_ax(118, 1, 12)
    for i, filename in enumerate(os.listdir(directory)):
        #print(filename)
        if(os.path.isdir(directory + '/' + filename)):
            continue
        img = Image.open(directory + '/' + filename)
        img = img.resize((256, 256))
        img_as_arr = np.array(img)
        results = model.detect([img_as_arr], verbose=0)
        if(len(results[0]['class_ids']) > 0):
            class_id = results[0]['class_ids'][0]
            score = results[0]['scores'][0]
            if('car' in filename):
                actual_class = 1
            elif('pickup' in filename):
                actual_class = 2
            else:
                actual_class = 0
            preds.append([filename, actual_class, class_id, float(score)])
            #preds.append('file: {0}, predicted class: {1}, score: {2}'.format(filename, class_id, score))
        else:
            preds.append([filename, actual_class, 0, 0])
    return preds
preds_new2 = score_test()

In [23]:
df = pd.DataFrame(preds_new2)
confusion_matrix(df[1], df[2])

array([[ 1,  7,  1],
       [ 3, 36, 11],
       [ 3, 42,  5]])

In [25]:
print(classification_report(df[1], df[2]))

              precision    recall  f1-score   support

           0       0.14      0.11      0.12         9
           1       0.42      0.72      0.53        50
           2       0.29      0.10      0.15        50

   micro avg       0.39      0.39      0.39       109
   macro avg       0.29      0.31      0.27       109
weighted avg       0.34      0.39      0.32       109



In [19]:
preds_new

[['car-19.png', 1, 1, 0.738649308681488],
 ['pickup-13.png', 2, 2, 0.9595611095428467],
 ['car-37.png', 2, 0, 0],
 ['pickup-31.png', 2, 2, 0.9910202622413635],
 ['car-28.png', 1, 1, 0.8691062927246094],
 ['pickup-22.png', 2, 2, 0.9533739686012268],
 ['car-46.png', 1, 1, 0.9484565854072571],
 ['pickup-40.png', 2, 2, 0.9843562245368958],
 ['pickup-4.png', 2, 2, 0.992992639541626],
 ['car-20.png', 1, 2, 0.8867312669754028],
 ['car-2.png', 1, 0, 0],
 ['pickup-39.png', 2, 2, 0.99094557762146],
 ['car-11.png', 1, 1, 0.8293371200561523],
 ['nada-4.png', 1, 0, 0],
 ['pickup-48.png', 2, 2, 0.9918733239173889],
 ['pickup-17.png', 2, 2, 0.9879947304725647],
 ['pickup-35.png', 2, 2, 0.9901829361915588],
 ['pickup-26.png', 2, 2, 0.99076908826828],
 ['nada-0.png', 0, 1, 0.8758792877197266],
 ['pickup-8.png', 2, 2, 0.9881957769393921],
 ['pickup-44.png', 2, 2, 0.9934331178665161],
 ['car-24.png', 1, 1, 0.8773436546325684],
 ['car-6.png', 1, 2, 0.8114509582519531],
 ['car-42.png', 1, 2, 0.962758898735