In [1]:
import os, json
import sys
import random
import math
import re
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore")

# Root directory of the project
ROOT_DIR = os.path.abspath("../")
INPUT_DIR = os.path.join(ROOT_DIR, 'considition-challenge')

# Import Mask RCNN
sys.path.append(ROOT_DIR)
from mrcnn.config import Config
from mrcnn import utils
import mrcnn.model as modellib
from mrcnn import visualize
from mrcnn.model import log

%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(INPUT_DIR, "logs")

# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)

Using TensorFlow backend.


In [None]:
import zipfile

if not os.path.exists(ROOT_DIR + '/Training_dataset'):
    with zipfile.ZipFile(ROOT_DIR + '/dataset.zip', 'r') as zip_ref:
        zip_ref.extractall(ROOT_DIR)

In [2]:
def recreate_data():
    
    import shutil
    INPUT_DIR = os.path.join(ROOT_DIR, 'considition-challenge')
    if os.path.exists(INPUT_DIR):
        shutil.rmtree(INPUT_DIR)
    

    if not os.path.exists(INPUT_DIR):
        #os.makedirs(INPUT_DIR)
        try:
            os.mkdir('../considition-challenge')
            os.mkdir('../considition-challenge/train')
            os.mkdir('../considition-challenge/validation')
            os.mkdir('../considition-challenge/train/Images')
            os.mkdir('../considition-challenge/train/Masks')
            os.mkdir('../considition-challenge/train/Annotations')
            os.mkdir('../considition-challenge/train/Percentages')
            os.mkdir('../considition-challenge/validation/Images')
            os.mkdir('../considition-challenge/validation/Masks')
            os.mkdir('../considition-challenge/validation/Annotations')
            os.mkdir('../considition-challenge/validation/Percentages')
        except:
            print("Error while creating directories")

recreate_data()

In [None]:
source_dir = os.path.join(ROOT_DIR, 'Training_dataset/')
destination_dir = os.path.join(ROOT_DIR, 'considition-challenge/')

images_dir = os.path.join(source_dir, 'Images/')
masks_dir = os.path.join(source_dir, 'Masks/all/')
percentages_dir = os.path.join(source_dir, 'Percentages/')
annotations_dir = os.path.join(source_dir, 'Annotations/')


# Directory with training files
train_images_dir = os.path.join(destination_dir, 'train/Images/')
train_masks_dir = os.path.join(destination_dir, 'train/Masks/')
train_percentages_dir = os.path.join(destination_dir, 'train/Percentages/')
train_annotations_dir = os.path.join(destination_dir, 'train/Annotations/')


# Directory with validation files
validation_images_dir = os.path.join(destination_dir, 'validation/Images/')
validation_masks_dir = os.path.join(destination_dir, 'validation/Masks/')
validation_percentages_dir = os.path.join(destination_dir, 'validation/Percentages/')
validation_annotations_dir = os.path.join(destination_dir, 'validation/Annotations/')


print('There are {} images files' .format(len(os.listdir(images_dir))))
print('There are {} masks files' .format(len(os.listdir(masks_dir))))
print('There are {} percentages files' .format(len(os.listdir(percentages_dir))))

In [None]:
def summarize_percentages(percentages_dir, annotations_dir):
    '''
    
    '''    
    import pandas as pd
    # percentages_dir = os.path.join(os.getcwd(),'Training_dataset/Percentages/')
                               
    # Obtain a list with the names of json files
    json_names = []
    for filename in os.listdir(percentages_dir):
        json_names.append(filename)

    percentages = []
    results = {'imageName': '',
             'buildingPercentage': '',
             'roadPercentage':'',
             'waterPercentage':''}

    
    for filename in json_names:
        get_dir = os.path.join(percentages_dir, filename)
        op = open(get_dir)
        data = json.load(op)
        op.close()
    
        file_name = os.path.splitext(filename)[0]+'.jpg'
    
        results = {}
        results['imageName'] = file_name
        results['buildingPercentage'] = data['building']
        results['roadPercentage'] = data['road']
        results['waterPercentage'] = data['water']
        percentages.append(results)
        
    get_dir = os.path.join(annotations_dir, 'master_train.json')
    op = open(get_dir)
    data = json.load(op)
    op.close()
    
    exc_img_size =[]
    img_names = []
    for item in data['images']:    
        img_name = item['file_name']
        wd = item['width']
        hd = item['height']
        if (wd!=hd) | (wd!=1024) | (hd!=1024):
            exc_img_size.append(img_name)            

    
    df_results = pd.DataFrame.from_dict(percentages)
    df_results['totalPercentage'] = df_results['buildingPercentage'] + df_results['roadPercentage'] + df_results['waterPercentage'] 
    list_exclude_files = list(df_results.loc[df_results['totalPercentage']==0,'imageName'])
    list_exclude_sizes = list(exc_img_size)
    list_exclude_files.extend(list_exclude_sizes)
    
    return percentages, df_results, list_exclude_files

percentages, df_results, list_exclude_files = summarize_percentages(percentages_dir, annotations_dir)

def fn_1(x):
    if x in list_exclude_files:
        return 1
    else:
        return 0
df_results['x'] = df_results['imageName'].apply(lambda x: fn_1(x))
df_results_clean = df_results.loc[df_results['x']==0,]
del df_results_clean['x']

df_x_ = df_results_clean.loc[:,['imageName']]
df_y_ = df_results_clean.loc[:,['buildingPercentage','roadPercentage','waterPercentage']]

df_y = df_y_.copy()
df_x = df_x_.copy()

df_y["buildingPercentage"] = df_y['buildingPercentage'].transform(lambda x: 1 if x > 0 else 0)
df_y["roadPercentage"] = df_y['roadPercentage'].transform(lambda x: 1 if x > 0 else 0)
df_y["waterPercentage"] = df_y['waterPercentage'].transform(lambda x: 1 if x > 0 else 0)

df_x['image'] = df_x['imageName'].transform(lambda x: x.split('.')[0])


#import numpy as np
from sklearn.model_selection import train_test_split
X = df_x
y = df_y
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.01, random_state= 42, 
                                                    shuffle= True, stratify= y)
train_list_jpg = X_train.imageName.values.tolist()
val_list_jpg = X_test.imageName.values.tolist()

train_list = X_train.image.values.tolist()
val_list = X_test.image.values.tolist()

print('There are {} training images' .format(len(train_list)))
print('There are {} validation images' .format(len(val_list)))

In [None]:
from shutil import copyfile

def copy_from_to_dir(source_dir, destination_dir, ext, filename):
    this_file = source_dir + filename + ext
    destination = destination_dir + filename + ext
    copyfile(this_file, destination)

def split_data(source_dir, train_list, val_list):       

    training_set = train_list 
    validation_set = val_list

    for filename in training_set:
        copy_from_to_dir(images_dir, train_images_dir, '.jpg', filename)
        copy_from_to_dir(masks_dir, train_masks_dir, '.png', filename)
        copy_from_to_dir(percentages_dir, train_percentages_dir, '.json', filename)

    for filename in validation_set:
        copy_from_to_dir(images_dir, validation_images_dir, '.jpg', filename)
        copy_from_to_dir(masks_dir, validation_masks_dir, '.png', filename)
        copy_from_to_dir(percentages_dir, validation_percentages_dir, '.json', filename)
        
def split_json(image_names, annotations_dir, dest_annotations_dir, subset):
    
    # Load json from file
    json_file = open(os.path.join(annotations_dir,'master_train.json'))
    coco_json = json.load(json_file)
    json_file.close()

    # Get all images and add them to the dataset
    images_id_list = []
    seen_images = {}
    for image in coco_json['images']:
        image_id = image['id']
        image_file_name = image['file_name']
        if image_file_name in image_names:
            if image_id not in seen_images:
                seen_images[image_id] = image
            images_id_list.append(image_id)
    
    new_img_json=[item for item in coco_json['images'] if item['id']  in images_id_list]
    new_ann_json=[item for item in coco_json['annotations'] if item['image_id']  in images_id_list]
    
    new_json = {'images': '',
                'annotations': '',
                'info': '',
                'categories': '',
                'licenses': '' }
    
    new_json['info'] = coco_json['info']
    new_json['categories'] = coco_json['categories']
    new_json['licenses'] = coco_json['licenses']
    new_json['images'] = new_img_json
    new_json['annotations'] = new_ann_json                   
                     
    with open('{}.json' .format(subset), 'w') as outfile:
        json.dump(new_json, outfile)
    
    if subset == 'train':
        copy_from_to_dir(os.getcwd(), dest_annotations_dir, '.json', '/train')
    if subset == 'validation':
        copy_from_to_dir(os.getcwd(), dest_annotations_dir, '.json', '/validation')

In [None]:
split_data(images_dir, train_list, val_list)
split_json(train_list_jpg, annotations_dir, train_annotations_dir, 'train')
split_json(val_list_jpg, annotations_dir, validation_annotations_dir, 'validation')

In [None]:
sys.path.append(ROOT_DIR)
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
from pycocotools import mask as maskUtils

from mrcnn.visualize import display_instances, display_images, apply_mask

import skimage
import zipfile
import urllib.request
import shutil


class consid_dataset(utils.Dataset):
    def load_data(self, dataset_dir, subset, class_ids=None,
                  class_map=None, return_coco=False):
        """Load a subset of the COCO dataset.
        dataset_dir: The root directory of the COCO dataset.
        subset: What to load (train, val, minival, valminusminival)
        class_ids: If provided, only loads images that have the given classes.
        return_coco: If True, returns the COCO object.
        """

        coco = COCO("{}/{}/Annotations/{}.json".format(dataset_dir, subset, subset))
        image_dir = "{}/{}/Images".format(dataset_dir, subset)

        # Load all classes or a subset?
        if not class_ids:
            # All classes
            class_ids = sorted(coco.getCatIds())
        
        # All images or a subset?
        if class_ids:
            image_ids = []
            for id in class_ids:
                image_ids.extend(list(coco.getImgIds(catIds=[id])))
            # Remove duplicates
            image_ids = list(set(image_ids))
        else:
            # All images
            image_ids = list(coco.imgs.keys())

        
        # Add classes
        for i in class_ids:
            self.add_class("coco", i, coco.loadCats(i)[0]["name"])
        

        # Add images
        for i in image_ids:
            self.add_image(
                "coco", image_id=i,
                path=os.path.join(image_dir, coco.imgs[i]['file_name']),
                width=coco.imgs[i]["width"],
                height=coco.imgs[i]["height"],
                annotations=coco.loadAnns(coco.getAnnIds(
                    imgIds=[i], catIds=class_ids, iscrowd=None)))
        if return_coco:
            return coco

    
    def load_mask(self, image_id):
        """Load instance masks for the given image.
        Different datasets use different ways to store masks. This
        function converts the different mask format to one format
        in the form of a bitmap [height, width, instances].
        Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        # If not a COCO image, delegate to parent class.
        image_info = self.image_info[image_id]
        if image_info["source"] != "coco":
            return super(consid_dataset2, self).load_mask(image_id)

        instance_masks = []
        class_ids = []
        annotations = self.image_info[image_id]["annotations"]
        # Build mask of shape [height, width, instance_count] and list
        # of class IDs that correspond to each channel of the mask.
        for annotation in annotations:
            class_id = self.map_source_class_id(
                "coco.{}".format(annotation['category_id']))
            if class_id:
                m = self.annToMask(annotation, image_info["height"],
                                   image_info["width"])
                # Some objects are so small that they're less than 1 pixel area
                # and end up rounded out. Skip those objects.
                if m.max() < 1:
                    continue
                # Is it a crowd? If so, use a negative class ID.
                if annotation['iscrowd']:
                    # Use negative class ID for crowds
                    class_id *= -1
                    # For crowd masks, annToMask() sometimes returns a mask
                    # smaller than the given dimensions. If so, resize it.
                    if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]:
                        m = np.ones([image_info["height"], image_info["width"]], dtype=bool)
                instance_masks.append(m)
                class_ids.append(class_id)

        # Pack instance masks into an array
        if class_ids:
            mask = np.stack(instance_masks, axis=2).astype(np.bool)
            class_ids = np.array(class_ids, dtype=np.int32)
            return mask, class_ids
        else:
            # Call super class to return an empty mask
            return super(consid_dataset2, self).load_mask(image_id)

    def image_reference(self, image_id):
        '''
        Return the path to the image.
        '''
        info = self.image_info[image_id]
        return info['path']
            
    ######------
    def display_image(self, image_id): #theres a display_images at visuzalise
        '''
        
        '''
        # Load image
        img = skimage.io.imread(self.image_info[image_id]['path'])
        skimage.io.imshow(img)
        skimage.io.show()
    
    
    def image_data(self, image_id):
        '''
        
        '''
        info = self.image_info[image_id]
        return info
    
    def conv_img_to_int_ID(self, num, subset):
        
        path_dir = os.path.abspath("../")
        if subset=='train':
            REF = os.path.join(path_dir, 'considition-challenge/train/Annotations/train.json')
        elif subset == 'validation':
            REF = os.path.join(path_dir, 'considition-challenge/validation/Annotations/validation.json')
        
        # Load json from file
        flnm = open(REF)
        cc = json.load(flnm)
        flnm.close()
        
        
        images_id_list = []
        seen_images = {}
        for image in cc['images']:
            image_id = image['id']
            image_file_name = image['file_name']
            if image_id not in seen_images:
                seen_images[image_id] = image
            images_id_list.append(image_id)        
    
        dictOfWords = { images_id_list[i]:i for i in range(0, len(images_id_list) ) }
        #print(dictOfWords[num])
        return dictOfWords[num]
    ######------

    # The following two functions are from pycocotools with a few changes.

    def annToRLE(self, ann, height, width):
        """
        Convert annotation which can be polygons, uncompressed RLE to RLE.
        :return: binary mask (numpy 2D array)
        """
        segm = ann['segmentation']
        if isinstance(segm, list):
            # polygon -- a single object might consist of multiple parts
            # we merge all parts into one mask rle code
            rles = maskUtils.frPyObjects(segm, height, width)
            rle = maskUtils.merge(rles)
        elif isinstance(segm['counts'], list):
            # uncompressed RLE
            rle = maskUtils.frPyObjects(segm, height, width)
        else:
            # rle
            rle = ann['segmentation']
        return rle

    def annToMask(self, ann, height, width):
        """
        Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask.
        :return: binary mask (numpy 2D array)
        """
        rle = self.annToRLE(ann, height, width)
        m = maskUtils.decode(rle)
        return m

In [None]:
import json
# train set
train_set = consid_dataset()
train_set.load_data(INPUT_DIR, 'train')
train_set.prepare()
print('Train dataset: {}' .format(len(train_set.image_ids)))

# test/val set
val_set = consid_dataset()
val_set.load_data(INPUT_DIR, 'validation')
val_set.prepare()
print('Test dataset: {}' .format(len(val_set.image_ids)))

In [None]:
# Load and display random samples
dataset = train_set
image_ids = np.random.choice(dataset.image_ids, 4)
for image_id in image_ids:
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset.class_names)

In [None]:
dataset = val_set
image_ids = np.random.choice(dataset.image_ids, 4)
for image_id in image_ids:
    image = dataset.load_image(image_id)
    mask, class_ids = dataset.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset.class_names)

In [None]:
image_ids

In [None]:
import imageio
import imgaug as ia
from imgaug import augmenters as iaa
ia.seed(4)
%matplotlib inline

dataset = val_set
image_ids #= np.random.choice(dataset.image_ids, 4)
print(image_ids)
ids = []
ids_path = []
for image_id in image_ids:
    image = dataset.load_image(image_id)
    path = imageio.imread(dataset.image_reference(image_id))
    ids.append(image)
    ids_path.append(path)

In [None]:
import numpy as np

images = ids
rotate = iaa.Affine(rotate=(-25, 25))
images_aug = rotate.augment_images(images)

print("Augmented batch:")
ia.imshow(np.hstack(images_aug))

In [None]:
seq = iaa.Sequential([
    #iaa.Affine(rotate=(-25, 25)),
    iaa.AdditiveGaussianNoise(scale=(30, 90)), #dont see much difference
    #iaa.Crop(percent=(0, 0.2), keep_size=True) #useful
])
images_aug = seq.augment_images(images)

print("Augmented batch:")
ia.imshow(np.hstack(images_aug))

In [None]:
seq = iaa.Sequential([
    iaa.Affine(
            scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
            translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
            rotate=(-45, 45),
            shear=(-30, 30)
    )])

images_aug = [seq.augment_image(image) for image in ids]

print("Augmented:")
ia.imshow(ia.draw_grid(images_aug, cols=4, rows=1))

In [None]:
seq = iaa.Sequential([
    iaa.AdditiveGaussianNoise(scale=(20, 30)),
    iaa.Crop(percent=(0, 0.4)),
    iaa.AddToHueAndSaturation((-60, 60)),  # change their color #useful too?
    iaa.ElasticTransformation(alpha=90, sigma=9),  # water-like effect #useful test more
    iaa.Fliplr(0.5), # horizontally flip 50% of the images, #useful
    iaa.Crop(percent=(0, 0.1)), # random crops
    ])

images_aug = [seq.augment_image(image) for image in ids]

print("Augmented:")
ia.imshow(ia.draw_grid(images_aug, cols=4, rows=1))

In [None]:
seq = iaa.Sequential([
    # Small gaussian blur with random sigma between 0 and 0.5.
    # But we only blur about 50% of all images.
    iaa.Sometimes(0.5,
        iaa.GaussianBlur(sigma=(0, 0.5))
    ),
    # Strengthen or weaken the contrast in each image.
    iaa.ContrastNormalization((0.75, 2)),
    # Add gaussian noise.
    # For 50% of all images, we sample the noise once per pixel.
    # For the other 50% of all images, we sample the noise per pixel AND
    # channel. This can change the color (not only brightness) of the
    # pixels.
    iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
    # Make some images brighter and some darker.
    # In 20% of all cases, we sample the multiplier once per channel,
    # which can end up changing the color of the images.
    iaa.Multiply((0.8, 1.2), per_channel=0.2)
    ])

images_aug = [seq.augment_image(image) for image in ids]

print("Augmented:")
ia.imshow(ia.draw_grid(images_aug, cols=4, rows=1))

In [None]:
seq = iaa.Sequential([

    )])
    

images_aug = [seq.augment_image(image) for image in ids]

print("Augmented:")
ia.imshow(ia.draw_grid(images_aug, cols=4, rows=1))

In [None]:
seq = iaa.Sequential([

                # Add gaussian noise to some images.
                # In 50% of these cases, the noise is randomly sampled per
                # channel and pixel.
                # In the other 50% of all cases it is sampled once per
                # pixel (i.e. brightness change).
    #iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),


                # Invert each image's channel with 5% probability.
                # This sets each pixel value v to 255-v.
    #iaa.Invert(0.05, per_channel=True), # invert color channels

                # Change brightness of images (50-150% of original value).
    #iaa.Multiply((0.5, 1.5), per_channel=0.5),

                # Improve or worsen the contrast of images.
    #iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5),

                # Convert each image to grayscale and then overlay the
                # result with the original with random alpha. I.e. remove
                # colors with varying strengths.
    #iaa.Grayscale(alpha=(0.0, 1.0))
])


    

images_aug = [seq.augment_image(image) for image in ids]

print("Augmented:")
ia.imshow(ia.draw_grid(images_aug, cols=4, rows=1))

In [None]:
sometimes = lambda aug: iaa.Sometimes(0.5, aug)


seq = iaa.Sequential([
    
    iaa.Sometimes(0.4,(
        iaa.Affine(
            scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
            translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
            rotate=(-45, 45),
            shear=(-16, 16)
        ))),
    iaa.ContrastNormalization((0.75, 2)),
    iaa.Sometimes(0.6,(
        #iaa.ContrastNormalization((0.5, 1.5)),
        iaa.Crop(percent=(0, 0.4)),
        iaa.AddToHueAndSaturation((-60, 60)),  # change their color
        #iaa.ElasticTransformation(alpha=90, sigma=9),  # water-like effect
        iaa.Fliplr(0.5), # horizontally flip 50% of the images,
            
        iaa.Sometimes(0.5,([
            #iaa.GaussianBlur(sigma=(0, 0.5)),
            iaa.Crop(percent=(0, 0.1)), # random crops
            #iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
            iaa.Multiply((0.8, 1.5), per_channel=0.4)
        ])),
        
        iaa.Invert(0.05, per_channel=True), # invert color channels
        iaa.Grayscale(alpha=(0.0, 1.0))
        ))
    ], random_order=True)

# load images with different sizes
images_different_sizes = ids

# augment them as one batch
images_aug = seq.augment_images(ids_path)

# visualize the results
print("Image 0 (input shape: %s, output shape: %s)" % (images_different_sizes[0].shape, images_aug[0].shape))
ia.imshow(np.hstack([images_different_sizes[0], images_aug[0]]))

print("Image 1 (input shape: %s, output shape: %s)" % (images_different_sizes[1].shape, images_aug[1].shape))
ia.imshow(np.hstack([images_different_sizes[1], images_aug[1]]))

print("Image 2 (input shape: %s, output shape: %s)" % (images_different_sizes[2].shape, images_aug[2].shape))
ia.imshow(np.hstack([images_different_sizes[2], images_aug[2]]))

print("Image 3 (input shape: %s, output shape: %s)" % (images_different_sizes[3].shape, images_aug[3].shape))
ia.imshow(np.hstack([images_different_sizes[3], images_aug[3]]))