In [None]:
import numpy as np
import pandas as pd
import cv2
from datetime import datetime
import time
from tqdm import tqdm
import os
import sys
from skimage import io

from matplotlib import pyplot as plt
%matplotlib inline

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

from unet.maskprocessor import *
from unet.normalization import *
from unet.loss import *

The purpose of this notebook is: 
   To evaluate the model performance on unseen test set.  The test set is sourced from DigitalGlobe (WorldView 2) Open Data Program which is lower resolution and noisier than the hi-res MapBox satellite imageries used in training set and validation set.

Pre-requisite:
   Directory containing the source of truth MapBox street tiles.  These could be obtained via the MapBox API.
   Directory containing the inferred mask tiles.  These tiles could be generated by roadSegmentationMaskGen.py.


In [None]:
index = 2

dir_truth = '/Users/jkwok/Documents/Insight/tools/jTileDownloader 2/digitalglobe/mapbox_custom_street/'

dir_inferred = ['/Users/jkwok/Documents/Insight/tools/jTileDownloader 2/digitalglobe/jackkwok.digitalglobe_harvey_3020132_tif_mask/',
                '/Users/jkwok/Documents/Insight/tools/jTileDownloader 2/digitalglobe/141812_post_mask/',
                '/Users/jkwok/Documents/Insight/tools/jTileDownloader 2/digitalglobe/214437_post_mask/']

model_file = ['/Users/jkwok/Documents/Insight/models/Unet_Dilated-20170917-223544.hdf5',
              '/Users/jkwok/Documents/Insight/models/Unet_Dilated-20170921-141812.hdf5',
              '/Users/jkwok/Documents/Insight/models/Unet_Dilated-20170921-214437.hdf5']

In [None]:
def image_file_list(dir_path):
	""" limitation: the images files must have an image extension: webp, jpg, png, or jpeg """
	result = []
	for root, dirs, files in os.walk(dir_path):
		for file in files:
			if file.endswith('.webp') or file.endswith('.png') or file.endswith('.jpg') or file.endswith('.jpeg'):
				result.append(os.path.join(root, file))
	return result

In [None]:
inferred_filelist = image_file_list(dir_inferred[index])

truth_filelist = []

for img_file in inferred_filelist:
    truth_file = img_file.replace(dir_inferred[index], dir_truth)
    truth_file = truth_file.replace('.jpg', '.png')
    truth_filelist.append(truth_file)

img_df = pd.DataFrame(
    {'infer': inferred_filelist,
     'truth': truth_filelist,
    })

img_df.head()

In [None]:
infer_jpg_filename = img_df.loc[0, 'infer']
truth_jpg_filename = img_df.loc[0, 'truth']
print(infer_jpg_filename)
print(truth_jpg_filename)

infer_jpg_img = cv2.imread(infer_jpg_filename)
truth_jpg_img = cv2.imread(truth_jpg_filename)

print(infer_jpg_img.shape)
print(truth_jpg_img.shape)

infer_image = cv2.cvtColor(infer_jpg_img, cv2.COLOR_BGR2GRAY)

mask = get_street_mask(truth_jpg_img)
print('binary mask', np.unique(mask))

plt.imshow(mask, cmap=plt.cm.binary)
new_style = {'grid': False}

mask.dtype='uint8'
mask[mask == 1] = 255

print(np.unique(infer_image))
print(np.unique(mask))

dice = np.sum(infer_image[mask==255])*2.0 / (np.sum(infer_image) + np.sum(mask))

print(dice)

In [None]:
# compute dice coef score between source of truth and prediction

for i in tqdm(range(len(inferred_filelist)), miniters=10):
    infer_jpg_filename = img_df.loc[i, 'infer']
    truth_jpg_filename = img_df.loc[i, 'truth']

    infer_jpg_img = cv2.imread(infer_jpg_filename)
    truth_jpg_img = cv2.imread(truth_jpg_filename)
    
    infer_image = cv2.cvtColor(infer_jpg_img, cv2.COLOR_BGR2GRAY)
    
    mask = get_street_mask(truth_jpg_img)
    mask.dtype='uint8'
    mask[mask == 1] = 255

    #print(np.unique(infer_image))
    #print(np.unique(mask))

    dice = np.sum(infer_image[mask==255])*2.0 / (np.sum(infer_image) + np.sum(mask))
    
    img_df.set_value(i, 'dice_coef', dice)

In [None]:
img_df.head()

In [None]:
# 0.270229 (no blur) Unet_Dilated-20170917-223544.hdf5
# 0.305918 (with blur aug during training) Unet_Dilated-20170921-141812.hdf5
# 0.334121 (with blur aug during training large dataset) 

x = img_df[img_df['dice_coef']!=0]
avg_dice = x.mean()
print 'average dice score: ', avg_dice

In [None]:
sorted_df = img_df.sort_values('dice_coef', ascending=False)
sorted_df.head(10)