### Segmentation evaluation for training and testing set of baseline

In [1]:
from shapely.ops import cascaded_union
import matplotlib.pyplot as plt
import geopandas as gpd
import multiprocessing
import pandas as pd
import numpy as np
import skimage.io
import tqdm
import glob
import math
import gdal
import time
import sys
import os

import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 16})
import matplotlib as mpl
mpl.rcParams['figure.dpi'] = 300
import matplotlib
# matplotlib.use('Agg') # non-interactive
from sklearn.metrics import roc_auc_score, average_precision_score, roc_curve, auc

import solaris as sol
from solaris.utils.core import _check_gdf_load
from solaris.raster.image import create_multiband_geotiff 

# import from data_postproc_funcs
module_path = os.path.abspath(os.path.join('../src/'))
if module_path not in sys.path:
    sys.path.append(module_path)
from sn7_baseline_postproc_funcs import map_wrapper, multithread_polys, \
        calculate_iou, track_footprint_identifiers, \
        sn7_convert_geojsons_to_csv

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def dice_coef(y_true, y_pred):
    intersection = np.logical_and(y_true, y_pred)
    return (2. * np.sum(intersection)) / (np.sum(y_true) + np.sum(y_pred))

def iou_coef(y_true, y_pred):
    intersection = np.logical_and(y_true, y_pred)
    union = np.logical_or(y_true, y_pred)
    iou = np.sum(intersection) / np.sum(union)
    return iou

In [3]:
def auc_coef(y_true, y_pred):
    y_true = y_true.flatten()
    y_pred = y_pred.flatten()
    fpr, tpr, thresholds = roc_curve(y_true, y_pred)
    return auc(fpr, tpr)

def ap_coef(y_true, y_pred):
    y_true = y_true.flatten()
    y_pred = y_pred.flatten()
    return average_precision_score(y_true, y_pred)

In [4]:
def evaluate(pred_top_dir, im_top_dir, outfile, mode):
    df = pd.DataFrame()
    
    aois = sorted([f for f in os.listdir(os.path.join(im_top_dir))
                   if os.path.isdir(os.path.join(im_top_dir, f))
                  and f != 'list'])
    
    print(aois)

    auc_multi_aoi = 0.
    ap_multi_aoi = 0.
    dice_multi_aoi = 0.
    iou_multi_aoi = 0.

    for i, aoi in enumerate(aois):
        print(i, "aoi:", aoi)

        pred_dir = os.path.join(pred_top_dir, 'grouped', aoi, 'masks')
        im_dir = os.path.join(im_top_dir, aoi, 'images_masked')
        gt_dir = os.path.join(im_top_dir, aoi, 'masks')

        im_list = sorted([z for z in os.listdir(im_dir) if z.endswith('.tif')])

        auc_one_aoi = 0.
        ap_one_aoi = 0.
        dice_one_aoi = 0.
        iou_one_aoi = 0.

        for j, f in enumerate(im_list):        
            sample_mask_name = f
            if mode == 'baseline':
                sample_mask_path = os.path.join(pred_dir, sample_mask_name)
            elif mode == 'winner':
                sample_mask_path = os.path.join(pred_dir, sample_mask_name).replace('.tif', '.npy')
            sample_im_path = os.path.join(im_dir, sample_mask_name)
            sample_gt_path = os.path.join(gt_dir, sample_mask_name).replace('.tif', '_Buildings.tif')

            image = skimage.io.imread(sample_im_path)
            if mode == 'baseline':
                mask_image = skimage.io.imread(sample_mask_path)
            elif mode == 'winner':
                mask_image = np.load(sample_mask_path)
            gt_image = skimage.io.imread(sample_gt_path)
    #         print(mask_image)
    #         print("mask_image.shape:", mask_image.shape)
    #         print("min, max, mean mask image:", np.min(mask_image), np.max(mask_image), np.mean(mask_image))
    #         print(gt_image)
    #         print("gt_image.shape:", gt_image.shape)
    #         print("min, max, mean gt image:", np.min(gt_image), np.max(gt_image), np.mean(gt_image))

            norm = (mask_image - np.min(mask_image)) / (np.max(mask_image) - np.min(mask_image))

    #         print(norm)
    #         print("mask_image.shape:", norm.shape)
    #         print("min, max, mean mask image:", np.min(norm), np.max(norm), np.mean(norm))

            gt_image = gt_image / 255
            
            if mode == 'winner':
                tmp = np.zeros((1024, 1024))
                tmp[:gt_image.shape[0],:gt_image.shape[1]] = gt_image
                gt_image = np.repeat(tmp, 3, axis=0)
                gt_image = np.repeat(gt_image, 3, axis=1)
            
            auc = auc_coef(gt_image, norm)
            ap = ap_coef(gt_image, norm)
            
            auc_one_aoi += auc
            ap_one_aoi += ap
                
            norm = np.where(norm > 0.5, 1, 0)

            dice = dice_coef(gt_image, norm)
            iou = iou_coef(gt_image, norm)
            dice_one_aoi += dice
            iou_one_aoi += iou

        auc_one_aoi = auc_one_aoi / len(im_list)
        ap_one_aoi = ap_one_aoi / len(im_list)
        dice_one_aoi = dice_one_aoi / len(im_list)
        iou_one_aoi = iou_one_aoi / len(im_list)

        print("AUC: ", auc_one_aoi)
        print("AP: ", ap_one_aoi)
        print("Dice: ", dice_one_aoi)
        print("IOU: ", iou_one_aoi)
        
        row1 = {'AOI': aoi, 'AUC': auc_one_aoi, 'AP': ap_one_aoi, 'Dice': dice_one_aoi, 'IOU': iou_one_aoi}
        df = df.append(row1, ignore_index=True)
        
        auc_multi_aoi += auc_one_aoi
        ap_multi_aoi += ap_one_aoi
        dice_multi_aoi += dice_one_aoi
        iou_multi_aoi += iou_one_aoi
    
    df.to_csv(outfile, index=False, header=True)

    auc_multi_aoi = auc_multi_aoi / len(aois)
    ap_multi_aoi = ap_multi_aoi / len(aois)
    dice_multi_aoi = dice_multi_aoi / len(aois)
    iou_multi_aoi = iou_multi_aoi / len(aois)

    print("Average AUC: ", auc_multi_aoi)
    print("Average AP: ", ap_multi_aoi)
    print("Average Dice: ", dice_multi_aoi)
    print("Average IOU: ", iou_multi_aoi)

In [5]:
def group(raw_name, grouped_name):
    im_list = sorted([z for z in os.listdir(os.path.join(raw_name)) if z.endswith('.npy')])
    df = pd.DataFrame({'image': im_list})
    roots = [z.split('mosaic_')[-1].split('.npy')[0] for z in df['image'].values]
    df['root'] = roots
    # copy files
    for idx, row in df.iterrows():
        in_path_tmp = os.path.join(raw_name, row['image'])
        out_dir_tmp = os.path.join(grouped_name, row['root'], 'masks')
        os.makedirs(out_dir_tmp, exist_ok=True)
        cmd = 'cp ' + in_path_tmp + ' ' + out_dir_tmp
#         print("cmd:", cmd)
        os.system(cmd)   

In [None]:
raw_name = '/local_storage/users/hfang/HRNet_SN7/output/spacenet7/seg_hrnet_w48_512x512_sgd_lr1e-2_wd4e-5_bs_16_epoch70/test_results/npy_compose'
grouped_name = '/local_storage/users/hfang/HRNet_SN7/output/spacenet7/seg_hrnet_w48_512x512_sgd_lr1e-2_wd4e-5_bs_16_epoch70/test_results/grouped'
group(raw_name, grouped_name)

In [6]:
# Set prediction and image (ground truth) directories (edit appropriately)
pred_top_dir = '/local_storage/users/hfang/HRNet_SN7/output/spacenet7/seg_hrnet_w48_512x512_sgd_lr1e-2_wd4e-5_bs_16_epoch70/test_results'
im_top_dir = '/local_storage/datasets/sn7_winner_split/test_public'

evaluate(pred_top_dir, im_top_dir, 'segmentation_metrics/segmentation_winner_ours.csv', mode='winner')

['L15-0387E-1276N_1549_3087_13', 'L15-0566E-1185N_2265_3451_13', 'L15-0632E-0892N_2528_4620_13', 'L15-1015E-1062N_4061_3941_13', 'L15-1200E-0847N_4802_4803_13', 'L15-1276E-1107N_5105_3761_13', 'L15-1438E-1134N_5753_3655_13', 'L15-1615E-1206N_6460_3366_13', 'L15-1690E-1211N_6763_3346_13', 'L15-1848E-0793N_7394_5018_13']
0 aoi: L15-0387E-1276N_1549_3087_13
AUC:  0.9767957420782369
AP:  0.6118795379019007
Dice:  0.16206110668688536
IOU:  0.08838868746037797
1 aoi: L15-0566E-1185N_2265_3451_13
AUC:  0.9934992217667559
AP:  0.4828566784096869
Dice:  0.22629240818585203
IOU:  0.13630067551600264
2 aoi: L15-0632E-0892N_2528_4620_13
AUC:  0.9754743140218279
AP:  0.5650290470334722
Dice:  0.10084930881783233
IOU:  0.05318996677971516
3 aoi: L15-1015E-1062N_4061_3941_13
AUC:  0.974276471217617
AP:  0.5795348227015709
Dice:  0.0325218033438439
IOU:  0.01658259431777519
4 aoi: L15-1200E-0847N_4802_4803_13
AUC:  0.9177868453610952
AP:  0.5746638834210113
Dice:  0.14732029282452605
IOU:  0.079614098

  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]
  recall = tps / tps[-1]


AUC:  nan
AP:  nan
Dice:  0.09690589556623772
IOU:  0.05431171287636695
Average AUC:  nan
Average AP:  nan
Average Dice:  0.14970786582261075
Average IOU:  0.08721418986767476


In [6]:
def dice_iou_threshold(pred_top_dir, im_top_dir, outfile, mode):
    df = pd.DataFrame()
    
    aois = sorted([f for f in os.listdir(os.path.join(im_top_dir))
                   if os.path.isdir(os.path.join(im_top_dir, f))
                  and f != 'list'])

    for theta in range(11):
        threshold = theta / 10
        print(threshold)
        
        dice_multi_aoi = 0.
        iou_multi_aoi = 0.

        for i, aoi in enumerate(aois):
            print(i, "aoi:", aoi)

            pred_dir = os.path.join(pred_top_dir, 'grouped', aoi, 'masks')
            im_dir = os.path.join(im_top_dir, aoi, 'images_masked')
            gt_dir = os.path.join(im_top_dir, aoi, 'masks')

            im_list = sorted([z for z in os.listdir(im_dir) if z.endswith('.tif')])

            dice_one_aoi = 0.
            iou_one_aoi = 0.

            for j, f in enumerate(im_list):        
                sample_mask_name = f
                if mode == 'baseline':
                    sample_mask_path = os.path.join(pred_dir, sample_mask_name)
                elif mode == 'winner':
                    sample_mask_path = os.path.join(pred_dir, sample_mask_name).replace('.tif', '.npy')
                sample_im_path = os.path.join(im_dir, sample_mask_name)
                sample_gt_path = os.path.join(gt_dir, sample_mask_name).replace('.tif', '_Buildings.tif')

                image = skimage.io.imread(sample_im_path)
                if mode == 'baseline':
                    mask_image = skimage.io.imread(sample_mask_path)
                elif mode == 'winner':
                    mask_image = np.load(sample_mask_path)
                gt_image = skimage.io.imread(sample_gt_path)
        #         print(mask_image)
        #         print("mask_image.shape:", mask_image.shape)
        #         print("min, max, mean mask image:", np.min(mask_image), np.max(mask_image), np.mean(mask_image))
        #         print(gt_image)
        #         print("gt_image.shape:", gt_image.shape)
        #         print("min, max, mean gt image:", np.min(gt_image), np.max(gt_image), np.mean(gt_image))

                norm = (mask_image - np.min(mask_image)) / (np.max(mask_image) - np.min(mask_image))

        #         print(norm)
        #         print("mask_image.shape:", norm.shape)
        #         print("min, max, mean mask image:", np.min(norm), np.max(norm), np.mean(norm))

                gt_image = gt_image / 255

                if mode == 'winner':
                    tmp = np.zeros((1024, 1024))
                    tmp[:gt_image.shape[0],:gt_image.shape[1]] = gt_image
                    gt_image = np.repeat(tmp, 3, axis=0)
                    gt_image = np.repeat(gt_image, 3, axis=1)


                norm = np.where(norm > threshold, 1, 0)

                dice = dice_coef(gt_image, norm)
                iou = iou_coef(gt_image, norm)
                dice_one_aoi += dice
                iou_one_aoi += iou

            dice_one_aoi = dice_one_aoi / len(im_list)
            iou_one_aoi = iou_one_aoi / len(im_list)

            print("Dice: ", dice_one_aoi)
            print("IOU: ", iou_one_aoi)

            row1 = {'AOI': aoi, 'Dice': dice_one_aoi, 'IOU': iou_one_aoi, 'threshold': threshold}
            df = df.append(row1, ignore_index=True)

            dice_multi_aoi += dice_one_aoi
            iou_multi_aoi += iou_one_aoi

        dice_multi_aoi = dice_multi_aoi / len(aois)
        iou_multi_aoi = iou_multi_aoi / len(aois)

        print("Average Dice: ", dice_multi_aoi)
        print("Average IOU: ", iou_multi_aoi)
        
    df.to_csv(outfile, index=False, header=True)

In [7]:
# Set prediction and image (ground truth) directories (edit appropriately)
pred_top_dir = '/local_storage/users/hfang/HRNet_SN7/output/spacenet7/seg_hrnet_w48_512x512_sgd_lr1e-2_wd4e-5_bs_16_epoch70/test_results'
im_top_dir = '/local_storage/datasets/sn7_winner_split/test_public'

dice_iou_threshold(pred_top_dir, im_top_dir, 'segmentation_metrics/segmentation_ours_infer_threshold.csv', mode='winner')

0.0
0 aoi: L15-0387E-1276N_1549_3087_13
Dice:  0.05912087974839937
IOU:  0.030465085916961763
1 aoi: L15-0566E-1185N_2265_3451_13
Dice:  0.008478867447901087
IOU:  0.0042724613902263
2 aoi: L15-0632E-0892N_2528_4620_13
Dice:  0.04818749091634234
IOU:  0.024688761466212954
3 aoi: L15-1015E-1062N_4061_3941_13
Dice:  0.05527082002130957
IOU:  0.028426756162299702
4 aoi: L15-1200E-0847N_4802_4803_13
Dice:  0.18087302015045392
IOU:  0.09943271736915564
5 aoi: L15-1276E-1107N_5105_3761_13
Dice:  0.04906682142129181
IOU:  0.025152781290772898
6 aoi: L15-1438E-1134N_5753_3655_13
Dice:  0.3683387224534129
IOU:  0.22688972103029237
7 aoi: L15-1615E-1206N_6460_3366_13
Dice:  0.10691707217583449
IOU:  0.05649468294361052
8 aoi: L15-1690E-1211N_6763_3346_13
Dice:  0.12348348598316178
IOU:  0.06591523532536708
9 aoi: L15-1848E-0793N_7394_5018_13
Dice:  0.01587265744289208
IOU:  0.008082581422867396
Average Dice:  0.10156098377609993
Average IOU:  0.056982078431776675
0.1
0 aoi: L15-0387E-1276N_1549_

  This is separate from the ipykernel package so we can avoid doing imports until
  


Dice:  nan
IOU:  nan
Average Dice:  nan
Average IOU:  nan


In [8]:
# Set prediction and image (ground truth) directories (edit appropriately)
pred_top_dir = '/local_storage/users/hfang/winner_docker/vis'
im_top_dir = '/local_storage/datasets/sn7_winner_split/test_public'

dice_iou_threshold(pred_top_dir, im_top_dir, 'segmentation_metrics/segmentation_winner_infer_threshold.csv', mode='winner')

0.0
0 aoi: L15-0387E-1276N_1549_3087_13
Dice:  0.059172985643818184
IOU:  0.03049276964406489
1 aoi: L15-0566E-1185N_2265_3451_13
Dice:  0.008483328465150897
IOU:  0.0042747212699304725
2 aoi: L15-0632E-0892N_2528_4620_13
Dice:  0.0482470281643182
IOU:  0.024720027028678915
3 aoi: L15-1015E-1062N_4061_3941_13
Dice:  0.05527345463838809
IOU:  0.028428148840967657
4 aoi: L15-1200E-0847N_4802_4803_13
Dice:  0.1809791599463617
IOU:  0.09949683071323934
5 aoi: L15-1276E-1107N_5105_3761_13
Dice:  0.04907192281019376
IOU:  0.025155463264519305
6 aoi: L15-1438E-1134N_5753_3655_13
Dice:  0.3683426484919032
IOU:  0.2268926456529335
7 aoi: L15-1615E-1206N_6460_3366_13
Dice:  0.10691740496804779
IOU:  0.05649486879253275
8 aoi: L15-1690E-1211N_6763_3346_13
Dice:  0.12348670756500103
IOU:  0.0659170648482917
9 aoi: L15-1848E-0793N_7394_5018_13
Dice:  0.015877734492890254
IOU:  0.008085214508873223
Average Dice:  0.1015852375186073
Average IOU:  0.05699577545640318
0.1
0 aoi: L15-0387E-1276N_1549_30

  This is separate from the ipykernel package so we can avoid doing imports until
  


Dice:  nan
IOU:  nan
Average Dice:  nan
Average IOU:  nan


In [9]:
# Set prediction and image (ground truth) directories (edit appropriately)
pred_top_dir = '/local_storage/users/hfang/inference_winner_split/sn7_baseline_preds'
im_top_dir = '/local_storage/datasets/sn7_winner_split/test_public'

dice_iou_threshold(pred_top_dir, im_top_dir, 'segmentation_metrics/segmentation_baseline_infer_threshold.csv', mode='baseline')

0.0
0 aoi: L15-0387E-1276N_1549_3087_13
Dice:  0.059120928377817974
IOU:  0.0304651117425597
1 aoi: L15-0566E-1185N_2265_3451_13
Dice:  0.0084788745800213
IOU:  0.00427246501204015
2 aoi: L15-0632E-0892N_2528_4620_13
Dice:  0.04823349864264706
IOU:  0.02471291612532469
3 aoi: L15-1015E-1062N_4061_3941_13
Dice:  0.055270865570944565
IOU:  0.028426780259982465
4 aoi: L15-1200E-0847N_4802_4803_13
Dice:  0.18103395611481043
IOU:  0.09952999901675434
5 aoi: L15-1276E-1107N_5105_3761_13
Dice:  0.04906686199164478
IOU:  0.025152802613070114
6 aoi: L15-1438E-1134N_5753_3655_13
Dice:  0.368338906286585
IOU:  0.22688985950570764
7 aoi: L15-1615E-1206N_6460_3366_13
Dice:  0.10691715647501582
IOU:  0.056494730027381486
8 aoi: L15-1690E-1211N_6763_3346_13
Dice:  0.1234835813380591
IOU:  0.06591528979805926
9 aoi: L15-1848E-0793N_7394_5018_13
Dice:  0.01587267065520446
IOU:  0.008082588274563096
Average Dice:  0.10158173000327506
Average IOU:  0.0569942542375443
0.1
0 aoi: L15-0387E-1276N_1549_3087_

  This is separate from the ipykernel package so we can avoid doing imports until
  


Dice:  nan
IOU:  nan
Average Dice:  nan
Average IOU:  nan
