# Sylhet Floods 2022
## calc_accuracyVsLabel.ipynb
This script evaluates the flood mapping algorithms against the hand labels. It computes metrics for specified image dates and sources.

In [1]:
from pathlib import Path
import os
import sys
import pandas as pd
import numpy as np
import rasterio
import matplotlib.pyplot as plt
from rasterio.plot import show
from rasterio.mask import mask
from itertools import chain
from pyproj import Transformer
from matplotlib.patches import Patch
from matplotlib.colors import ListedColormap
import matplotlib.colors as colors
from datetime import datetime
import geopandas as gpd
import copy
import collections
from shapely.geometry import mapping
import pycrs
from shapely.geometry import Polygon
from functools import reduce
import shutil

In [2]:
# Set the root path
rootPath = Path('Z:/media/mule/Projects/NASA/NIP/Data')

In [3]:
# Set the module path for helpers scripts
module_path = os.path.abspath(os.path.join('C:/Users/alexa/Documents/GitHub/Sylhet2022Floods/scr/'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [4]:
# Import module from analyse_modifiedDevries containing helpful functions to use
import importlib
import helpers.analyse_modifiedDevries as analyse_DV
importlib.reload(analyse_DV)
import helpers.prepare_flood_raster as prep_raster
importlib.reload(prep_raster)

<module 'Helpers.prepare_flood_raster' from 'C:\\Users\\alexsaunders\\Documents\\01_uoa\\04_git\\NIP\\Sylhet\\Helpers\\prepare_flood_raster.py'>

## PART 1: Get the label data

In [5]:
# Move the mosaic NASSA IMPACT predictions to a single folder
labelPath = rootPath/'Raster/SylhetLabels/finalLabels'

In [6]:
labelFiles = [file for file in list(labelPath.iterdir()) if file.suffix=='.tif']

In [7]:
labelDates = list(np.unique([item.stem.split('_')[0] for item in labelFiles]))

In [8]:
labelDates

['20220524', '20220530', '20220714']

## PART 2: Load the map data for scorign against labels

In [9]:
# Open the GFM mosaiced raster files and get the image dates
GFMPath = rootPath/'Raster/SylhetCopernicusGFM/sylhet_district_2022-05-01-2022-08-31/Mosaic'
GFM_mosaics = [file for file in (GFMPath).iterdir() if file.is_file() and '2022' in str(file)]
GFM_dates = [str(datetime.strptime(mosaic.stem[-10:].replace('_',''), '%Y%m%d').date()) for mosaic in GFM_mosaics]
print('GFM image dates: ', GFM_dates)

GFM image dates:  ['2022-05-01', '2022-05-11', '2022-05-13', '2022-05-23', '2022-05-25', '2022-06-04', '2022-06-06', '2022-06-16', '2022-06-18', '2022-06-28', '2022-06-30', '2022-07-12', '2022-07-22', '2022-07-24', '2022-08-05', '2022-08-15', '2022-08-27', '2022-08-29']


In [10]:
# Open the modified Devries mosaiced raster files and get the image dates
DevriesPath = rootPath/'Raster/Sylhet/Sen1MitchellSingleOrbit/Mosaic'
Devries_mosaics = [file for file in (DevriesPath).iterdir() if file.is_file() and '2022' in str(file)]
Devries_dates = [str(datetime.strptime(mosaic.stem[-14:-6], '%Y%m%d').date()) for mosaic in Devries_mosaics]
print('Modified DeVries image dates: ', Devries_dates)

Modified DeVries image dates:  ['2022-05-01', '2022-05-11', '2022-05-13', '2022-05-23', '2022-05-25', '2022-06-04', '2022-06-06', '2022-06-16', '2022-06-18', '2022-06-28', '2022-06-30', '2022-07-12', '2022-07-22', '2022-07-24', '2022-08-03', '2022-08-05', '2022-08-15', '2022-08-27', '2022-08-29']


In [11]:
# Open the UNOSAT raster files and get the image dates
UNOSATPath = rootPath/'Raster/SylhetUNOSAT'
UNOSAT_mosaics = [file for file in (UNOSATPath).iterdir() if file.is_file() and 'allTouchFalse' in str(file)]
UNOSAT_mosaics.reverse()
UNOSAT_dates = ['2022-05-25','2022-06-19']
print('UNOSAT image dates: ', UNOSAT_dates)

UNOSAT image dates:  ['2022-05-25', '2022-06-19']


In [12]:
# Open the NASA IMPACT raster files and get the image dates
IMPACTPath = rootPath/'Raster/SylhetNASAImpact/5_Preds/Mosaic/'
IMPACT_mosaics = [file for file in (IMPACTPath).iterdir() if file.is_file() and '2022' in str(file) and file.suffix=='.tif']
IMPACT_dates = [str(datetime.strptime(file.stem, '%Y%m%d').date()) for file in IMPACT_mosaics]
print('NASA IMPACT image dates: ', IMPACT_dates)

NASA IMPACT image dates:  ['2022-05-01', '2022-05-11', '2022-05-13', '2022-05-23', '2022-05-25', '2022-06-04', '2022-06-06', '2022-06-16', '2022-06-18', '2022-06-28', '2022-06-30', '2022-07-12', '2022-07-22', '2022-07-24', '2022-08-03', '2022-08-05', '2022-08-15', '2022-08-27', '2022-08-29']


## PART 3: Comparisons

### Dates to compare

Compare 20220524 label with 2022-05-25

Compare 20220530 label with 2022-06-04

Compare 20220714 label with 2022-07-12

### Compare on a label by label basis
I.e., compute accuracy for a single label

In [25]:
# Function to compare a given map with a given label and retrun a list of confusion matrix metrics
# ['TN', 'FP', 'FN', 'TP', 'Precision', 'Recall', 'Accuracy', 'IOU']
def compare_map_wLabel(labelFile, mapRasterFile):
    
    # Get the label date and tileID
    labelDate=labelFile.stem.split('_')[0]
    labelTile='_'.join(labelFile.stem.split('_')[1:3])
    
    # Open the label raster
    labelRaster = prep_raster.load_label_for_comparison(str(labelFile))
    
    # Open the map to compare, and reproject and match the extent
    mapRaster = prep_raster.load_raster_for_comparison(str(mapRasterFile), 0)
    mapRasterReproj = prep_raster.reproj_match_raster(mapRaster, labelRaster)
    
    # Convert rasters to numpy arrays, make binary integers
    labelValues = prep_raster.raster_to_binary_list(labelRaster)
    mapValues = prep_raster.raster_to_binary_list(mapRasterReproj)
    
    # Run the function to get the confusion matrix and other metrics
    metrics_list = analyse_DV.get_confusion_matrix_metrics(labelValues, mapValues)
    
    return [labelDate, labelTile] + metrics_list

In [19]:
# Set up empty dataframe for saving results
confus_matrix_metrics = pd.DataFrame(data = [], index = ['MapSource', 'MapDate', 'labelDate', 'labelTile', 'TN', 'FP', 'FN', 'TP', 'Precision', 'Recall', 'Accuracy', 'IOU']).T

#### Perform on first set of labels from 20220524

In [38]:
# Define the date for the label and date for the nearest S1 images
labelDate = labelDates[0]
compareDate = '20220525'

In [39]:
# Get the labels on the given date
labelDateFiles = [file for file in labelFiles if labelDate in str(file)]

In [40]:
# Get the Devries, GFM and NASAIMPACT rasters to comapre, from the closest date
DevriesCompare = [file for file in Devries_mosaics if compareDate in str(file)][0]
GFMCompare = [file for file in GFM_mosaics if str(datetime.strptime(compareDate, '%Y%m%d').date()).replace('-','_') in str(file)][0]
IMPACTCompare = [file for file in IMPACT_mosaics if compareDate in str(file)][0]

In [53]:
# Loop through the labels, get the metrics and append to the dataframe
for i, labelDateFile in enumerate(labelDateFiles):
    
    # Get metrics for Devries
    confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['Devries',compareDate]+list(compare_map_wLabel(labelDateFile, DevriesCompare))], 
                                                                      columns=confus_matrix_metrics.columns), ignore_index=True)
    # Get metrics for GFM
    confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['GFM',compareDate]+list(compare_map_wLabel(labelDateFile, GFMCompare))], 
                                                                      columns=confus_matrix_metrics.columns), ignore_index=True)
    # Get metrics for NASA IMPACT
    confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['NASAIMPACT',compareDate]+list(compare_map_wLabel(labelDateFile, IMPACTCompare))], 
                                                                      columns=confus_matrix_metrics.columns), ignore_index=True)

  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['Devries',compareDate]+list(compare_map_wLabel(labelDateFile, DevriesCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['GFM',compareDate]+list(compare_map_wLabel(labelDateFile, GFMCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['NASAIMPACT',compareDate]+list(compare_map_wLabel(labelDateFile, IMPACTCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['Devries',compareDate]+list(compare_map_wLabel(labelDateFile, DevriesCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['GFM',compareDate]+list(compare_map_wLabel(labelDateFile, GFMCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['NASAIMPACT',compareDate]+list(compare_map_wLabel(labelDateFile, IMPACTCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['

#### Perform on second set of labels from 20220530

In [26]:
labelDates[1]

'20220530'

In [27]:
# Define the date for the label and date for the nearest S1 images
labelDate = labelDates[1]
compareDate = '20220604'

In [28]:
# Get the labels on the given date
labelDateFiles = [file for file in labelFiles if labelDate in str(file)]

In [29]:
# Get the Devries, GFM and NASAIMPACT rasters to comapre, from the closest date
DevriesCompare = [file for file in Devries_mosaics if compareDate in str(file)][0]
GFMCompare = [file for file in GFM_mosaics if str(datetime.strptime(compareDate, '%Y%m%d').date()).replace('-','_') in str(file)][0]
IMPACTCompare = [file for file in IMPACT_mosaics if compareDate in str(file)][0]

In [30]:
# Loop through the labels, get the metrics and append to the dataframe
for i, labelDateFile in enumerate(labelDateFiles):
    
    # Get metrics for Devries
    confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['Devries',compareDate]+list(compare_map_wLabel(labelDateFile, DevriesCompare))], 
                                                                      columns=confus_matrix_metrics.columns), ignore_index=True)
    # Get metrics for GFM
    confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['GFM',compareDate]+list(compare_map_wLabel(labelDateFile, GFMCompare))], 
                                                                      columns=confus_matrix_metrics.columns), ignore_index=True)
    # Get metrics for NASA IMPACT
    confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['NASAIMPACT',compareDate]+list(compare_map_wLabel(labelDateFile, IMPACTCompare))], 
                                                                      columns=confus_matrix_metrics.columns), ignore_index=True)

  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['Devries',compareDate]+list(compare_map_wLabel(labelDateFile, DevriesCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['GFM',compareDate]+list(compare_map_wLabel(labelDateFile, GFMCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['NASAIMPACT',compareDate]+list(compare_map_wLabel(labelDateFile, IMPACTCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['Devries',compareDate]+list(compare_map_wLabel(labelDateFile, DevriesCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['GFM',compareDate]+list(compare_map_wLabel(labelDateFile, GFMCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['NASAIMPACT',compareDate]+list(compare_map_wLabel(labelDateFile, IMPACTCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['

#### Perform on third set of labels from 20220714

In [32]:
labelDates[2]

'20220714'

In [33]:
# Define the date for the label and date for the nearest S1 images
labelDate = labelDates[2]
compareDate = '20220712'

In [34]:
# Get the labels on the given date
labelDateFiles = [file for file in labelFiles if labelDate in str(file)]

In [35]:
# Get the Devries, GFM and NASAIMPACT rasters to comapre, from the closest date
DevriesCompare = [file for file in Devries_mosaics if compareDate in str(file)][0]
GFMCompare = [file for file in GFM_mosaics if str(datetime.strptime(compareDate, '%Y%m%d').date()).replace('-','_') in str(file)][0]
IMPACTCompare = [file for file in IMPACT_mosaics if compareDate in str(file)][0]

In [36]:
# Loop through the labels, get the metrics and append to the dataframe
for i, labelDateFile in enumerate(labelDateFiles):
    
    # Get metrics for Devries
    confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['Devries',compareDate]+list(compare_map_wLabel(labelDateFile, DevriesCompare))], 
                                                                      columns=confus_matrix_metrics.columns), ignore_index=True)
    # Get metrics for GFM
    confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['GFM',compareDate]+list(compare_map_wLabel(labelDateFile, GFMCompare))], 
                                                                      columns=confus_matrix_metrics.columns), ignore_index=True)
    # Get metrics for NASA IMPACT
    confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['NASAIMPACT',compareDate]+list(compare_map_wLabel(labelDateFile, IMPACTCompare))], 
                                                                      columns=confus_matrix_metrics.columns), ignore_index=True)

  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['Devries',compareDate]+list(compare_map_wLabel(labelDateFile, DevriesCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['GFM',compareDate]+list(compare_map_wLabel(labelDateFile, GFMCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['NASAIMPACT',compareDate]+list(compare_map_wLabel(labelDateFile, IMPACTCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['Devries',compareDate]+list(compare_map_wLabel(labelDateFile, DevriesCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['GFM',compareDate]+list(compare_map_wLabel(labelDateFile, GFMCompare))],
  pre = tp / (tp+fp)
  confus_matrix_metrics = confus_matrix_metrics.append(pd.DataFrame(data=[['NASAIMPACT',compareDate]+list(compare_map_wLabel(labelDateFile, IMPACTCompare))],
  confus_matrix_metrics = confus_matrix_metrics.append(

In [41]:
# Write out the confusion matrix metrics as csv
confus_matrix_metrics.to_csv(rootPath/'Table/SylhetConfusionMatrix/confusMatrixVsLabels_20220524-20220715.csv',index=False)