This notebook evaluates field extend and boundary predictions against validation data, uses the validation results to select the best performing months, and evaluates the consensus/averaged predictions from the selected months.

## Load packages and modules

In [1]:
import numpy as np
import pandas as pd
import imageio.v2 as imageio
import os
from glob import glob
import sys
from osgeo import gdal,osr

In [2]:
# import functions from modules
from datasets import export_geotiff
from evaluation import Calculate_IoUs,get_accuracy_scores

## Define parameters

In [3]:
# prediction and ground truth folders
prediction_folder='results'
groundtruth_folder='../0_Data_preparation/results/groundtruth'

# output folder to store averaged results
out_folder='results/averaged'

# all candidate image months as strings
# str_months=['02','04','06','08','10','12']
str_months=['03','04','08','10','11','12']

str_year='2021'
# country = 'Mozambique'
country = 'Rwanda'

In [4]:
if not os.path.isdir(out_folder):
    os.makedirs(out_folder)

## Identify predicted and ground truth files

In [5]:
# predicted exent probabilities for all months
files_extent_predictions=glob(prediction_folder+'/'+country+'*extent_prob_*.tif')
print('Found {} field exent probability images'.format(len(files_extent_predictions)))

# ground truth field exent
files_extent_true=glob(groundtruth_folder+'/'+country+'*crop_field_extent*.tif')
print('Found {} ground truth field exent images'.format(len(files_extent_true)))

# ground truth field boundary
files_boundaries_true=glob(groundtruth_folder+'/'+country+'*crop_field_bound*.tif')
print('Found {} ground truth field boundary images'.format(len(files_boundaries_true)))

Found 738 field exent probability images
Found 123 ground truth field exent images
Found 123 ground truth field boundary images


## Evaluation of predictions for all months - OA, F1 and MCC

In [6]:
# initialise accuracy scores for all months
mean_accuracy={str_month:[] for str_month in str_months}
mean_f1={str_month:[] for str_month in str_months}
mean_mcc={str_month:[] for str_month in str_months}

# loop for all months
for str_month in str_months:
    # list of chunk ids
    chunk_ids=[]
    for file_extent_true in files_extent_true:
        # chunk id
        chunk_id='_'.join(os.path.basename(file_extent_true)[:-4].split('_')[-2:])
        chunk_ids.append(chunk_id)
        # read in ground truth extent file
        extent_true=imageio.imread(file_extent_true)
        # read in corresponding groundtruth boundary file
        boundary_true=imageio.imread(file_extent_true.replace('extent','bound'))

        # identify and read in corresponding predicted extent probabilities file
        extent_prob_predicted_file=os.path.join(prediction_folder,'_'.join([country,'extent_prob',str_year,str_month,chunk_id])+'.tif')
        extent_prob_predicted=imageio.imread(extent_prob_predicted_file)

        # calculate evaluation scores
        accuracy,f1,mcc=get_accuracy_scores(extent_true,boundary_true,extent_prob_predicted)

        # scores for all fields
        mean_accuracy[str_month].append(accuracy.get()[1])
        mean_f1[str_month].append(f1.get()[1])
        mean_mcc[str_month].append(mcc.get()[1])
    
    # mean scores
    mean_accuracy[str_month]=np.mean(mean_accuracy[str_month])
    mean_f1[str_month]=np.mean(mean_f1[str_month])
    mean_mcc[str_month]=np.mean(mean_mcc[str_month])

print('mean accuracy: ',mean_accuracy)
print('mean F1 score: ',mean_f1)
print('mean MCC: ',mean_mcc)

highest_mccs=sorted(mean_mcc.items(), key=lambda item: item[1],reverse=True)[0:3]
print('The three months with highest MCCs: ',highest_mccs)
selected_months=[item[0] for item in highest_mccs]

mean accuracy:  {'03': 0.729652956229998, '04': 0.7240467230362735, '08': 0.7209047786857291, '10': 0.7231197685498895, '11': 0.7312427565828762, '12': 0.723182380309055}
mean F1 score:  {'03': 0.8186620063153506, '04': 0.8124161784649712, '08': 0.8096760266082574, '10': 0.8131651103133967, '11': 0.8197450286835098, '12': 0.8115618585923122}
mean MCC:  {'03': 0.24091206986578043, '04': 0.24440012200501474, '08': 0.23878090579471944, '10': 0.2456711636626551, '11': 0.23800338966525164, '12': 0.2537885513573362}
The three months with highest MCCs:  [('12', 0.2537885513573362), ('10', 0.2456711636626551), ('04', 0.24440012200501474)]


## Combine predictions from selected months, evaluate and export results

Field extent and boundary probabilities are averaged over selected months.

In [7]:
averaged_mean_acc=[]
averaged_mean_f1=[]
averaged_mean_mcc=[]
# loop through all chunks and average over months
for chunk_id in chunk_ids:
    extent_average=None
    bound_average=None
    for i in range(len(selected_months)):
        # read in field extent probability geotiff and metadata
        extent_prob_predicted_file=os.path.join(prediction_folder,'_'.join([country,'extent_prob',str_year,selected_months[i],chunk_id])+'.tif')
        ds_extent = gdal.Open(extent_prob_predicted_file)
        geotrans=ds_extent.GetGeoTransform()
        proj=ds_extent.GetProjection()
        np_extent = ds_extent.GetRasterBand(1).ReadAsArray()

        # read in boundary probability
        bound_prob_predicted_file=extent_prob_predicted_file.replace('extent','bound')
        ds_bound=gdal.Open(bound_prob_predicted_file)
        np_bound = ds_bound.GetRasterBand(1).ReadAsArray()

        if i==0:
            extent_average=np_extent
            bound_average=np_bound
        else:
            extent_average+=np_extent
            bound_average+=np_bound
        ds_extent=None
        ds_bound=None
    # calculate averages
    extent_average/=len(selected_months)*1.0
    bound_average/=len(selected_months)*1.0
    
    # find groundtruth extents and boundary probabilities
    fn_prefix='_'.join([country,'*extent',chunk_id])+'.tif'
    file_extent_true=glob(groundtruth_folder+'/'+fn_prefix)[0]
    
    # read in ground truth extent and boundary file
    extent_true=imageio.imread(file_extent_true)
    boundary_true=imageio.imread(file_extent_true.replace('extent','bound'))
    
    # calculate evaluation scores
    accuracy,f1,mcc=get_accuracy_scores(extent_true,boundary_true,extent_average)
    averaged_mean_acc.append(accuracy.get()[1])
    averaged_mean_f1.append(f1.get()[1])
    averaged_mean_mcc.append(mcc.get()[1])
    
    # export as geotiffs
    outname_extent='_'.join([country,'average_extent_prob',str_year,'_'.join(selected_months),chunk_id])+'.tif'
    outname_extent=os.path.join(out_folder,outname_extent)
    export_geotiff(outname_extent,extent_average,geotrans,proj,gdal.GDT_Float32)

    outname_bound='_'.join([country,'average_bound_prob',str_year,'_'.join(selected_months),chunk_id])+'.tif'
    outname_bound=os.path.join(out_folder,outname_bound)
    export_geotiff(outname_bound,bound_average,geotrans,proj,gdal.GDT_Float32)

In [8]:
print('mean accuracy of months-averaged predictions: ',np.mean(averaged_mean_acc))
print('mean F1 score of months-averaged predictions: ',np.mean(averaged_mean_f1))
print('mean MCC of months-averaged predictions: ',np.mean(averaged_mean_mcc))

mean accuracy of months-averaged predictions:  0.7508476692005877
mean F1 score of months-averaged predictions:  0.8345175457888819
mean MCC of months-averaged predictions:  0.28746021573549563
