This notebook select the best performing months based on the model evaluation metrics generated from previous notebook and produce consensus predictions from the selected months.

## Load packages and modules

In [1]:
import numpy as np
import pandas as pd
import imageio.v2 as imageio
import os
from glob import glob
import sys
from osgeo import gdal,osr
from collections import Counter
from tqdm import tqdm

# add existing and decode modules to system path
module_paths=['decode/FracTAL_ResUNet/models/semanticsegmentation',
             'decode/FracTAL_ResUNet/nn/loss']
for module_path in module_paths:
    if module_path not in sys.path:
        sys.path.append(module_path)

In [2]:
# import functions from modules
from FracTAL_ResUNet import FracTAL_ResUNet_cmtsk
from datasets import export_geotiff
from datasets import *
from evaluation import Calculate_IoUs,get_accuracy_scores

## Define parameters

In [3]:
# hyperparameters for model architecture
n_filters = 32
depth = 6
n_classes = 1
batch_size = 4
codes_to_keep = [1]
ctx_name = 'cpu'
gpu_id = 0

# training metrics folder
metrics_folder='metrics'
# model parameters folder
models_folder='model_weights'
# folder of input RGB chunk geotiffs
input_folder='../0_Data_preparation/results/RGB_chunks'
# ground truth label folder
groundtruth_folder='../0_Data_preparation/results/groundtruth'
# output folder to store results of individual months
out_folder='results_finetuned'
# output folder to store averaged results
out_folder_averaged='results_finetuned/averaged'

# all candidate image months as strings
# str_months=['02','04','06','08','10','12']
str_months=['03','04','08','10','11','12']

str_year='2021'
# country = 'Mozambique'
country = 'Rwanda'
srs = osr.SpatialReference()
srs.ImportFromEPSG(3857)
prj=srs.ExportToWkt()

In [4]:
# Set MXNet ctx
if ctx_name == 'cpu':
    ctx = mx.cpu()
elif ctx_name == 'gpu':
    ctx = mx.gpu(gpu_id)

In [5]:
if not os.path.isdir(out_folder):
    os.makedirs(out_folder)
if not os.path.isdir(out_folder_averaged):
    os.makedirs(out_folder_averaged)

## Load training/validation metrics and select months

In [6]:
val_mccs={}
for str_month in str_months:
    metrics_month_file=os.path.join(metrics_folder,'metrics_month_'+str_month+'.csv')
    if os.path.exists(metrics_month_file):
        metrics_month=pd.read_csv(metrics_month_file)
        val_mccs[str_month]=metrics_month['val_mcc'].max()
highest_mccs=dict(Counter(val_mccs).most_common(np.min([3,len(val_mccs)])))
print('selected months and mcc values: ',highest_mccs)
selected_months=list(highest_mccs.keys())

selected months and mcc values:  {'10': 0.4328345198321451, '04': 0.3520785675064072, '12': 0.3209426847704443}


## Identify RGB chunks with validation data available

In [7]:
# extract chunk ids of validation data
extent_names=glob(groundtruth_folder+'/'+country+'*crop_field_extent*.tif')
bound_names=[extent_name.replace('extent','bound') for extent_name in extent_names]
print('Found {} groundtruth extent chunks'.format(len(extent_names)))
print('Found {} groundtruth boundary chunks'.format(len(bound_names)))

Found 123 groundtruth extent chunks
Found 123 groundtruth boundary chunks


In [8]:
# find Planet RGB chunks corresponding to validation chunks
image_names_months=[]
for str_month in selected_months:
    image_names=[]
    for extent_name in extent_names:
        # extract id of validation chunk
        chunk_id=os.path.basename(extent_name)[:-4].split('_')[-2:]
        image_list=glob(os.path.join(input_folder,country+'*'+str_month+'_'+'_'.join(chunk_id)+'.tif'))
        if len(image_list)<1:
            print('no RGB found for chunk')
        else:
            for img in image_list:
                image_names.append(img)
    print('Found {} RGB images for month {}'.format(len(image_names),str_month))
    image_names_months.append(image_names)

Found 123 RGB images for month 10
Found 123 RGB images for month 04
Found 123 RGB images for month 12


## Create datasets and dataloaders

In [9]:
data_loaders=[]
for i in range(len(selected_months)):
    # Define dataset
    test_dataset = Planet_Dataset_No_labels(image_names=image_names_months[i])
    # Loads data from a dataset and create mini batches
    # test_dataloader = gluon.data.DataLoader(test_dataset, batch_size=batch_size,num_workers=CPU_COUNT) # might encounter 'connection refused' issue
    test_dataloader = gluon.data.DataLoader(test_dataset, batch_size=batch_size,num_workers=1)
    data_loaders.append(test_dataloader)

## Load pre-trained models

In [10]:
models=[]
for str_month in selected_months:
    # initialise model
    model = FracTAL_ResUNet_cmtsk(nfilters_init=n_filters, depth=depth, NClasses=n_classes)
    # search for model weights parameters
    model_weights=glob(os.path.join(models_folder,'*month_'+str_month+'_finetuned.params'))
    if len(model_weights)>0:
        print('model weights for month {}: {}'.format(str_month,model_weights[0]))
        # load pre-trained model parameters
        model.load_parameters(model_weights[0], ctx=ctx)
        models.append(model)
    else:
        print('cant find model weights for month {}'.format(str_month))

depth:= 0, nfilters: 32, nheads::8, widths::1
depth:= 1, nfilters: 64, nheads::16, widths::1
depth:= 2, nfilters: 128, nheads::32, widths::1
depth:= 3, nfilters: 256, nheads::64, widths::1
depth:= 4, nfilters: 512, nheads::128, widths::1
depth:= 5, nfilters: 1024, nheads::256, widths::1
depth:= 6, nfilters: 512, nheads::256, widths::1
depth:= 7, nfilters: 256, nheads::128, widths::1
depth:= 8, nfilters: 128, nheads::64, widths::1
depth:= 9, nfilters: 64, nheads::32, widths::1
depth:= 10, nfilters: 32, nheads::16, widths::1
model weights for month 10: model_weights/Planet_pretrained-france_finetuned-india_month_10_finetuned.params
depth:= 0, nfilters: 32, nheads::8, widths::1
depth:= 1, nfilters: 64, nheads::16, widths::1
depth:= 2, nfilters: 128, nheads::32, widths::1
depth:= 3, nfilters: 256, nheads::64, widths::1
depth:= 4, nfilters: 512, nheads::128, widths::1
depth:= 5, nfilters: 1024, nheads::256, widths::1
depth:= 6, nfilters: 512, nheads::256, widths::1
depth:= 7, nfilters: 256,

## Run predictions for each selected month

In [24]:
%%time
outnames_extent={str_month:[] for str_month in selected_months}
outnames_bound={str_month:[] for str_month in selected_months}
# run model
for m, (model, dataloader) in enumerate(zip(models,data_loaders)):
    print('Predicting using model parameters for month ',selected_months[m])
    for batch_i, img_data in enumerate(tqdm(dataloader)):
        # extract batch data
        imgs,id_dates,geotrans=img_data
        bt_size=id_dates.asnumpy().shape[0]

        # make a copy if the variable currently lives in the wrong context
        imgs = imgs.as_in_context(ctx)

        # predicted outputs: field extent probability, field boundary probability and distance to boundary
        logits, bound, dist = model(imgs)

        # export predictions for all images in the batch
        for i in range(bt_size):
            id_date=id_dates[i,:].asnumpy().astype(int)
            str_id_date=[str(id_date[0])] # year
            str_id_date.append(str(id_date[1]).zfill(2)) # month
            str_id_date.extend([str(s).zfill(3) for s in id_date[2:]]) # zfill row and col ids so that output files also have uniform file name length
            gt=geotrans[i,:].asnumpy()

            outname_extent=os.path.join(out_folder,country+'_extent_prob_'+'_'.join(str_id_date)+'.tif')
            export_geotiff(outname_extent,logits[i,:,:].asnumpy().squeeze(),gt,prj,gdal.GDT_Float32)

            outname_bound=os.path.join(out_folder,country+'_bound_prob_'+'_'.join(str_id_date)+'.tif')
            export_geotiff(outname_bound,bound[i,:,:].asnumpy().squeeze(),gt,prj,gdal.GDT_Float32)

    #         outname_dist=os.path.join(out_folder,country+'_distance'+'_'.join(str_id_date)+'.tif')
    #         export_geotiff(outname_dist,dist[i,:,:].asnumpy().squeeze(),gt,prj,gdal.GDT_Float32)
            outnames_extent[selected_months[m]].append(outname_extent)
            outnames_bound[selected_months[m]].append(outname_bound)

Predicting using model parameters for month  10


100%|██████████| 31/31 [05:02<00:00,  9.77s/it]


Predicting using model parameters for month  04


100%|██████████| 31/31 [05:02<00:00,  9.76s/it]


Predicting using model parameters for month  12


100%|██████████| 31/31 [05:02<00:00,  9.77s/it]

CPU times: user 26min 52s, sys: 3min 32s, total: 30min 24s
Wall time: 15min 8s





## Generate consensus results of selected months

In [25]:
averaged_mean_acc=[]
averaged_mean_f1=[]
averaged_mean_mcc=[]
# loop through all chunks and average over months
for i in range(len(extent_names)):
    list_extents=[]
    list_bounds=[]
    for str_month in selected_months:
        # read in field extent probability geotiff and metadata
        extent_prob_predicted_file=outnames_extent[str_month][i]
        ds_extent = gdal.Open(extent_prob_predicted_file)
        geotrans=ds_extent.GetGeoTransform()
        proj=ds_extent.GetProjection()
        np_extent = ds_extent.GetRasterBand(1).ReadAsArray()

        # read in boundary probability
        bound_prob_predicted_file=outnames_bound[str_month][i]
        ds_bound=gdal.Open(bound_prob_predicted_file)
        np_bound = ds_bound.GetRasterBand(1).ReadAsArray()
        
        list_extents.append(np_extent)
        list_bounds.append(np_bound)
        # release memory
        ds_extent=None
        ds_bound=None
    # calculate averages
    extent_average=np.mean(list_extents,axis=0)
    bound_average=np.mean(list_bounds,axis=0)
    
    # find corresponding groundtruth extents and boundary probability files
    chunk_id='_'.join(os.path.basename(outnames_extent[str_month][i])[:-4].split('_')[-2:])
    fn_prefix='_'.join([country,'*extent',chunk_id])+'.tif'
    file_extent_true=glob(groundtruth_folder+'/'+fn_prefix)[0]
    
    # read in ground truth extent and boundary file
    extent_true=imageio.imread(file_extent_true)
    boundary_true=imageio.imread(file_extent_true.replace('extent','bound'))
    
    # calculate evaluation scores
    accuracy,f1,mcc=get_accuracy_scores(extent_true,boundary_true,extent_average)
    averaged_mean_acc.append(accuracy.get()[1])
    averaged_mean_f1.append(f1.get()[1])
    averaged_mean_mcc.append(mcc.get()[1])
    
    # export as geotiffs
    outname_extent='_'.join([country,'average_extent_prob',str_year,'_'.join(selected_months),chunk_id])+'.tif'
    outname_extent=os.path.join(out_folder_averaged,outname_extent)
    export_geotiff(outname_extent,extent_average,geotrans,proj,gdal.GDT_Float32)

    outname_bound='_'.join([country,'average_bound_prob',str_year,'_'.join(selected_months),chunk_id])+'.tif'
    outname_bound=os.path.join(out_folder_averaged,outname_bound)
    export_geotiff(outname_bound,bound_average,geotrans,proj,gdal.GDT_Float32)

In [26]:
print('mean accuracy of months-averaged predictions: ',np.mean(averaged_mean_acc))
print('mean F1 score of months-averaged predictions: ',np.mean(averaged_mean_f1))
print('mean MCC of months-averaged predictions: ',np.mean(averaged_mean_mcc))

mean accuracy of months-averaged predictions:  0.8417284560266809
mean F1 score of months-averaged predictions:  0.90329837455408
mean MCC of months-averaged predictions:  0.38680642543516536
