In [None]:
# builtins
import sys
import pathlib

# externals
import numpy as np
import xarray as xr
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix

# locals
from pysegcnn.core.utils import (img2np, extract_by_points, np2tif, array_replace, search_files)
from pysegcnn.core.graphics import (plot_confusion_matrix, plot_classification_report)
from ai4ebv.core.landcover import WteLandCover, LC_LOOKUPS
from ai4ebv.core.metrics import area_adjusted_classification_report

In [None]:
# class labels: drop the NoData class
LABELS = WteLandCover.label_dict()
LABELS.pop(WteLandCover.NoData.id)

# class names
CLASS_LABELS = [v['label'] for v in LABELS.values()]

In [None]:
# path to LUCAS survey dataset
LUCAS = pathlib.Path('/mnt/CEPH_PROJECTS/AI4EBV/INPUTS/LANDCOVER/LUCAS/lucas_points_WTE_AC.nc')

In [None]:
# helper function
def eval_lucas(layer, lucas):
    # read LUCAS dataset
    LUCAS = xr.open_dataset(lucas).load()

    # extract LUCAS points within layer
    points, rows, cols = extract_by_points(layer, LUCAS.lon.values, LUCAS.lat.values)

    # get the indices of the Lucas records within the layer
    indices = []
    for point in points:
        indices.append(np.where((LUCAS.lon.values == point[0]) &
                                (LUCAS.lat.values == point[1]))[0].item())
        y_true = LUCAS.LCWTE_Letter.sel(record=indices).values.astype(str)
        
    # read input land cover product
    y_p = img2np(layer)
    
    # subset model predictions to LUCAS points
    y_pred = y_p[rows, cols]

    # replace missing values
    y_true[np.where(y_true == 'NA')] = WteLandCover.NoData.id
    y_true[np.where(y_true == '')] = WteLandCover.NoData.id

    # convert to integer
    y_true = y_true.astype(np.int16)

    # check where both the reference layer and the Lucas dataset are defined
    defined = ((y_true != WteLandCover.NoData.id) &
               (y_pred != WteLandCover.NoData.id))

    # exclude NoData values from the evaluation
    y_pred = y_pred[defined]
    y_true = y_true[defined]
    
    return y_true, y_pred

In [None]:
# define custom color palette
colors = sns.color_palette([[c / 255 for c in label.color] for label in WteLandCover if label != WteLandCover.NoData])
sns.palplot(colors)

## Evaluate existing land cover products

In [None]:
# path to existing land cover products
lc_product = 'ESACCI'
if lc_product == 'ESACCI':
    layer = pathlib.Path('/mnt/CEPH_PROJECTS/AI4EBV/INPUTS/LANDCOVER/ESACCILC/ESACCI-LC-L4-LCCS-Map-300m-P1Y-2015-v2.0.7_clip.tif')
    year = 2015
else:
    layer = pathlib.Path('/mnt/CEPH_PROJECTS/AI4EBV/INPUTS/LANDCOVER/CORINE/CORINE_2018_ALPS_clip.tif')
    year = 2018

In [None]:
# extract LUCAS pixels
y_true, y_pred = eval_lucas(layer, LUCAS)

In [None]:
# calculate metrics
y_p = array_replace(img2np(layer), LC_LOOKUPS[lc_product].to_numpy())
y_pred = array_replace(y_pred, LC_LOOKUPS[lc_product].to_numpy())
report = area_adjusted_classification_report(y_true, y_pred, y_p, labels=list(LABELS.keys()), target_names=CLASS_LABELS)

In [None]:
# create dataframe of class-wise metrics over time
df = pd.DataFrame()
for idx, row in report.iterrows():
    if row.name in CLASS_LABELS:
        df = df.append({**{k: v for k, v in zip(row.index, row)}, 'year': str(year), 'label': row.name}, ignore_index=True)

# convert area to percent
df['area'] *= 100
df

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(10, 6))
# sns.barplot(x='year', y='precision', hue='label', data=df, palette=colors, ax=axes[0]);
# sns.barplot(x='year', y='recall', hue='label', data=df, palette=colors, ax=axes[1]);
sns.barplot(x='year', y='f1-score', hue='label', data=df, palette=colors, ax=axes[0]);
sns.barplot(x='year', y='area', hue='label', data=df, palette=colors, ax=axes[-1]);

# axes properties
for ax in axes:
    ax.set_ylabel('')
    ax.set_yticks(np.arange(0, 1.2, 0.2))
    ax.set_xlabel('')
    ax.set_xticks([])
axes[-1].set_yticks(np.arange(0, 60, 10))
axes[-1].yaxis.tick_right()
#axes[0].set_title('Precision')
#axes[1].set_title('Recall')
axes[0].set_title('F$_1$-score')
axes[-1].set_title('Area (%)')

# adjust legend
for ax in axes:
    h, _ = ax.get_legend_handles_labels()
    ax.get_legend().remove()
axes[-1].legend(h, ['{} ({:0d})'.format(l, int(n)) for l, n in df.groupby('label', sort=False).support.mean().iteritems()],
               loc='lower left', ncol=1, bbox_to_anchor=(1.15, 0.21), frameon=False)

# adjust subplots and save figure
fig.subplots_adjust(wspace=0.15)
fig.savefig('./Figures/lucas_class-wise_metrics_{}.png'.format(lc_product), dpi=300, bbox_inches='tight')

## Evaluate downscaled land cover layers

In [None]:
# path to search for classifications
MOSAIC_PATH = pathlib.Path('/mnt/CEPH_PROJECTS/AI4EBV/DELIVERABLES/')
YEARS = np.arange(2015, 2021)

In [None]:
# classification parameters
classifier = 'RandomForestClassifier'
mode = 'single'
features = 'FT'
months = '3456789'
labels = 'CORINE'
indices = 'IND'
ftmode = 'ANN'
dem = 'DEM'
n = 5000

# search files matching this pattern
pattern = '_'.join(['^{}'.format(classifier), mode, features, '({})'.format('|'.join([str(y) for y in YEARS])),
                    'M{}'.format(months), labels])
pattern = '_'.join([pattern, indices]) if indices else pattern
pattern = '_'.join([pattern, ftmode])
pattern = '_'.join([pattern, dem]) if dem else pattern
pattern = '_'.join([pattern, 'N{}'.format(n)]) + '_wte.tif$'
pattern

In [None]:
# search layers matching classification pattern
layers = sorted(search_files(MOSAIC_PATH, pattern))
layers

In [None]:
# evaluate metrics for each reference year
metrics = {}
for y, layer in zip(YEARS, layers):
    # extract LUCAS survey pixels
    y_true, y_pred = eval_lucas(layer, LUCAS)
    
    # calculate metrics
    y_p = img2np(layer)
    report = area_adjusted_classification_report(y_true, y_pred, y_p, labels=list(LABELS.keys()), target_names=CLASS_LABELS)
    metrics[y] = report

In [None]:
# plot overall accuracy for each year
accuracies = np.asarray([np.unique(df.loc['accuracy'].loc['f1-score']) for df in metrics.values()]).squeeze()
fig, ax = plt.subplots(1, 1, figsize=(16, 9))
sns.barplot(x=YEARS, y=accuracies * 100, color='grey', ax=ax)
ax.set_ylim(0, 100)
ax.set_yticks(np.arange(0, 110, 10))
ax.set_ylabel('Overall accuracy (%)')
ax.set_xlabel('Reference year', labelpad=20)
#ax.set_xticklabels(YEARS, rotation=45);
#ax.set_title('Mean accuracy: {:.0f}%'.format(accuracies.mean() * 100));

# save figure
fig.savefig('./Figures/lucas_oa.png', dpi=300, bbox_inches='tight')

In [None]:
# create dataframe of class-wise metrics over time
df = pd.DataFrame()
for k, v in metrics.items():
    # metrics for each year of current class
    for idx, row in v.iterrows():
        if row.name in CLASS_LABELS:
            df = df.append({**{k: v for k, v in zip(row.index, row)}, 'year': str(k), 'label': row.name}, ignore_index=True)

# convert area to percent
df['area'] *= 100
df

In [None]:
fig, axes = plt.subplots(4, 1, figsize=(16, 16), sharex=True)
sns.barplot(x='year', y='precision', hue='label', data=df, palette=colors, ax=axes[0]);
sns.barplot(x='year', y='recall', hue='label', data=df, palette=colors, ax=axes[1]);
sns.barplot(x='year', y='f1-score', hue='label', data=df, palette=colors, ax=axes[2]);
sns.barplot(x='year', y='area', hue='label', data=df, palette=colors, ax=axes[3]);

# axes properties
for ax in axes[:-1]:
    ax.set_xlabel('')
    ax.set_yticks(np.arange(0, 1.2, 0.2))
axes[0].set_ylabel('Precision')
axes[1].set_ylabel('Recall')
axes[2].set_ylabel('F$_1$-score')

axes[-1].set_ylim(0, 50);
axes[-1].set_yticks(np.arange(0, 60, 10))
axes[-1].set_ylabel('Area (%)');
axes[-1].set_xlabel('Reference year', labelpad=20)
axes[-1].set_xticklabels(YEARS);

# adjust legend
for ax in axes:
    h, _ = ax.get_legend_handles_labels()
    ax.get_legend().remove()
axes[-1].legend(h, ['{} ({:0d})'.format(l, int(n)) for l, n in df.groupby('label', sort=False).support.mean().iteritems()],
               loc='lower left', ncol=4, bbox_to_anchor=(-0.15, -0.85), frameon=False)

# adjust subplots and save figure
fig.subplots_adjust(hspace=0.15)
fig.savefig('./Figures/lucas_class-wise_metrics.png', dpi=300, bbox_inches='tight')

In [None]:
fig, axes = plt.subplots(1, 4, figsize=(16, 16), sharey=True)
sns.barplot(y='year', x='precision', hue='label', data=df, palette=colors, ax=axes[0]);
sns.barplot(y='year', x='recall', hue='label', data=df, palette=colors, ax=axes[1]);
sns.barplot(y='year', x='f1-score', hue='label', data=df, palette=colors, ax=axes[2]);
sns.barplot(y='year', x='area', hue='label', data=df, palette=colors, ax=axes[3]);

# axes properties
for ax in axes[:-1]:
    ax.set_ylabel('')
    ax.set_xticks(np.arange(0, 1.2, 0.2))
axes[0].set_xlabel('Precision')
axes[1].set_xlabel('Recall')
axes[2].set_xlabel('F$_1$-score')

axes[-1].set_xlim(0, 50);
axes[-1].set_xticks(np.arange(0, 60, 10))
axes[-1].set_xlabel('Area (%)');
axes[-1].set_ylabel('')
axes[0].set_ylabel('Reference year', labelpad=20);

# adjust legend
for ax in axes:
    h, _ = ax.get_legend_handles_labels()
    ax.get_legend().remove()
axes[0].legend(h, ['{} ({:0d})'.format(l, int(n)) for l, n in df.groupby('label', sort=False).support.mean().iteritems()],
               loc='lower left', ncol=4, bbox_to_anchor=(-0.5, -0.2), frameon=False)

# adjust subplots and save figure
fig.subplots_adjust(wspace=0.15)
fig.savefig('./Figures/lucas_class-wise_metrics_h.png', dpi=300, bbox_inches='tight')