In [15]:
import os
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns
from matplotlib.lines import Line2D
import shutil

from data_extraction import load_data_to_dataframe

In [16]:
# ------ Settings ------
# => These variables should be set to the desired settings!

# The files in the directory should have names of the form:
# "{dataset/other information}-factor-{downsample factor}-{any remaining information}"
directory = ''

# ---- The metrics to consider ----

# Given in the Excel file
metrics_precalc = []

# Need to be calculated here -> assumes that the Excel file contains TP, FP, TN and FN
metrics_calc = [
    'OA',
    'False Alarm',
    'Recall',
    'Precision',
    'FMeasure',
]

# Filter on a thresholding algorithm
threAlg = 'OTSU'

# Variables to make plotting more convenient

datasets = {
    'OSCDDatasetRGBBands': 'OSCD (RGB)',
    'LEVIRCDDataset': 'LEVIR-CD'
}

algs = {
    'Change Vector Analysis': 'CVA',
    'Iterative Reweighted MAD': 'IR-MAD',
}

metrics_syn = {
    'OA' : 'Accuracy',
    'Recall' : 'Recall',
    'Precision' : 'Precision',
    'FMeasure' : 'F-score',
    'False Alarm': 'False Alarm',
}

selection_levir = []
selection_oscd = []
selection_imgs = selection_levir + selection_oscd

heatmap_per_img = False
means_per_img = False

In [17]:
# Load the data
df = load_data_to_dataframe(directory, metrics_calc)

# Filter on thresholding algorithm
df = df.loc[df['threAlg'] == threAlg]

# Exclude files that have rows with NaN values, save in an Excel file which values are excluded
print('Size before NaN exclusion:', len(df))
excluded_fnames = df[df.isna().any(axis=1)]['refName'].unique()
print('Files with NaN metric rows excluded:', excluded_fnames)
excluded_rows = df[df.refName.isin(excluded_fnames)]
excluded_rows.to_excel("rows_with_NaN_values_excluded.xlsx")
df = df[~df.refName.isin(excluded_fnames)]
print('Size after NaN exclusion:', len(df))

# Define directories
dir_dict = {
    'difference_dir': os.path.join(directory, 'Plots', 'Difference'),
    'means_dir': os.path.join(directory, 'Plots', 'Means'),
    'heatmap_dir': os.path.join(directory, 'Plots', 'Heatmap')
}

# For further usage
metrics = metrics_calc + metrics_precalc

Size before NaN exclusion: 1400
Files with NaN metric rows excluded: ['test_108' 'test_117' 'test_122' 'test_125' 'test_60' 'test_62' 'test_64'
 'test_65' 'test_66' 'test_90' 'test_128' 'test_59' 'test_61' 'test_88'
 'test_95' 'test_99']
Size after NaN exclusion: 1240


## Heatmap

Reference: https://medium.com/@szabo.bibor/how-to-create-a-seaborn-correlation-heatmap-in-python-834c0686b88e

In [18]:
def createHeatmap(data, attributes, attrs_synonyms, rows, cols, title, directory, subfolder, figname):
    data = data[attributes].rename(columns=attrs_synonyms)
    corr = data.corr().loc[rows][cols]

    sns.set_theme(font_scale=1.5, font='Times New Roman')
    plt.figure(figsize=(len(cols) * 1.4, len(rows) * 1.2))
    hm = sns.heatmap(corr, vmin = -1, vmax = 1, center = 0, annot=True, fmt='.3f', linewidths=2.2, cmap = 'coolwarm')

    folder = os.path.join(directory, subfolder)
    if not os.path.exists(folder):
        os.makedirs(folder)

    if title is not None:
            hm.set_title(title, fontdict={'fontsize':18, 'weight':'bold'}, pad=14)
    plt.xticks(rotation=0)
    plt.yticks(rotation=0)
    plt.savefig(os.path.join(folder, figname + '.png'), bbox_inches='tight')
    plt.close()

In [21]:
datasets = {
    'LEVIRCDDataset': 'LEVIR-CD',
    'OSCDDatasetRGBBands': 'OSCD (RGB)'
}
attrs = ['scaledRes', 'OA', 'Precision', 'Recall', 'FMeasure','False Alarm',] # 'Size'
attrs_syn = {
    #'Size': 'Size/n[px × px]', 
    'scaledRes': 'Resolution\n[m/px]', 
    'OA': 'Accuracy',
    'Precision': 'Precision',
    'Recall': 'Recall',
    'FMeasure': 'F-score',
    'False Alarm': 'False\nAlarm',
}
cols = [attrs_syn[x] for x in ['OA', 'False Alarm', 'Precision', 'Recall', 'FMeasure']]
rows = [attrs_syn[x] for x in ['scaledRes']] #, 'Size'

In [22]:
plt.ioff()

# Filter on the datasets for combined
df_filter = df.loc[(df['dataset'] == 'OSCDDatasetRGBBands') | (df['dataset'] == 'LEVIRCDDataset')]

for alg in algs.keys():
    createHeatmap(
        df_filter.loc[(df_filter['name'] == alg)],
        attrs, attrs_syn,
        rows, cols,
        None,
        dir_dict['heatmap_dir'],
        alg,
        f'{algs[alg]}-{threAlg}-all'
    )
    
    for ds in datasets.keys():
        createHeatmap(
            df.loc[(df['name'] == alg) & (df['dataset'] == ds)],
            attrs, attrs_syn,
            rows, cols,
            None,
            dir_dict['heatmap_dir'],
            alg,
            f'{algs[alg]}-{threAlg}-{ds}'
        )

In [None]:
if heatmap_per_img:
    for img in selection_imgs:
        for alg in algs.keys():
            createHeatmap(
                df.loc[(df['name'] == alg) & (df['refName'] == img)],
                attrs, attrs_syn,
                rows, cols,
                f'{alg} {img}',
                dir_dict['heatmap_dir'],
                img,
                f'{algs[alg]}-{threAlg}-{img}'
            )

### Shared heatmap

In [23]:
def createHeatmapShared(corr, yaxis_labels, second_yaxis_labels, title, directory, subfolder, figname, figsize):
    sns.set_theme(font_scale=1.5, font='Times New Roman')
    plt.figure(figsize=figsize)
    hm = sns.heatmap(corr, vmin = -1, vmax = 1, center = 0, annot=True, fmt='.3f', linewidths=2.2, cmap = 'coolwarm',
                     cbar_kws = {'pad': 0.2})
    
    hm.set_yticklabels(yaxis_labels, rotation = 0, weight='bold')
    second_yaxis = hm.twinx()
    second_yaxis.set_ylim(hm.get_ylim())
    second_yaxis.set_yticks(np.arange(len(second_yaxis_labels)) + 0.5)
    second_yaxis.set_yticklabels(second_yaxis_labels, rotation = 0)

    folder = os.path.join(directory, subfolder)
    if not os.path.exists(folder):
        os.makedirs(folder)

    if title is not None:
            hm.set_title(title, fontdict={'fontsize':24, 'weight':'bold'}, pad=16)
    plt.grid(False)
    plt.xticks(rotation=0)
    plt.yticks(rotation=0)
    plt.savefig(os.path.join(folder, figname + '.png'), bbox_inches='tight')
    plt.close()
    
plt.ioff()

# Filter on the datasets for combined
df_filter = df.loc[(df['dataset'] == 'OSCDDatasetRGBBands') | (df['dataset'] == 'LEVIRCDDataset')]

for alg in algs.keys():
    corrs = df_filter.loc[(df_filter['name'] == alg)][attrs].rename(columns=attrs_syn).corr().loc[rows][cols].copy()
    
    for ds in datasets.keys():
        corrs = pd.concat([corrs, df.loc[(df['name'] == alg) & (df['dataset'] == ds)][attrs].rename(columns=attrs_syn).corr()\
            .loc[rows][cols].copy()])

    createHeatmapShared(
        corrs,
        ['Combined\nDatasets', 'LEVIR-CD', 'OSCD\n(only RGB)'],
        ['Resolution\n[m/px]', 'Resolution\n[m/px]', 'Resolution\n[m/px]'],
        alg,
        dir_dict['heatmap_dir'],
        alg,
        f'{algs[alg]}-{threAlg}-shared-heatmap',
        (9.4,3.8)
    )

## Means

In [None]:
def plotMeanDiffAlgsBetter(
        data, algs, datasets,
        x_attr, x_label,
        y_attr, y_label, ylim,
        title, legend, styles,
        directory, subfolder, figname):
    
    fig, ax = plt.subplots()
    
    for alg in algs.keys():
        for ds in datasets.keys():
            style = styles[algs[alg]][ds]

            data.loc[(data['name'] == alg) & (data['dataset'] == ds)] \
                [[x_attr, y_attr]].groupby(x_attr).mean() \
                    .plot.line(ax=ax, style=style[1], color=style[0])
    
    if ylim is not None:
        plt.ylim(ylim[0], ylim[1])
    plt.xlabel(x_label, fontsize=14)
    plt.ylabel(y_label, fontsize=14)
    plt.title(title, fontsize=18, pad=9, weight='bold')
    plt.legend(handles=legend, fontsize=14)

    ax.grid(color = 'gainsboro', linestyle = '--', linewidth = 1.3)
    ax.set_facecolor('white')
    for spine in ax.spines.values():
        spine.set_linewidth(1)
        spine.set_color('black')

    folder = os.path.join(directory, subfolder)
    if not os.path.exists(folder):
        os.makedirs(folder)
    plt.savefig(os.path.join(folder, figname), bbox_inches='tight')
    plt.close()

# Style settings
plt.rcParams["font.family"] = "Times New Roman"
colors = ['#8a8a8a', '#2e2e2e']
styles = {
    'CVA': {
        'OSCDDataset': [colors[0], ':o'],
        'OSCDDatasetRGBBands': [colors[0], '-o'],
        'LEVIRCDDataset': [colors[0], '-o']
    },
    'IR-MAD': {
        'OSCDDataset': [colors[1], ':o'],
        'OSCDDatasetRGBBands': [colors[1], '-o'],
        'LEVIRCDDataset': [colors[1], '-o']
    }
}

In [None]:
# Only three bands compared to LEVIR
datasets = {
    'OSCDDatasetRGBBands': 'OSCD (RGB)',
    'LEVIRCDDataset': 'LEVIR-CD'
}
legend = [
    Line2D([0], [0], color=colors[0], marker='o',lw=2, label='CVA'),
    Line2D([0], [0], color=colors[1], marker='o', lw=2, label='IR-MAD')]
subfolder = 'OSCD only RGB bands'

plt.ioff()
for metric in metrics_calc:
    plotMeanDiffAlgsBetter(
        df, algs, datasets,
        'scaledRes', 'Resolution [m/px]',
        metric, metrics_syn[metric], (0,1),
        metrics_syn[metric], legend, styles,
        dir_dict['means_dir'], subfolder, f'{metric}-{threAlg}-resolution'
    )

In [None]:
# Both OSCD in one plot
datasets = {
    'OSCDDataset': 'OSCD',
    'OSCDDatasetRGBBands': 'OSCD (RGB)'
}
legend = [
    Line2D([0], [0], color=colors[0], marker='o', lw=2, label='CVA - RBG'),
    Line2D([0], [0], color=colors[0], marker='o', ls=':', lw=2, label='CVA - all bands'),
    Line2D([0], [0], color=colors[1], marker='o', lw=2, label='IR-MAD - RGB'),
    Line2D([0], [0], color=colors[1], marker='o', ls=':', lw=2, label='IR-MAD - all bands')]
subfolder = 'Only OSCD comparison'

plt.ioff()
for metric in metrics_calc:
    plotMeanDiffAlgsBetter(
        df, algs, datasets,
        'scaledRes', 'Resolution [m/px]',
        metric, metrics_syn[metric], (0,1),
        metrics_syn[metric], legend, styles,
        dir_dict['means_dir'], subfolder, f'{metrics_syn[metric]}-{threAlg}-resolution'
    )

In [None]:
# Specific LEVIR images
datasets = {
    'LEVIRCDDataset': 'LEVIR-CD'
}
legend = [
    Line2D([0], [0], color=colors[0], marker='o',lw=2, label='CVA'),
    Line2D([0], [0], color=colors[1], marker='o', lw=2, label='IR-MAD')]
subfolder = 'LEVIR-CD images'

if means_per_img:
    plt.ioff()
    for metric in metrics_calc:
        for img in selection_levir:
            plotMeanDiffAlgsBetter(
                df.loc[(df['refName'] == img)], algs, datasets,
                'scaledRes', 'Resolution [m/px]',
                metric, metric, (0,1),
                metric + ' for ' + img, legend, styles,
                 dir_dict['means_dir'], os.path.join(subfolder, img), f'{metric}-{threAlg}-resolution-{img}'
            )

    # Put LEVIR images in folders
    path_from = '' # <--- Important: Set the folder here
    if means_per_img:
        for img in selection_levir:
            path_to = os.path.join(dir_dict['means_dir'], 'LEVIR-CD images', img, img)

            shutil.copy(os.path.join(path_from, 'A', img + '.tiff'), path_to + '_A.tiff')
            shutil.copy(os.path.join(path_from, 'B', img + '.tiff'), path_to + '_B.tiff')
            shutil.copy(os.path.join(path_from, 'label', img + '.tiff'), path_to + '_label.tiff')

## Differences per image

In [None]:
# Style and other settings
legend = [
    Line2D([0], [0], color=colors[0], marker='o',lw=2, label='CVA'),
    Line2D([0], [0], color=colors[1], marker='o', lw=2, label='IR-MAD')]

attrs = ['scaledRes', 'OA_diff', 'Precision_diff', 'Recall_diff', 'FMeasure_diff','False Alarm_diff']
attrs_syn = {
    'scaledRes': 'Resolution\n[m/px]', 
    'OA_diff': 'Accuracy',
    'Precision_diff': 'Precision',
    'Recall_diff': 'Recall',
    'FMeasure_diff': 'F-score',
    'False Alarm_diff': 'False\nAlarm',
}

cols = [attrs_syn[x] for x in ['OA_diff', 'False Alarm_diff', 'Precision_diff', 'Recall_diff', 'FMeasure_diff']]
rows = [attrs_syn[x] for x in ['scaledRes']]

calc_diff = [
    'OA',
    'False Alarm',
    'Recall',
    'Precision',
    'FMeasure',
]

In [None]:
plt.ioff()

# Filter on datasets
df_ds = df.loc[(df['dataset'] == 'OSCDDatasetRGBBands') | (df['dataset'] == 'LEVIRCDDataset')]

for alg in algs.keys():
    for metric_diff in calc_diff:
        diff = metric_diff + '_diff'

        # Drop nan values for the metric, since no difference can be calculated in that case
        data = df_ds[['name', 'scaledRes', metric_diff, 'refName']].loc[(df_ds['name'] == alg)].copy()
        data = data.dropna()
        data[diff] = data.sort_values('scaledRes').groupby(by=['name', 'refName'], group_keys=False)[metric_diff].diff() \
            .apply(lambda x: pd.NA if pd.isna(x) else (-1 if x < 0 else (1 if x > 0 else 0))).astype('category')
        data = data[['scaledRes', diff]].groupby(by=['scaledRes'], as_index=False).value_counts(normalize = True)
        # TODO 0.5 and 10 are in there but with zeros.

        # Partial/main references: https://matplotlib.org/stable/gallery/lines_bars_and_markers/bar_stacked.html,
        # https://stackoverflow.com/questions/23357798/how-to-draw-grid-lines-behind-matplotlib-bar-graph
        x_ax = data['scaledRes'].unique()
        
        bt = np.zeros(len(x_ax))
        pos = data.loc[(data[diff] == 1)]['proportion']
        zero = data.loc[(data[diff] == 0)]['proportion']
        neg = data.loc[(data[diff] == -1)]['proportion']
        
        _, ax = plt.subplots()

        plt.ylim((-0.05,1.05))
        plt.yticks(ticks=[0, 0.25, 0.5, 0.75, 1], labels=['0%', '25%', '50%', '75%', '100%'])
        plt.grid(color = 'grey', linestyle = '-', linewidth = 0.4, zorder=0, axis='y')
        w = 0.8
        if len(neg) > 0:
            plt.bar(x_ax, neg, w, label='decrease', color='#5e5e5e', zorder=3, edgecolor='k', linewidth=0.7)
            bt += neg.values
        if len(zero) > 0:
            plt.bar(x_ax, zero, w, label='unchanged', color='w', bottom=bt, zorder=3, edgecolor='k', linewidth=0.7)
            bt += zero.values
        if len(pos) > 0:
            plt.bar(x_ax, pos, w, label='increase', color='#c9c9c9', bottom=bt, zorder=3, edgecolor='k', linewidth=0.7)
        plt.legend()
        
        plt.xlabel('Resolution [m/px]', fontsize=14)
        plt.ylabel(metrics_syn[metric_diff] + ' change direction [%]', fontsize=14)
        plt.title(metrics_syn[metric_diff] + ' image difference (' + algs[alg] + ')', fontsize=18, pad=9, weight='bold')
        plt.legend(fontsize=14)

        ax.set_facecolor('white')
        for spine in ax.spines.values():
            spine.set_linewidth(1)
            spine.set_color('black')

        folder = os.path.join(dir_dict['difference_dir'], alg)
        if not os.path.exists(folder):
            os.makedirs(folder)
        plt.savefig(os.path.join(folder, f'{alg}-{metric_diff}-difference'), facecolor='white', bbox_inches='tight')
        plt.close()