Author: Daniel Lusk, University of Potsdam

Inspired by: Ankit Kariryaa ([github repo](https://github.com/ankitkariryaa/An-unexpectedly-large-count-of-trees-in-the-western-Sahara-and-Sahel))

In [None]:
import glob
import os

import core.utils as utils
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio as rio
import tensorflow as tf
from config import ModelComparison
from core.dataset_generator import DataGenerator
from core.prediction import Prediction
from matplotlib.colors import ListedColormap
from scipy.stats import ks_2samp
from skimage.measure import label, regionprops
from tensorflow.keras.metrics import BinaryIoU
import tensorflow.keras.metrics as tf_metrics
from tqdm.notebook import tqdm

plt.style.use("lusk") # Use custom plot styles

RUN_PREDICTIONS = False
SAVE_FIGS = False
USE_ABSERR = True

# Magic commands
%matplotlib inline
%reload_ext autoreload
%autoreload 2

### Configuration and loading data

Load the configuration and get the image directories and model filenames.

In [None]:
config = ModelComparison.Config()
im_dirs = sorted(glob.glob(config.image_dir))
model_fns = sorted(glob.glob("./saved_models/UNet/*.h5"))

Read all of the images (AKA frames) into memory and get test data

In [None]:
if RUN_PREDICTIONS:
    frames = utils.get_frames(im_dirs, config)

    # We can use all frames since they only consist of the test images
    predict_idx = list(range(0, len(frames), 1))

    annotation_channels = config.input_label_channel + config.input_weight_channel

    X, y = DataGenerator(
        config.input_image_channels,
        config.patch_size,
        predict_idx,
        frames,
        annotation_channels,
    ).all_sequential_patches(config.step_size)

Load the original and eroded labels for later comparison with predictions. Get tree count and tree area distribution of y_true

In [None]:
y_true_rio = rio.open(config.true_labels_tile)
y_true = np.moveaxis(y_true_rio.read(), 0,  -1) # Shift bands to last axis

y_true_labels_rio = rio.open(config.true_eroded_labels_tile)
y_true_eroded = np.moveaxis(y_true_labels_rio.read(), 0, -1) # Shift bands to last axis

y_true = label(np.squeeze(y_true))
y_true_regions = regionprops(np.squeeze(y_true))
true_tree_ct = len(y_true_regions)
y_true_areas = np.zeros((true_tree_ct))
y_true_areas = [r.area for r in y_true_regions]
true_bins = np.histogram_bin_edges(y_true_areas, config.bins)
true_np_hist, true_bin_e = np.histogram(y_true_areas, true_bins)

### Perform predictions

Load the model and perform predictions

In [None]:
# Load model after training
# If you load a model with different python version, then you may run into a
# problem: https://github.com/keras-team/keras/issues/9595#issue-303471777

# Shapes of the original tile tile and the frames it was subdivided into (before further
# patchifying for model training)
frame_shape = (*config.frame_size, 1)
tile_shape = (*config.tile_size, 1)

y_preds = []
y_pred_evals = []

if RUN_PREDICTIONS:
    for fn in tqdm(model_fns):
        print("Model:", os.path.basename(fn))
        model = tf.keras.models.load_model(
            fn,
            custom_objects={
                "tversky": config.loss,
                "dice_coef": config.metrics[0],
                "dice_loss": config.metrics[1],
                "accuracy": config.metrics[4],
                "specificity": config.metrics[2],
                "sensitivity": config.metrics[3],
            },
            compile=False,
        )

        # In case you want to use multiple GPU you can uncomment the following lines.
        # from tensorflow.python.keras.utils import multi_gpu_model
        # model = multi_gpu_model(model, gpus=2, cpu_merge=False)

        model.compile(
            optimizer=config.optimizer,
            loss=config.loss,
            # metrics=[tf_metrics.BinaryAccuracy],
            metrics=config.metrics
        )
        
        
        # Perform the predictions on all patches
        y_pred = model.predict(X, steps=1)

        # Restictch the predictions into the original tile
        y_pred = utils.stitch_to_tile(
            y_pred,
            frame_shape,
            tile_shape,
        )

        y_preds.append(y_pred)
        
        pred_eval = model.evaluate(
            X, y, return_dict=True
        )
        pred_eval["model_name"] = fn
        y_pred_evals.append(pred_eval)

    # Save the full set of prediction arrays
    if not os.path.exists(config.pred_dir):
        os.mkdir(config.pred_dir)

    np.save(os.path.join(config.pred_dir, "y_preds.npy"), np.array(y_preds))
    np.save(os.path.join(config.pred_dir, "y_pred_evals.npy"), np.array(y_pred_evals, dtype=object))
else:
    y_preds = np.load(os.path.join(config.pred_dir, "y_preds.npy"))
    y_pred_evals = np.load(os.path.join(config.pred_dir, "y_pred_evals.npy"), allow_pickle=True)

Convert predictions into objects of class Prediction

In [None]:
preds = []
for y_pred, model_fn in tqdm(zip(y_preds, model_fns), total=len(y_preds)):
    preds.append(Prediction(y_pred, model_fn, y_true, y_true_eroded))

Generate convex hulls, dilate them, and get resulting bious

In [None]:
for pred in tqdm(preds):
    pred.trees_ch, pred.trees_ch_reg = utils.chull(pred.trees)
    pred.trees_ch_exp, pred.trees_ch_exp_reg = utils.grow(pred.trees_ch)
    pred.biou_ch = utils.get_biou(np.where(pred.trees_ch > 0, 1, 0), np.where(y_true > 0, 1, 0))
    pred.biou_ch_exp = utils.get_biou(np.where(pred.trees_ch_exp > 0, 1, 0), np.where(y_true > 0, 1, 0))

Get tree count and tree area distribution of y_true

In [None]:
y_true = label(np.squeeze(y_true))
y_true_regions = regionprops(np.squeeze(y_true))
true_tree_ct = len(y_true_regions)
y_true_areas = np.zeros((true_tree_ct))
y_true_areas = [r.area for r in y_true_regions]
true_bins = np.histogram_bin_edges(y_true_areas, config.bins)
true_np_hist, true_bin_e = np.histogram(y_true_areas, true_bins)

Compare different minimum distances when determining local maxima for watershed segmentation

In [None]:
dists = [3, 5, 7, 9, 11, 13]
# pred_areas = []
for dist in tqdm(dists):
    for pred in preds:
        trees, regions = utils.get_trees(pred.img, dist)
        tree_ct = len(regionprops(label(trees)))
        areas = [region.area for region in regions]
        pred_bins = np.histogram_bin_edges(areas, config.bins)
        pred_np_hist, pred_bin_e = np.histogram(areas, pred_bins)
        ks = ks_2samp(true_np_hist, pred_np_hist)
        # ks = ks_2samp(y_true_areas, areas)
        # pred_areas.append(areas)
        setattr(pred, f"trees_{dist}", trees)
        setattr(pred, f"trees_reg_{dist}", regions)
        setattr(pred, f"tree_ct_{dist}", tree_ct)
        setattr(pred, f"ks_test_{dist}", ks[1])

In [None]:
stats_df = pd.DataFrame(
    columns=[
        "model",
        "labels_trained",
        "weight type",
        "weight scheme",
        "non-eroded biou",
        "eroded biou",
        "chull biou",
        "dilated biou",
        "tree count",
        "tree abserr",
        "tree ratio",
        "tree count 3",
        "tree abserr 3",
        "tree ratio 3",
        "tree ks test 3",
        "tree count 5",
        "tree abserr 5",
        "tree ratio 5",
        "tree ks test 5",
        "tree count 7",
        "tree abserr 7",
        "tree ratio 7",
        "tree ks test 7",
        "tree count 9",
        "tree abserr 9",
        "tree ratio 9",
        "tree ks test 9",
        "tree count 11",
        "tree abserr 11",
        "tree ratio 11", 
        "tree ks test 11",
        "tree count 13",
        "tree abserr 13",
        "tree ratio 13",
        "tree ks test 13"
    ]
)

for i, p in enumerate(preds):
    tree_ct_ratio = p.tree_ct / true_tree_ct
    tree_ct_ratio_3 = p.tree_ct_3 / true_tree_ct
    tree_ct_ratio_5 = p.tree_ct_5 / true_tree_ct
    tree_ct_ratio_7 = p.tree_ct_7 / true_tree_ct
    tree_ct_ratio_9 = p.tree_ct_9 / true_tree_ct
    tree_ct_ratio_11 = p.tree_ct_11 / true_tree_ct
    tree_ct_ratio_13 = p.tree_ct_13 / true_tree_ct
    
    tree_ct_abserr = abs(p.tree_ct - true_tree_ct)
    tree_ct_abserr_3 = abs(p.tree_ct_3 - true_tree_ct)
    tree_ct_abserr_5 = abs(p.tree_ct_5 - true_tree_ct)
    tree_ct_abserr_7 = abs(p.tree_ct_7 - true_tree_ct)
    tree_ct_abserr_9 = abs(p.tree_ct_9 - true_tree_ct)
    tree_ct_abserr_11 = abs(p.tree_ct_11 - true_tree_ct)
    tree_ct_abserr_13 = abs(p.tree_ct_13 - true_tree_ct)
    
    stats_df.loc[i] = [
        p.model,
        p.trained_on,
        p.weights,
        p.wt_scheme,
        p.biou_uneroded,
        p.biou_eroded,
        p.biou_ch,
        p.biou_ch_exp,
        p.tree_ct,
        tree_ct_abserr,
        tree_ct_ratio,
        p.tree_ct_3,
        tree_ct_abserr_3,
        tree_ct_ratio_3,
        p.ks_test_3,
        p.tree_ct_5,
        tree_ct_abserr_5,
        tree_ct_ratio_5,
        p.ks_test_5,
        p.tree_ct_7,
        tree_ct_abserr_7,
        tree_ct_ratio_7,
        p.ks_test_7,
        p.tree_ct_9,
        tree_ct_abserr_9,
        tree_ct_ratio_9,
        p.ks_test_9,
        p.tree_ct_11,
        tree_ct_abserr_11,
        tree_ct_ratio_11,
        p.ks_test_11,
        p.tree_ct_13,
        tree_ct_abserr_13,
        tree_ct_ratio_13,
        p.ks_test_13,
    ]

Get convex hull and dilated bIoUs from best min_dist for each model

In [None]:
best_dists = []
best_morphs = []
for _, model in stats_df.iterrows():
    if USE_ABSERR:
        best_dists.append(model[["abserr" in x for x in stats_df.columns]][1:].sort_values().index[0].split("abserr")[-1])
    else:
        # best_errs = model[["abserr" in x for x in stats_df.columns]][1:].sort_values().index[0:3].values
        # for i, err in enumerate(best_errs):
        #     best_errs[i] = f"tree ks test {err.split('abserr ')[-1]}"
        # best_dists.append(model[best_errs].sort_values(ascending=False).index[0].split("ks test")[-1])
        best_dists.append(model[["ks test" in x for x in stats_df.columns]].sort_values(ascending=False).index[0].split("ks test ")[-1])
    best_morphs.append(model[["biou" in x for x in stats_df.columns]].sort_values().index[-1])
    
stats_df["best dist"] = best_dists
stats_df["best morph"] = best_morphs

In [None]:
best_bious_ch = []
best_bious_ch_exp = []
best_bious = []
best_morph_types = []
morph_types = ["original", "chull", "dilated"]

for (_, model), pred in tqdm(zip(stats_df.iterrows(), preds), total=len(preds)):
    best_dist = model["best dist"]
    pred.best_og = getattr(pred, f"trees_{best_dist.strip()}")
    pred.best_og_reg = getattr(pred, f"trees_reg_{best_dist.strip()}")
    pred.best_ch, pred.best_ch_reg = utils.chull(pred.best_og)
    pred.best_ch_exp, pred.best_ch_exp_reg = utils.grow(pred.best_ch)
    pred.biou_best_ch = utils.get_biou(np.where(pred.best_ch > 0, 1, 0), np.where(y_true > 0, 1, 0))
    pred.biou_best_ch_exp = utils.get_biou(np.where(pred.best_ch_exp > 0, 1, 0), np.where(y_true > 0, 1, 0))
    pred.biou_best = utils.get_biou(np.where(pred.best_og > 0, 1, 0), np.where(y_true > 0, 1, 0))
    
    pred_best_bious = np.array([pred.biou_best, pred.biou_best_ch, pred.biou_best_ch_exp])
    best_biou = pred_best_bious.max()
    best_morph_type = morph_types[pred_best_bious.argmax(axis=0)]
        
    best_bious_ch.append(pred.biou_best_ch)
    best_bious_ch_exp.append(pred.biou_best_ch_exp)
    best_bious.append(best_biou)
    best_morph_types.append(best_morph_type)

stats_df["best biou chull"] = best_bious_ch
stats_df["best biou dilated"] = best_bious_ch_exp
stats_df["best biou"] = best_bious
stats_df["best biou morph"] = best_morph_types

# Order stats by label type
stats_df = stats_df.sort_values("labels_trained", ascending=False)

Save predictions and dataframe

In [None]:
# import dill

# for pred in preds:
#     filename = os.path.join(config.pred_dir, "pickled_preds", f"{os.path.splitext(os.path.basename(pred.model))[0]}.pkl")
#     with open(filename, "wb") as f:
#         dill.dump(pred, f)

In [None]:
# np.save(os.path.join(config.pred_dir, "pred_objs.npy"), np.array(preds, dtype=object), allow_pickle=True)
# stats_df.to_pickle(os.path.join(config.pred_dir, "2023-04-13_stats_df.pkl"))

Load predictions and dataframe (doesn't work)

In [None]:
# # preds = np.load(os.path.join(config.pred_dir, "pred_objs.npy"), allow_pickle=True)
# preds = []
# pickles = sorted(glob.glob(os.path.join(config.pred_dir, "pickled_preds", "*.pkl")))
# for p in pickles:
#     with open(p, "rb") as pkl:
#         preds.append(dill.load(pkl))
# stats_df = pd.read_pickle(os.path.join(config.pred_dir, "2023-04-13_stats_df.pkl"))

### Plot tree ratios by minimum distance

In [None]:
# plt.axhline(1, 0, 1, color="black", alpha=0.8, lw="0.7", zorder=0)

for i, model in enumerate(stats_df["model"]):
    x = dists
    y = (stats_df.loc[:,['tree abserr' in j for j in stats_df.columns]].iloc[i].values[1:] / true_tree_ct) * 100
    label = f"{stats_df.iloc[i]['labels_trained']} + {stats_df.iloc[i]['weight scheme']}"
    if "ORIG" in label:
        linestyle = "-"
    else:
        linestyle = "dotted"
    plt.plot(x, y, label=label, alpha=0.7, ls=linestyle)
    plt.scatter(x, y, s=10)

y_ticks = np.arange(0, 65, 5)
plt.xticks(dists)
plt.yticks(y_ticks)
plt.grid()
plt.legend(fontsize=10)
plt.xlabel("minimum distance", labelpad=20)
plt.ylabel("tree count abs err (%)", labelpad=20)
plt.title("Effect of minimum distance on watershed \npredicted tree absolute error");
if SAVE_FIGS:
    plt.savefig(os.path.join(config.figures_dir, "min-dist_by_abs-tree-error-pct.png"), bbox_inches="tight");

In [None]:
# plt.axhline(1, 0, 1, color="black", alpha=0.8, lw="0.7", zorder=0)

for i, model in enumerate(stats_df["model"]):
    x = dists
    y = (stats_df.loc[:,['tree ks test' in j for j in stats_df.columns]].iloc[i].values / true_tree_ct) * 100
    label = f"{stats_df.iloc[i]['labels_trained']} + {stats_df.iloc[i]['weight scheme']}"
    if "ORIG" in label:
        linestyle = "-"
    else:
        linestyle = "dotted"
    plt.plot(x, y, label=label, alpha=0.7, ls=linestyle)
    plt.scatter(x, y, s=10)

# y_ticks = np.arange(0, 65, 5)
plt.xticks(dists)
# plt.yticks(y_ticks)
# plt.yscale("log")
plt.grid()
plt.legend(fontsize=10)
plt.xlabel("minimum distance", labelpad=20)
plt.ylabel("KS-Test p", labelpad=20)
plt.title("Effect of minimum distance on watershed \npredicted tree KS-Test");
if SAVE_FIGS:
    plt.savefig(os.path.join(config.figures_dir, "min-dist_by_ks-test.png"), bbox_inches="tight");

In [None]:
plt.axhline(1, 0, 1, color="black", alpha=0.8, lw="0.7", zorder=0)

for i, model in enumerate(stats_df["model"]):
    x = dists
    y = stats_df.loc[:,['tree ratio' in j for j in stats_df.columns]].iloc[i].values[1:]
    label = f"{stats_df.iloc[i]['labels_trained']} + {stats_df.iloc[i]['weight scheme']}"
    if "ORIG" in label:
        linestyle = "-"
    else:
        linestyle = "dotted"
    plt.plot(x, y, label=label, alpha=0.7, ls=linestyle)
    plt.scatter(x, y, s=10)

plt.xticks(dists)
plt.grid()
plt.legend(fontsize=10)
plt.xlabel("minimum distance")
plt.ylabel("tree count ratio")
plt.title("Effect of minimum distance on watershed \npredicted tree count:true tree count");
if SAVE_FIGS:
    plt.savefig(os.path.join(config.figures_dir, "min-dist_effect_on_tree-ct-ratio.png"), bbox_inches="tight");

### Plot bIoUs by model type

In [None]:
from matplotlib.patches import Rectangle

x = ["non-eroded biou", "eroded biou", "best biou chull", "best biou dilated"]
x_ticks = ["no erosion", "eroded", "best dist chull", "best dist dilated"]
# plt.axvline(1.5, -1, 1, color="black", alpha=0.8, lw="0.7")
currentAxis = plt.gca()
currentAxis.add_patch(Rectangle((-0.1, 0.68), 1.625, 0.15, facecolor="grey", alpha=0.1))

for i, (_, model) in enumerate(stats_df.iterrows()):
    y = [model[v] for v in x]
    
    label = f"{model['labels_trained']} + {model['weight scheme']}"
    if "ORIG" in label:
        linestyle = "-"
    else:
        linestyle = "dotted"
    plt.plot(x_ticks, y, label=label, alpha=0.7, ls=linestyle)
    
    # if "RONN" in label:
    #     marker_styles = dict(marker="D", edgecolor="black")
    # elif "BOUNDS10" in label:
    #     marker_styles = dict(marker="o", s=40, lw=2, edgecolor="black")
    # elif "BORD10" in label:
    #     marker_styles = dict(marker="o", s=40, lw=0.7, edgecolor="black")
    # elif "ALL1" in label:
    #     marker_styles = dict(marker="s", edgecolor="black")
    plt.scatter(x_ticks, y)

plt.xlim(-0.1, 3.1)
plt.ylim(0.68, 0.82)
plt.xticks(rotation=40)
plt.grid()
plt.legend(fontsize=10, loc="center left", bbox_to_anchor=(0.05, 0.45))
plt.xlabel("Morphological Adjustment", labelpad=10)
plt.ylabel("Binary IoU")
cat_pad = 0.05
plt.gcf().text(0.14+cat_pad, 0.825, "Y_true adjustments", fontsize=14)
plt.gcf().text(0.55+cat_pad, 0.825, "Y_pred adjustments", fontsize=14)
plt.title("Effect of morphological adjustments on bIoU");
if SAVE_FIGS:
    plt.savefig(os.path.join(config.figures_dir, "morph_vs_biou.png"), bbox_inches="tight");

### Plot tree errors by model type

Histogram of tree ct and best biou for each model

In [None]:
xtick_labels = [f"{model['labels_trained']} + {model['weight scheme']}" for i, model in stats_df.iterrows()]
x = np.arange(0, len(xtick_labels), 1)
y_biou = np.zeros((len(x)))
y_tree_err = np.zeros(len(x))

for i, (_, model) in enumerate(stats_df.iterrows()):
    best_tree_err = (model[f"tree abserr {model['best dist'].strip()}"] / true_tree_ct) * 100
    best_biou = model["best biou"]
    # best_biou_all = model[["biou" in x for x in stats_df.columns]].sort_values()[-1]
    
    y_tree_err[i] = best_tree_err
    y_biou[i] = best_biou
    # y_biou[i, 1] = best_biou_all
    
width = 0.2  # the width of the bars
multiplier = 0

fig, ax = plt.subplots(figsize=(14, 5), tight_layout=True)

# for i in x:
#     offset = width * multiplier
#     rects = ax.bar(x + offset, measurement, width, label=attribute)
#     ax.bar_label(rects, padding=3)
#     multiplier += 1
ax.bar(x + 0, y_tree_err, width, color="green", label="tree error")

ax.set_ylabel("best tree error (%)")
ax.set_xticks(x+width/2, xtick_labels, rotation=60)
ax.legend(loc="upper left")
ax.grid()
ax.set_ylim(0, 22)
ax2 = ax.twinx()
ax2.bar(x + width, y_biou, width, color="orange", label="best biou")
# ax2.bar(x + width * 2, y_biou[:, 1], width, color="pink", label="best orig biou")
ax2.set_ylim(0.73, 0.805)
ax2.set_ylabel("best binary IoU")
ax2.legend(loc="upper right");
if SAVE_FIGS:
    if USE_ABSERR:
        plt.savefig(os.path.join(config.figures_dir, "tree-error_by_biou_abserr.png"), bbox_inches="tight");
    else:
        plt.savefig(os.path.join(config.figures_dir, "tree-error_by_biou_ks-test.png"), bbox_inches="tight");

### Inspect predictions of best states for all models (lowest tree count error and highest bIoU)

For each model, get the min_dist with the lowest tree ct error and the morphology with the highest bIoU, then inspect the predictions.

In [None]:
fn = "../../data/dap05/combined/full_size/rgb/393_5823_RGB_2020.tif"
rgb_rio = rio.open(fn)
rgb = np.moveaxis(rgb_rio.read(), 0, -1)
best_sample = (760, 219)
step = 750
xmin = np.random.randint(0, rgb.shape[0] - step)
ymin = np.random.randint(0, rgb.shape[1] - step)
xmin, ymin = best_sample

y_true_samp = utils.sample_img(utils.mask_bg(y_true), xmin, ymin, step)

# Colormap
rainbow = mpl.colormaps["rainbow"]
cmap = ListedColormap(rainbow(np.random.random(len(np.unique(y_true_samp)))))

fig, ax = plt.subplots(2, 5, figsize=(15, 8.5), tight_layout=True)
ax = ax.ravel()
# cmap = plt.cm.Accent

ax[0].imshow(utils.sample_img(rgb, xmin, ymin, step))
ax[1].imshow(utils.sample_img(rgb, xmin, ymin, step))
img = ax[1].imshow(
    y_true_samp,
    cmap=cmap,
    alpha=0.5,
)

# title_pos = dict({"y": -0.2, "pad": -14})
title_pos = dict({"y": 1.03})
ax[0].set_title("RGB", **title_pos)
ax[1].set_title("Y_True", **title_pos)

# plt.colorbar(img, ax=ax[1])

for i, (_, model) in enumerate(stats_df.iterrows()):
    pred = [p for p in preds if p.model == model["model"]][0]
    j = i + 2
    # print(j, pred.trained_on)
    ax[j].imshow(utils.sample_img(rgb, xmin, ymin, step))
    if model["best biou morph"] == "original":
        im = pred.best_og
    elif model["best biou morph"] == "chull": 
        im = pred.best_ch
    elif model["best biou morph"] == "dilated":
        im = pred.best_ch_exp
    img = ax[j].imshow(
        utils.sample_img(utils.mask_bg(im), xmin, ymin, step),
        cmap=cmap,
        alpha=0.5,
    )
    error = (model[f"tree abserr {model['best dist'].strip()}"] / true_tree_ct) * 100
    ax[j].set_title(f"trained on: {pred.trained_on}\nweights: {pred.wt_scheme}\n-----------\nbIoU: {model['best biou']:.3f}\nTree error: {error:.2f}%", fontsize=12, **title_pos)
    # ax[j].set_title(f"ax {j} {pred.trained_on}")
    # plt.colorbar(img, ax=ax[j])
for a in ax:
    a.axis("off")

fig.delaxes(ax[-1]);
SAVE_FIGS = True
if SAVE_FIGS:
    if USE_ABSERR:
        plt.savefig(os.path.join(config.figures_dir, "best_preds_abserr.png"));
    else:
        plt.savefig(os.path.join(config.figures_dir, "best_preds_ks.png"));

### Tree area histograms

In [None]:
fig, axes = plt.subplots(3, 3, figsize=(12, 10), dpi=300, tight_layout=True)
axes = axes.flatten()
for i, ((_, model), ax) in enumerate(zip(stats_df.iterrows(), axes)):
    pred = [p for p in preds if p.model == model["model"]][0]
    
    if model["best biou morph"] == "original":
        reg = pred.best_og_reg
    elif model["best biou morph"] == "chull":
        reg = pred.best_ch_reg
    elif model["best biou morph"] == "dilated":
        reg = pred.best_ch_exp_reg

    areas = [r.area for r in reg]
    true_bins = np.histogram_bin_edges(y_true_areas, config.bins)
    pred_bins = np.histogram_bin_edges(areas, config.bins)
    true_np_hist, true_bin_e = np.histogram(y_true_areas, true_bins)
    pred_np_hist, pred_bin_e = np.histogram(areas, pred_bins)
    ks = ks_2samp(true_np_hist, pred_np_hist)
    ax.hist(y_true_areas, bins=config.bins, label="y_true", color="red", alpha=0.5)
    ax.hist(areas, bins=config.bins, label="y_pred", color="blue", alpha=0.5)
    # ax.bar(
    #     true_bin_e[:-1],
    #     true_np_hist,
    #     width=np.diff(true_bin_e),
    #     label="y_true",
    #     color="red",
    #     alpha=0.5,
    #     align="edge",
    # )
    # ax.bar(
    #     pred_bin_e[:-1],
    #     pred_np_hist,
    #     width=np.diff(pred_bin_e),
    #     label="y_pred",
    #     color="blue",
    #     alpha=0.5,
    #     align="edge",
    # )
    if i in [4, 5, 6]:
        ax.set_xlabel("Tree area")
    if i % 3 == 0:
        ax.set_ylabel("Number of trees")
    ax.legend(fontsize=8)
    ax.grid()
    error = (model[f"tree abserr {model['best dist'].strip()}"] / true_tree_ct) * 100
    ks_pred = model[f"tree ks test {model['best dist'].strip()}"]
    ax.set_title(
        f"trained on: {pred.trained_on}\nweights: {pred.wt_scheme}\n-----------\nbIoU: {model['best biou']:.3f} | Tree error: {error:.2f}%\n$p$: {ks[1]:.4f}",
        fontsize=10, y=0.4, x=0.55
    )
fig.delaxes(axes[-1])
fig.delaxes(axes[-2])

if SAVE_FIGS:
    plt.savefig(os.path.join(config.figures_dir, "tree_area_hist.png"))

In [None]:
best_abserrs = []
best_ks = []
for _, model in stats_df.iterrows():
    best_dist = model["best dist"].strip()
    best_abserrs.append((model[f"tree abserr {best_dist}"] / true_tree_ct) * 100)
    
    pred = [p for p in preds if p.model == model["model"]][0]
    
    if model["best biou morph"] == "original":
        reg = pred.best_og_reg
    elif model["best biou morph"] == "chull":
        reg = pred.best_ch_reg
    elif model["best biou morph"] == "dilated":
        reg = pred.best_ch_exp_reg

    areas = [r.area for r in reg]
    
    true_bins = np.histogram_bin_edges(y_true_areas, config.bins)
    pred_bins = np.histogram_bin_edges(areas, config.bins)
    true_np_hist, true_bin_e = np.histogram(y_true_areas, true_bins)
    pred_np_hist, pred_bin_e = np.histogram(areas, pred_bins)
    ks = ks_2samp(true_np_hist, pred_np_hist)
    best_ks.append(ks[1])
# cols = ["labels_trained", "weight type", "best dist", "best biou", "best"]
stats_df["best abserr"] = best_abserrs
stats_df["best ks"] = best_ks

In [None]:
cols = ["labels_trained", "weight type", "best dist", "best biou morph", "best biou", "best abserr", "best ks"]
final_stats = stats_df[cols].copy()
pretty_cols = ["Label Set", "Weights", "Best Min-Dist", "Best Morph", "bIoU", "Tree Absolute Error", "KS-Test p"]
final_stats.columns = pretty_cols

In [None]:
final_stats.sort_values("Tree Absolute Error", ascending=True)

In [None]:
format_mapping={'bIoU': '{:,.3f}', 'Tree Absolute Error': '{:.2f}%', 'KS-Test p': '{:.3f}'}
for key, value in format_mapping.items():
    final_stats[key] = final_stats[key].apply(value.format)
# final_stats.style.to_markdown(index=False).style.format(precision=3)

In [None]:
final_stats.to_html(index=False)

In [None]:
fn = "../../data/dap05/combined/full_size/rgb/393_5823_RGB_2020.tif"
rgb_rio = rio.open(fn)
rgb = np.moveaxis(rgb_rio.read(), 0, -1)

model_name = stats_df.loc[4]["model"]
p = [p for p in preds if p.model == model_name][0]

fig, ax = plt.subplots(1, 1, figsize=(10, 6))

# Colormap
rainbow = mpl.colormaps["rainbow"]
cmap = ListedColormap(rainbow(np.random.random(len(p.best_ch_exp_reg))))

ax.imshow(rgb)
im = ax.imshow(
    utils.mask_bg(p.best_ch_exp),
    cmap=cmap,
    alpha=0.5,
)

# fs = 12
# ax0.set_title("y_true", fontsize=fs)

# im1 = ax1.imshow(
#     utils.mask_bg(p.best_ch_exp),
#     cmap=cmap,
#     alpha=0.5,
# )
# ax1.set_title("ERODED + BORD10 + convex hull + dilation", fontsize=fs)

ax.axis("off");
# ax1.axis("off");
plt.savefig(os.path.join(config.figures_dir, "banner.png"), bbox_inches="tight");