In [None]:
%load_ext autoreload
%autoreload 2
import os
os.chdir("/scratch/ewalt/pdm/rs-uncertainty")
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from src.metrics import StratifiedRCU
from src.viz import *
from pathlib import Path
import rasterio
import fiona
from datetime import datetime
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import trange
import random
import yaml
import fiona
import rasterio.warp
import rasterio.features
sns.set()
sns.set_style("whitegrid")
random.seed(123)

RESDIR = "results/cloud_exp/2023-06-20_16-14-11" #"results/cloud_exp/2023-05-26_15-55-46"
S2DIR = "gee_data/reprojected/"
S2REPRDIR = "gee_data/reprojected_dirs"
GTDIR = "assets/data/preprocessed"
SANITYRESDIR = "results/cloud_exp/2023-05-31_11-23-56_sanity_check" # results
SANITYS2DIR = "assets/data/sentinel_data/s2_reprojected" # s2 reprojected
SANITYS2REPRDIR = "gee_data/sanity_check/" # restructured s2 reprojected
SPLITMASKDIR = "assets/data/split_masks/" # split masks
SHAPEFILES = ['assets/data/NHM_projectDekning_AOI_edit2015_V2.shp', 'assets/data/ALS_projects_Dz_all_norway.shp']
STATSFILE = "data/2023-04-05_18-58-33_baseline/stats.yaml"
EMPIRICAL_CP_THRESHOLD = 7.9

with open(STATSFILE, "r") as f:
    stats = yaml.safe_load(f)
TRAINMEANS = stats["labels_stats"]["mean"]
TRAINSTDS = stats["labels_stats"]["std"]
for i in [2,4]:
    TRAINMEANS[i] /= 100
    TRAINSTDS[i] /= 100
    
VARIABLES = ['P95', 'MeanH', 'Dens', 'Gini', 'Cover']

# Experiment result directories
result_dirs = [p.path for p in os.scandir(RESDIR) if os.path.exists(os.path.join(p.path,"rcu.json"))]
outliers = [os.path.join(RESDIR, f"1023_{d}") for d in [
    "20180503T104019", # index: 3, avgcp: 42, all white
    "20180620T105031", # index: 6, avgcp: 0., all white
]]
result_dirs = [r for r in result_dirs if not any(r.__contains__(o) for o in outliers)]
len(result_dirs)

## Four conditions

In [None]:
selected_result_dirs = [os.path.join(RESDIR, p) for p in [
 '1023_20180526T105029',
 '1023_20180925T104021',
 '1023_20180918T105021',
 '1023_20180528T104021']]
conditions = ["no cloud", "thin clouds", "cloudy", "very cloudy"]
islice, jslice = slice(470, 600), slice(150, 300)

In [None]:
showRGB(selected_result_dirs, S2REPRDIR, 
        conditions, islice=islice, jslice=jslice, draw_bbox=True,
        color="r",
        save_name="images/cloud_experiment/rgb_complete")
showRGB(selected_result_dirs, S2REPRDIR, 
        conditions, islice=islice, jslice=jslice,
        save_name="images/cloud_experiment/rgb_cropped")

In [None]:
showConditionCloudDistribution(
    selected_result_dirs,
    conditions,
    S2REPRDIR,
    islice=islice,
    jslice=jslice,
    save_name="images/cloud_experiment/conditions_cp_distro"
)

In [None]:
seldata = computeValidCenterFraction(
    selected_result_dirs, 
    conditions, 
    S2REPRDIR, 
    EMPIRICAL_CP_THRESHOLD,
    islice=islice, 
    jslice=jslice,
    figsize=(10,4),
    save_name="images/cloud_experiment/selected_fractions_4_conditions"
)
seldata

## Raster-level

In [None]:
# report RUC rasters
rerror_bounds = [15,11,0.5,0.18,0.55] # regression residuals
cerror_pu_bounds = [9, 6.5, 0.35, 0.12, 0.42] # calibration residuals, pu, absolute regression residuals
kwargs = dict(
    predictive_uncertainty_bounds=cerror_pu_bounds,
    rerror_bounds=rerror_bounds,
    cerror_bounds=cerror_pu_bounds
)
# MH, Gini => R, U, C rasters for readability
for varname in ["Dens", "Gini"]:
    varindex = VARIABLES.index(varname)+1
    showRegressionMaps(
        selected_result_dirs, 
        conditions,
        varindex, 
        varname,
        S2REPRDIR,
        GTDIR,
        SHAPEFILES,
        islice=islice,
        jslice=jslice,
        normalize=False,
        figsize=(15,13),
        save_name=f"images/cloud_experiment/regression_maps_{varname}",
        **kwargs
    )
    showUsabilityMaps(
        selected_result_dirs, 
        conditions,
        varindex, 
        varname,
        S2REPRDIR,
        GTDIR,
        SHAPEFILES,
        islice=islice,
        jslice=jslice,
        normalize=False,
        figsize=(15,6.5),
        save_name=f"images/cloud_experiment/usability_maps_{varname}",
        **kwargs
    )
    showCalibrationMaps(
        selected_result_dirs, 
        conditions,
        varindex, 
        varname,
        S2REPRDIR,
        GTDIR,
        SHAPEFILES,
        islice=islice,
        jslice=jslice,
        normalize=False,
        figsize=(15,13),
        save_name=f"images/cloud_experiment/calibration_maps_{varname}",
        **kwargs
    )
# other variables => full matrix
for varname in ["P95", "MeanH", "Cover"]:
    varindex = VARIABLES.index(varname)+1
    showPredictionMaps(
        selected_result_dirs, 
        conditions,
        varindex, 
        varname,
        S2REPRDIR,
        GTDIR,
        SHAPEFILES,
        islice=islice,
        jslice=jslice,
        normalize=False,
        figsize=(15,22.75),
        save_name=f"images/cloud_experiment/RUC_maps_{varname}",
        **kwargs
    )

## Image-level

In [None]:
visualizer, nnz_result_dirs, exp_vars = get_nonzero_avg_cp_visualizer(
    result_dirs, S2REPRDIR, VARIABLES
)

In [None]:
# Get Exploded RMSE data
bin_means, bin_gts, bin_counts = computeExplodedResiduals(nnz_result_dirs, GTDIR, S2REPRDIR, VARIABLES,
                                        islice=None, jslice=None, num_bins=15)

In [None]:
# Report exploded plots
visualizer.variable_names = VARIABLES
variable_groups = [["Dens", "Gini"], ["P95", "MeanH", "Cover"]]
variable_indexes = [[2,3], [0,1,4]]
hi_bounds = [[np.inf,np.inf], [30,30,np.inf]]
sizes = [(12,6), (15,5)]
for vg, vi, hb, sz in zip(variable_groups, variable_indexes, hi_bounds, sizes):
    suffix = "-".join(vg)
    # Regression
    explodedRMSEplot(
        bin_means, 
        bin_gts, 
        vg, vi, 
        exp_vars, 
        hi_bound=[np.inf for _ in vg],
        ncols=len(vg),
        figsize=sz,
        save_name=f"images/cloud_experiment/rmse_exploded_{suffix}"
    )
    # Usability
    visualizer.histogram_plot(
        variables=vg,
        hi_bounds=hb,
        log=False,
        palette="bwr", 
        show_legend=False,
        fig_ncols=len(vg),
        figsize=sz,
        save_name=f"images/cloud_experiment/predictive_uncertainty_exploded_{suffix}"
    )
    # Calibration
    visualizer.calibration_plot(
        "ence", 
        k=100,
        variables=vg,
        hi_bounds=hb,
        log_bins=False, 
        palette="bwr", 
        show_legend=False,
        fig_ncols=len(vg),
        figsize=sz,
        save_name=f"images/cloud_experiment/ence_exploded_{suffix}"
    )

In [None]:
# Defense exploded plots
visualizer.variable_names = VARIABLES
variable_groups = [[v] for v in VARIABLES]
variable_indexes = [[i] for i, _ in enumerate(VARIABLES)]
hi_bounds = [[30], [30], [np.inf],[np.inf], [np.inf]]
sizes = [(6.7, 5.3) for _ in VARIABLES]
for vg, vi, hb, sz in zip(variable_groups, variable_indexes, hi_bounds, sizes):
    suffix = "-".join(vg)
    # Regression
    explodedRMSEplot(
        bin_means, 
        bin_gts, 
        vg, vi, 
        exp_vars, 
        hi_bound=[np.inf for _ in vg],
        ncols=len(vg),
        figsize=sz,
        save_name=f"images/defense/rmse_exploded_{suffix}"
    )
    # Usability
    visualizer.histogram_plot(
        variables=vg,
        hi_bounds=hb,
        log=False,
        palette="bwr", 
        show_legend=False,
        fig_ncols=len(vg),
        figsize=sz,
        save_name=f"images/defense/predictive_uncertainty_exploded_{suffix}",
        add_title=False
    )
    # Calibration
    visualizer.calibration_plot(
        "ence", 
        k=100,
        variables=vg,
        hi_bounds=hb,
        log_bins=False, 
        palette="bwr", 
        show_legend=False,
        fig_ncols=len(vg),
        figsize=sz,
        save_name=f"images/defense/ence_exploded_{suffix}",
        add_title=False
    )

In [None]:
# invite ENCE exploded plots
hi_bounds = [30,30,np.inf,np.inf, np.inf]
visualizer.calibration_plot(
    "ence", 
    k=100,
    variables=VARIABLES,
    hi_bounds=hi_bounds,
    log_bins=False, 
    palette="bwr", 
    show_legend=False,
    fig_ncols=len(VARIABLES),
    figsize=(21,5),
    save_name=f"images/invite/ence_exploded_{'-'.join(VARIABLES)}",
    add_title=True,
    invite=True
)

## Project-level

In [None]:
bins=np.array([0, EMPIRICAL_CP_THRESHOLD, 25, 60, 100])
counts, bins = np.histogram(
    np.array(exp_vars),
    bins=bins
)

In [None]:
def showMetrics(visualizer, bins, metrics, kind="agg"):
    prev_vg = visualizer.variable_names
    variable_groups = [["P95"], ["MeanH"], ["Dens"], ["Gini"], ["cover"]]
    for vg in variable_groups:
        visualizer.variable_names = vg
        for metric in metrics:
            axs = visualizer.metric_boxplot(
                metric, 
                kind, 
                exp_var_bins=bins,
                save_name=f"images/cloud_experiment/metric_plot_{metric}_{kind}_{'-'.join(vg)}",
                figsize=(12,12/len(vg)),
                fig_ncols=len(vg)
            )
    visualizer.variable_names = prev_vg
    
def showDefenseMetrics(visualizer, bins, metrics, kind="agg"):
    prev_vg = visualizer.variable_names
    variable_groups = [["P95"], ["MeanH"], ["Dens"], ["Gini"], ["Cover"]]
    for vg in variable_groups:
        visualizer.variable_names = vg
        for metric in metrics:
            axs = visualizer.metric_boxplot(
                metric, 
                kind, 
                exp_var_bins=bins,
                save_name=f"images/defense/metric_plot_{metric}_{kind}_{'-'.join(vg)}",
                figsize=(6.7,5.3),
                fig_ncols=len(vg),
                add_title=False
            )
    visualizer.variable_names = prev_vg

In [None]:
# Report metrics
showMetrics(
    visualizer,
    metrics=["rmse", "srp", "ence"],
    kind="agg",
    bins=bins,
)

In [None]:
# Defense metrics
showDefenseMetrics(
    visualizer,
    metrics=["rmse", "srp", "ence"],
    kind="agg",
    bins=bins,
)

## Superiority of Gini

In [None]:
import scipy.stats as sps
# quantiles
q = np.linspace(1e-4, 1-1e-4, 1000)
# load gt
gtp = getPaths(result_dirs[0], gt_dir=GTDIR, returns=["gt"])
gt = loadRaster(gtp, bands=None, set_nan_mask=True).reshape(5,-1)
gt = gt[:,~np.isnan(gt).all(0)]
for i in [2,4]: gt[i] /= 100
# standardize
gt = (gt-np.nanmean(gt, axis=1, keepdims=True))/np.nanstd(gt, axis=1, keepdims=True)
# normal distro and quantiles
dist = sps.norm(loc=0, scale=1)
norm_qs = [dist.ppf(qq) for qq in q]
# dataframe
for vg in [["Dens", "Gini"], ["P95", "MeanH", "Cover"]]:
    dfq = {"variable": [], 
           "theoretical quantiles": [], 
           "empirical quantiles": []}
    for varname in vg:
        i = VARIABLES.index(varname)
        dfq["variable"].extend([varname for _ in q])
        dfq["theoretical quantiles"].extend(norm_qs)
        dfq["empirical quantiles"].extend([np.quantile(gt[i], qq) for qq in q])
    # plot
    fig, axs = plt.subplots(ncols=2, nrows=1, figsize=(12,4))
    axs[0].plot(np.linspace(-5, 5, 100), dist.pdf(np.linspace(-5, 5, 100)),color="k",linestyle="dotted")
    for varname in vg:
        i = VARIABLES.index(varname)
        gti = gt[i]
        sns.kdeplot(gti, ax=axs[0], label=varname)
    axs[0].set_xlim(-5, 5)
    axs[0].legend()
    axs[0].set_xlabel("value")
    axs[0].set_title("variable distributionD")
    axs[1].plot(norm_qs,norm_qs,color="k",linestyle="dotted")
    sns.lineplot(data=dfq, x="theoretical quantiles", y="empirical quantiles", hue="variable", ax=axs[1])
    axs[1].get_legend().set_title("")
    axs[1].set_title("Q-Q plot")
    plt.tight_layout()
    savefigure(fig, f"images/cloud_experiment/superior_gini_normality_analysis_{'-'.join(vg)}")
    plt.show()

In [None]:
for varname in VARIABLES:
    print(varname)
    residualNormalityAnalysis(
        nnz_result_dirs, 
        GTDIR, 
        variable_name=varname, 
        variable_index=VARIABLES.index(varname)+1, 
        exp_vars=exp_vars, 
        figsize=(6.5,12), 
        save_name=f"images/cloud_experiment/superior_gini_residual_analysis-{varname}"
    )
    singlePlotNormalityAnalysis(
        nnz_result_dirs, 
        GTDIR, 
        variable_name=varname, 
        variable_index=VARIABLES.index(varname)+1, 
        exp_vars=exp_vars, 
        figsize=(6.7,5.3), 
        save_name=f"images/defense/superior_gini-{varname}"
    )

## Convergence to train set expectation

In [None]:
#     # difference rasters
showMeanDiffWithExpectation(
    selected_result_dirs[-1],
    TRAINMEANS,
    VARIABLES,
    islice=islice,
    jslice=jslice,
    figsize=(12,2),
    save_name="images/cloud_experiment/convergence_to_expectation_rasters"
)
#     # distance plots
showDistanceToTrainsetExpectation(
    dirs=selected_result_dirs,
    titles=conditions,
    s2repr_dirs=S2REPRDIR,
    variable_indexes=[3,4],
    trainset_means=TRAINMEANS,
    variable_names=["Dens", "Gini"],
    islice=islice,
    jslice=jslice,
    ncols=2, nrows=1,
    figsize=(12, 4),
    save_name="images/cloud_experiment/convergence_to_expectation_avg_distance-Dens_Gini",
)
showDistanceToTrainsetExpectation(
    dirs=selected_result_dirs,
    titles=conditions,
    s2repr_dirs=S2REPRDIR,
    variable_indexes=[1,2,5],
    trainset_means=TRAINMEANS,
    variable_names=["P95", "MeanH", "Cover"],
    islice=islice,
    jslice=jslice,
    ncols=3, nrows=1,
    figsize=(12, 3),
    save_name="images/cloud_experiment/convergence_to_expectation_avg_distance-P95_MeanH_Cover",
)

In [None]:
showCloudVsPrediction(
    nnz_result_dirs,
    S2REPRDIR,
    VARIABLES,
    TRAINMEANS,
    islice=None,
    jslice=None,
    figsize=(12,18),
    save_name="images/cloud_experiment/avgpred_vs_cp"
)

## See through clouds

In [None]:
showPairedBandsHistograms(
    selected_result_dirs[0],
    selected_result_dirs[-1],
    S2REPRDIR,
    islice, jslice,
    #slice(550,570), slice(280,300),
    "images/cloud_experiment/see_through_clouds_bands_histograms"
)
# show all S2 bands to validate
showAllSentinel2Bands(
    selected_result_dirs[0],
    selected_result_dirs[-1],
    S2REPRDIR,
    islice, jslice,
    #slice(550,570), slice(280,300),
    "images/cloud_experiment/see_through_clouds_s2bands"
)
# show all S1 bands
showClosestSentinel1Bands(
    selected_result_dirs[-1],
    "assets/data/sentinel_data/s1_reprojected/1023",
    islice, jslice,
    #slice(550,570), slice(280,300),
    "images/cloud_experiment/see_through_clouds_s1bands"
)