In [None]:
%load_ext autoreload
%autoreload 2
import os
os.chdir("/scratch/ewalt/pdm/rs-uncertainty")
import matplotlib.patches as patches
import matplotlib.pyplot as plt
from src.metrics import StratifiedRCU
from src.viz import *
from pathlib import Path
import rasterio
import fiona
from datetime import datetime
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import trange
import random
import yaml
import fiona
import rasterio.warp
import rasterio.features
sns.set()
sns.set_style("whitegrid")
random.seed(123)

RESDIR = "results/cloud_exp/2023-06-20_16-14-11" #"results/cloud_exp/2023-05-26_15-55-46"
S2DIR = "gee_data/reprojected/"
S2REPRDIR = "gee_data/reprojected_dirs"
GTDIR = "assets/data/preprocessed"
SANITYRESDIR = "results/cloud_exp/2023-05-31_11-23-56_sanity_check" # results
SANITYS2DIR = "assets/data/sentinel_data/s2_reprojected" # s2 reprojected
SANITYS2REPRDIR = "gee_data/sanity_check/" # restructured s2 reprojected
SPLITMASKDIR = "assets/data/split_masks/" # split masks
SHAPEFILES = ['assets/data/NHM_projectDekning_AOI_edit2015_V2.shp', 'assets/data/ALS_projects_Dz_all_norway.shp']
STATSFILE = "data/2023-04-05_18-58-33_baseline/stats.yaml"
EMPIRICAL_CP_THRESHOLD = 7.9

with open(STATSFILE, "r") as f:
    stats = yaml.safe_load(f)
TRAINMEANS = stats["labels_stats"]["mean"]
TRAINSTDS = stats["labels_stats"]["std"]
for i in [2,4]:
    TRAINMEANS[i] /= 100
    TRAINSTDS[i] /= 100
    
VARIABLES = ['P95', 'MeanH', 'Dens', 'Gini', 'Cover']

# Experiment result directories
result_dirs = [p.path for p in os.scandir(RESDIR) if os.path.exists(os.path.join(p.path,"rcu.json"))]
outliers = [os.path.join(RESDIR, f"1023_{d}") for d in [
    "20180503T104019", # index: 3, avgcp: 42, all white
    "20180620T105031", # index: 6, avgcp: 0., all white
]]
result_dirs = [r for r in result_dirs if not any(r.__contains__(o) for o in outliers)]
len(result_dirs)

## Cloud probability in both sources

In [None]:
# average in GEE
avg = []
for result_dir in result_dirs:
    img_path = getPaths(result_dir, s2repr_dirs=S2REPRDIR, returns=["img"])
    avg.append(float(loadRaster(img_path, bands=-1).mean()))
fig = plt.figure(figsize=(12,4))
data = pd.DataFrame({"average cloud probability": avg})
ax=plt.gca()
sns.histplot(data, x="average cloud probability", ax=ax, bins=10)
ax.set_yscale("log")
plt.tight_layout()
savefigure(fig, "images/gee_vs_original/gee_avg_cp_histo")
plt.show()

In [None]:
# pixel-wise in GEE vs original
all_cps = np.arange(0,101)
odata = {"cloud probability": np.arange(0, 100),
        "counts": np.zeros(100)}
for od in os.scandir(SANITYRESDIR):
    img_path = getPaths(od.path, s2repr_dirs=SANITYS2REPRDIR, returns=["img"])
    cp = loadRaster(img_path, bands=-1)
    cp_counts, _ = np.histogram(cp, all_cps)
    odata["counts"] += cp_counts    
odata = pd.DataFrame(odata)
gdata = {"cloud probability": np.arange(0, 100),
        "counts": np.zeros(100)}
for gd in os.scandir(RESDIR):
    img_path = getPaths(gd.path, s2repr_dirs=S2REPRDIR, returns=["img"])
    cp = loadRaster(img_path, bands=-1)
    cp_counts, _ = np.histogram(cp, all_cps)
    gdata["counts"] += cp_counts  
gdata = pd.DataFrame(gdata)
odata["source"] = "original"
odata["log frequency"] = odata["counts"] / odata["counts"].sum()
gdata["source"] = "gee"
gdata["log frequency"] = gdata["counts"] / gdata["counts"].sum()
data = pd.concat([odata, gdata], axis=0)
fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(12, 3))
sns.lineplot(data=data, x="cloud probability", y="log frequency", hue="source", ax=ax)
ax.set(yscale="log", xlim=(0,100))
ax.legend().set_title("")
ax.legend(loc="lower left")
plt.tight_layout()
savefigure(fig, "images/gee_vs_original/gee_vs_orig_cp_frequencies")
plt.show()

## Validation

In [None]:
train_islice = slice(400,530)
train_jslice = slice(20,170)
test_islice = slice(730,810)
test_jslice = slice(1080,1170)

In [None]:
with rasterio.open(os.path.join(SPLITMASKDIR,"1023.tif")) as f:
    split_mask = f.read(1)

In [None]:
showSplit(
    os.path.join(GTDIR, "1023.tif"),
    split_mask,
    [train_islice, test_islice],
    [train_jslice, test_jslice],
    patches_color=["r", "b"]
    save_name="images/gee_vs_original/split_and_patches"
)

In [None]:
showSplit(
    os.path.join(GTDIR, "1023.tif"),
    split_mask,
    save_name="images/albecker/splits_1023"
)

In [None]:
matching_dirs = []
for orig_dir in os.scandir(SANITYRESDIR):
    for gee_dir in os.scandir(RESDIR):
        if orig_dir.name == gee_dir.name:
            matching_dirs.append((orig_dir.path, gee_dir.path))
orig_paths, gee_paths, titles = [], [] ,[]
for orig_dir, gee_dir in matching_dirs:
    orig_paths.append(orig_dir)
    gee_paths.append(gee_dir)
    titles.append(datetime.strptime(Path(orig_dir).name.split("_")[1].split("T")[0], '%Y%m%d').strftime("%d.%m.%Y"))
len(matching_dirs), len(orig_paths), len(gee_paths), len(titles)

In [None]:
# RGB matching images
showRGB(orig_paths, SANITYS2REPRDIR, titles, figsize=(12, 3),
        islice=None,
        jslice=None,
        draw_bbox=False,
        color=None,
        save_name=f"images/defense/rgb_original")
showRGB(gee_paths, S2REPRDIR, titles, figsize=(12, 3),
        islice=None,
        jslice=None,
        draw_bbox=False,
        color=None,
        save_name=f"images/defense/rgb_gee")
showRGB(orig_paths, SANITYS2REPRDIR, titles, figsize=(12, 3),
        islice=[train_islice, test_islice],
        jslice=[train_jslice, test_jslice],
        draw_bbox=True,
        color=["r", "b"],
        save_name=f"images/gee_vs_original/rgb_original")
showRGB(gee_paths, S2REPRDIR, titles, figsize=(12, 3),
        islice=[train_islice, test_islice],
        jslice=[train_jslice, test_jslice],
        draw_bbox=True,
        color=["r", "b"],
        save_name=f"images/gee_vs_original/rgb_gee")
showRGB(orig_paths, SANITYS2REPRDIR, titles, figsize=(12, 3),
        islice=train_islice,
        jslice=train_jslice,
        draw_bbox=True,
        color="r",
        save_name=f"images/gee_vs_original/rgb_original_train_only")
showRGB(gee_paths, S2REPRDIR, titles, figsize=(12, 3),
        islice=train_islice,
        jslice=train_jslice,
        draw_bbox=True,
        color="r",
        save_name=f"images/gee_vs_original/rgb_gee_train_only")

### Raster-level analysis

In [None]:
def scVisual(split, normalize):
    if split=="train": islice, jslice= train_islice, train_jslice
    elif split=="test": islice, jslice= test_islice, test_jslice
    else: raise ValueError()
    for i, variable_name in enumerate(VARIABLES):
        # maps
        showPairedMaps(
            matching_dirs, 
            i+1, 
            VARIABLES[i],
            islice=islice,
            jslice=jslice,
            normalize=normalize,
            save_name=f"images/gee_vs_original/maps_{VARIABLES[i]}_{split}"
        )
        # histos
        showPairedHistograms(
            matching_dirs, 
            i+1, 
            VARIABLES[i],
            islice=islice,
            jslice=jslice,
            log_mean=False,
            log_uncertainty=True,
            normalize=normalize,
            save_name=f"images/gee_vs_original/histograms_{VARIABLES[i]}_{split}"
        )

In [None]:
scVisual(split="train", normalize=False)

In [None]:
scVisual(split="test", normalize=False)

### Input bands

In [None]:
# Latex tables
s2dfs = []
for pair in matching_dirs:
    s2_df = compareS2Bands(pair, SANITYS2REPRDIR, S2REPRDIR)
    print(datetime.strptime(Path(pair[0]).name.split("_")[1].split("T")[0], '%Y%m%d').strftime("%d.%m.%Y"))
    for _, r2 in s2_df.round(3).iterrows():
        s = [str(x) for x in r2.values]
        s[0] = s[0].split(".")[0]
        print(" & ".join(s) + " \\tabularnewline")
    print()
    s2dfs.append(s2_df)

In [None]:
# Sentinel 2 differences raster-level
for pair in matching_dirs:
    od, gd = pair
    fig, axs = plt.subplots(ncols=3, nrows=3, figsize=(12,10))
    os2 = loadRaster(getPaths(od, s2repr_dirs=SANITYS2REPRDIR, returns=["img"]), 
                     dtype=float, bands=[1,4,12])
    os2c = clip(os2.copy(), (100, 2000))
    gs2 = loadRaster(getPaths(gd, s2repr_dirs=S2REPRDIR, returns=["img"]), 
                     dtype=float, bands=[1,4,12])
    gs2c = clip(gs2.copy(), (100, 2000))
    for i, band in enumerate([1,4,12]):
        axs[i,0].set_title("original")
        axs[i,1].set_title("gee")
        axs[i,2].set_title("difference")
        vmin, vmax = min(np.nanmin(os2c[i]), np.nanmin(gs2c[i])), max(np.nanmax(os2c[i]), np.nanmax(gs2c[i]))
        sns.heatmap(os2c[i], ax=axs[i,0], vmin=vmin, vmax=vmax)
        sns.heatmap(gs2c[i], ax=axs[i,1], vmin=vmin, vmax=vmax)
        bnd = np.nanmax(np.abs(gs2[i]-os2[i]))
        sns.heatmap(gs2[i]-os2[i], ax=axs[i,2], vmin=-1, vmax=1, cmap="bwr")
    dat = datetime.strptime(
        Path(pair[0]).name.split("_")[1].split("T")[0], '%Y%m%d').strftime("%d.%m.%Y")
    for ax in axs.flatten(): 
        ax.set_xticks([])
        ax.set_yticks([])
    for i, band in enumerate(["1","4","12"]):
        axs[i,0].set_ylabel(f"Band {band}")
    fig.suptitle(dat)
    plt.tight_layout()
    fig.savefig(f"images/gee_vs_original/stats_comp_map_{dat.replace('.','-')}.png", dpi=300)
    plt.show()

###  Predictions

In [None]:
train_mdf, train_pudf = pd.DataFrame(), pd.DataFrame()
for pair in matching_dirs:
    dat = datetime.strptime(
        Path(pair[0]).name.split("_")[1].split("T")[0], '%Y%m%d').strftime("%d.%m.%Y")
    mdf, pudf = CompareMapsStats(pair, VARIABLES, 
                     split_mask=split_mask, split="train")
    mdf = mdf.assign(date=dat)
    pudf = pudf.assign(date=dat)
    train_mdf = pd.concat([train_mdf, mdf], axis=0)
    train_pudf = pd.concat([train_pudf, pudf], axis=0)
train_mdf, train_pudf = train_mdf.reset_index(), train_pudf.reset_index()
test_mdf, test_pudf = pd.DataFrame(), pd.DataFrame()
for pair in matching_dirs:
    dat = datetime.strptime(
        Path(pair[0]).name.split("_")[1].split("T")[0], '%Y%m%d').strftime("%d.%m.%Y")
    mdf, pudf = CompareMapsStats(pair, VARIABLES, 
                     split_mask=split_mask, split="test")
    mdf = mdf.assign(date=dat)
    pudf = pudf.assign(date=dat)
    test_mdf = pd.concat([test_mdf, mdf], axis=0)
    test_pudf = pd.concat([test_pudf, pudf], axis=0)
test_mdf, test_pudf = test_mdf.reset_index(), test_pudf.reset_index()

In [None]:
def print_report(df, dt):
    df = df.groupby(["date", "variable", "stat", "split"]).mean().reset_index()
    df = df.query(f"date == '{dt}'")
    for v in VARIABLES:
        stats = ["\\texttt{"+v+"}"]
        for stat in ["min", "max", "mean", "median", "std"]:
            cnt = df.query(f"variable == '{v}' & stat == '{stat}'")["relative delta (%)"].values[0]
            stats.append(str(round(cnt,3)))
        stats = "\t\t"+" & ".join(stats) + " \\tabularnewline"
        print(stats)

In [None]:
# Print latex tables
for split, df in zip(["test_mean", "test_pu", "train_mean", "train_pu"],
                     [test_mdf, test_pudf, train_mdf, train_pudf]):
    for dt in df.date.unique():
        if dt == "05.06.2018" and split.startswith("test"): continue
        print(dt, split)
        print_report(df, dt=dt)
    print()

## Metrics

In [None]:
def getScMetrics(metrics):
    train_df = loadMetricsDataFrame(
        matching_dirs,
        split_mask=split_mask,
        split="train",
        metrics=metrics,
        gt_dir=GTDIR
    )
    test_df = loadMetricsDataFrame(
        matching_dirs,
        split_mask=split_mask,
        split="test",
        metrics=metrics,
        gt_dir=GTDIR
    )
    return train_df, test_df

In [None]:
train_df, test_df = getScMetrics(["ence", "rmse", "srp"])

In [None]:
plotTrainTestMetricsDataFrames(
    train_df, 
    test_df,
    type="bar",
    metrics=["rmse", "srp", "ence"],
    save_name="images/gee_vs_original/train_test_metrics_bar_all-variables",
    variables=VARIABLES,
    figsize=(12,12)
)

### Summary (defense)

In [None]:
# Sentinel-2 and predictions differences raster-level
mds = matching_dirs
fig, axs = plt.subplots(ncols=len(mds), nrows=6, figsize=(4*len(mds),18))
for i, pair in enumerate(mds):
    od, gd = pair
    orig_img_path, orig_mean_path, orig_var_path = getPaths(
        od, s2repr_dirs=SANITYS2REPRDIR, 
        returns=["img", "mean", "variance"]
    )
    gee_img_path, gee_mean_path, gee_var_path = getPaths(
        gd, s2repr_dirs=S2REPRDIR, 
        returns=["img", "mean", "variance"]
    )
    rgb = loadRaster(
        orig_img_path,
        bands=[4,3,2],
        transpose_order=(1,2,0),
        clip_range=(100,2000),
        islice=islice,
        jslice=jslice
    )
    delta_b1 = loadRaster(orig_img_path, bands=1, dtype="float",
                               islice=islice, jslice=jslice)-\
               loadRaster(gee_img_path, bands=1, dtype="float",
                               islice=islice, jslice=jslice) 
    delta_b4 = loadRaster(orig_img_path, bands=4, dtype="float",
                               islice=islice, jslice=jslice)-\
               loadRaster(gee_img_path, bands=4, dtype="float",
                               islice=islice, jslice=jslice)
    delta_b12 = loadRaster(orig_img_path, bands=12, dtype="float",
                               islice=islice, jslice=jslice)-\
               loadRaster(gee_img_path, bands=12, dtype="float",
                               islice=islice, jslice=jslice)
    delta_mean = loadRaster(orig_mean_path, bands=3, dtype="float",
                            islice=islice, jslice=jslice)-\
                 loadRaster(gee_mean_path, bands=3, dtype="float",
                            islice=islice, jslice=jslice)
    delta_pu = loadRaster(orig_var_path, bands=3, dtype="float", 
                          elementwise_fn=np.sqrt,
                          islice=islice, jslice=jslice)-\
               loadRaster(gee_var_path, bands=3, dtype="float", 
                          elementwise_fn=np.sqrt,
                          islice=islice, jslice=jslice)
    delta_mean[delta_mean>.5]=.5
    delta_mean[delta_mean<-.5]=-.5
    delta_pu[delta_pu>.06]=.06
    delta_pu[delta_pu<-.06]=-.06
    delta_mean, delta_pu = (norm2d(delta_mean, -.5, .5, -1, 1),
                            norm2d(delta_pu, -.06, .06, -1, 1))
    axs[0,i].imshow(rgb)
    sns.heatmap(delta_b1, ax=axs[1,i], vmin=-1, vmax=1, cmap="bwr", cbar=i==len(mds)-1)
    sns.heatmap(delta_b4, ax=axs[2,i], vmin=-1, vmax=1, cmap="bwr", cbar=i==len(mds)-1)
    sns.heatmap(delta_b12, ax=axs[3,i], vmin=-1, vmax=1, cmap="bwr", cbar=i==len(mds)-1)
    sns.heatmap(delta_mean, ax=axs[4,i], cmap="bwr", cbar=i==len(mds)-1)
    sns.heatmap(delta_pu, ax=axs[5,i], cmap="bwr", cbar=i==len(mds)-1)
    dat = datetime.strptime(
        Path(pair[0]).name.split("_")[1].split("T")[0], '%Y%m%d').strftime("%d.%m.%Y")
    axs[0,i].set_title(dat)
    if i==0: 
        axs[0,i].set_ylabel("RGB")
        axs[1,i].set_ylabel("Delta S2 band 1")
        axs[2,i].set_ylabel("Delta S2 band 4")
        axs[3,i].set_ylabel("Delta S2 band 12")
        axs[4,i].set_ylabel("Delta mean\n(Dens)")
        axs[5,i].set_ylabel("Delta predictive uncertainty\n(Dens)")
for ax in axs.flatten(): 
    ax.set_xticks([])
    ax.set_yticks([])
plt.tight_layout()
fig.savefig(f"images/defense/raster_level_gee_validation.png", dpi=300)
plt.show()