# RCU Metrics Tests

In [1]:
%load_ext autoreload
%autoreload 2
import os
os.chdir("/scratch/ewalt/pdm/rs-uncertainty")
from src.rcu_metrics import StratifiedRCUSubset, nan_frac
import rasterio
import argparse
import yaml
import numpy as np
from tqdm import tqdm
from pathlib import Path
import pandas as pd

In [2]:
def pjoin(*subs): return Path(os.path.abspath(os.path.join(*subs)))
def get_variance_bounds(cfg, N=2):
    for k, v in cfg.items():
        if k.endswith("_dir"): cfg[k] = Path(v)
    projects = cfg["projects_east"]+cfg["projects_west"]+cfg["projects_north"]
    # loop on projects to get variance bounds
    print(f"Computing variance bounds in {cfg['pkl_dir']}...")
    lo_variance = np.full((5,), np.inf)
    hi_variance = np.full((5,), -np.inf)
    variance_files = []
    # for variance_file in tqdm(list(cfg["prediction_dir"].glob("*_variance.tif"))):
    for variance_file in tqdm(list(cfg["prediction_dir"].glob("*_variance.tif"))[:N]): # Debug
        if variance_file.stem.split("_")[0] not in projects: continue
        variance_files.append(variance_file)
        with rasterio.open(variance_file) as fh:
            variance = fh.read(fh.indexes)
        variance_flat = variance.reshape(5, -1)
        hi = np.nanmax(variance_flat, axis=1)
        lo = np.nanmin(variance_flat, axis=1)
        hi_variance[hi>hi_variance] = hi[hi>hi_variance]
        lo_variance[lo<lo_variance] = lo[lo<lo_variance]
    print("Variances lower bound:", lo_variance.tolist())
    print("Variances upper bound:", hi_variance.tolist())
    # initialize RCU metrics
    return lo_variance, hi_variance, variance_files
def get_projects(cfg, variance_files):
    projects = []
    # Load standardization data
    with pjoin(cfg["pkl_dir"], "stats.yaml").open("r", encoding="utf-8") as f:
        stats = yaml.safe_load(f)
    labels_mean = np.array(stats["labels_stats"]["mean"]).reshape(5,1,1)
    labels_std = np.array(stats["labels_stats"]["std"]).reshape(5,1,1)
    # compute stats online
    for variance_file in tqdm(variance_files):
        # load data
        project = variance_file.stem.split('_')[0]
        # if project not in projects: continue
        with rasterio.open(pjoin(cfg['prediction_dir'], f"{project}_mean.tif")) as fh:
            mean = fh.read(fh.indexes)
        with rasterio.open(variance_file) as fh:
            variance = fh.read(fh.indexes)
        with rasterio.open(pjoin(cfg['gt_dir'], f"{project}.tif")) as fh:
            gt = fh.read(fh.indexes)
            gt[2] /= 100 # Cover/Dens normalization!!
            gt[4] /= 100
        # standardize
        mean[[0,1]] = (mean[[0,1]]-labels_mean[[0,1]])/labels_std[[0,1]]
        gt[[0,1]] = (gt[[0,1]]-labels_mean[[0,1]])/labels_std[[0,1]]
        projects.append((project, gt, mean, variance))
    return projects    
def res2df(res, cfg):
    R = {}
    for eid, eres in res.items():
        for metric_name, metric_info in eres.items():
            R[(eid, metric_name)] = metric_info["values"]
            R[(eid, f"ause-{metric_name}")] = metric_info["ause"]
    return pd.DataFrame(R, index=cfg["variable_names"]).T
def test(metrics, lo_variance, hi_variance, projects, cfg, N=2):
    print("creating")
    rcu = rcu = StratifiedRCUSubset(
        metric_names=metrics,
        num_variables=len(cfg["data_bands"]),
        # num_groups=len(projects),
        num_groups=N, # Debug
        num_bins=cfg["num_bins"],
        lo_variance=lo_variance,
        hi_variance=hi_variance
    )
    print("adding")
    results = {}
    for i, project in enumerate(projects):
        print("adding", project[0])
        rcu.add_project(*project)
        results[project[0]] = rcu.get_subset([project[0]])
    results["global"] = rcu.get()
    return res2df(results, cfg), rcu

In [3]:
with Path("./config/evaluate_testset/baseline.yaml").open("r", encoding="utf-8") as f:
    cfg = yaml.safe_load(f)
lo_variance, hi_variance, variance_files = get_variance_bounds(cfg)
projects = get_projects(cfg, variance_files)

Computing variance bounds in data/2023-04-05_18-58-33...


100%|████████████████████████████████████████| 2/2 [00:02<00:00,  1.30it/s]


Variances lower bound: [0.4428499639034271, 0.08763699978590012, 0.0004826262593269348, 0.0001668027980485931, 4.102319962839829e-06]
Variances upper bound: [268.0447998046875, 106.31964111328125, 0.11541682481765747, 0.0358952134847641, 0.18080325424671173]


100%|████████████████████████████████████████| 2/2 [00:09<00:00,  4.25s/it]


### Error Metrics [pOK]

In [4]:
res, rcu = test(["mse", "rmse", "mae", "mbe"], lo_variance, hi_variance, projects, cfg)

creating
adding
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:546] Getting: <src.rcu_metrics.StratifiedMSE object at 0x7fd95950e310>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))


  return np.nansum(values*counts, axis=axis, keepdims=keepdims)/np.nansum(counts, axis=axis, keepdims=keepdims)


[debug:624] (5,) (5,)
[debug:546] Getting: <src.rcu_metrics.StratifiedRMSE object at 0x7fd95a07b4d0>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))
[debug:624] (5,) (5,)
[debug:546] Getting: <src.rcu_metrics.StratifiedMAE object at 0x7fd95a07b650>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))
[debug:624] (5,) (5,)
[debug:546] Getting: <src.rcu_metrics.StratifiedMBE object at 0x7fd95950e050>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))
[debug:624] (5,) (5,)
adding 471
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:546] Getting: <src.rcu_metrics.StratifiedMSE object at 0x7fd95950e310>
[debug:184] Arr[2] nan: 0.438 ((1, 1000))
[debug:624] (5,) (5,)
[debug:546] Getting: <src.rcu_metrics.StratifiedRMSE object at 0x7fd95a07b4d0>
[debug:184] Arr[2] nan: 0.438 ((1, 1000))
[debug:624] (5,) (5,)
[debug:546] Getting: <src.rcu_metr

In [5]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
439,mse,0.325619,0.345869,0.014802,0.002589,0.019557
439,ause-mse,0.316299,0.332808,0.013426,0.002458,0.017563
439,rmse,0.546519,0.566615,0.115757,0.048444,0.127427
439,ause-rmse,0.537798,0.554638,0.110258,0.047084,0.120157
439,mae,0.435301,0.450296,0.092023,0.037767,0.101679
439,ause-mae,0.42807,0.440366,0.087426,0.03671,0.095359
439,mbe,-0.042112,-0.027191,-0.004988,-0.003574,-0.015277
439,ause-mbe,-0.042008,-0.02656,-0.003587,-0.003595,-0.013693
471,mse,0.267499,0.256655,0.014136,0.002227,0.022606
471,ause-mse,0.261826,0.248938,0.013323,0.002123,0.021195


### NLL [pOK]

In [6]:
res, rcu = test(["nll"], lo_variance, hi_variance, projects, cfg)

creating
adding
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:546] Getting: <src.rcu_metrics.StratifiedNLL object at 0x7fd9584bbe10>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))
[debug:624] (5,) (5,)
adding 471


  return np.nansum(values*counts, axis=axis, keepdims=keepdims)/np.nansum(counts, axis=axis, keepdims=keepdims)


[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:546] Getting: <src.rcu_metrics.StratifiedNLL object at 0x7fd9584bbe10>
[debug:184] Arr[2] nan: 0.438 ((1, 1000))
[debug:624] (5,) (5,)
[debug:184] Arr[2] nan: 0.3715 ((2, 1000))


In [7]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
439,nll,0.962645,0.672108,-1.636143,-2.537406,-1.618837
439,ause-nll,0.946107,0.648937,-1.6836,-2.545449,-1.685208
471,nll,0.742455,0.361909,-1.559775,-2.579001,-1.36992
471,ause-nll,0.731254,0.345214,-1.599821,-2.594327,-1.42545
global,nll,0.772297,0.40395,-1.570125,-2.573364,-1.403655
global,ause-nll,0.782579,0.417078,-1.588992,-2.563738,-1.468072


### UCE, ENCE [pOK]

In [8]:
res, rcu = test(["uce", "ence"], lo_variance, hi_variance, [projects[1], projects[0]], cfg)

creating
adding
adding 471
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 0, 0.5102, 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 0, 0.5102, 0.0
[debug:546] Getting: <src.rcu_metrics.StratifiedUCE object at 0x7fd9584a7090>
[Debug:218] cumX: (5, 1000), cumH: (5, 2, 1000)
[debug:624] (5,) (5,)
[debug:546] Getting: <src.rcu_metrics.StratifiedENCE object at 0x7fd9584a70d0>
[Debug:218] cumX: (5, 1000), cumH: (5, 2, 1000)


  result = np.abs(result)/np.sqrt(np.nansum(histogram*arr1, axis=self.groups_axis, keepdims=True))


[debug:624] (5,) (5,)
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 1, 0.5222, 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 1, 0.5222, 0.0
[debug:546] Getting: <src.rcu_metrics.StratifiedUCE object at 0x7fd9584a7090>
[Debug:218] cumX: (5, 1000), cumH: (5, 2, 1000)
[debug:624] (5,) (5,)
[debug:546] Getting: <src.rcu_metrics.StratifiedENCE object at 0x7fd9584a70d0>
[Debug:218] cumX: (5, 1000), cumH: (5, 2, 1000)
[debug:624] (5,) (5,)
[Debug:218] cumX: (5, 1000), cumH: (5, 2, 1000)
[Debug:218] cumX: (5, 1000), cumH: (5, 2, 1000)


In [9]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
471,uce,4.826015,6.130642,13.99475,8.169284,16.260691
471,ause-uce,2.359191,2.973686,6.597004,3.91465,7.613376
471,ence,0.000203,0.000186,0.000202,0.000203,0.000201
471,ause-ence,8.4e-05,7.8e-05,8.7e-05,8.7e-05,8.5e-05
439,uce,1.859255,2.441899,5.539462,3.230504,6.134424
439,ause-uce,1.768784,2.28161,5.099823,3.00989,5.743859
439,ence,0.00014,0.000201,0.000199,0.00018,0.000194
439,ause-ence,6.6e-05,8.3e-05,8.6e-05,8.6e-05,8.3e-05
global,uce,5.168083,2.441899,0.007108,0.000926,0.01068
global,ause-uce,4.910363,2.28161,0.005283,0.000686,0.008649


### CI90 [pOK]

In [10]:
res, rcu = test(["ci90_accs"], lo_variance, hi_variance, projects, cfg)

creating
adding
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:546] Getting: <src.rcu_metrics.StratifiedCIAccuracy object at 0x7fd9584bbcd0>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))
[debug:624] (5,) (5,)
adding 471


  return np.nansum(values*counts, axis=axis, keepdims=keepdims)/np.nansum(counts, axis=axis, keepdims=keepdims)


[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:546] Getting: <src.rcu_metrics.StratifiedCIAccuracy object at 0x7fd9584bbcd0>
[debug:184] Arr[2] nan: 0.438 ((1, 1000))
[debug:624] (5,) (5,)
[debug:184] Arr[2] nan: 0.3715 ((2, 1000))


In [11]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
439,ci90_accs,0.999993,0.999857,0.821685,0.852386,0.853149
439,ause-ci90_accs,0.995993,0.995846,0.821412,0.84753,0.855276
471,ci90_accs,0.999852,0.999617,0.763954,0.80858,0.788883
471,ause-ci90_accs,0.997848,0.997606,0.765335,0.80759,0.793129
global,ci90_accs,0.999871,0.999649,0.771778,0.814517,0.797593
global,ause-ci90_accs,0.99785,0.997624,0.770584,0.811998,0.802726


### AUCE [pOK]

In [14]:
res, rcu = test(["auce"], lo_variance, hi_variance, projects, cfg)

creating
adding
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000, 100)
[debug:600] (5, 1000, 100)
[debug:546] Getting: <src.rcu_metrics.StratifiedAUCE object at 0x7fd958443210>
[debug:184] Arr[2] nan: 0.305 ((1, 1000, 100))


  return np.nansum(values*counts, axis=axis, keepdims=keepdims)/np.nansum(counts, axis=axis, keepdims=keepdims)


[debug:624] (5,) (5,)
adding 471
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000, 100)
[debug:600] (5, 1000, 100)
[debug:546] Getting: <src.rcu_metrics.StratifiedAUCE object at 0x7fd958443210>
[debug:184] Arr[2] nan: 0.438 ((1, 1000, 100))
[debug:624] (5,) (5,)
[debug:184] Arr[2] nan: 0.3715 ((2, 1000, 100))


In [15]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
439,auce,0.368993,0.317995,0.067401,0.036826,0.06587
439,ause-auce,0.367661,0.31663,0.066651,0.036606,0.064691
471,auce,0.355436,0.298897,0.104811,0.070676,0.097014
471,ause-auce,0.354704,0.298232,0.102536,0.069996,0.094326
global,auce,0.357274,0.301485,0.099741,0.066089,0.092793
global,ause-auce,0.356379,0.300839,0.099267,0.066479,0.089476


### C_v [pOK]

In [16]:
res, rcu = test(["cv"], lo_variance, hi_variance, projects, cfg)

creating
adding
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 0, 0.5222, 0.0
[debug:546] Getting: <src.rcu_metrics.StratifiedCv object at 0x7fd958443bd0>
[Debug:218] cumX: (5, 1000), cumH: (5, 2, 1000)
[debug:624] (5,) (5,)
adding 471


  return np.nansum(values*counts, axis=axis, keepdims=keepdims)/np.nansum(counts, axis=axis, keepdims=keepdims)
  result = np.sqrt(np.nansum(arr2, axis=axes, keepdims=True)/(np.nansum(histogram, axis=axes, keepdims=True)-1))/mu
  result = np.sqrt(np.nansum(arr2, axis=axes, keepdims=True)/(np.nansum(histogram, axis=axes, keepdims=True)-1))/mu


[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 1, 0.5102, 0.0
[debug:546] Getting: <src.rcu_metrics.StratifiedCv object at 0x7fd958443bd0>
[Debug:218] cumX: (5, 1000), cumH: (5, 2, 1000)
[debug:624] (5,) (5,)
[Debug:218] cumX: (5, 1000), cumH: (5, 2, 1000)


In [17]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
439,cv,0.006044,0.005152,0.003871,0.004707,0.004296
439,ause-cv,0.008827,0.007887,0.00766,0.008074,0.006905
471,cv,0.009312,0.008257,0.005409,0.007123,0.005156
471,ause-cv,0.00899,0.007995,0.006896,0.008058,0.006155
global,cv,0.009032,0.008257,0.00301,0.004724,0.003838
global,ause-cv,0.008559,0.007995,0.003157,0.004594,0.004731


### SRP [pOK]

In [18]:
res, rcu = test(["srp"], lo_variance, hi_variance, projects, cfg)

creating
adding
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:546] Getting: <src.rcu_metrics.StratifiedSRP object at 0x7fd958453b10>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))
[debug:624] (5,) (5,)
adding 471


  return np.nansum(values*counts, axis=axis, keepdims=keepdims)/np.nansum(counts, axis=axis, keepdims=keepdims)


[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000)
[debug:600] (5, 1000)
[debug:546] Getting: <src.rcu_metrics.StratifiedSRP object at 0x7fd958453b10>
[debug:184] Arr[2] nan: 0.438 ((1, 1000))
[debug:624] (5,) (5,)
[debug:184] Arr[2] nan: 0.3715 ((2, 1000))


In [19]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
439,srp,7.675559,4.135724,0.009542,0.001838,0.012452
439,ause-srp,7.447388,3.965252,0.008634,0.001747,0.011087
471,srp,5.093514,2.487223,0.006748,0.001285,0.011395
471,ause-srp,4.980209,2.408655,0.006345,0.001225,0.010625
global,srp,5.443459,2.710645,0.007127,0.00136,0.011538
global,ause-srp,5.5647,2.775924,0.00688,0.001349,0.010728


In [23]:
class A():
    def __init__(self, x):
        self.x = x
    def copy(self):
        return self.__class__(self.x)
    def __repr__(self):
        return f"A(x={self.x})"

In [26]:
a1 = A(10)
a2 = a1.copy()
a2.x += 10
a1, a2

(A(x=10), A(x=20))

In [28]:
# upsampling 10 -> 5
num_bins = 10
k = 2
bin_map = [np.arange(i, i+k) for i in np.arange(0, num_bins, k)]
bin_map

[array([0, 1]), array([2, 3]), array([4, 5]), array([6, 7]), array([8, 9])]

In [30]:
np.ones((5, 10, 1000))[:,:,[0,10,23]].shape, np.ones((5, 10, 1000))[:,:,1].shape, 

((5, 10, 3), (5, 10))

In [32]:
x = (1,2,3)
x += (1)
x

TypeError: can only concatenate tuple (not "int") to tuple