# RCU Metrics Tests

- **Suggestion**: Metrics that can not be aggregated as function of themselves at higher resolution (e.g. UCE) may be better represented by a `NestedStratifiedMetric` that contains multiple `StratifiedMetric` objects and calls the standard functions on it:
    ```python
    class NestedStratifiedMetric(StratifiedMetric):
        def __init__(self, *args, **kwargs):
            self.members = [
                StratifiedMetric(*args, **kwargs),
                StratifiedTensor(*args, **kwargs),
                ...
            ]
        def agg(self, arr, ...):
            agg_members = [m.agg(arr, ...) for m in self.members]
            agg_metric = f(agg_members)
    ```

In [290]:
%load_ext autoreload
%autoreload 2
import os
os.chdir("/scratch/ewalt/pdm/rs-uncertainty")
from src.rcu_metrics import StratifiedRCU, nan_frac
import rasterio
import argparse
import yaml
import numpy as np
from tqdm import tqdm
from pathlib import Path
import pandas as pd

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [282]:
def pjoin(*subs): return Path(os.path.abspath(os.path.join(*subs)))
def get_variance_bounds(cfg, N=2):
    for k, v in cfg.items():
        if k.endswith("_dir"): cfg[k] = Path(v)
    projects = cfg["projects_east"]+cfg["projects_west"]+cfg["projects_north"]
    # loop on projects to get variance bounds
    print(f"Computing variance bounds in {cfg['pkl_dir']}...")
    lo_variance = np.full((5,), np.inf)
    hi_variance = np.full((5,), -np.inf)
    variance_files = []
    # for variance_file in tqdm(list(cfg["prediction_dir"].glob("*_variance.tif"))):
    for variance_file in tqdm(list(cfg["prediction_dir"].glob("*_variance.tif"))[:N]): # Debug
        if variance_file.stem.split("_")[0] not in projects: continue
        variance_files.append(variance_file)
        with rasterio.open(variance_file) as fh:
            variance = fh.read(fh.indexes)
        variance_flat = variance.reshape(5, -1)
        hi = np.nanmax(variance_flat, axis=1)
        lo = np.nanmin(variance_flat, axis=1)
        hi_variance[hi>hi_variance] = hi[hi>hi_variance]
        lo_variance[lo<lo_variance] = lo[lo<lo_variance]
    print("Variances lower bound:", lo_variance.tolist())
    print("Variances upper bound:", hi_variance.tolist())
    # initialize RCU metrics
    return lo_variance, hi_variance, variance_files
def get_projects(cfg, variance_files):
    projects = []
    # Load standardization data
    with pjoin(cfg["pkl_dir"], "stats.yaml").open("r", encoding="utf-8") as f:
        stats = yaml.safe_load(f)
    labels_mean = np.array(stats["labels_stats"]["mean"]).reshape(5,1,1)
    labels_std = np.array(stats["labels_stats"]["std"]).reshape(5,1,1)
    # compute stats online
    for variance_file in tqdm(variance_files):
        # load data
        project = variance_file.stem.split('_')[0]
        # if project not in projects: continue
        with rasterio.open(pjoin(cfg['prediction_dir'], f"{project}_mean.tif")) as fh:
            mean = fh.read(fh.indexes)
        with rasterio.open(variance_file) as fh:
            variance = fh.read(fh.indexes)
        with rasterio.open(pjoin(cfg['gt_dir'], f"{project}.tif")) as fh:
            gt = fh.read(fh.indexes)
            gt[2] /= 100 # Cover/Dens normalization!!
            gt[4] /= 100
        # standardize
        mean[[0,1]] = (mean[[0,1]]-labels_mean[[0,1]])/labels_std[[0,1]]
        gt[[0,1]] = (gt[[0,1]]-labels_mean[[0,1]])/labels_std[[0,1]]
        projects.append((project, gt, mean, variance))
    return projects
class TestStratifiedRCU(StratifiedRCU):
    def __init__(self, metric_names, *args, **kwargs):
        self.metric_names = metric_names
        super().__init__(*args, **kwargs)
    def metrics_tensors(self):
        return {k:v for k,v in super().metrics_tensors().items() if k in self.metric_names}
    
def res2df(res, cfg):
    R = {}
    for eid, eres in res.items():
        for metric_name, metric_info in eres.items():
            R[(eid, metric_name)] = metric_info["values"]
            R[(eid, f"ause-{metric_name}")] = metric_info["ause"]
    return pd.DataFrame(R, index=cfg["variable_names"]).T
def test(metrics, lo_variance, hi_variance, projects, cfg, N=2):
    print("creating")
    rcu = rcu = TestStratifiedRCU(
        metric_names=metrics,
        num_variables=len(cfg["data_bands"]),
        # num_groups=len(projects),
        num_groups=N, # Debug
        num_bins=cfg["num_bins"],
        lo_variance=lo_variance,
        hi_variance=hi_variance
    )
    print("adding")
    results = {}
    for i, project in enumerate(projects):
        print("adding", project[0])
        rcu.add_project(*project)
        results[project[0]] = rcu.get_subset([project[0]])
    results["global"] = rcu.get()
    return res2df(results, cfg), rcu

In [269]:
with Path("./config/evaluate_testset/baseline.yaml").open("r", encoding="utf-8") as f:
    cfg = yaml.safe_load(f)
lo_variance, hi_variance, variance_files = get_variance_bounds(cfg)
projects = get_projects(cfg, variance_files)

  0%|                                                | 0/2 [00:00<?, ?it/s]

Computing variance bounds in data/2023-04-05_18-58-33...


100%|████████████████████████████████████████| 2/2 [00:01<00:00,  1.52it/s]


Variances lower bound: [0.4428499639034271, 0.08763699978590012, 0.0004826262593269348, 0.0001668027980485931, 4.102319962839829e-06]
Variances upper bound: [268.0447998046875, 106.31964111328125, 0.11541682481765747, 0.0358952134847641, 0.18080325424671173]


100%|████████████████████████████████████████| 2/2 [00:06<00:00,  2.30s/it]


### Error Metrics [OK]

In [270]:
res, rcu = test(["mse", "rmse", "mae", "mbe"], lo_variance, hi_variance, projects, cfg)

creating
adding
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:546] <src.rcu_metrics.StratifiedMSE object at 0x7f383004cad0>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 1, 1000)
[debug:546] <src.rcu_metrics.StratifiedRMSE object at 0x7f383004c750>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 1, 1000)
[debug:546] <src.rcu_metrics.StratifiedMAE object at 0x7f383004ca90>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 1, 1000)
[debug:546] <src.rcu_metrics.StratifiedMBE object at 0x7f383004c150>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 1, 1000)
adding 471


  num_variables, num_bins = len(binned), len(binned[0])


[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:546] <src.rcu_metrics.StratifiedMSE object at 0x7f383004cad0>
[debug:184] Arr[2] nan: 0.438 ((1, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 1, 1000)
[debug:546] <src.rcu_metrics.StratifiedRMSE object at 0x7f383004c750>
[debug:184] Arr[2] nan: 0.438 ((1, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 1, 1000)
[debug:546] <src.rcu_metrics.StratifiedMAE object at 0x7f383004ca90>
[debug:184] Arr[2] nan: 0.438 ((1, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 1, 1000)
[debug:546] <src.rcu_metrics.StratifiedMBE object at 0x7f383004c150>
[debug:184] Arr[2] nan: 0.438 ((1, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 1, 1000)
[debug:184] Arr[2] nan: 0.3715 ((2, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 2, 1000)
[debug:184] Arr[2] nan: 0.3715 ((2, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 2, 1000)
[debug:184] Arr[2] nan: 0.3715 ((2, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 2, 1000)
[debug:184] Arr[2] nan: 0.3715 ((2, 1000))
[D

In [271]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
439,mse,0.325619,0.345869,0.014802,0.002589,0.019557
439,ause-mse,0.316299,0.332808,0.013426,0.002458,0.017563
439,rmse,0.546519,0.566615,0.115757,0.048444,0.127427
439,ause-rmse,0.537798,0.554638,0.110258,0.047084,0.120157
439,mae,0.435301,0.450296,0.092023,0.037767,0.101679
439,ause-mae,0.42807,0.440366,0.087426,0.03671,0.095359
439,mbe,-0.042112,-0.027191,-0.004988,-0.003574,-0.015277
439,ause-mbe,-0.042008,-0.02656,-0.003587,-0.003595,-0.013693
471,mse,0.267499,0.256655,0.014136,0.002227,0.022606
471,ause-mse,0.261826,0.248938,0.013323,0.002123,0.021195


### NLL [pOK]

In [272]:
res, rcu = test(["nll"], lo_variance, hi_variance, projects, cfg)

creating
adding
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:546] <src.rcu_metrics.StratifiedNLL object at 0x7f3830045310>
[debug:184] Arr[2] nan: 0.305 ((1, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 1, 1000)
adding 471


  num_variables, num_bins = len(binned), len(binned[0])


[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:546] <src.rcu_metrics.StratifiedNLL object at 0x7f3830045310>
[debug:184] Arr[2] nan: 0.438 ((1, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 1, 1000)
[debug:184] Arr[2] nan: 0.3715 ((2, 1000))
[Debug:170] cumX: (5, 1000), cumH: (5, 2, 1000)


In [273]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
439,nll,0.962645,0.672108,-1.636143,-2.537406,-1.618837
439,ause-nll,0.946107,0.648937,-1.6836,-2.545449,-1.685208
471,nll,0.742455,0.361909,-1.559775,-2.579001,-1.36992
471,ause-nll,0.731254,0.345214,-1.599821,-2.594327,-1.42545
global,nll,0.772297,0.40395,-1.570125,-2.573364,-1.403655
global,ause-nll,0.782579,0.417078,-1.588992,-2.563738,-1.468072


### UCE, ENCE [NO]

In [312]:
res, rcu = test(["uce", "ence"], lo_variance, hi_variance, [projects[1], projects[0]], cfg)

creating
adding
adding 471
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 0, 0.5102, 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 0, 0.5102, 0.0
[debug:546] <src.rcu_metrics.StratifiedUCE object at 0x7f3830072310>
[debug:624] (5,) (5,)
[debug:546] <src.rcu_metrics.StratifiedENCE object at 0x7f3830072b10>


  result /= N
  result = np.abs(result) / r1
  result /= N


[debug:624] (5,) (5,)
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 1, 0.5222, 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 1, 0.5222, 0.0
[debug:546] <src.rcu_metrics.StratifiedUCE object at 0x7f3830072310>
[debug:624] (5,) (5,)
[debug:546] <src.rcu_metrics.StratifiedENCE object at 0x7f3830072b10>
[debug:624] (5,) (5,)


In [313]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
471,uce,,,,,
471,ause-uce,2.359191,2.973686,6.597004,3.91465,7.613376
471,ence,,,,,
471,ause-ence,8.4e-05,7.8e-05,8.7e-05,8.7e-05,8.5e-05
439,uce,1.859255,2.441899,5.539462,3.230504,6.134424
439,ause-uce,1.768784,2.28161,5.099823,3.00989,5.743859
439,ence,0.00014,0.000201,0.000199,0.00018,0.000194
439,ause-ence,6.6e-05,8.3e-05,8.6e-05,8.6e-05,8.3e-05
global,uce,5.168083,2.441899,0.007108,0.000926,0.01068
global,ause-uce,4.910363,2.28161,0.005283,0.000686,0.008649


In [314]:
res, rcu = test(["uce", "ence"], lo_variance, hi_variance, projects, cfg)

creating
adding
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 0, 0.5222, 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 0, 0.5222, 0.0
[debug:546] <src.rcu_metrics.StratifiedUCE object at 0x7f382f961e10>
[debug:624] (5,) (5,)
[debug:546] <src.rcu_metrics.StratifiedENCE object at 0x7f382f961f50>


  result /= N
  result = np.abs(result) / r1
  result /= N


[debug:624] (5,) (5,)
adding 471
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 1, 0.5102, 0.0
[debug:587] [(5, 1000), (5, 1000)]
[debug:591] 1, 0.5102, 0.0
[debug:546] <src.rcu_metrics.StratifiedUCE object at 0x7f382f961e10>
[debug:624] (5,) (5,)
[debug:546] <src.rcu_metrics.StratifiedENCE object at 0x7f382f961f50>
[debug:624] (5,) (5,)


In [315]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
439,uce,,,,,
439,ause-uce,3.565545,4.847122,8.64363,5.828359,7.473895
439,ence,,,,,
439,ause-ence,0.000511,0.000484,0.000626,0.000544,0.000491
471,uce,1.859255,2.441899,5.539462,3.230504,6.134424
471,ause-uce,1.903411,2.499632,5.361926,3.225446,5.724908
471,ence,0.00014,0.000201,0.000199,0.00018,0.000194
471,ause-ence,0.000105,0.000134,0.00014,0.00013,0.000135
global,uce,5.168083,2.441899,0.007108,0.000926,0.01068
global,ause-uce,5.281255,2.499632,0.006949,0.000941,0.009956


In [338]:
# Validate results
mean_variance, mean_mse = rcu.uce.X1, rcu.uce.X2
h = rcu.histogram.array
N = h.sum(axis=(1,2))
uces = np.abs(np.nansum(h*(mean_variance-mean_mse), axis=1)/h.sum(axis=1))*h.sum(axis=1)
uce_global = np.nansum(uces, axis=1)/N
print(uces.shape, uce_global.shape)
# np.stack([uces, uce_global], axis=0)

(5, 1000) (5,)


  """


Note: The single project computation is probably wrong. See the UCE formulas on overleaf. For a single project $q$:
$$
UCE(q) = \frac{1}{N_q}\sum_{k=1}^{M}h_k|\bar\sigma_{q,k}^2-\bar\delta_{q,k}^2|
$$
which is probably not what we get when doing `rcu.uce.get([q])`.

Fix idea: on `StratifiedMetric.get` and `StratifiedMetric.get_subset`, return a dict `{q1: ..., q2: ..., ..., global: ...}` for `qi` in subset/set. this requires to implement `agg_one` (or change the logic of `agg` methods).

### CI90

In [310]:
res, rcu = test(["ci90_accs"], lo_variance, hi_variance, projects, cfg)

creating
adding
adding 439
[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000, 1)
[debug:546] <src.rcu_metrics.StratifiedCIAccuracy object at 0x7f383004d510>
[debug:184] Arr[2] nan: 0.305 ((1, 1000, 1))
[Debug:170] cumX: (5, 1000, 1), cumH: (5, 1, 1000)
[debug:624] (5,) (5,)
adding 471


  return np.nansum(values*counts, axis=axis)/np.nansum(counts, axis=axis)


[debug:559] nans -> diff: 0.0, variance: 0.0
[debug:589] (5, 1000, 1)
[debug:546] <src.rcu_metrics.StratifiedCIAccuracy object at 0x7f383004d510>
[debug:184] Arr[2] nan: 0.438 ((1, 1000, 1))
[Debug:170] cumX: (5, 1000, 1), cumH: (5, 1, 1000)
[debug:624] (5,) (5,)
[debug:184] Arr[2] nan: 0.3715 ((2, 1000, 1))
[Debug:170] cumX: (5, 1000, 1), cumH: (5, 2, 1000)


In [311]:
res

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
439,ci90_accs,0.999993,0.999857,0.821685,0.852386,0.853149
439,ause-ci90_accs,0.995993,0.995846,0.821412,0.84753,0.855276
471,ci90_accs,0.999852,0.999617,0.763954,0.80858,0.788883
471,ause-ci90_accs,0.997848,0.997606,0.765335,0.80759,0.793129
global,ci90_accs,0.999871,0.999649,0.771778,0.814517,0.797593
global,ause-ci90_accs,0.99785,0.997624,0.770584,0.811998,0.802726


### AUCE


In [259]:
np.array([[1,2,3]]).T[[0,1]].shape

(2, 1)

In [157]:
variables = 
print([np.array(list(res.keys())), np.array(variables)])
metrics = pd.DataFrame(
    index=[list(res.keys()), variables],
    data=np.random.randn(15, 3)
)
print(metrics)
for index in res.keys():
    for column in res[index].keys():
        for i, (var, val) in enumerate(zip(variables, res[index][column].tolist())):
            print([index,column,var], res[index][column][i])

[array(['439', '471', 'global'], dtype='<U6'), array(['P95', 'MeanH', 'Dens', 'Gini', 'Cover'], dtype='<U5')]


ValueError: all arrays must be same length

In [222]:
variable_names = cfg["variable_names"]
def res_to_df(*results):
    """
    {
        entity {
            metric {
                values: Array
                ause: Array
            }
        }
    }
    """
    formatted = {}
    ids = list(results.keys())
    for entity_id, eresults in results.items():
        #formatted[entity_id] = {vn: {mn: {"values": None, "ause": None} for mn in results[entity_id].keys()} for vn in variable_names}
        for metric_name, metric_dict in eresults.items():
            for kind, arr in metric_dict.items():
                for i, vn in enumerate(variable_names):
                    if not (vn,metric_name,kind) in formatted.keys():
                        formatted[(vn,metric_name,kind)] = [results[entity_id][metric_name][kind][i]]
                    else:
                        formatted[(vn,metric_name,kind)].append(results[entity_id][metric_name][kind][i])
    return pd.DataFrame(formatted, index=ids)
res_to_df(res).T

Unnamed: 0,Unnamed: 1,Unnamed: 2,439,471,global
P95,mse,values,0.325619,0.267499,0.275376
MeanH,mse,values,0.345869,0.256655,0.268746
Dens,mse,values,2.7e-05,2.6e-05,2.6e-05
Gini,mse,values,0.671615,0.577526,0.590278
Cover,mse,values,2.7e-05,3.1e-05,3.1e-05
P95,mse,ause,0.316299,0.261826,0.283445
MeanH,mse,ause,0.332808,0.248938,0.276293
Dens,mse,ause,2.5e-05,2.5e-05,2.6e-05
Gini,mse,ause,0.637547,0.55071,0.59251
Cover,mse,ause,2.4e-05,2.9e-05,2.9e-05


In [214]:
res["471"]["mse"]

{'values': array([2.6749894e-01, 2.5665486e-01, 2.6140886e-05, 5.7752591e-01,
        3.1218653e-05], dtype=float32),
 'ause': array([2.61826318e-01, 2.48937649e-01, 2.46361726e-05, 5.50710034e-01,
        2.92707312e-05])}

In [120]:
np.nansum(np.ones((3,)))/np.nansum(np.array([1,0,1]))

1.5

In [128]:
_, gt, mean, var = projects[0]
mask = ~np.isnan(mean).all(0)
diff = mean[:,mask]-gt[:,mask]
var = var[:,mask]

In [134]:
var.min(1), var.max(1)

(array([3.7209354e-02, 2.4249621e-02, 8.9247635e-07, 8.3112732e-02,
        3.1976008e-08], dtype=float32),
 array([9.2106428e+00, 7.5214000e+00, 2.1342971e-04, 9.3099546e+00,
        2.4968814e-04], dtype=float32))

In [132]:
hi_variance, lo_variance

(array([2.68044800e+02, 1.06319641e+02, 1.15416825e-01, 3.58952135e-02,
        1.80803254e-01]),
 array([4.42849964e-01, 8.76369998e-02, 4.82626259e-04, 1.66802798e-04,
        4.10231996e-06]))

In [230]:
reform = {(outerKey, innerKey): values for outerKey, innerDict in res.items() for innerKey, values in innerDict.items()}
pd.DataFrame(reform)

Unnamed: 0_level_0,439,439,439,439,471,471,471,471,global,global,global,global
Unnamed: 0_level_1,mse,rmse,mae,mbe,mse,rmse,mae,mbe,mse,rmse,mae,mbe
values,"[0.32561916, 0.34586895, 2.7371232e-05, 0.6716...","[0.54651886, 0.5666146, 0.00497785, 0.780184, ...","[0.43530083, 0.450296, 0.003957208, 0.60823405...","[-0.04211172, -0.027191294, -0.00021448842, -0...","[0.26749894, 0.25665486, 2.6140886e-05, 0.5775...","[0.49287754, 0.4758262, 0.00486834, 0.71582305...","[0.38713685, 0.37602183, 0.0038529327, 0.55980...","[0.033393744, 0.029015576, 3.8440656e-05, 0.00...","[0.27537596, 0.26874605, 2.6307636e-05, 0.5902...","[0.5001475, 0.48813078, 0.0048831822, 0.724545...","[0.3936645, 0.38608822, 0.003867065, 0.5663693...","[0.023160484, 0.021397855, 4.1611647e-06, -0.0..."
ause,"[0.31629889644496145, 0.33280802916642277, 2.4...","[0.5377978439033031, 0.5546379370987415, 0.004...","[0.42806975400447844, 0.4403657746091485, 0.00...","[-0.042007546013221146, -0.026560295313596726,...","[0.2618263181606308, 0.2489376492444426, 2.463...","[0.4874713160991669, 0.4681610092446208, 0.004...","[0.38269808324426413, 0.36981656708568333, 0.0...","[0.0328326032529585, 0.028637537222122773, 4.2...","[0.28344492984656244, 0.27629262625612316, 2.5...","[0.5065962020158767, 0.49451919908076525, 0.00...","[0.3985910469070077, 0.3912328855767846, 0.003...","[0.025393906575161964, 0.023075931975385173, 1..."


In [166]:
from itertools import product

In [173]:
list(list(t) for t in product([1,2],["a", "b", "c"]))

[[1, 'a'], [1, 'b'], [1, 'c'], [2, 'a'], [2, 'b'], [2, 'c']]

In [176]:
indexes

[[0, 'a'],
 [0, 'b'],
 [0, 'c'],
 [0, 'd'],
 [1, 'a'],
 [1, 'b'],
 [1, 'c'],
 [1, 'd'],
 [2, 'a'],
 [2, 'b'],
 [2, 'c'],
 [2, 'd'],
 [3, 'a'],
 [3, 'b'],
 [3, 'c'],
 [3, 'd']]

In [229]:
pd.DataFrame.from_dict(res, orient="index").stack().to_frame()

Unnamed: 0,Unnamed: 1,0
439,mse,"{'values': [0.32561916, 0.34586895, 2.7371232e..."
439,rmse,"{'values': [0.54651886, 0.5666146, 0.00497785,..."
439,mae,"{'values': [0.43530083, 0.450296, 0.003957208,..."
439,mbe,"{'values': [-0.04211172, -0.027191294, -0.0002..."
471,mse,"{'values': [0.26749894, 0.25665486, 2.6140886e..."
471,rmse,"{'values': [0.49287754, 0.4758262, 0.00486834,..."
471,mae,"{'values': [0.38713685, 0.37602183, 0.00385293..."
471,mbe,"{'values': [0.033393744, 0.029015576, 3.844065..."
global,mse,"{'values': [0.27537596, 0.26874605, 2.6307636e..."
global,rmse,"{'values': [0.5001475, 0.48813078, 0.004883182..."


In [234]:
d = {
    ("439", "mse"): [10, 0],
    ("439", "ause-mse"): [20, 1],
    ("global", "mse"): [10, 0],
    ("global", "ause-mse"): [20, 1],
}
pd.DataFrame(d, index=["var1", "var2"]).T

Unnamed: 0,Unnamed: 1,var1,var2
439,mse,10,0
439,ause-mse,20,1
global,mse,10,0
global,ause-mse,20,1


In [246]:
R = {}
for eid, eres in res.items():
    for metric_name, metric_info in eres.items():
        R[(eid, metric_name)] = metric_info["values"]
        R[(eid, f"ause-{metric_name}")] = metric_info["values"]
pd.DataFrame(R, index=variable_names).T

Unnamed: 0,Unnamed: 1,P95,MeanH,Dens,Gini,Cover
439,mse,0.325619,0.345869,2.7e-05,0.671615,2.7e-05
439,ause-mse,0.325619,0.345869,2.7e-05,0.671615,2.7e-05
439,rmse,0.546519,0.566615,0.004978,0.780184,0.004735
439,ause-rmse,0.546519,0.566615,0.004978,0.780184,0.004735
439,mae,0.435301,0.450296,0.003957,0.608234,0.003779
439,ause-mae,0.435301,0.450296,0.003957,0.608234,0.003779
439,mbe,-0.042112,-0.027191,-0.000214,-0.057566,-0.000568
439,ause-mbe,-0.042112,-0.027191,-0.000214,-0.057566,-0.000568
471,mse,0.267499,0.256655,2.6e-05,0.577526,3.1e-05
471,ause-mse,0.267499,0.256655,2.6e-05,0.577526,3.1e-05


In [241]:
metrics = []
for m in res[list(res.keys())[0]].keys(): 
    print(m)
    metrics.extend([m, f"ause-{m}"])
formatted = {m: [None for _ in variable_names]}
pd.DataFrame(formatted, index=variable_names).T

mse
rmse
mae
mbe


Unnamed: 0,P95,MeanH,Dens,Gini,Cover
mbe,,,,,


In [240]:
res[list(res.keys())[0]].keys()


dict_keys(['mse', 'rmse', 'mae', 'mbe'])