# Visualization: CRC

Losses:
- binary w/ threshold
- Miscoverage
 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
from typing import Literal
from cose.utils import load_project_paths

device_str = "cuda:0"
# device_str = "cuda:1"
# device_str = "cpu"
device = torch.device(device_str)

prj_path = load_project_paths().COSE_PATH

# dataset: Literal["Cityscapes", "ADE20K", "LoveDA"]

# dataset = "Cityscapes"
# dataset = "ADE20K"
dataset = "LoveDA"
do_loveda_mincv_09 = True

datasets = ["Cityscapes", "ADE20K", "LoveDA"]


# loss: Literal["binary", "miscoverage"]
# loss = "binary"
loss = "miscoverage"
losses = ["binary", "miscoverage"]

### Test results: load csv

In [2]:
## get optimal lambdas from calibration output (csv format)
clbd_csv = f"{prj_path}/experiments/outputs/processed_lambdas/crc_{loss}_{dataset}_optimized_lambdas.csv"
resu_csv = f"{prj_path}/experiments/metrics/{dataset}/{dataset}_{loss}_loss.csv"


# loveda for mincov 0.9
if do_loveda_mincv_09 and dataset == "LoveDA" and loss == "binary":
    clbd_csv = f"{prj_path}/experiments/outputs/processed_lambdas/crc_binary_LoveDA_optimized_lambdas_cov_0.9.csv"
    resu_csv = f"{prj_path}/experiments/metrics/{dataset}/{dataset}_{loss}_loss_0.9.csv"


clbd = pd.read_csv(clbd_csv)

clbd["mincov"] = clbd["mincov"].fillna(False)


clbd["config_id"] = (
    clbd["alpha"].astype(str)
    + "_"
    + clbd["mincov"].astype(str)
    + "_"
    + clbd["random_seed"].astype(str)
)
print(f"{set(clbd.columns) = }")


keep_columns = tuple(range(101, 110 + 1))  # , 103)

clbd.drop(clbd[~clbd.random_seed.isin(keep_columns)].index, inplace=True)


resu = pd.read_csv(resu_csv, index_col=0)
resu.drop(resu[~resu.random_seed.isin(keep_columns)].index, inplace=True)

resu["mincov"] = resu["mincov"].fillna(False)

resu["expe_setup"] = resu["alpha"].astype(str) + "_" + resu["mincov"].astype(str)

resu["config_id"] = (
    resu["alpha"].astype(str)
    + "_"
    + resu["mincov"].astype(str)
    + "_"
    + resu["random_seed"].astype(str)
)
print(f"{set(resu.columns) = }")

## Link optimized_lambda to expe in resu_csv
print(f"[LUCA dbg] {clbd.config_id.unique() = }")
print(f"[LUCA dbg] {resu.config_id.unique() = }")

print(" --- difference:")
print(set(clbd.config_id.unique()).difference(set(resu.config_id.unique())))

# assert (
# (set(clbd.config_id.unique()) == set(resu.config_id.unique()))
# and (len(clbd) == len(resu))
# ) == True, f"Error: configs in clbd and resu are different: {set(clbd.config_id.unique()).intersection(set(resu.config_id.unique()))}"

set(clbd.columns) = {'res_path', 'cal_id', 'optimal_lambda', 'loss_function', 'alpha', 'n_calib', 'mincov', 'config_id', 'random_seed', 'early_stopped', 'dataset', 'expe_name'}
set(resu.columns) = {'optimal_lambda', 'loss_function', 'empirical_risk', 'activations', 'alpha', 'id_path', 'mincov', 'config_id', 'expe_setup', 'empirical_coverage_ratio', 'random_seed'}
[LUCA dbg] clbd.config_id.unique() = array(['0.2_False_101', '0.1_False_101', '0.05_False_101',
       '0.01_False_101', '0.005_False_101', '0.2_False_102',
       '0.1_False_102', '0.05_False_102', '0.01_False_102',
       '0.005_False_102', '0.2_False_103', '0.1_False_103',
       '0.05_False_103', '0.01_False_103', '0.005_False_103',
       '0.2_False_104', '0.1_False_104', '0.05_False_104',
       '0.01_False_104', '0.005_False_104', '0.2_False_105',
       '0.1_False_105', '0.05_False_105', '0.01_False_105',
       '0.005_False_105', '0.2_False_106', '0.1_False_106',
       '0.05_False_106', '0.01_False_106', '0.005_False

In [3]:
# merged_df = pd.merge(clbd[["config_id", "optimal_lambda"]], resu, on="config_id")
merged_df = pd.merge(clbd[["config_id", "dataset"]], resu, on="config_id")
merged_df.drop(columns=["id_path"], inplace=True)

if dataset == "ADE20K" and loss == "binary":
    # 0.5: one run by mistake, no std dev, useless config
    merged_df = merged_df[merged_df["mincov"] != 0.5]
    # trivial result: lbd = 1.0 (model not good enough to attain non-trivial value (lbd ~ 1.0, heatmap all red))
    merged_df = merged_df[merged_df["expe_setup"] != "0.01_0.9"]


assert not merged_df.isnull().any().any(), "Error: NaN values found in merged dataframe"

merged_df.groupby(["alpha", "mincov"]).size().reset_index().rename(columns={0: "count"})

print(merged_df.head())

         config_id dataset  random_seed  alpha  optimal_lambda  mincov  \
0    0.2_False_101  LoveDA          101  0.200        0.973163   False   
1    0.1_False_101  LoveDA          101  0.100        0.998640   False   
2   0.05_False_101  LoveDA          101  0.050        0.999562   False   
3   0.01_False_101  LoveDA          101  0.010        0.999916   False   
4  0.005_False_101  LoveDA          101  0.005        0.999966   False   

      loss_function  empirical_risk  activations  empirical_coverage_ratio  \
0  miscoverage_loss        0.198286     1.388994                  0.801714   
1  miscoverage_loss        0.099392     2.650068                  0.900608   
2  miscoverage_loss        0.048548     4.069606                  0.951452   
3  miscoverage_loss        0.007724     6.350654                  0.992276   
4  miscoverage_loss        0.002503     6.796717                  0.997497   

    expe_setup  
0    0.2_False  
1    0.1_False  
2   0.05_False  
3   0.01_False  
4

In [4]:
colnames_avg_resu = [
    "expesetup",
    "alpha",
    "mincov",
    "riskavg",
    "riskstd",
    "activations",
    # "lbd_avg",
    # "lbd_std",
    # "covratio_avg",
    # "covratio_std",
]
avg_resu = []

print(
    f" --- conf:\t risk_avg +/- (std), covratio_avg +/- (std)"  # ====== lbd_avg +/- (lbd_std)"
)

for conf in merged_df["expe_setup"].unique():
    subset = merged_df[merged_df["expe_setup"] == conf]
    # print(f"{subset = }")

    assert len(subset["alpha"].unique()) == 1, "Error: more than one alpha in subset"
    assert len(subset["mincov"].unique()) == 1, "Error: more than one alpha in subset"

    alpha = subset["alpha"].unique()[0]
    activations = subset["activations"].unique()[0]
    mincov = subset["mincov"].unique()[0]

    risk_avg, covratio_avg = subset[
        ["empirical_risk", "empirical_coverage_ratio"]
    ].mean()
    risk_std, covratio_std = subset[
        ["empirical_risk", "empirical_coverage_ratio"]
    ].std()

    print(subset[["empirical_risk", "empirical_coverage_ratio"]].std())

    print(
        f" --- {conf}:\t {risk_avg:.3f} +/- {risk_std:.3f},\t {covratio_avg:.3f} +/- {covratio_std:.3f}"
    )

    avg_resu.append(
        [
            conf,
            alpha,
            mincov,
            risk_avg,
            risk_std,
            activations,
            # lbd_avg,
            # lbd_std,
            # covratio_avg,
            # covratio_std,
        ]
    )

avg_resu = pd.DataFrame(avg_resu, columns=colnames_avg_resu)

 --- conf:	 risk_avg +/- (std), covratio_avg +/- (std)
empirical_risk              0.01262
empirical_coverage_ratio    0.01262
dtype: float64
 --- 0.2_False:	 0.199 +/- 0.013,	 0.801 +/- 0.013
empirical_risk              0.009427
empirical_coverage_ratio    0.009427
dtype: float64
 --- 0.1_False:	 0.100 +/- 0.009,	 0.900 +/- 0.009
empirical_risk              0.005494
empirical_coverage_ratio    0.005494
dtype: float64
 --- 0.05_False:	 0.049 +/- 0.005,	 0.951 +/- 0.005
empirical_risk              0.000835
empirical_coverage_ratio    0.000835
dtype: float64
 --- 0.01_False:	 0.008 +/- 0.001,	 0.992 +/- 0.001
empirical_risk              0.000252
empirical_coverage_ratio    0.000252
dtype: float64
 --- 0.005_False:	 0.003 +/- 0.000,	 0.997 +/- 0.000


In [5]:
avg_metrics = avg_resu[avg_resu["mincov"] != 1.0]
avg_metrics.drop(columns=["mincov"], inplace=True)

print(f"{dataset = }")
print(f"{loss = }")

print(avg_metrics)
print()

## generate latex table line
print(avg_metrics.to_latex(index=False))

dataset = 'LoveDA'
loss = 'miscoverage'
     expesetup  alpha   riskavg   riskstd  activations
0    0.2_False  0.200  0.198736  0.012620     1.388994
1    0.1_False  0.100  0.100274  0.009427     2.650068
2   0.05_False  0.050  0.048787  0.005494     4.069606
3   0.01_False  0.010  0.007777  0.000835     6.350654
4  0.005_False  0.005  0.002642  0.000252     6.796717

\begin{tabular}{lrrrr}
\toprule
expesetup & alpha & riskavg & riskstd & activations \\
\midrule
0.2_False & 0.200000 & 0.198736 & 0.012620 & 1.388994 \\
0.1_False & 0.100000 & 0.100274 & 0.009427 & 2.650068 \\
0.05_False & 0.050000 & 0.048787 & 0.005494 & 4.069606 \\
0.01_False & 0.010000 & 0.007777 & 0.000835 & 6.350654 \\
0.005_False & 0.005000 & 0.002642 & 0.000252 & 6.796717 \\
\bottomrule
\end{tabular}

