# Tests using real data benchmarks


In [None]:
## External modules.
from contextlib import ExitStack
from copy import deepcopy
from json import load as json_load
from matplotlib import cm
from matplotlib.colors import BoundaryNorm
import matplotlib.pyplot as plt
import numpy as np
import os
from sklearn.metrics import ConfusionMatrixDisplay

## Internal modules.
from mml.utils import makedir_safe
from mml.utils.mest import scale_madmed
from setup_data import get_data
from setup_losses import get_loss
from setup_models import get_model
from setup_results import img_dir, results_dir, my_fontsize, my_ext, export_legend

In [None]:
## Specify the risk classes we want to consider.
riskclasses = ["erm", "mvHuber", "cvar", "dro"]

risk_names = {"erm": "Vanilla ERM",
              "mvHuber": "Modified Sun-Huber",
              "cvar": "CVaR risk",
              "entropic": "Tilted risk",
              "dro": r"$\chi^{2}$-DRO risk"}

In [None]:
## Choose the dataset we want to consider.
dataset = "adult"

In [None]:
## Choose a risk class, model, and learning algorithm combination.
loss_base = "logistic" # specify base loss name.
model = "linreg_multi" # specify model name
algo = "SGD_Ave" # specify algo name

In [None]:
## Directory setup.
makedir_safe(img_dir)
toread_dir = os.path.join(results_dir, dataset)

In [None]:
## Specify the evaluation metrics to look at (for selecting "best settings").
ltype = "base" #"obj"
etypes = ["train", "val", "test"]

In [None]:
## Specify which epoch to use.
epoch_touse = -1

In [None]:
## Specify the statistic index to use (for selection).
stat_idx = 0 # mean is 0, median is 1, std is 2.

## Specify the statistic index to use (for visualization).
stat_idx_vis = 0 # mean is 0, median is 1, std is 2.

In [None]:
## Linestyles for discriminating train/val/test.
linestyles = {"train": "dotted",
              "val": "dashed",
              "test": "solid"}

In [None]:
## Extras naming etc.
extras_keys = {"erm": [],
               "mvHuber": ["alpha_strat"], #["beta"],
               "cvar": ["prob"],
               "dro": ["atilde"]}

extras_symbols = {"erm": [],
                  "mvHuber": [r"$\alpha strat$"], #[r"$\beta$"],
                  "cvar": [r"$\beta$"],
                  "dro": [r"$\widetilde{a}$"]}

def parse_extras(extras, risk_name):
    if risk_name in ["erm", "meanvar"]:
        return None
    elif risk_name == "mvHuber":
        #return r"$\beta=${}".format(extras["beta"])
        return r"$\alpha$ strat {}".format(extras["alpha_strat"])
    elif risk_name == "cvar":
        return r"$\beta=${}".format(extras["prob"])
    elif risk_name == "dro":
        return r"$\widetilde a=${}".format(extras["atilde"])
    else:
        raise ValueError("Please pass a valid risk name.")

In [None]:
## Dictionary for storing the best/representative settings for each risk class.
best_settings = {riskclass: [] for riskclass in riskclasses}

In [None]:
## Gathering, processing, and visualization all handled in one big loop.

for riskclass in riskclasses:
    
    ## Most of the filename is already specified.
    mth_base = riskclass+"_"+loss_base+"_"+model+"_"+algo
    
    ## Initialize a dict of results.
    results_dict = {}
    
    ## Gather the relevant hyperparameter indices to be parsed.
    all_files = os.listdir(toread_dir)
    r_values = []
    s_values = []
    for filename in all_files:
        _mth, extension = filename.split(".")
        if extension == "json":
            task, _mth_base = _mth.split("-")
            if mth_base == _mth_base:
                s_value, r_value = task.split("s")[1].split("r")
                if r_value not in r_values:
                    r_values += [r_value]
                if s_value not in s_values:
                    s_values += [s_value]
            else:
                continue
        else:
            continue
    num_r = len(r_values)
    num_s = len(s_values)
    if num_r > 0 and num_s > 0:
        ## Convert to integers, and sort.
        r_values = sorted([int(r) for r in r_values])
        s_values = sorted([int(s) for s in s_values])
    else:
        print("No results found for '{}' dataset.".format(dataset))
        continue
    
    ## Initial prep of the figure.
    fig, axes = plt.subplots(1, num_r, figsize=(12,3), sharex=True, sharey=True)
    
    ## Now we can just loop over the relevant files.
    for i, r in enumerate(r_values):
        
        ## Relevant axis.
        ax = axes[i] if num_r > 1 else axes
        
        ## Storage for values to be plotted.
        x_values = []
        y_values_dict = {etype: [] for etype in etypes}
        
        for s in s_values:
            
            task = "s{}r{}".format(s, r)
            mth = "-".join([task, mth_base])
            
            ## Gather relevant information from the experiment JSON file.
            with open(os.path.join(toread_dir, ".".join([mth, "json"])), "r", encoding="utf-8") as f:
                json_dict = json_load(f)
                num_trials = json_dict["num_trials"]
                x_values += [json_dict["step_size"]]
                
                if s == 0:
                    ## Get risk function parameters at first inner step.
                    print(json_dict)
                    extras = {key: json_dict[key] for key in extras_keys[riskclass]}
                else:
                    ## After that, make sure they all match.
                    if not all([extras[key] == json_dict[key] for key in extras_keys[riskclass]]):
                        raise ValueError("Extras don't match.")
            
            ## Initialize a list for this specific risk setting.
            results = {etype: [] for etype in etypes}
            
            ## Gather the results of interest.
            for etype in results:
                for trial in range(num_trials):
                    ## Read in the results, add to a list that will be stacked soon.
                    fname = mth+"-"+str(trial)+"."+ltype+"_"+etype
                    with open(os.path.join(toread_dir, fname), "r", encoding="utf-8") as f:
                        values = np.loadtxt(fname=f, dtype=float, delimiter=",", ndmin=2)
                        ## Add results for the current trial.
                        results[etype] += [np.expand_dims(a=values, axis=0)]
                
                ## Having covered all trials, stack and process the arrays.
                y_values_dict[etype] += [ np.vstack(results[etype]).mean(axis=0)[epoch_touse,stat_idx] ]
                
            
        ## Plot these results.
        x_values = np.array(x_values)
        for etype in results:
            y_values = np.array(y_values_dict[etype])
            ax.semilogx(x_values, y_values, base=10, marker="o",
                        color="xkcd:black", ls=linestyles[etype])
        ax.tick_params(labelsize=my_fontsize)
        ax.set_title(parse_extras(extras=extras, risk_name=riskclass), size=my_fontsize)
        
        ## Store the best task (based on validation data).
        try:
            best_settings[riskclass] += ["s{}r{}".format(np.nanargmin(y_values_dict["val"]), r)]
        except ValueError:
            best_settings[riskclass] += [None]
    
        
    ## Title and horizontal axis label for the figure.
    fig.supxlabel(r"Step size factor ($\log_{10}$ scale)", size=my_fontsize)
    fig.suptitle("Empirical {} mean values (dataset: {})".format(riskclass, dataset), size=my_fontsize)
    
    ## Display the figure.
    plt.tight_layout()
    plt.show()

Next, we visualize the trajectory (over time) of several key evaluation metrics of interest.

In [None]:
## Specify the risks for which we want to visualize trajectories.
riskclasses_tovis = ["erm", "mvHuber", "cvar", "dro"]

In [None]:
## Specify the evaluation metrics to look at (from ["base", "obj", "zeroone", "confuse", "l1", "l2"]).
ltypes = ["base", "zeroone", "confuse"]
ltype_tovis = "base"
etype = "test" # either "train", "val", or "test"

In [None]:
## Prepare a dictionary for storing results.
results_allrisks = {}
json_dicts_allrisks = {}

In [None]:
## Gathering of results (for trajectories).

for riskclass in riskclasses_tovis:
    
    ## Initialize a list of results for each risk class.
    results_list = []
    json_dicts = []
    
    ## Grab the representative results.
    for task in best_settings[riskclass]:
        
        if task is None:
            print("Risk {}: task is none here; skipping.".format(riskclass))
            continue
        
        ## Identify the method for which we will gather results.
        mth = task+"-"+riskclass+"_"+loss_base+"_"+model+"_"+algo
        
        ## Gather relevant information from the experiment JSON file.
        with open(os.path.join(toread_dir, ".".join([mth, "json"])), "r", encoding="utf-8") as f:
            json_dict = json_load(f)
            num_trials = json_dict["num_trials"]
            
        ## Initialize a dictionary for this specific risk setting.
        results = {ltype: [] for ltype in ltypes}
        
        ## Gather the results of interest.
        for ltype in ltypes:
            for trial in range(num_trials):
                ## Read in the results, add to a list that will be stacked soon.
                fname = mth+"-"+str(trial)+"."+ltype+"_"+etype
                with open(os.path.join(toread_dir, fname), "r", encoding="utf-8") as f:
                    values = np.loadtxt(fname=f, dtype=float, delimiter=",", ndmin=2)
                    shape = values.shape
                ## Add results for the current trial.
                results[ltype] += [np.expand_dims(a=values, axis=0)]
            
            ## Having covered all trials, stack the arrays.
            results[ltype] = np.vstack(results[ltype])
        
        ## Store results for this risk setting.
        results_list += [deepcopy(results)]
        json_dicts += [deepcopy(json_dict)]
    
    ## Finally, store the results dictionary for the current risk class.
    results_allrisks[riskclass] = deepcopy(results_list)
    json_dicts_allrisks[riskclass] = deepcopy(json_dicts)

In [None]:
## Visualize the trajectories.

figsize = (12,3.5) # nice size for paper.
#figsize = (20,8) # nice size for viewing here.

fig, axes = plt.subplots(1, len(riskclasses_tovis), figsize=figsize, sharey=True)

## Loop over risk classes.
for j, riskclass in enumerate(riskclasses_tovis):
        
    ## Result lists for this risk class.
    results_list = results_allrisks[riskclass]
    json_dicts = json_dicts_allrisks[riskclass]

    ## Relevant bits of information.
    num_risks = len(results_list)
    
    ## Color setup.
    cmap = cm.get_cmap("plasma")
    colours = [cmap(k/num_risks) for k in range(num_risks)]
    
    ## Loop over the individual risks.
    for k in range(num_risks):
        
        if riskclass == "mvHuber" and k > 0:
            print("Only using strategy 0 for mvHuber.")
            continue
        
        ## Actual results of interest for plotting.
        results = results_list[k]
        json_dict = json_dicts[k]
        
        ## More relevant bits of information.
        num_epochs = json_dict["num_epochs"]
        
        ## Values to be plotted.
        x_values = np.arange(num_epochs+1)
        y_values = results[ltype_tovis][...,np.array([0,2])].sum(axis=2).mean(axis=0)
        y_err = results[ltype_tovis][...,np.array([0,2])].sum(axis=2).std(axis=0)
        
        ## Plot results.
        axes[j].plot(x_values, y_values, color=colours[k])
        axes[j].fill_between(x=x_values, y1=y_values-y_err, y2=y_values+y_err,
                                alpha=0.15, color=colours[k], lw=0)
    
    ## Column titles.
    axes[j].set_title(risk_names[riskclass], size=my_fontsize)
    axes[j].tick_params(labelsize=my_fontsize)

## Title for the figure.
fig_title = "Mean + SD (dataset: {})".format(dataset)
fig.suptitle(fig_title, size=my_fontsize, fontweight="bold")
fname = os.path.join(img_dir, "real_traj_mstd_{}.{}".format(dataset, my_ext))
plt.tight_layout()
plt.savefig(fname=fname)
plt.show()

___