# Report

In [2]:
from config import ASSETS_DIR
from utils import log, reporting
import os

logger = log.get_logger()

## General configurations and functions

In [3]:
ARCHITECTURE_COLUMNS = ["architecture_name", 
                        "attn_dropout", 
                        "embed_dim", 
                        "ff_dropout", 
                        "n_head", 
                        "n_layers",
                        "numerical_passthrough", 
                        "optimizer__lr", 
                        "optimizer__weight_decay",
                        "aggregator__cell", 
                        "aggregator__dropout", 
                        "aggregator__hidden_size",
                        "aggregator__num_layers"
                    ]

OPTIMIZATION_METRIC = "log_loss"
EVALUATION_METRIC = "balanced_accuracy"

DATASET_MAP = {
    "jasmine": "JASM",
    "anneal": "ANNE",
    "australian": "AUST",
    "kr-vs-kp": "KVSK",
    "sylvine": "SYLV",
    "nomao": "NOMA",
    "volkert": "VOLK",
    "adult": "ADUL",
    "ldpa": "LDPA"
}

AGGREGATOR_MAP = {
    "cls": "CLS",
    "concatenate": "CAT",
    "max": "MAX",
    "mean": "AVG",
    "rnn": "RNN",
    "sum": "SUM",
}

In [4]:
def num_as_str(x):
    return "{0:.3f}".format(x)


if not os.path.exists(ASSETS_DIR):
    os.makedirs(ASSETS_DIR)

## General executions

In [11]:
# Load executions report
executions_df = reporting.load_report_df()
executions_df["dataset"] = executions_df["dataset"].replace(DATASET_MAP)
executions_df["aggregator"] = executions_df["aggregator"].replace(AGGREGATOR_MAP)































































IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)































































IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [12]:
assert executions_df.shape[0] == 15360, "The number of executions is wrong"

## Best models

In [14]:
# Compute mean and std for each metric
best_archs = executions_df
# Delete this fillna
best_archs = best_archs.fillna(-1)
best_archs = best_archs.drop(["fold_name"], axis=1) \
                        .groupby(["dataset", "aggregator"] + ARCHITECTURE_COLUMNS, as_index=False) \
                        .agg(["mean", "std"]) 

best_archs.columns = ["_".join(col) if col[1] else col[0] for col in best_archs.columns]
assert best_archs.shape[0] == 3072, "The number of aggregated executions is wrong"

### Per dataset & aggregator

In [15]:
# Get registers with the lowest optimization metric
best_archs_ds_agg = best_archs.loc[
                best_archs.groupby(["dataset", "aggregator"])[f"{OPTIMIZATION_METRIC}_mean"].idxmin()
            ]

assert best_archs_ds_agg.shape[0] == 54, "The number of optimal architectures is wrong"


# Adds a column to format Latex
best_archs_ds_agg[EVALUATION_METRIC] = "$" + best_archs_ds_agg[f"{EVALUATION_METRIC}_mean"].apply(num_as_str) \
                                + " \pm " + best_archs_ds_agg[f"{EVALUATION_METRIC}_std"].apply(num_as_str) + "$"

# Exports best dataset-aggregator configuration
best_archs_ds_agg = best_archs_ds_agg[["dataset", "aggregator", EVALUATION_METRIC]] \
                        .pivot(index="dataset", columns="aggregator") \
                        .reset_index()
best_archs_ds_agg.columns = [col[1] for col in best_archs_ds_agg.columns]

with open(os.path.join(ASSETS_DIR, "ds_agg_baccuracy_cv.tex"), "w") as f:
    f.write(best_archs_ds_agg.to_latex(index=False))
    
best_archs_ds_agg

Unnamed: 0,Unnamed: 1,AVG,CAT,CLS,MAX,RNN,SUM
0,ADUL,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$
1,ANNE,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$
2,AUST,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$
3,JASM,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$0.812 \pm 0.012$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$
4,KVSK,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$
5,LDPA,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$
6,NOMA,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$
7,SYLV,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$
8,VOLK,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$,$-1.000 \pm 0.000$


### Per dataset

In [19]:
# Exports best dataset configuration
best_archs_ds = best_archs.loc[
                best_archs.groupby(["dataset"])[f"{OPTIMIZATION_METRIC}_mean"].idxmin()
            ]

assert best_archs_ds.shape[0] == 9, "The number of optimal architectures is wrong"


# Adds a column to format Latex
best_archs_ds[EVALUATION_METRIC] = "$" + best_archs_ds[f"{EVALUATION_METRIC}_mean"].apply(num_as_str) \
                                + " \pm " + best_archs_ds[f"{EVALUATION_METRIC}_std"].apply(num_as_str) + "$"

# Exports best dataset configuration
best_archs_ds = best_archs_ds[["dataset", "aggregator"] + ARCHITECTURE_COLUMNS + [EVALUATION_METRIC]]

with open(os.path.join(ASSETS_DIR, "ds_baccuracy_cv.tex"), "w") as f:
    f.write(best_archs_ds.to_latex(index=False))

best_archs_ds

Unnamed: 0,dataset,aggregator,architecture_name,attn_dropout,embed_dim,ff_dropout,n_head,n_layers,numerical_passthrough,optimizer__lr,optimizer__weight_decay,aggregator__cell,aggregator__dropout,aggregator__hidden_size,aggregator__num_layers,balanced_accuracy
1,ADUL,AVG,A1,0.3,128,0.1,4,2,False,0.0001,0.0001,-1,-1.0,-1.0,-1.0,$-1.000 \pm 0.000$
384,ANNE,AVG,A0,0.3,128,0.1,4,2,True,0.0001,0.0001,-1,-1.0,-1.0,-1.0,$-1.000 \pm 0.000$
769,AUST,AVG,A1,0.3,128,0.1,4,2,False,0.0001,0.0001,-1,-1.0,-1.0,-1.0,$-1.000 \pm 0.000$
1153,JASM,AVG,A1,0.3,128,0.1,4,2,False,0.0001,0.0001,-1,-1.0,-1.0,-1.0,$-1.000 \pm 0.000$
1536,KVSK,AVG,A0,0.3,128,0.1,4,2,True,0.0001,0.0001,-1,-1.0,-1.0,-1.0,$-1.000 \pm 0.000$
1920,LDPA,AVG,A0,0.3,128,0.1,4,2,True,0.0001,0.0001,-1,-1.0,-1.0,-1.0,$-1.000 \pm 0.000$
2304,NOMA,AVG,A0,0.3,128,0.1,4,2,True,0.0001,0.0001,-1,-1.0,-1.0,-1.0,$-1.000 \pm 0.000$
2688,SYLV,AVG,A1,0.3,128,0.1,4,2,False,0.0001,0.0001,-1,-1.0,-1.0,-1.0,$-1.000 \pm 0.000$
2880,VOLK,AVG,A1,0.3,128,0.1,4,2,False,0.0001,0.0001,-1,-1.0,-1.0,-1.0,$-1.000 \pm 0.000$
