# Init

In [1]:
import sys
sys.path.append("../../src")
import pickle as pkl
from pathlib import Path
from collections import defaultdict

import pandas as pd

import constants
from gen.util import read_data

In [2]:
root_metrics = Path("../../metrics")

In [3]:
result_p = root_metrics

for res in result_p.iterdir():
    print(res.stem)
    if res.suffix == ".pkl":
        with res.open("rb") as fn:
            globals()[res.stem] = pkl.load(fn)

concatenate_evidences_metrics
sent_macro_verdict_meanproba_metrics
sent_micro_verdict_metrics
sent_macro_verdict_majority_metrics


# Summarise

In [4]:
def summarise_metrics(metrics):
    summary = defaultdict(list)
    for dataset, splits in metrics.items():
        for k, v in splits.items():
            for i in v:
                rte_met = i.rte_metrics
                rte_met["fullname"] = i._score_name
                if "da" in i._score_name:
                    rte_met["model"] = "da"
                elif "xlnet" in i._score_name:
                    rte_met["model"] = "xlnet"
                else:
                    rte_met["model"] = "bert"
                rte_met["dataset"] = dataset
                trained_on = i._score_name.split("-")
                rte_met["trained_on"] = "-".join([trained_on[0], trained_on[1] if "climatefever" in trained_on[1] else ""])
                summary[f"{dataset}-{k}"].append(rte_met)
    return summary

In [5]:
def summarise_confusion_matrix(metrics):
    summary = defaultdict(list)
    for dataset, splits in metrics.items():
        for split, model_ls in splits.items():
            res = []
            for model in model_ls:
                confmat = {}
                for label in constants.LABEL2ID.keys():
                    for metname, metval in model.classification_report_dict[label].items():
                        confmat.update({f"{label}_{metname}": metval})
                confmat["fullname"] = model._score_name
                if "da" in model._score_name:
                    confmat["model"] = "da"
                elif "xlnet" in model._score_name:
                    confmat["model"] = "xlnet"
                else:
                    confmat["model"] = "bert"
                confmat["dataset"] = dataset
                trained_on = model._score_name.split("-")
                confmat["trained_on"] = "-".join([trained_on[0], trained_on[1] if "climatefever" in trained_on[1] else ""])
                res.append(confmat)
            summary[f"{dataset}-{split}"].append(pd.DataFrame(res))
    return summary

## Concatenate Sentences

In [6]:
concat_summary = summarise_metrics(concatenate_evidences_metrics)

In [7]:
pd.concat([
    pd.DataFrame(concat_summary["fever-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(concat_summary["climatefeverpure-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(concat_summary["climatefever-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(concat_summary["scifact-all"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(concat_summary["scifactpipeline-all"]).sort_values(["trained_on", "model"]),
], axis=0).to_csv("tmp.csv")

In [8]:
pd.concat([
    pd.DataFrame(concat_summary["fever-test"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(concat_summary["climatefeverpure-test"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(concat_summary["climatefever-test"]).sort_values(["trained_on", "model"]),
], axis=0).to_csv("tmp.csv")

In [9]:
concat_confusion = summarise_confusion_matrix(concatenate_evidences_metrics)

In [10]:
pd.concat([
    pd.concat(concat_confusion["fever-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(concat_confusion["climatefeverpure-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(concat_confusion["climatefever-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(concat_confusion["scifact-all"]).sort_values(["trained_on", "model"]),
    pd.concat(concat_confusion["scifactpipeline-all"]).sort_values(["trained_on", "model"])
], axis=0).to_csv("tmp.csv")

In [11]:
pd.concat([
    pd.concat(concat_confusion["fever-test"]).sort_values(["trained_on", "model"]),
    pd.concat(concat_confusion["climatefeverpure-test"]).sort_values(["trained_on", "model"]),
    pd.concat(concat_confusion["climatefever-test"]).sort_values(["trained_on", "model"]),
], axis=0).to_csv("tmp.csv")

## Sentence

### Micro

In [12]:
micro_summary = summarise_metrics(sent_micro_verdict_metrics)

In [13]:
pd.concat([
    pd.DataFrame(micro_summary["fever-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(micro_summary["climatefeverpure-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(micro_summary["climatefever-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(micro_summary["scifact-all"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(micro_summary["scifactpipeline-all"]).sort_values(["trained_on", "model"])
], axis=0).to_csv("tmp.csv")

In [14]:
pd.concat([
    pd.DataFrame(micro_summary["fever-test"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(micro_summary["climatefeverpure-test"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(micro_summary["climatefever-test"]).sort_values(["trained_on", "model"]),
], axis=0).to_csv("tmp.csv")

In [15]:
micro_confusion = summarise_confusion_matrix(sent_micro_verdict_metrics)

In [16]:
pd.concat([
    pd.concat(micro_confusion["fever-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(micro_confusion["climatefeverpure-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(micro_confusion["climatefever-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(micro_confusion["scifact-all"]).sort_values(["trained_on", "model"]),
    pd.concat(micro_confusion["scifactpipeline-all"]).sort_values(["trained_on", "model"])
], axis=0).to_csv("tmp.csv")

In [17]:
pd.concat([
    pd.concat(micro_confusion["fever-test"]).sort_values(["trained_on", "model"]),
    pd.concat(micro_confusion["climatefeverpure-test"]).sort_values(["trained_on", "model"]),
    pd.concat(micro_confusion["climatefever-test"]).sort_values(["trained_on", "model"]),
], axis=0).to_csv("tmp.csv")

### Macro

#### Majority

In [18]:
macro_majority_summary = summarise_metrics(sent_macro_verdict_majority_metrics)

In [19]:
pd.concat([
    pd.DataFrame(macro_majority_summary["fever-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_majority_summary["climatefeverpure-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_majority_summary["climatefever-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_majority_summary["scifact-all"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_majority_summary["scifactpipeline-all"]).sort_values(["trained_on", "model"])
], axis=0).to_csv("tmp.csv")

In [20]:
pd.concat([
    pd.DataFrame(macro_majority_summary["fever-test"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_majority_summary["climatefeverpure-test"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_majority_summary["climatefever-test"]).sort_values(["trained_on", "model"]),
], axis=0).to_csv("tmp.csv")

In [21]:
macro_majority_confusion = summarise_confusion_matrix(sent_macro_verdict_majority_metrics)

In [22]:
pd.concat([
    pd.concat(macro_majority_confusion["fever-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_majority_confusion["climatefeverpure-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_majority_confusion["climatefever-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_majority_confusion["scifact-all"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_majority_confusion["scifactpipeline-all"]).sort_values(["trained_on", "model"])
], axis=0).to_csv("tmp.csv")

In [23]:
pd.concat([
    pd.concat(macro_majority_confusion["fever-test"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_majority_confusion["climatefeverpure-test"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_majority_confusion["climatefever-test"]).sort_values(["trained_on", "model"]),
], axis=0).to_csv("tmp.csv")

#### Mean proba

In [24]:
macro_meanproba_summary = summarise_metrics(sent_macro_verdict_meanproba_metrics)

In [25]:
pd.concat([
    pd.DataFrame(macro_meanproba_summary["fever-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_meanproba_summary["climatefeverpure-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_meanproba_summary["climatefever-dev"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_meanproba_summary["scifact-all"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_meanproba_summary["scifactpipeline-all"]).sort_values(["trained_on", "model"])
], axis=0).to_csv("tmp.csv")

In [26]:
pd.concat([
    pd.DataFrame(macro_meanproba_summary["fever-test"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_meanproba_summary["climatefeverpure-test"]).sort_values(["trained_on", "model"]),
    pd.DataFrame(macro_meanproba_summary["climatefever-test"]).sort_values(["trained_on", "model"]),
], axis=0).to_csv("tmp.csv")

In [27]:
macro_meanproba_confusion = summarise_confusion_matrix(sent_macro_verdict_meanproba_metrics)

In [28]:
pd.concat([
    pd.concat(macro_meanproba_confusion["fever-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_meanproba_confusion["climatefeverpure-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_meanproba_confusion["climatefever-dev"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_meanproba_confusion["scifact-all"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_meanproba_confusion["scifactpipeline-all"]).sort_values(["trained_on", "model"])
], axis=0).to_csv("tmp.csv")

In [29]:
pd.concat([
    pd.concat(macro_meanproba_confusion["fever-test"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_meanproba_confusion["climatefeverpure-test"]).sort_values(["trained_on", "model"]),
    pd.concat(macro_meanproba_confusion["climatefever-test"]).sort_values(["trained_on", "model"]),
], axis=0).to_csv("tmp.csv")