In [37]:
from pathlib import Path

import pandas as pd
from sklearn.metrics import cohen_kappa_score

label_mapping = {
    "COMPONENT_USED": "COMPONENT_USED",
    "RE-EVALUATION": "PREVIOUS_VERSION",
    "EVALUATION_REUSED": "COMPONENT_USED",
    "PREVIOUS_VERSION": "PREVIOUS_VERSION",
    "COMPONENT_SHARED": "COMPONENT_USED",
}


def load_all_dataframes(base_folder: Path) -> pd.DataFrame:
    splits = ["train", "valid", "test"]

    df_train, df_valid, df_test = pd.DataFrame(), pd.DataFrame(), pd.DataFrame()
    for split in splits:
        df = pd.read_csv(base_folder / f"{split}.csv")
        if split == "train":
            df_train = df
        elif split == "valid":
            df_valid = df
        else:
            df_test = df

    df_to_return = pd.concat([df_train, df_valid, df_test])
    return df_to_return.assign(label=lambda df_: df_.label.fillna("unknown")).assign(
        label=lambda df_: df_.label.str.upper(),
        simplified_label=lambda df_: df_.label.map(label_mapping),
    )


REPO_ROOT = Path()


adam_df = load_all_dataframes(REPO_ROOT / "src/sec_certs/data/reference_annotations/adam")
jano_df = load_all_dataframes(REPO_ROOT / "src/sec_certs/data/reference_annotations/jano")
agreement_series = adam_df.label == jano_df.label

print("Results on 5 classes:")
print(f"\t- Cohen's Kappa: {cohen_kappa_score(adam_df.label, jano_df.label)}")
print(f"\t- Percentage agreement: {agreement_series.loc[agreement_series == True].count() / agreement_series.count()}")

indices_to_drop = set(adam_df.loc[adam_df.simplified_label.isnull()].index.tolist()) | set(
    jano_df.loc[jano_df.simplified_label.isnull()].index.tolist()
)
adam_df_simplified = adam_df.drop(indices_to_drop)
jano_df_simplified = jano_df.drop(indices_to_drop)
agreement_series = adam_df_simplified.simplified_label == jano_df_simplified.simplified_label


print("Results on simplified 2 classes:")
print(
    f"\t- Cohen's Kappa: {cohen_kappa_score(adam_df_simplified.simplified_label, jano_df_simplified.simplified_label)}"
)
print(f"\t- Percentage agreement: {agreement_series.loc[agreement_series == True].count() / agreement_series.count()}")


Results on 5 classes:
	- Cohen's Kappa: 0.7101271765978729
	- Percentage agreement: 0.8225
Results on simplified 2 classes:
	- Cohen's Kappa: 0.8424203759140207
	- Percentage agreement: 0.9437869822485208
