# Results Summaries + Plots

In [85]:
import json, os
import seaborn as sns  
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from dotenv import load_dotenv

load_dotenv();

## Load Results

### OOD Eval

For each mapping type we are going to create a dataframe whose rows are the relation types, with the last three rows representing the micro, macro, and weighted F1 scores.
The columns are the six domains, then the cross-domain average, then the cross domain average without the news domain.
For cells with 0 corresponding support the value is None.

In [86]:
mapping_types = ["no_mapping", "manual", "embedding", "ood_clustering", "topological", "thesaurus_affinity"]
domains = ["ai", "literature", "music", "politics", "science", "news"]
results_folder_path = "data/results"

# To construct file path
domain_list = "".join([domain[0] for domain in sorted(domains)])

relation_types = sorted(os.getenv(f"RELATION_LABELS").split())
f1_types = ["micro", "macro", "weighted"]

indices = relation_types + [f"{f1_type}_F1" for f1_type in f1_types]
columns = domains + ["average", "avg_wo_news"]

ood_results = {}

for mapping_type in mapping_types:
    result_path = os.path.join(results_folder_path, f"{domain_list}_{mapping_type}", "ood_validation.json")
    with open(result_path) as f:
        result_dict = json.load(f)

    results = pd.DataFrame(0, index=indices, columns=columns)
    # Fill in dataframe
    # Go row-by-row
    for relation_type in relation_types + f1_types:
        if relation_type in f1_types:
            json_name = f"{relation_type} avg"
            result_name = f"{relation_type}_F1"
        else:
            json_name = relation_type
            result_name = relation_type

        # We only want to include specific label-domain score in the avg if its support != 0
        # So let's put them in a list if support != 0, then the length of the list determines the denominator for the mean
        avg_wo_news = []
        for domain in domains + ["average"]:
            if result_dict[domain][json_name]["support"] == 0:
                results.loc[result_name, domain] = None
            else:
                f1 = result_dict[domain][json_name]["f1-score"]
                results.loc[result_name, domain] = f1
                if domain not in ["news", "average"]:
                    avg_wo_news.append(f1)
        avg_wo_news = sum(avg_wo_news) / len(avg_wo_news)
        results.loc[result_name, "avg_wo_news"] = avg_wo_news
    ood_results[mapping_type] = results

In [88]:
sum_measures = ["micro_F1", "macro_F1", "weighted_F1"]
ood_summary = pd.DataFrame(0, index=sum_measures, columns=mapping_types)
for mapping_type in mapping_types:
    for measure in sum_measures:
        ood_summary.loc[measure, mapping_type] = ood_results[mapping_type].loc[measure, "avg_wo_news"]
ood_summary

Unnamed: 0,no_mapping,manual,embedding,ood_clustering,topological,thesaurus_affinity
micro_F1,0.466878,0.544317,0.510969,0.49462,0.539467,0.548099
macro_F1,0.288376,0.328379,0.303398,0.288007,0.326025,0.327904
weighted_F1,0.435395,0.511465,0.479359,0.462159,0.510325,0.521648


### Cross-Domain Eval

One df, rows are the same as above, columns are mapping types.

In [87]:
mapping_types_cd_eval = [mapping_type for mapping_type in mapping_types if mapping_type != "ood_clustering"]
cd_results = pd.DataFrame(0, index=indices, columns=mapping_types_cd_eval)
for mapping_type in mapping_types_cd_eval:
    result_path = os.path.join(results_folder_path, f"{domain_list}_{mapping_type}", "all.json")
    with open(result_path) as f:
        result_dict = json.load(f)
    # Fill in dataframe
    # Go row-by-row
    for relation_type in relation_types + f1_types:
        if relation_type in f1_types:
            json_name = f"{relation_type} avg"
            result_name = f"{relation_type}_F1"
        else:
            json_name = relation_type
            result_name = relation_type
        if result_dict["average"][json_name]["support"] == 0:
            cd_results.loc[result_name, mapping_type] = None
        else:
            cd_results.loc[result_name, mapping_type] = result_dict["average"][json_name]["f1-score"]

In [92]:
cd_summary = cd_results.iloc[-3:]
cd_summary

Unnamed: 0,no_mapping,manual,embedding,topological,thesaurus_affinity
micro_F1,0.639771,0.643988,0.633692,0.631987,0.641095
macro_F1,0.452749,0.467648,0.448746,0.463378,0.461635
weighted_F1,0.618562,0.62194,0.611512,0.613782,0.619818
