In [None]:
import json
import seaborn as sns  
import dotenv
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd

In [None]:
# Read function
def load_result_json(mapping_method, ood=False):
    ## create path
    path = f"data/results/almnps_{mapping_method}/"
    
    ## open ood or all
    if ood:
        path += "ood_validation.json"
    else:
        path += "all.json"
    
    ## open json file and read
    file = open(path, "r")
    results = json.load(file)
    file.close()
    
    return results

In [None]:
envs = dotenv.dotenv_values()
list_of_relations = envs["RELATION_LABELS"].split(" ")
list_of_relations.sort()

## load file 
results_for_method = load_result_json("embedding", ood=True)

# list of domians
domains = list(results_for_method.keys())[2:]

# create shape of array to use in functions 
shape_of_array = (len(list_of_relations), len(domains))

## Helper functions

In [None]:
def get_all_f1_scores(results):

    ## create empty array
    data = np.empty(shape_of_array)

    ## loop over domains
    for i, domain in enumerate(domains):

        ## loop over relations    
        for j, relation in enumerate(list_of_relations):
            
            ## get support for relation
            support = results[domain][relation]["support"]

            if support == 0: # if support is 0, then f1-score is 0
                data[j,i] = None
            else: # if the relation is in the domain

                ## get f1 score and add to array
                data[j,i] = results[domain][relation]["f1-score"]
    
    return data

def get_total_support_for_domain(results, domain):
    
    total_support = 0
    
    ## loop over relations
    for relation in list_of_relations:
        
        ## get support for relation
        support = results[domain][relation]["support"]
        
        ## add to total support
        total_support += support
    
    return total_support

def weighted_f1_score(results):
    
    weighted_f1 = []

    ## get all f1 scores to use in calculations
    all_f1_scores = get_all_f1_scores(results)
    
    ## for each domains calculate weighted f1-score
    for i, domain in enumerate(domains):
        
        total_support = get_total_support_for_domain(results, domain)
        weighted_f1_domain = 0
        
        for j, relation in enumerate(list_of_relations):
            
            ## get support for relation
            support = results[domain][relation]["support"]

            if support == 0: # if support is 0, then f1-score is 0
                pass
            else: # if the relation is in the domain

                ## calculate weight
                weight = support / total_support
                
                ## get f1-score
                f1_relation = all_f1_scores[j][i]
                
                ## add to weighted f1-score
                weighted_f1_domain += weight * f1_relation

        weighted_f1.append(weighted_f1_domain)

    return weighted_f1    

def plot_weighted_f1_score_as_bar(results):
    weighted_f1 = weighted_f1_score(results)
    plt.figure(figsize=(10, 5))
    plt.bar(domains, weighted_f1)
    plt.title("Weighted F1-score for each domain")
    plt.xlabel("Domain")
    plt.ylabel("Weighted F1-score")
    plt.show()

def plot_heat_map(results, domains, transposed=False):
    data = get_all_f1_scores(results)
    plt.figure(figsize=(10, 5))
    cmap = mpl.cm.get_cmap("YlGn")
    cmap.set_bad("grey")
    if transposed:
        ax = sns.heatmap(data.transpose(), annot=True, cmap=cmap , xticklabels=list_of_relations, yticklabels=domains)
        ax.set_title("F1-score for each relation and domain")
        ax.set_xlabel("Relation")
        ax.set_ylabel("Domain")
    else:
        ax = sns.heatmap(data, annot=True, cmap=cmap , xticklabels=domains, yticklabels=list_of_relations)
        ax.set_title("F1-score for each relation and domain")
        ax.set_xlabel("Domain")
        ax.set_ylabel("Relation")
    plt.show()

def plot_heat_map_mapping_ood(results_for_method, transposed=False):
    
    # list of domains
    domains = list(results_for_method.keys())[2:]
    # plotting
    plot_heat_map(results_for_method, domains, transposed=transposed)

In [None]:
plot_heat_map(results_for_method, transposed=False, domains=domains)

In [None]:
weighted_f1 = weighted_f1_score(results_for_method)

In [None]:
plot_weighted_f1_score_as_bar(results_for_method)

## OOD 

In [None]:
mapping_types = ["no_mapping", "manual", "embedding", "ood_clustering", "topological", "thesaurus_affinity"]
for m in mapping_types:
    print("Matrix For ", m)

    results_for_method = load_result_json(m, ood=True)

    plot_heat_map_mapping_ood(results_for_method, transposed=False)
        

In [None]:
mapping_types = ["no_mapping", "manual", "embedding", "ood_clustering", "topological", "thesaurus_affinity"]

fig, ax = plt.subplots(2, 3, figsize=(15, 10), sharey=True)

for idx, m in enumerate(mapping_types):
    print("Matrix For ", m)
    row = idx // 3
    column  = idx % 3
    results_for_method = load_result_json(m, ood=True)

    weighted_f1 = weighted_f1_score(results_for_method)

    ax[row, column].bar(domains, weighted_f1)
    ax[row, column].set_title(m)

In [None]:
df = pd.DataFrame(index=mapping_types, columns=domains)

In [None]:
df_1 = pd.DataFrame(index=domains, columns=mapping_types)

In [None]:
for m in mapping_types:
    
    results_for_method = load_result_json(m, ood=True)
    
    weighted_f1 = weighted_f1_score(results_for_method)
    
    for i, d in enumerate(domains):
        
        df_1[m][d] = weighted_f1[i]

In [None]:
cmap = mpl.cm.get_cmap("YlGn")

l = sns.color_palette("twilight",6)

In [None]:
color_1 = ["hotpink", "lightsteelblue", "green", "midnightblue", "mediumturquoise", "blue"]
ax = df_1.plot.bar(rot=0, figsize=(12, 6), title="Weighted F1-score for each domain", ylabel="Weighted F1-score", xlabel="Domain")

plt.savefig("figures/weighted_f1_score_for_each_domain.png")
