In [None]:
%matplotlib inline
# Evaluate with CQA task and show the results
import matplotlib.pyplot as plt

import pickle
import numpy as np
from Evaluation import CQAnswering
from model import DFALC
import torch
import seaborn as sns 
import pandas as pd
device = torch.device("cpu")
sns.set_theme(style="whitegrid", palette="pastel")
sns.set(font_scale=5)
depth=2

mask_rates = [0.2,0.4,0.6,0.8]
base_names = [
    'Family.owl',
    'Family2.owl',
    'glycordf.glycordf.14.owl.xml',
    # 'nifdys.neuroscience-information-framework-nif-dysfunction-ontlogy.14.owl.xml',
    'nihss.national-institutes-of-health-stroke-scale-ontology.11.owl.xml',
    # 'ontodm-core.ontology-of-core-data-mining-entities.6.owl.xml',
    'sso.syndromic-surveillance-ontology.1.owl.xml',
]
names = ["Family","Family2","GlycoRDF","Nihss","Sso"]#,"Nifdys","Ontodm",,
our_precision = []
base_precision = []
our_recall = []
base_recall = []
model_name = ["ELEmbedding"]
for rate in mask_rates:
    out_pathes = ["ELEmbedding_output_alpha0.8/mask_"+str(rate)+"/"]
    info_path = "ELEmbedding_output_alpha0.8/mask_"+str(rate)+"/"
    for idx, out_path in enumerate(out_pathes):
        for file_name in base_names:
            print(rate, idx, file_name)
            cEmb = pickle.load(open(out_path+file_name+".cEmb.pkl","rb"))
            rEmb = pickle.load(open(out_path+file_name+".rEmb.pkl","rb"))
            masked_cEmb = np.load(open(info_path+file_name+".masked_cEmb.npy","rb"))
            masked_rEmb = np.load(open(info_path+file_name+".masked_rEmb.npy","rb"))
            true_cEmb = np.load(open(info_path+file_name+".true_cEmb.npy","rb"))
            true_rEmb = np.load(open(info_path+file_name+".true_rEmb.npy","rb"))
            c2id = pickle.load(open(info_path+file_name+".c2id.pkl","rb"))
            r2id = pickle.load(open(info_path+file_name+".r2id.pkl","rb"))
            i2id = pickle.load(open(info_path+file_name+".i2id.pkl","rb"))
            id2c = {i:c for c,i in c2id.items()}
            id2r = {i:c for c,i in r2id.items()}
            id2i = {i:c for c,i in i2id.items()}

            model = DFALC({}, len(c2id), len(r2id), masked_cEmb, masked_rEmb, device,name=model_name[idx]).to(device)
            cqa = CQAnswering("input/"+file_name+".depth_"+str(depth)+".queries", "input/"+file_name+".depth_"+str(depth)+".answers", c2id, r2id, i2id)
            precision, recall = cqa.get_score(model, torch.tensor(cEmb), torch.tensor(rEmb), alpha=0.8)
            if idx == 0:
                our_precision.append(precision)
                our_recall.append(recall)
            else:
                base_precision.append(precision)
                base_recall.append(recall)
            print("precision: ", precision)
            print("recall: ", recall)


our_data = pd.DataFrame(np.array([our_precision,our_recall,['mask {:.0f}%, DF-ALC'.format(0.2*(i//len(base_names)+1)*100) for i in range(len(base_names)*len(mask_rates))],names*len(mask_rates)]).T, columns = ["Precision","Recall","mask_rate","Ontology"])
our_data["Precision"] = our_data["Precision"].astype("float")
our_data["Recall"] = our_data["Recall"].astype("float")
base_data = pd.DataFrame(np.array([base_precision,base_recall,['mask {:.0f}%, Base'.format(0.2*(i//len(base_names)+1)*100) for i in range(len(base_names)*len(mask_rates))],names*len(mask_rates)]).T, columns = ["Precision","Recall","mask_rate","Ontology"])
base_data["Precision"] = base_data["Precision"].astype("float")
base_data["Recall"] = base_data["Recall"].astype("float")
fig, axes = plt.subplots(1,2,figsize=(65,13))
axes1, axes2 = axes.flatten()
ax1=sns.lineplot(data=our_data,x="Ontology",y="Precision",hue="mask_rate",palette=["#00397E"]*4,style="mask_rate",ax=axes1,legend=False,linewidth=8)
ax2=sns.lineplot(data=base_data,x="Ontology",y="Precision",hue="mask_rate",palette=["#F66A2A"]*4,style="mask_rate",ax=axes1,legend=False,linewidth=8)
ax1.set(ylim=(0,1))
ax3=sns.lineplot(data=our_data,x="Ontology",y="Recall",hue="mask_rate",palette=["#00397E"]*4,style="mask_rate",ax=axes2,linewidth=8)
ax4=sns.lineplot(data=base_data,x="Ontology",y="Recall",hue="mask_rate",palette=["#F66A2A"]*4,style="mask_rate",ax=axes2,linewidth=8)
ax3.set(ylim=(0,1))
leg = plt.legend(bbox_to_anchor=(1.02,1),loc="upper left")
for legobj in leg.legendHandles:
    legobj.set_linewidth(8.0)
plt.tight_layout(pad=0.05)
# ax2.set(ylim=(0,1))
# plt.show()
fig.savefig("cqa.png",dpi=400)

0.2 0 Family.owl
precision:  1.0
recall:  0.37485399806911435
0.2 0 Family2.owl
precision:  0.7033140814081408
recall:  0.2399873739212718
0.2 0 glycordf.glycordf.14.owl.xml
precision:  0.93125
recall:  0.609661495911496
0.2 0 nihss.national-institutes-of-health-stroke-scale-ontology.11.owl.xml
precision:  0.95
recall:  0.6180555555555556
0.2 0 sso.syndromic-surveillance-ontology.1.owl.xml
precision:  1.0
recall:  0.725
0.4 0 Family.owl
precision:  0.8
recall:  0.08964435499900616
0.4 0 Family2.owl
precision:  0.20697569756975698
recall:  0.14034798534798534
0.4 0 glycordf.glycordf.14.owl.xml
precision:  0.7777777777777778
recall:  0.23067821067821068
0.4 0 nihss.national-institutes-of-health-stroke-scale-ontology.11.owl.xml
precision:  0.8
recall:  0.4033730158730158
0.4 0 sso.syndromic-surveillance-ontology.1.owl.xml
precision:  1.0
recall:  0.5165584415584417
0.6 0 Family.owl
precision:  0.35
recall:  0.028348595848595854
0.6 0 Family2.owl
precision:  0.19636963696369636
recall:  0.

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (4,) + inhomogeneous part.