In [1]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
import pandas as pd
import numpy as np
import scipy.stats  as stats
import seaborn as sns
import glob
import matplotlib.pyplot as plt
from quality.analyze_types import create_typed_predictions, get_entity_node_degrees, create_combined_df, create_combined_over_embeddings, _get_files, average_node_degree

In [3]:
embedding_approaches = ["BootEA","MultiKE","RDGCN"]
vector_type = "SimAndEmb"
#dataset_name = "EN_DE_15K_V1"

In [4]:
def set_errors(df, pred, val):
    if (df["pred"] == pred) & (df["val"] == val):
        return 1
    else:
        return 0

In [5]:
def calc_measures(data):
    df = data.copy()
    df["fn"] = df.apply(set_errors, args=(0,1), axis=1)
    df["fp"] = df.apply(set_errors, args=(1,0), axis=1)
    df["tp"] = df.apply(set_errors, args=(1,1), axis=1)
    df["tn"] = df.apply(set_errors, args=(0,0), axis=1)
    summed = (df.groupby("left_types").sum() + df.groupby("right_types").sum()) / 2
    summed["prec"] = summed["tp"] / (summed["tp"]+summed["fp"])
    summed["rec"] = summed["tp"] / (summed["tp"]+summed["fn"])
    summed["fm"] = 2*(summed["prec"]*summed["rec"]/(summed["prec"]+summed["rec"]))
    return summed[["prec","rec","fm"]]

In [6]:
def show_table(dataset_name, scadsmb=False):
    type_files = sorted([i for i in glob.iglob(f"/home/dobraczka/Downloads/git/er-embedding-benchmark/data/OpenEA/typed_links/datasets/{dataset_name}/721_5fold/*/typed_test")])
    type_dataset = "/home/dobraczka/Downloads/git/er-embedding-benchmark/data/OpenEA/typed_links/superclasses.json"
    if scadsmb:
        type_files = f"/home/dobraczka/Downloads/git/er-embedding-benchmark/data/EA-ScaDS-Datasets/ScadsMB/typed_links/datasets/{dataset_name}"
        type_dataset = "/home/dobraczka/Downloads/git/er-embedding-benchmark/data/ScadsMB/typed_links/superclasses.json"
    dfs = []
    for e in embedding_approaches:
        kg1_ent_id_files, kg2_ent_id_files, pred_files = _get_files(e, dataset_name, "/home/dobraczka/Downloads/git/er-embedding-benchmark/data/",vector_type)
        df = create_typed_predictions(
                kg1_ent_id_files,
                kg2_ent_id_files,
                pred_files,
                type_files,
                type_dataset,
                1,
                False,
        )
        dfs.append(df)
        measured = []
    for d in dfs:
        measured.append(calc_measures(d))
    mult = pd.concat(measured, axis = 1, keys=(["BootEA","MultiKE","RDGCN"]))
    final = mult.sort_index(axis=1, level=1).swaplevel(axis=1)
    final = final.reset_index().rename(columns={"left_types":"Type"})
    final["Type"] = [x.split("/")[-1].split("#")[-1] for x in final["Type"].astype(str)]
    final = final.set_index("Type").drop("index",level=0,axis=1).round(3)
    return final.fillna(-1).style.background_gradient(cmap='Greens',axis=1,subset=["fm"]).background_gradient(cmap='Blues',axis=1,subset=["prec"]).background_gradient(cmap='Purples',axis=1,subset=["rec"])

# DBpedia-Wikidata 15K V1

In [7]:
show_table("D_W_15K_V1")

Unnamed: 0_level_0,fm,fm,fm,prec,prec,prec,rec,rec,rec
Unnamed: 0_level_1,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Activity,0.364,0.875,0.364,1.0,1.0,1.0,0.222,0.778,0.222
Agent,-1.0,0.492,0.078,-1.0,1.0,1.0,0.0,0.327,0.041
Artwork,0.485,0.438,0.611,1.0,1.0,1.0,0.32,0.28,0.44
Award,0.8,0.892,0.8,1.0,1.0,1.0,0.667,0.806,0.667
Cartoon,0.849,0.833,0.765,1.0,1.0,1.0,0.738,0.714,0.619
ChemicalSubstance,0.667,1.0,1.0,1.0,1.0,1.0,0.5,1.0,1.0
Colour,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Device,0.727,0.902,0.88,1.0,1.0,1.0,0.571,0.821,0.786
Disease,0.9,0.9,0.9,1.0,1.0,1.0,0.818,0.818,0.818
EthnicGroup,0.746,0.825,0.825,1.0,1.0,1.0,0.595,0.703,0.703


# DBpedia-Wikidata 15K V2

In [8]:
show_table("D_W_15K_V2")

Unnamed: 0_level_0,fm,fm,fm,prec,prec,prec,rec,rec,rec
Unnamed: 0_level_1,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Agent,-1.0,0.825,0.233,-1.0,1.0,0.833,0.0,0.703,0.135
Award,0.604,0.862,0.806,1.0,1.0,1.0,0.432,0.757,0.676
Cartoon,1.0,0.769,0.609,1.0,1.0,1.0,1.0,0.625,0.438
Disease,0.93,1.0,0.989,1.0,1.0,0.979,0.87,1.0,1.0
EthnicGroup,0.489,0.764,0.64,1.0,1.0,1.0,0.324,0.618,0.471
Event,0.5,0.6,0.765,1.0,1.0,1.0,0.333,0.429,0.619
Film,0.964,0.972,0.948,0.999,0.993,0.995,0.932,0.953,0.905
Food,-1.0,1.0,-1.0,-1.0,1.0,-1.0,0.0,1.0,0.0
Language,0.738,1.0,0.738,1.0,1.0,1.0,0.585,1.0,0.585
Location,0.863,0.965,0.944,1.0,0.998,0.994,0.759,0.933,0.899


# DBpedia-Yago 15K V1

In [9]:
show_table("D_Y_15K_V1")

Unnamed: 0_level_0,fm,fm,fm,prec,prec,prec,rec,rec,rec
Unnamed: 0_level_1,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Agent,0.667,1.0,1.0,1.0,1.0,1.0,0.5,1.0,1.0
Award,0.603,0.867,0.786,1.0,1.0,1.0,0.431,0.765,0.647
Event,0.797,0.99,1.0,1.0,1.0,1.0,0.662,0.98,1.0
Film,0.967,0.998,0.996,0.999,0.999,0.997,0.937,0.998,0.996
Language,0.524,0.808,0.893,1.0,1.0,1.0,0.355,0.677,0.806
Location,0.886,0.98,0.985,1.0,0.999,0.997,0.796,0.962,0.973
MeanOfTransportation,-1.0,1.0,1.0,-1.0,1.0,1.0,0.0,1.0,1.0
MusicalWork,0.932,0.999,0.998,0.999,0.999,0.999,0.873,0.999,0.997
Organisation,0.919,0.985,0.986,1.0,1.0,0.998,0.85,0.971,0.975
Person,0.977,0.991,0.987,0.999,0.999,0.998,0.955,0.984,0.976


# DBpedia-Yago 15K V2

In [10]:
show_table("D_Y_15K_V2")

Unnamed: 0_level_0,fm,fm,fm,prec,prec,prec,rec,rec,rec
Unnamed: 0_level_1,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Award,0.6,0.6,0.923,1.0,1.0,1.0,0.429,0.429,0.857
Film,0.993,0.999,0.998,0.999,0.999,0.998,0.987,0.998,0.997
Location,0.975,1.0,1.0,1.0,1.0,1.0,0.952,1.0,1.0
MusicalWork,0.989,0.999,0.987,0.993,0.999,0.999,0.985,0.998,0.975
Organisation,0.975,0.97,1.0,1.0,1.0,1.0,0.951,0.943,1.0
Person,0.976,0.991,0.983,0.998,0.998,0.997,0.955,0.985,0.97
TelevisionShow,0.973,0.994,0.993,1.0,0.998,1.0,0.948,0.991,0.986
WrittenWork,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0
Thing,0.652,0.708,0.76,1.0,1.0,1.0,0.484,0.548,0.613


# DBpediaEN-DBpediaDE 15K V1

In [11]:
show_table("EN_DE_15K_V1") 

Unnamed: 0_level_0,fm,fm,fm,prec,prec,prec,rec,rec,rec
Unnamed: 0_level_1,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Activity,-1.0,-1.0,0.8,-1.0,-1.0,1.0,0.0,0.0,0.667
Agent,-1.0,0.756,0.25,-1.0,1.0,1.0,0.0,0.607,0.143
AnatomicalStructure,0.5,0.8,1.0,1.0,1.0,1.0,0.333,0.667,1.0
Cartoon,0.933,0.97,1.0,1.0,0.941,1.0,0.875,1.0,1.0
Device,0.878,0.987,0.861,1.0,1.0,1.0,0.782,0.974,0.756
Event,0.352,0.719,0.8,1.0,1.0,0.984,0.213,0.562,0.674
Film,0.867,0.904,0.898,1.0,0.981,0.996,0.765,0.838,0.817
Language,0.841,0.985,0.955,1.0,0.995,0.992,0.725,0.975,0.92
Location,0.785,0.954,0.937,0.999,0.99,0.989,0.647,0.921,0.891
MeanOfTransportation,0.59,0.933,0.921,1.0,1.0,0.992,0.419,0.874,0.86


# DBpediaEN-DBpediaDE 15K V2

In [12]:
show_table("EN_DE_15K_V2") 

Unnamed: 0_level_0,fm,fm,fm,prec,prec,prec,rec,rec,rec
Unnamed: 0_level_1,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Agent,-1.0,0.871,0.205,-1.0,1.0,1.0,0.0,0.771,0.114
AnatomicalStructure,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,0.0,0.0
Award,1.0,0.857,1.0,1.0,1.0,1.0,1.0,0.75,1.0
Cartoon,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Device,0.94,1.0,0.932,1.0,1.0,1.0,0.886,1.0,0.873
Event,0.444,0.833,0.833,1.0,1.0,1.0,0.286,0.714,0.714
Film,0.912,0.924,0.906,1.0,0.976,0.994,0.838,0.877,0.833
Language,0.591,0.984,0.784,1.0,1.0,1.0,0.419,0.968,0.645
Location,0.745,0.972,0.811,1.0,0.998,0.998,0.593,0.947,0.684
MeanOfTransportation,0.834,0.927,0.902,1.0,1.0,1.0,0.715,0.865,0.822


# DBpediaEN-DBpediaFR 15K V1

In [13]:
show_table("EN_FR_15K_V1") 

Unnamed: 0_level_0,fm,fm,fm,prec,prec,prec,rec,rec,rec
Unnamed: 0_level_1,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Agent,0.258,0.951,0.633,1.0,1.0,1.0,0.148,0.907,0.463
Artwork,0.514,0.866,0.878,1.0,1.0,1.0,0.345,0.764,0.782
Award,0.661,0.926,0.85,1.0,1.0,1.0,0.493,0.863,0.74
Cartoon,0.667,0.769,0.609,1.0,1.0,1.0,0.5,0.625,0.438
Colour,-1.0,0.5,0.286,-1.0,1.0,1.0,0.0,0.333,0.167
Device,0.797,0.995,0.922,1.0,1.0,0.982,0.663,0.989,0.87
Disease,-1.0,0.8,-1.0,-1.0,1.0,-1.0,0.0,0.667,0.0
EthnicGroup,-1.0,0.842,0.625,-1.0,1.0,1.0,0.0,0.727,0.455
Event,0.798,0.827,0.863,0.998,0.993,0.99,0.665,0.708,0.766
Film,0.692,0.762,0.696,1.0,0.996,0.997,0.529,0.617,0.535


# DBpediaEN-DBpediaFR 15K V2

In [14]:
show_table("EN_FR_15K_V2")

Unnamed: 0_level_0,fm,fm,fm,prec,prec,prec,rec,rec,rec
Unnamed: 0_level_1,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Agent,0.122,0.96,0.711,1.0,1.0,0.99,0.065,0.924,0.554
Award,0.794,0.877,0.949,1.0,1.0,1.0,0.659,0.78,0.902
Device,0.695,0.995,0.893,1.0,1.0,1.0,0.532,0.991,0.807
EthnicGroup,-1.0,0.667,0.857,-1.0,1.0,1.0,0.0,0.5,0.75
Event,0.912,0.951,0.955,0.997,0.974,0.981,0.841,0.93,0.932
Film,0.92,0.72,0.699,0.996,1.0,1.0,0.854,0.563,0.537
Food,0.5,1.0,1.0,1.0,1.0,1.0,0.333,1.0,1.0
Language,0.68,0.885,0.844,1.0,1.0,0.99,0.515,0.794,0.735
Location,0.721,0.956,0.908,1.0,0.995,0.997,0.564,0.92,0.834
MeanOfTransportation,0.769,0.97,0.667,1.0,0.941,1.0,0.625,1.0,0.5


# IMDB-TMDB

In [15]:
show_table("imdb-tmdb", scadsmb=True)

Unnamed: 0_level_0,fm,fm,fm,prec,prec,prec,rec,rec,rec
Unnamed: 0_level_1,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
Film,0.923,0.967,0.965,1.0,0.997,0.995,0.858,0.939,0.938
TelevisionEpisode,0.999,0.998,0.999,0.998,0.995,0.998,1.0,1.0,1.0
TelevisionShow,0.951,0.956,0.959,1.0,1.0,0.994,0.907,0.916,0.926
Person,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


# IMDB-TVDB

In [16]:
show_table("imdb-tvdb", scadsmb=True)

Unnamed: 0_level_0,fm,fm,fm,prec,prec,prec,rec,rec,rec
Unnamed: 0_level_1,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
TelevisionEpisode,0.996,0.997,0.996,0.993,0.994,0.991,0.999,1.0,1.0
TelevisionShow,0.879,0.906,0.918,1.0,1.0,1.0,0.784,0.827,0.849
Person,0.999,0.999,0.998,0.999,1.0,0.998,0.999,0.999,0.999


# TMDB-TVDB

In [17]:
show_table("tmdb-tvdb", scadsmb=True)

Unnamed: 0_level_0,fm,fm,fm,prec,prec,prec,rec,rec,rec
Unnamed: 0_level_1,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN,BootEA,MultiKE,RDGCN
Type,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
TelevisionEpisode,0.997,0.998,0.998,0.996,0.997,0.996,0.998,0.999,1.0
TelevisionShow,0.94,0.944,0.949,1.0,0.994,1.0,0.887,0.898,0.903
company,0.938,0.946,0.942,0.984,1.0,0.992,0.897,0.897,0.897
Person,1.0,0.999,0.999,1.0,0.997,0.999,1.0,1.0,1.0
