In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.stats import friedmanchisquare
from scikit_posthocs import posthoc_nemenyi_friedman as nemenyi

In [2]:
df = pd.read_csv("/home/dobraczka/Downloads/git/er-embedding-benchmark/data/all_results.csv",header=0,sep=",")
df["dataset"] = df["dataset"].str.replace(r"-721_5fold-\d","")
rich_graph_datasets = {"D_W_15K_V1", "D_W_15K_V2", "D_Y_15K_V1", "D_Y_15K_V2", "EN_DE_15K_V1", "EN_DE_15K_V2", "EN_FR_15K_V1", "EN_FR_15K_V2"}
rich_df = df[df["dataset"].isin(rich_graph_datasets)]
shallow_df = df[~df["dataset"].isin(rich_graph_datasets)]

In [3]:
# creates a df with mean value of folds and all values per feature variant with all classifiers and datasets as rows
def get_expanded_df(df, removed_dim_name, fixed_value, levels):
    aggregated = df[df[removed_dim_name]==fixed_value].groupby(levels).mean()
    #level_zero = df["model_name"].unique()
    #level_one = df["vector_name"].unique()
    level_zero = df[levels[0]].unique()
    level_one = df[levels[1]].unique()
    variant_cols = []
    row_names = None
    for f in level_one:
        tmp_names = None
        tmp_col = None
        for m in level_zero:
            tmp = aggregated.loc[(m,f)]["test_f1"]
            tmp = tmp.add_suffix("_" + m)
            tmp.name = f
            if tmp_names is None:
                tmp_names = set(tmp.index.to_list())
            else:
                tmp_names = tmp_names | set(tmp.index.to_list())
            if tmp_col is None:
                tmp_col = tmp
            else:
                tmp_col = tmp_col.append(tmp)
        if row_names is None:
            row_names = tmp_names
        else:
            row_names = row_names & tmp_names
        variant_cols.append(tmp_col)
    for i in range(0, len(variant_cols)):
        variant_cols[i] = variant_cols[i].loc[row_names]
    expanded = pd.DataFrame(variant_cols).T
    expanded.sort_index(inplace=True)
    return expanded.round(2)

In [4]:
def highlight_significant(val):
    color = 'green' if val < 0.05 and val > 0.0 else 'black'
    return 'color: %s' % color

In [5]:
def stylize(df):
    cm = sns.cubehelix_palette(5, as_cmap=True)
    return df.style.background_gradient(cmap=cm, axis=1)

In [6]:
def statistical_test(df, features=True):
    #dont know how to better provide the arguments without it being interpreted as single array
    if features:
        statistic, pvalue = friedmanchisquare(df['OnlyEmb'], df['OnlySim'], df['OnlySimNormalized'], df['SimAndEmb'], df['SimAndEmbNormalized'])
    else:
        statistic, pvalue = friedmanchisquare(df.iloc[0],df.iloc[1],df.iloc[2],df.iloc[3],df.iloc[4],df.iloc[5],df.iloc[6],df.iloc[7])
    if pvalue < 0.05:
        print("Friedman test result:")
        print("There is a significant difference in the rank sums: ", pvalue)
        res = nemenyi(df)
        print("Nemenyi post-hoc test result:")
        print("Significant pairwise differences (p < 0.05) are highlighted in green")
        return res.style.applymap(highlight_significant)
    else:
        print("No significant difference")
        return None

# General Remarks

- For each dataset there are 5 folds
- on each fold a 7-2-1 split:
    - 70% testing
    - 20% training
    - 10% validation
- Results show the F-measure on the test data averaged over the 5 folds

# Analysis of Rich Graph Datasets

Our hypothesis is that Knowledge Graph Embeddings (KGEs) perform well on datasets that with many edges between vertices. We also have the hypothesis that using both: KGEs and attribute similarities will have a beneficial effect on classifier performance. We gave several classifiers the following different features:

-__SimAndEmb__: Attribute similarities/distances and embeddings

-__OnlySim__: Only Attribute similarities/distances

-__SimAndEmbNormalized__: Attribute similarities and embeddings

-__OnlySimNormalized__: Attribute similarities normalized between 0 and 1

-__OnlyEmb__: Only euclidean distance
    
We do this for each different embedding method

## Analysis of Feature Input for RDGCN
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [7]:
rich_expanded = get_expanded_df(rich_df,"embed_model","RDGCN",["model_name","vector_name","dataset"])
stylized = stylize(rich_expanded)
stylized

Unnamed: 0_level_0,OnlyEmb,OnlySim,OnlySimNormalized,SimAndEmb,SimAndEmbNormalized
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
D_W_15K_V1_MLP,0.73,0.68,0.71,0.82,0.79
D_W_15K_V1_ada boost,0.53,0.73,0.72,0.8,0.75
D_W_15K_V1_decision tree,0.56,0.73,0.71,0.77,0.74
D_W_15K_V1_gaussian naive bayes,0.57,0.63,0.67,0.67,0.63
D_W_15K_V1_logistic regression,0.54,0.64,0.74,0.83,0.71
D_W_15K_V1_random forest 100,0.67,0.75,0.72,0.82,0.75
D_W_15K_V1_random forest 20,0.59,0.73,0.73,0.8,0.74
D_W_15K_V1_random forest 200,0.7,0.73,0.72,0.83,0.75
D_W_15K_V1_random forest 50,0.66,0.76,0.73,0.81,0.75
D_W_15K_V1_random forest 500,0.7,0.73,0.72,0.83,0.75


Mean row wise rank (the lower the better):

In [8]:
rank_order = rich_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
rich_expanded = rich_expanded.reindex(rank_order.index, axis=1)
rank_order

SimAndEmb              1.164773
SimAndEmbNormalized    2.403409
OnlySim                3.250000
OnlySimNormalized      3.892045
OnlyEmb                4.289773
dtype: float64

Using embeddings AND similarity seems to improve the results.
Let's test if the difference is significant!

The Friedman test is designed to check if there is a significant difference in the rank sums.
If this is the case a nemenyi test is applied pairwise to check which differences are significant.

In [9]:
res = statistical_test(rich_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  1.9461684020304978e-47
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,SimAndEmb,SimAndEmbNormalized,OnlySim,OnlySimNormalized,OnlyEmb
SimAndEmb,-1.0,0.001,0.001,0.001,0.001
SimAndEmbNormalized,0.001,-1.0,0.00351481,0.001,0.001
OnlySim,0.001,0.00351481,-1.0,0.0549235,0.001
OnlySimNormalized,0.001,0.001,0.0549235,-1.0,0.455067
OnlyEmb,0.001,0.001,0.001,0.455067,-1.0


Values below 0.05 show a significant difference. So keeping the mean row wise ranks in mind:
SimAndEmb is significantly better than all other approaches.

# Analysis of Feature Input for MultiKE
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [10]:
rich_expanded = get_expanded_df(rich_df,"embed_model","MultiKE",["model_name","vector_name","dataset"])
stylized = stylize(rich_expanded)
stylized

Unnamed: 0_level_0,OnlyEmb,OnlySim,OnlySimNormalized,SimAndEmb,SimAndEmbNormalized
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
D_W_15K_V1_MLP,0.86,0.72,0.72,0.9,0.91
D_W_15K_V1_ada boost,0.53,0.74,0.74,0.84,0.79
D_W_15K_V1_decision tree,0.65,0.75,0.74,0.79,0.79
D_W_15K_V1_gaussian naive bayes,0.52,0.63,0.68,0.64,0.67
D_W_15K_V1_logistic regression,0.52,0.64,0.74,0.83,0.79
D_W_15K_V1_random forest 100,0.84,0.75,0.74,0.86,0.87
D_W_15K_V1_random forest 20,0.77,0.77,0.74,0.85,0.85
D_W_15K_V1_random forest 200,0.86,0.76,0.74,0.86,0.88
D_W_15K_V1_random forest 50,0.82,0.76,0.72,0.85,0.86
D_W_15K_V1_random forest 500,0.87,0.76,0.75,0.86,0.88


Mean row wise rank (the lower the better):

In [11]:
rank_order = rich_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
rich_expanded = rich_expanded.reindex(rank_order.index, axis=1)
rank_order

SimAndEmb              1.380682
SimAndEmbNormalized    1.977273
OnlySim                3.602273
OnlyEmb                3.886364
OnlySimNormalized      4.153409
dtype: float64

In [12]:
res = statistical_test(rich_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  7.245832654669639e-47
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,SimAndEmb,SimAndEmbNormalized,OnlySim,OnlyEmb,OnlySimNormalized
SimAndEmb,-1.0,0.0897775,0.001,0.001,0.001
SimAndEmbNormalized,0.0897775,-1.0,0.001,0.001,0.001
OnlySim,0.001,0.001,-1.0,0.728954,0.140752
OnlyEmb,0.001,0.001,0.728954,-1.0,0.769542
OnlySimNormalized,0.001,0.001,0.140752,0.769542,-1.0


## Analysis of Feature Input for BootEA
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [13]:
rich_expanded = get_expanded_df(rich_df,"embed_model","BootEA",["model_name","vector_name","dataset"])
stylized = stylize(rich_expanded)
stylized

Unnamed: 0_level_0,OnlyEmb,OnlySim,OnlySimNormalized,SimAndEmb,SimAndEmbNormalized
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
D_W_15K_V1_MLP,0.77,0.7,0.73,0.8,0.84
D_W_15K_V1_ada boost,0.52,0.72,0.7,0.55,0.78
D_W_15K_V1_decision tree,0.57,0.74,0.72,0.56,0.79
D_W_15K_V1_gaussian naive bayes,0.53,0.63,0.67,0.83,0.66
D_W_15K_V1_logistic regression,0.53,0.64,0.74,0.75,0.68
D_W_15K_V1_random forest 100,0.67,0.73,0.73,0.82,0.81
D_W_15K_V1_random forest 20,0.62,0.74,0.73,0.82,0.79
D_W_15K_V1_random forest 200,0.67,0.75,0.73,0.82,0.8
D_W_15K_V1_random forest 50,0.66,0.73,0.72,0.82,0.81
D_W_15K_V1_random forest 500,0.68,0.75,0.73,0.82,0.8


Mean row wise rank (the lower the better):

In [14]:
rank_order = rich_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
rich_expanded = rich_expanded.reindex(rank_order.index, axis=1)
rank_order

SimAndEmbNormalized    2.017045
SimAndEmb              2.312500
OnlySim                2.818182
OnlySimNormalized      3.471591
OnlyEmb                4.380682
dtype: float64

In [15]:
res = statistical_test(rich_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  2.5884328387001797e-27
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,SimAndEmbNormalized,SimAndEmb,OnlySim,OnlySimNormalized,OnlyEmb
SimAndEmbNormalized,-1.0,0.7019,0.00695794,0.001,0.001
SimAndEmb,0.7019,-1.0,0.210842,0.001,0.001
OnlySim,0.00695794,0.210842,-1.0,0.0481909,0.001
OnlySimNormalized,0.001,0.001,0.0481909,-1.0,0.00129269
OnlyEmb,0.001,0.001,0.001,0.00129269,-1.0


# Analysis of shallow Datasets

Our hypothesis here is that Knowledge Graph Embeddings (KGEs) don't significantly improve the results, because there is too little information in the embedding process

## Analysis of Feature Input for RDGCN
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [16]:
shallow_expanded = get_expanded_df(shallow_df,"embed_model","RDGCN",["model_name","vector_name","dataset"])
stylized = stylize(shallow_expanded)
stylized

Unnamed: 0_level_0,OnlyEmb,OnlySim,OnlySimNormalized,SimAndEmb,SimAndEmbNormalized
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abt-buy_MLP,0.65,0.98,0.97,0.97,0.97
abt-buy_ada boost,0.58,0.97,0.96,0.97,0.97
abt-buy_decision tree,0.58,0.97,0.95,0.96,0.95
abt-buy_gaussian naive bayes,0.53,0.96,0.97,0.87,0.87
abt-buy_logistic regression,0.56,0.97,0.97,0.97,0.97
abt-buy_random forest 100,0.65,0.98,0.97,0.97,0.97
abt-buy_random forest 20,0.6,0.97,0.97,0.96,0.96
abt-buy_random forest 200,0.66,0.98,0.97,0.97,0.97
abt-buy_random forest 50,0.64,0.98,0.97,0.97,0.96
abt-buy_random forest 500,0.66,0.98,0.97,0.97,0.97


Mean row wise rank (the lower the better):

In [17]:
rank_order = shallow_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
shallow_expanded = shallow_expanded.reindex(rank_order.index, axis=1)
rank_order

OnlySim                1.870130
SimAndEmb              2.110390
OnlySimNormalized      2.974026
SimAndEmbNormalized    3.071429
OnlyEmb                4.974026
dtype: float64

In [18]:
res = statistical_test(shallow_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  5.751530028164361e-45
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,OnlySim,SimAndEmb,OnlySimNormalized,SimAndEmbNormalized,OnlyEmb
OnlySim,-1.0,0.87026,0.001,0.001,0.001
SimAndEmb,0.87026,-1.0,0.00630355,0.0015243,0.001
OnlySimNormalized,0.001,0.00630355,-1.0,0.9,0.001
SimAndEmbNormalized,0.001,0.0015243,0.9,-1.0,0.001
OnlyEmb,0.001,0.001,0.001,0.001,-1.0


# Analysis of Feature Input for MultiKE
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [19]:
shallow_expanded = get_expanded_df(shallow_df,"embed_model","MultiKE",["model_name","vector_name","dataset"])
stylized = stylize(shallow_expanded)
stylized

Unnamed: 0_level_0,OnlyEmb,OnlySim,OnlySimNormalized,SimAndEmb,SimAndEmbNormalized
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
dblp-scholar_MLP,0.54,1.0,0.98,0.99,0.99
dblp-scholar_ada boost,0.54,1.0,0.97,1.0,0.99
dblp-scholar_decision tree,0.52,1.0,0.97,0.99,0.99
dblp-scholar_gaussian naive bayes,0.54,1.0,0.97,1.0,0.99
dblp-scholar_logistic regression,0.53,0.99,0.97,0.99,0.99
dblp-scholar_random forest 100,0.55,1.0,0.98,1.0,0.99
dblp-scholar_random forest 20,0.51,1.0,0.98,0.99,0.99
dblp-scholar_random forest 200,0.56,1.0,0.98,1.0,0.99
dblp-scholar_random forest 50,0.53,1.0,0.98,1.0,0.99
dblp-scholar_random forest 500,0.58,1.0,0.98,1.0,0.99


Mean row wise rank (the lower the better):

In [20]:
rank_order = shallow_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
shallow_expanded = shallow_expanded.reindex(rank_order.index, axis=1)
rank_order

SimAndEmb              1.772727
OnlySim                2.340909
SimAndEmbNormalized    2.886364
OnlySimNormalized      3.068182
OnlyEmb                4.931818
dtype: float64

In [21]:
res = statistical_test(shallow_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  5.605103456230508e-25
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,SimAndEmb,OnlySim,SimAndEmbNormalized,OnlySimNormalized,OnlyEmb
SimAndEmb,-1.0,0.44458,0.00847559,0.00114987,0.001
OnlySim,0.44458,-1.0,0.486128,0.196028,0.001
SimAndEmbNormalized,0.00847559,0.486128,-1.0,0.9,0.001
OnlySimNormalized,0.00114987,0.196028,0.9,-1.0,0.001
OnlyEmb,0.001,0.001,0.001,0.001,-1.0


## Analysis of Feature Input for BootEA
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [22]:
shallow_expanded = get_expanded_df(shallow_df,"embed_model","BootEA",["model_name","vector_name","dataset"])
stylized = stylize(shallow_expanded)
stylized

Unnamed: 0_level_0,OnlyEmb,OnlySim,OnlySimNormalized,SimAndEmb,SimAndEmbNormalized
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abt-buy_MLP,0.63,0.98,0.97,0.96,0.96
abt-buy_ada boost,0.58,0.97,0.96,0.97,0.97
abt-buy_decision tree,0.54,0.97,0.95,0.96,0.96
abt-buy_gaussian naive bayes,0.59,0.97,0.97,0.97,0.97
abt-buy_logistic regression,0.58,0.97,0.97,0.97,0.97
abt-buy_random forest 100,0.61,0.98,0.97,0.97,0.97
abt-buy_random forest 20,0.54,0.98,0.97,0.97,0.97
abt-buy_random forest 200,0.63,0.98,0.97,0.97,0.97
abt-buy_random forest 50,0.58,0.98,0.97,0.97,0.97
abt-buy_random forest 500,0.64,0.98,0.97,0.97,0.97


Mean row wise rank (the lower the better):

In [23]:
rank_order = shallow_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
shallow_expanded = shallow_expanded.reindex(rank_order.index, axis=1)
rank_order

OnlySim                1.961039
SimAndEmb              2.227273
SimAndEmbNormalized    2.766234
OnlySimNormalized      3.110390
OnlyEmb                4.935065
dtype: float64

In [24]:
res = statistical_test(shallow_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  8.131635382361422e-44
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,OnlySim,SimAndEmb,SimAndEmbNormalized,OnlySimNormalized,OnlyEmb
OnlySim,-1.0,0.81241,0.0136826,0.001,0.001
SimAndEmb,0.81241,-1.0,0.213558,0.00480797,0.001
SimAndEmbNormalized,0.0136826,0.213558,-1.0,0.638867,0.001
OnlySimNormalized,0.001,0.00480797,0.638867,-1.0,0.001
OnlyEmb,0.001,0.001,0.001,0.001,-1.0


## Analysis of Classifier Performance (on SimAndEmb)
Similarly we analyze the different performance of the classifiers. We are interested in the performance using both: similarities and embeddings

In [25]:
whole_df = get_expanded_df(df,"vector_name","SimAndEmb",["embed_model","model_name","dataset"])
stylized = stylize(whole_df)
stylized

Unnamed: 0_level_0,MLP,ada boost,decision tree,gaussian naive bayes,logistic regression,random forest 100,random forest 20,random forest 200,random forest 50,random forest 500,svc
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
D_W_15K_V1_BootEA,0.8,0.55,0.56,0.83,0.75,0.82,0.82,0.82,0.82,0.82,0.83
D_W_15K_V1_MultiKE,0.9,0.84,0.79,0.64,0.83,0.86,0.85,0.86,0.85,0.86,0.87
D_W_15K_V1_RDGCN,0.82,0.8,0.77,0.67,0.83,0.82,0.8,0.83,0.81,0.83,0.86
D_W_15K_V2_BootEA,0.95,0.61,0.59,0.97,0.93,0.92,0.92,0.92,0.92,0.92,0.95
D_W_15K_V2_MultiKE,0.96,0.92,0.84,0.67,0.95,0.93,0.92,0.93,0.93,0.93,0.96
D_W_15K_V2_RDGCN,0.91,0.86,0.84,0.67,0.89,0.87,0.86,0.87,0.87,0.87,0.9
D_Y_15K_V1_BootEA,0.94,0.8,0.57,0.89,0.89,0.94,0.93,0.93,0.91,0.95,0.92
D_Y_15K_V1_MultiKE,0.99,0.98,0.97,0.96,0.99,0.99,0.99,0.99,0.99,0.99,0.99
D_Y_15K_V1_RDGCN,0.98,0.99,0.98,0.95,0.98,0.99,0.98,0.99,0.99,0.99,0.98
D_Y_15K_V2_BootEA,0.99,0.54,0.54,0.98,0.98,0.99,0.98,0.98,0.98,0.98,0.99


mean row wise rank

In [26]:
rank_order = whole_df.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
whole_df = whole_df.reindex(rank_order.index, axis=1)
rank_order

random forest 500       3.944444
random forest 200       4.177778
random forest 100       4.200000
random forest 50        5.055556
MLP                     5.388889
logistic regression     6.011111
ada boost               6.288889
random forest 20        6.455556
svc                     6.600000
gaussian naive bayes    8.855556
decision tree           9.022222
dtype: float64

In [27]:
res = statistical_test(whole_df,features=False)
res

Friedman test result:
There is a significant difference in the rank sums:  3.6229709860720423e-10
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,random forest 500,random forest 200,random forest 100,random forest 50,MLP,logistic regression,ada boost,random forest 20,svc,gaussian naive bayes,decision tree
random forest 500,-1.0,0.9,0.9,0.883968,0.587443,0.10624,0.032826,0.0146094,0.00682537,0.001,0.001
random forest 200,0.9,-1.0,0.9,0.9,0.795009,0.237256,0.0889586,0.0444142,0.0226922,0.001,0.001
random forest 100,0.9,0.9,-1.0,0.9,0.814778,0.253614,0.0972801,0.0489354,0.0252388,0.001,0.001
random forest 50,0.883968,0.9,0.9,-1.0,0.9,0.9,0.775242,0.62698,0.498446,0.001,0.001
MLP,0.587443,0.795009,0.814778,0.9,-1.0,0.9,0.9,0.9,0.795009,0.001,0.001
logistic regression,0.10624,0.237256,0.253614,0.9,0.9,-1.0,0.9,0.9,0.9,0.00233566,0.001
ada boost,0.032826,0.0889586,0.0972801,0.775242,0.9,0.9,-1.0,0.9,0.9,0.0109698,0.00443444
random forest 20,0.0146094,0.0444142,0.0489354,0.62698,0.9,0.9,0.9,-1.0,0.9,0.0252388,0.0109698
svc,0.00682537,0.0226922,0.0252388,0.498446,0.795009,0.9,0.9,0.9,-1.0,0.0489354,0.0226922
gaussian naive bayes,0.001,0.001,0.001,0.001,0.001,0.00233566,0.0109698,0.0252388,0.0489354,-1.0,0.9


In [28]:
df = shallow_df[["model_name","vector_name","test_f1","dataset","embed_model"]]
df

Unnamed: 0,model_name,vector_name,test_f1,dataset,embed_model
495,MLP,OnlyEmb,0.540308,dblp-scholar,MultiKE
496,MLP,OnlySim,0.996254,dblp-scholar,MultiKE
497,MLP,OnlySimNormalized,0.975213,dblp-scholar,MultiKE
498,MLP,SimAndEmb,0.994382,dblp-scholar,MultiKE
499,MLP,SimAndEmbNormalized,0.991458,dblp-scholar,MultiKE
500,ada boost,OnlyEmb,0.540099,dblp-scholar,MultiKE
501,ada boost,OnlySim,0.995989,dblp-scholar,MultiKE
502,ada boost,OnlySimNormalized,0.975623,dblp-scholar,MultiKE
503,ada boost,SimAndEmb,0.995044,dblp-scholar,MultiKE
504,ada boost,SimAndEmbNormalized,0.993190,dblp-scholar,MultiKE


In [29]:
df[df["embed_model"]=="BootEA"].set_index(["vector_name","model_name"]).drop("embed_model",axis=1)
df_shortened = df[df["model_name"].isin(["MLP","ada boost","decision tree","gaussian naive bayes","random forest 500","svc"])]
df_shortened = df_shortened[df_shortened["vector_name"].isin(["OnlyEmb","OnlySim","SimAndEmb"])]
df_shortened["model_name"] = df_shortened["model_name"].map({"ada boost":"ada","decision tree":"dt","gaussian naive bayes":"bayes","random forest 500":"rf","svc":"svc","MLP":"MLP","logistic regression":"lr"})
piv = df_shortened.pivot_table(index="dataset",columns=["vector_name","model_name"],values="test_f1")

ModuleNotFoundError: No module named 'colorizelatextables'

In [None]:
sns.color_palette("Blues",n_colors=6)

In [None]:
piv.round(3).style.background_gradient(axis=1)