In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from scipy.stats import friedmanchisquare
from scikit_posthocs import posthoc_nemenyi_friedman as nemenyi

In [2]:
df = pd.read_csv("/home/dobraczka/Downloads/git/er-embedding-benchmark/data/all_results.csv",header=0,sep=",")
df["dataset"] = df["dataset"].str.replace(r"-721_5fold-\d","")
rich_graph_datasets = {"D_W_15K_V1", "D_W_15K_V2", "D_Y_15K_V1", "D_Y_15K_V2", "EN_DE_15K_V1", "EN_DE_15K_V2", "EN_FR_15K_V1", "EN_FR_15K_V2"}
rich_df = df[df["dataset"].isin(rich_graph_datasets)]
shallow_df = df[~df["dataset"].isin(rich_graph_datasets)]

In [3]:
# creates a df with mean value of folds and all values per feature variant with all classifiers and datasets as rows
def get_expanded_df(df, removed_dim_name, fixed_value, levels):
    aggregated = df[df[removed_dim_name]==fixed_value].groupby(levels).mean()
    #level_zero = df["model_name"].unique()
    #level_one = df["vector_name"].unique()
    level_zero = df[levels[0]].unique()
    level_one = df[levels[1]].unique()
    variant_cols = []
    row_names = None
    for f in level_one:
        tmp_names = None
        tmp_col = None
        for m in level_zero:
            tmp = aggregated.loc[(m,f)]["test_f1"]
            tmp = tmp.add_suffix("_" + m)
            tmp.name = f
            if tmp_names is None:
                tmp_names = set(tmp.index.to_list())
            else:
                tmp_names = tmp_names | set(tmp.index.to_list())
            if tmp_col is None:
                tmp_col = tmp
            else:
                tmp_col = tmp_col.append(tmp)
        if row_names is None:
            row_names = tmp_names
        else:
            row_names = row_names & tmp_names
        variant_cols.append(tmp_col)
    for i in range(0, len(variant_cols)):
        variant_cols[i] = variant_cols[i].loc[row_names]
    expanded = pd.DataFrame(variant_cols).T
    expanded.sort_index(inplace=True)
    return expanded.round(2)

In [4]:
def highlight_significant(val):
    color = 'green' if val < 0.05 and val > 0.0 else 'black'
    return 'color: %s' % color

In [5]:
def stylize(df):
    cm = sns.cubehelix_palette(5, as_cmap=True)
    return df.style.background_gradient(cmap=cm, axis=1)

In [6]:
def statistical_test(df, features=True):
    #dont know how to better provide the arguments without it being interpreted as single array
    if features:
        statistic, pvalue = friedmanchisquare(df['OnlyEmb'], df['OnlySim'], df['OnlySimNormalized'], df['SimAndEmb'], df['SimAndEmbNormalized'])
    else:
        statistic, pvalue = friedmanchisquare(df.iloc[0],df.iloc[1],df.iloc[2],df.iloc[3],df.iloc[4],df.iloc[5],df.iloc[6],df.iloc[7])
    if pvalue < 0.05:
        print("Friedman test result:")
        print("There is a significant difference in the rank sums: ", pvalue)
        res = nemenyi(df)
        print("Nemenyi post-hoc test result:")
        print("Significant pairwise differences (p < 0.05) are highlighted in green")
        return res.style.applymap(highlight_significant)
    else:
        print("No significant difference")
        return None

# General Remarks

- For each dataset there are 5 folds
- on each fold a 7-2-1 split:
    - 70% testing
    - 20% training
    - 10% validation
- Results show the F-measure on the test data averaged over the 5 folds

# Analysis of Rich Graph Datasets

Our hypothesis is that Knowledge Graph Embeddings (KGEs) perform well on datasets that with many edges between vertices. We also have the hypothesis that using both: KGEs and attribute similarities will have a beneficial effect on classifier performance. We gave several classifiers the following different features:

-__SimAndEmb__: Attribute similarities/distances and embeddings

-__OnlySim__: Only Attribute similarities/distances

-__SimAndEmbNormalized__: Attribute similarities and embeddings

-__OnlySimNormalized__: Attribute similarities normalized between 0 and 1

-__OnlyEmb__: Only euclidean distance
    
We do this for each different embedding method

## Analysis of Feature Input for RDGCN
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [7]:
rich_expanded = get_expanded_df(rich_df,"embed_model","RDGCN",["model_name","vector_name","dataset"])
stylized = stylize(rich_expanded)
stylized

Unnamed: 0_level_0,SimAndEmb,SimAndEmbNormalized,OnlySim,OnlySimNormalized,OnlyEmb
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
D_W_15K_V1_MLP,0.82,0.79,0.75,0.73,0.74
D_W_15K_V1_decision tree,0.77,0.74,0.72,0.72,0.57
D_W_15K_V1_gaussian naive bayes,0.65,0.63,0.63,0.67,0.56
D_W_15K_V1_random forest 100,0.82,0.75,0.73,0.72,0.68
D_W_15K_V1_random forest 20,0.8,0.74,0.74,0.72,0.6
D_W_15K_V1_random forest 200,0.82,0.75,0.73,0.72,0.69
D_W_15K_V1_random forest 50,0.81,0.75,0.73,0.72,0.65
D_W_15K_V1_random forest 500,0.83,0.75,0.73,0.72,0.7
D_W_15K_V1_svc,0.86,0.81,0.68,0.71,0.79
D_W_15K_V2_MLP,0.9,0.89,0.8,0.8,0.81


Mean row wise rank (the lower the better):

In [8]:
rank_order = rich_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
rich_expanded = rich_expanded.reindex(rank_order.index, axis=1)
rank_order

SimAndEmb              1.194444
SimAndEmbNormalized    2.250000
OnlySim                3.423611
OnlySimNormalized      3.993056
OnlyEmb                4.138889
dtype: float64

Using embeddings AND similarity seems to improve the results.
Let's test if the difference is significant!

The Friedman test is designed to check if there is a significant difference in the rank sums.
If this is the case a nemenyi test is applied pairwise to check which differences are significant.

In [9]:
res = statistical_test(rich_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  2.6867513204308996e-39
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,SimAndEmb,SimAndEmbNormalized,OnlySim,OnlySimNormalized,OnlyEmb
SimAndEmb,-1.0,0.001,0.001,0.001,0.001
SimAndEmbNormalized,0.001,-1.0,0.001,0.001,0.001
OnlySim,0.001,0.001,-1.0,0.194648,0.0518902
OnlySimNormalized,0.001,0.001,0.194648,-1.0,0.9
OnlyEmb,0.001,0.001,0.0518902,0.9,-1.0


Values below 0.05 show a significant difference. So keeping the mean row wise ranks in mind:
SimAndEmb is significantly better than all other approaches.

# Analysis of Feature Input for MultiKE
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [10]:
rich_expanded = get_expanded_df(rich_df,"embed_model","MultiKE",["model_name","vector_name","dataset"])
stylized = stylize(rich_expanded)
stylized

Unnamed: 0_level_0,SimAndEmb,SimAndEmbNormalized,OnlySim,OnlySimNormalized,OnlyEmb
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
D_W_15K_V1_MLP,0.88,0.9,0.71,0.71,0.84
D_W_15K_V1_decision tree,0.79,0.79,0.74,0.72,0.66
D_W_15K_V1_gaussian naive bayes,0.69,0.67,0.63,0.67,0.53
D_W_15K_V1_random forest 100,0.86,0.87,0.75,0.75,0.85
D_W_15K_V1_random forest 20,0.85,0.85,0.76,0.75,0.76
D_W_15K_V1_random forest 200,0.86,0.88,0.75,0.73,0.86
D_W_15K_V1_random forest 50,0.85,0.87,0.76,0.73,0.82
D_W_15K_V1_random forest 500,0.86,0.88,0.75,0.73,0.87
D_W_15K_V1_svc,0.87,0.86,0.68,0.72,0.88
D_W_15K_V2_MLP,0.94,0.96,0.79,0.79,0.94


Mean row wise rank (the lower the better):

In [11]:
rank_order = rich_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
rich_expanded = rich_expanded.reindex(rank_order.index, axis=1)
rank_order

SimAndEmb              1.437500
SimAndEmbNormalized    1.951389
OnlyEmb                3.611111
OnlySim                3.729167
OnlySimNormalized      4.270833
dtype: float64

Using embeddings AND similarities seems to improve the results.
Let's test if the difference is significant!

The Friedman test is designed to check if there is a significant difference in the rank sums.
If this is the case a nemenyi test is applied pairwise to check which differences are significant.

In [12]:
res = statistical_test(rich_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  1.361537382966769e-37
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,SimAndEmb,SimAndEmbNormalized,OnlyEmb,OnlySim,OnlySimNormalized
SimAndEmb,-1.0,0.290984,0.001,0.001,0.001
SimAndEmbNormalized,0.290984,-1.0,0.001,0.001,0.001
OnlyEmb,0.001,0.001,-1.0,0.9,0.089638
OnlySim,0.001,0.001,0.9,-1.0,0.239603
OnlySimNormalized,0.001,0.001,0.089638,0.239603,-1.0


Using similarities and embeddings (normalized or not) is significantly better than all other approaches.

## Analysis of Feature Input for BootEA
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [13]:
rich_expanded = get_expanded_df(rich_df,"embed_model","BootEA",["model_name","vector_name","dataset"])
stylized = stylize(rich_expanded)
stylized

Unnamed: 0_level_0,SimAndEmb,SimAndEmbNormalized,OnlySim,OnlySimNormalized,OnlyEmb
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
D_W_15K_V1_MLP,0.8,0.85,0.71,0.75,0.77
D_W_15K_V1_decision tree,0.52,0.79,0.75,0.74,0.57
D_W_15K_V1_gaussian naive bayes,0.83,0.66,0.63,0.67,0.53
D_W_15K_V1_random forest 100,0.81,0.8,0.76,0.75,0.67
D_W_15K_V1_random forest 20,0.81,0.8,0.76,0.76,0.63
D_W_15K_V1_random forest 200,0.82,0.8,0.76,0.75,0.67
D_W_15K_V1_random forest 50,0.8,0.8,0.76,0.75,0.65
D_W_15K_V1_random forest 500,0.81,0.81,0.76,0.75,0.68
D_W_15K_V1_svc,0.83,0.79,0.68,0.74,0.86
D_W_15K_V2_MLP,0.95,0.95,0.8,0.79,0.93


Mean row wise rank (the lower the better):

In [14]:
rank_order = rich_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
rich_expanded = rich_expanded.reindex(rank_order.index, axis=1)
rank_order

SimAndEmbNormalized    1.895833
SimAndEmb              2.256944
OnlySim                3.000000
OnlySimNormalized      3.583333
OnlyEmb                4.263889
dtype: float64

Interestingly normalization seems to be beneficial here

The Friedman test is designed to check if there is a significant difference in the rank sums.
If this is the case a nemenyi test is applied pairwise to check which differences are significant.

In [15]:
res = statistical_test(rich_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  3.147558885928436e-23
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,SimAndEmbNormalized,SimAndEmb,OnlySim,OnlySimNormalized,OnlyEmb
SimAndEmbNormalized,-1.0,0.627655,0.001,0.001,0.001
SimAndEmb,0.627655,-1.0,0.0386758,0.001,0.001
OnlySim,0.001,0.0386758,-1.0,0.174719,0.001
OnlySimNormalized,0.001,0.001,0.174719,-1.0,0.0737383
OnlyEmb,0.001,0.001,0.001,0.0737383,-1.0


Using (normalized) similarities and embeddings is significantly better than all other approaches.

# Analysis of shallow Datasets

Our hypothesis here is that Knowledge Graph Embeddings (KGEs) don't significantly improve the results, because there is too little information in the embedding process

## Analysis of Feature Input for RDGCN
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [16]:
shallow_expanded = get_expanded_df(shallow_df,"embed_model","RDGCN",["model_name","vector_name","dataset"])
stylized = stylize(shallow_expanded)
stylized

Unnamed: 0_level_0,SimAndEmb,SimAndEmbNormalized,OnlySim,OnlySimNormalized,OnlyEmb
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abt-buy_MLP,0.84,0.84,0.98,0.98,0.28
abt-buy_decision tree,0.81,0.65,0.97,0.96,0.38
abt-buy_gaussian naive bayes,0.87,0.79,0.97,0.97,0.56
abt-buy_random forest 100,0.94,0.64,0.98,0.97,0.36
abt-buy_random forest 20,0.83,0.62,0.98,0.97,0.29
abt-buy_random forest 200,0.95,0.75,0.98,0.97,0.47
abt-buy_random forest 50,0.89,0.66,0.98,0.97,0.4
abt-buy_random forest 500,0.96,0.74,0.98,0.97,0.27
abt-buy_svc,0.95,0.84,0.98,0.98,0.21
amazon-google_MLP,0.87,0.83,0.98,0.98,0.2


Mean row wise rank (the lower the better):

In [17]:
rank_order = shallow_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
shallow_expanded = shallow_expanded.reindex(rank_order.index, axis=1)
rank_order

OnlySim                1.595238
OnlySimNormalized      2.103175
SimAndEmb              2.555556
SimAndEmbNormalized    3.793651
OnlyEmb                4.952381
dtype: float64

Using only the similarities gives the best results.
Let's test if the difference is significant!

The Friedman test is designed to check if there is a significant difference in the rank sums.
If this is the case a nemenyi test is applied pairwise to check which differences are significant.

In [18]:
res = statistical_test(shallow_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  2.2174474432157056e-41
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,OnlySim,OnlySimNormalized,SimAndEmb,SimAndEmbNormalized,OnlyEmb
OnlySim,-1.0,0.372647,0.00588386,0.001,0.001
OnlySimNormalized,0.372647,-1.0,0.493611,0.001,0.001
SimAndEmb,0.00588386,0.493611,-1.0,0.001,0.001
SimAndEmbNormalized,0.001,0.001,0.001,-1.0,0.001
OnlyEmb,0.001,0.001,0.001,0.001,-1.0


Using only the (normalized) similarities is significantly better than all other approaches.

# Analysis of Feature Input for MultiKE
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [19]:
shallow_expanded = get_expanded_df(shallow_df,"embed_model","MultiKE",["model_name","vector_name","dataset"])
stylized = stylize(shallow_expanded)
stylized

Unnamed: 0_level_0,SimAndEmb,SimAndEmbNormalized,OnlySim,OnlySimNormalized,OnlyEmb
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
dblp-scholar_MLP,0.99,0.99,1.0,0.98,0.52
dblp-scholar_decision tree,0.99,0.98,0.99,0.97,0.52
dblp-scholar_gaussian naive bayes,1.0,0.98,1.0,0.97,0.47
dblp-scholar_random forest 100,1.0,0.99,1.0,0.98,0.56
dblp-scholar_random forest 20,1.0,0.98,1.0,0.98,0.5
dblp-scholar_random forest 200,1.0,0.99,1.0,0.98,0.57
dblp-scholar_random forest 50,1.0,0.98,1.0,0.98,0.52
dblp-scholar_random forest 500,1.0,0.99,1.0,0.98,0.58
dblp-scholar_svc,1.0,0.99,1.0,0.98,0.5
imdb-tmdb_MLP,1.0,0.99,0.99,0.99,0.78


Mean row wise rank (the lower the better):

In [20]:
rank_order = shallow_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
shallow_expanded = shallow_expanded.reindex(rank_order.index, axis=1)
rank_order

SimAndEmb              2.138889
OnlySim                2.319444
SimAndEmbNormalized    2.750000
OnlySimNormalized      2.875000
OnlyEmb                4.916667
dtype: float64

Using embeddings AND similarities seems to improve the results.
Let's test if the difference is significant!

The Friedman test is designed to check if there is a significant difference in the rank sums.
If this is the case a nemenyi test is applied pairwise to check which differences are significant.

In [21]:
res = statistical_test(shallow_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  2.8134231965268338e-18
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,SimAndEmb,OnlySim,SimAndEmbNormalized,OnlySimNormalized,OnlyEmb
SimAndEmb,-1.0,0.9,0.472825,0.278221,0.001
OnlySim,0.9,-1.0,0.749684,0.559331,0.001
SimAndEmbNormalized,0.472825,0.749684,-1.0,0.9,0.001
OnlySimNormalized,0.278221,0.559331,0.9,-1.0,0.001
OnlyEmb,0.001,0.001,0.001,0.001,-1.0


Here using only embeddings is significantly outperformed by everything else. But there is no significant differnce between the other approaches.

## Analysis of Feature Input for BootEA
To check if using embeddings in conjunction with similarities provides an improvement overall we sum the test_f1 value over all folds

In [22]:
shallow_expanded = get_expanded_df(shallow_df,"embed_model","BootEA",["model_name","vector_name","dataset"])
stylized = stylize(shallow_expanded)
stylized

Unnamed: 0_level_0,SimAndEmb,SimAndEmbNormalized,OnlySim,OnlySimNormalized,OnlyEmb
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
abt-buy_MLP,0.95,0.8,0.98,0.98,0.37
abt-buy_decision tree,0.04,0.75,0.96,0.96,0.4
abt-buy_gaussian naive bayes,0.92,0.94,0.96,0.97,0.63
abt-buy_random forest 100,0.95,0.87,0.97,0.97,0.42
abt-buy_random forest 20,0.96,0.91,0.97,0.97,0.45
abt-buy_random forest 200,0.96,0.9,0.97,0.97,0.42
abt-buy_random forest 50,0.94,0.88,0.98,0.97,0.41
abt-buy_random forest 500,0.96,0.89,0.98,0.97,0.38
abt-buy_svc,0.55,0.76,0.98,0.98,0.48
amazon-google_MLP,0.93,0.81,0.98,0.98,0.4


Mean row wise rank (the lower the better):

In [23]:
rank_order = shallow_expanded.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
shallow_expanded = shallow_expanded.reindex(rank_order.index, axis=1)
rank_order

OnlySim                1.769841
OnlySimNormalized      2.436508
SimAndEmb              2.928571
SimAndEmbNormalized    3.007937
OnlyEmb                4.857143
dtype: float64

Interestingly normalization seems to be beneficial here

The Friedman test is designed to check if there is a significant difference in the rank sums.
If this is the case a nemenyi test is applied pairwise to check which differences are significant.

In [24]:
res = statistical_test(shallow_expanded)
res

Friedman test result:
There is a significant difference in the rank sums:  2.5175643414645975e-32
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,OnlySim,OnlySimNormalized,SimAndEmb,SimAndEmbNormalized,OnlyEmb
OnlySim,-1.0,0.124843,0.001,0.001,0.001
OnlySimNormalized,0.124843,-1.0,0.406807,0.252243,0.001
SimAndEmb,0.001,0.406807,-1.0,0.9,0.001
SimAndEmbNormalized,0.001,0.252243,0.9,-1.0,0.001
OnlyEmb,0.001,0.001,0.001,0.001,-1.0


Using only similarities is significantly better than all approaches that use embeddings.

## Analysis of Classifier Performance (on SimAndEmb)
Similarly we analyze the different performance of the classifiers. We are interested in the performance using both: similarities and embeddings

In [25]:
whole_df = get_expanded_df(df,"vector_name","SimAndEmb",["embed_model","model_name","dataset"])
stylized = stylize(whole_df)
stylized

Unnamed: 0_level_0,svc,random forest 20,random forest 50,random forest 100,random forest 200,random forest 500,decision tree,gaussian naive bayes,MLP
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
D_W_15K_V1_BootEA,0.83,0.81,0.8,0.81,0.82,0.81,0.52,0.83,0.8
D_W_15K_V1_MultiKE,0.87,0.85,0.85,0.86,0.86,0.86,0.79,0.69,0.88
D_W_15K_V1_RDGCN,0.86,0.8,0.81,0.82,0.82,0.83,0.77,0.65,0.82
D_W_15K_V2_BootEA,0.95,0.91,0.93,0.93,0.92,0.93,0.62,0.97,0.95
D_W_15K_V2_MultiKE,0.96,0.93,0.93,0.93,0.93,0.93,0.85,0.67,0.94
D_W_15K_V2_RDGCN,0.9,0.85,0.87,0.87,0.87,0.87,0.84,0.67,0.9
D_Y_15K_V1_BootEA,0.92,0.91,0.93,0.94,0.95,0.96,0.58,0.9,0.94
D_Y_15K_V1_MultiKE,0.99,0.99,0.99,0.99,0.99,0.99,0.97,0.96,0.99
D_Y_15K_V1_RDGCN,0.98,0.99,0.99,0.99,0.99,0.99,0.98,0.95,0.98
D_Y_15K_V2_BootEA,0.99,0.97,0.98,0.99,0.99,0.99,0.67,0.98,0.99


mean row wise rank

In [26]:
rank_order = whole_df.apply(lambda x: x.rank(ascending=False), axis=1).mean().sort_values()
# sort columns by rank to make nemenyi table easier to understand
whole_df = whole_df.reindex(rank_order.index, axis=1)
rank_order

random forest 500       3.130952
random forest 200       3.511905
MLP                     3.630952
random forest 100       3.797619
random forest 50        4.619048
svc                     5.285714
random forest 20        5.809524
gaussian naive bayes    7.095238
decision tree           8.119048
dtype: float64

In [27]:
res = statistical_test(whole_df,features=False)
res

Friedman test result:
There is a significant difference in the rank sums:  2.0581595907986204e-08
Nemenyi post-hoc test result:
Significant pairwise differences (p < 0.05) are highlighted in green


Unnamed: 0,random forest 500,random forest 200,MLP,random forest 100,random forest 50,svc,random forest 20,gaussian naive bayes,decision tree
random forest 500,-1.0,0.9,0.9,0.9,0.236666,0.00946085,0.001,0.001,0.001
random forest 200,0.9,-1.0,0.9,0.9,0.626485,0.073841,0.00384786,0.001,0.001
MLP,0.9,0.9,-1.0,0.9,0.747366,0.124997,0.00818373,0.001,0.001
random forest 100,0.9,0.9,0.9,-1.0,0.9,0.236666,0.0217147,0.001,0.001
random forest 50,0.236666,0.626485,0.747366,0.9,-1.0,0.9,0.541871,0.0011392,0.001
svc,0.00946085,0.073841,0.124997,0.236666,0.9,-1.0,0.9,0.0622364,0.001
random forest 20,0.001,0.00384786,0.00818373,0.0217147,0.541871,0.9,-1.0,0.441006,0.00355741
gaussian naive bayes,0.001,0.001,0.001,0.001,0.0011392,0.0622364,0.441006,-1.0,0.711103
decision tree,0.001,0.001,0.001,0.001,0.001,0.001,0.00355741,0.711103,-1.0


In [None]:
# Analysis conventional OpenEA