In [1]:
### Import useful libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn import metrics
import networkx as nx
import time
from cdlib import algorithms
import random
import csv
%matplotlib inline

Note: to be able to use all crisp methods, you need to install some additional packages:  {'graph_tool', 'wurlitzer'}
Note: to be able to use all overlapping methods, you need to install some additional packages:  {'ASLPAw'}
Note: to be able to use all bipartite methods, you need to install some additional packages:  {'wurlitzer'}


In [2]:
G = nx.read_edgelist("./EDGES_FILE.csv", create_using=nx.DiGraph, nodetype=int)

In [3]:
len(list(G.nodes()))

956

In [4]:
nodes = []
l = list(nx.selfloop_edges(G))
for x, _ in l:
    if(G.degree(x) == 2): 
        G.remove_node(x)
        nodes.append(x)

In [5]:
len(list(G.nodes()))

778

In [6]:
### Identifying Leiden communities
nodes = sorted(G.nodes())
comms = algorithms.leiden(G)

In [7]:
comms_dict = comms.to_node_community_map()
comms_dict_ok = {}
for node in nodes:
    comms_dict_ok[node] = comms_dict[node][0]
comms_dict = comms_dict_ok

### N2V

In [8]:
### Getting total tests number
tests_num = 0
exec_time = []
p = []
q = []
walk_len = []
walk_num = []
with open("./n2v/n2v_info.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        if "Test" in line:
            tests_num += 1
        if "Embedding" in line:
            exec_time.append(float(line[31:-2]))
        if "p:" in line:
            p.append(float(line[3:]))
        if "q:" in line:
            q.append(float(line[3:]))
        if "walk_len:" in line:
            walk_len.append(float(line[10:-1]))
        if "num_walks:" in line:
            walk_num.append(float(line[10:-1]))

In [9]:
f1_scores = []
f1_scores_macro = []
f1_scores_weigh = []

for i in range(tests_num):
    ### Random forest classifier creation with 70 trees
    clf = RandomForestClassifier(n_estimators=70)
    start_time = time.time()
    s_t = time.time()
    # Input 
    X_data = pd.read_csv("./n2v/n2v_emb_vectors" + str(i) + ".csv", header=None, sep=";").values.tolist()
    X_data = [X_data[i] for i in range(len(X_data)) if (i + 1) in nodes]

    # Output communities defined by Louvain algorithm
    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    f1_scores.append(metrics.f1_score(y_test, y_pred, average="micro"))
    f1_scores_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    print("Completed iteration in:", time.time() - s_t)

    end_time = time.time()

Completed iteration in: 0.2185349464416504
Completed iteration in: 0.23238635063171387
Completed iteration in: 0.233320951461792
Completed iteration in: 0.2727475166320801
Completed iteration in: 0.21936774253845215
Completed iteration in: 0.23386812210083008
Completed iteration in: 0.240189790725708
Completed iteration in: 0.2427232265472412
Completed iteration in: 0.213761568069458
Completed iteration in: 0.2355813980102539
Completed iteration in: 0.2309126853942871
Completed iteration in: 0.2557370662689209
Completed iteration in: 0.2093665599822998
Completed iteration in: 0.22118639945983887
Completed iteration in: 0.23666691780090332
Completed iteration in: 0.24304890632629395
Completed iteration in: 0.20594525337219238
Completed iteration in: 0.20864439010620117
Completed iteration in: 0.22224903106689453
Completed iteration in: 0.24326658248901367
Completed iteration in: 0.1926124095916748
Completed iteration in: 0.21749186515808105
Completed iteration in: 0.22382283210754395
Co

In [10]:
df = pd.DataFrame(columns = ["Micro", 'Macro', 'Weigh'])
df["Micro"] = pd.Series(f1_scores).describe()
df["Weigh"] = pd.Series(f1_scores_weigh).describe()
df["Macro"] = pd.Series(f1_scores_macro).describe()
df

Unnamed: 0,Micro,Macro,Weigh
count,24.0,24.0,24.0
mean,0.967949,0.966773,0.967918
std,0.011022,0.011969,0.011034
min,0.942308,0.941619,0.942284
25%,0.961538,0.959066,0.961322
50%,0.967949,0.967459,0.967974
75%,0.974359,0.974065,0.97431
max,0.987179,0.988359,0.987256


In [11]:
pd.DataFrame(list(zip(f1_scores, exec_time, p, q, walk_num, walk_len)),
               columns =['F1-micro', 'Exec time', 'p', 'q', 'walk_num', 'walk_len']).sort_values(by="F1-micro")

Unnamed: 0,F1-micro,Exec time,p,q,walk_num,walk_len
8,0.942308,10.692874,0.5,0.5,40.0,5.0
20,0.955128,10.213931,2.0,1.0,40.0,5.0
9,0.955128,11.893926,0.5,0.5,80.0,5.0
13,0.955128,13.568689,1.0,0.5,80.0,5.0
15,0.955128,16.266213,1.0,0.5,80.0,10.0
0,0.961538,7.215413,1.0,1.0,40.0,5.0
3,0.961538,10.022111,1.0,1.0,80.0,10.0
16,0.961538,11.407092,1.0,2.0,40.0,5.0
21,0.967949,11.243125,2.0,1.0,80.0,5.0
2,0.967949,7.874896,1.0,1.0,40.0,10.0


In [12]:
pd.DataFrame(list(zip(f1_scores_macro, exec_time, p, q, walk_num, walk_len)),
               columns =['F1-macro', 'Exec time', 'p', 'q', 'walk_num', 'walk_len']).sort_values(by="F1-macro")

Unnamed: 0,F1-macro,Exec time,p,q,walk_num,walk_len
8,0.941619,10.692874,0.5,0.5,40.0,5.0
13,0.950222,13.568689,1.0,0.5,80.0,5.0
20,0.950548,10.213931,2.0,1.0,40.0,5.0
15,0.952724,16.266213,1.0,0.5,80.0,10.0
9,0.953976,11.893926,0.5,0.5,80.0,5.0
0,0.959019,7.215413,1.0,1.0,40.0,5.0
3,0.959082,10.022111,1.0,1.0,80.0,10.0
16,0.959428,11.407092,1.0,2.0,40.0,5.0
17,0.966285,12.942001,1.0,2.0,80.0,5.0
19,0.966672,16.335857,1.0,2.0,80.0,10.0


In [13]:
pd.DataFrame(list(zip(f1_scores_weigh, exec_time, p, q, walk_num, walk_len)),
               columns =['F1-weigh', 'Exec time', 'p', 'q', 'walk_num', 'walk_len']).sort_values(by="F1-weigh")

Unnamed: 0,F1-weigh,Exec time,p,q,walk_num,walk_len
8,0.942284,10.692874,0.5,0.5,40.0,5.0
15,0.955045,16.266213,1.0,0.5,80.0,10.0
13,0.955086,13.568689,1.0,0.5,80.0,5.0
9,0.95515,11.893926,0.5,0.5,80.0,5.0
20,0.955322,10.213931,2.0,1.0,40.0,5.0
3,0.960757,10.022111,1.0,1.0,80.0,10.0
0,0.96151,7.215413,1.0,1.0,40.0,5.0
16,0.961601,11.407092,1.0,2.0,40.0,5.0
17,0.967762,12.942001,1.0,2.0,80.0,5.0
19,0.967771,16.335857,1.0,2.0,80.0,10.0


### DW

In [14]:
### Getting total tests number
tests_num = 0
exec_time = []
walk_num = []
walk_len = []
with open("./dw/dw_info.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        if "Test" in line:
            tests_num += 1
        if "walk_len:" in line:
            walk_len.append(float(line[10:-1]))
        if "num_walks:" in line:
            walk_num.append(float(line[10:-1]))
        if "Embedding" in line:
            exec_time.append(float(line[31:-2]))

In [15]:
f1_scores = []
f1_scores_macro = []
f1_scores_weigh = []
for i in range(tests_num):
    ### Random forest classifier creation with 70 trees
    clf = RandomForestClassifier(n_estimators=70)
    start_time = time.time()
    s_t = time.time()
    # Input 
    X_data = pd.read_csv("./dw/dw_emb_vectors" + str(i) + ".csv", header=None, sep=";").values.tolist()
    X_data = [X_data[i] for i in range(len(X_data)) if (i + 1) in nodes]

    # Output communities defined by Louvain algorithm
    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    f1_scores.append(metrics.f1_score(y_test, y_pred, average="micro"))
    f1_scores_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    print("Completed iteration in:", time.time() - s_t)

    end_time = time.time()

Completed iteration in: 0.20232343673706055
Completed iteration in: 0.17332172393798828
Completed iteration in: 0.1839277744293213
Completed iteration in: 0.20901107788085938
Completed iteration in: 0.2070019245147705
Completed iteration in: 0.2218935489654541
Completed iteration in: 0.21371006965637207
Completed iteration in: 0.19402432441711426
Completed iteration in: 0.18353986740112305
Completed iteration in: 0.18134832382202148
Completed iteration in: 0.19866275787353516
Completed iteration in: 0.18774008750915527
Completed iteration in: 0.18398404121398926
Completed iteration in: 0.19134116172790527
Completed iteration in: 0.2015514373779297


In [16]:
df = pd.DataFrame(columns = ["Micro", 'Macro', 'Weigh'])
df["Micro"] = pd.Series(f1_scores).describe()
df["Weigh"] = pd.Series(f1_scores_weigh).describe()
df["Macro"] = pd.Series(f1_scores_macro).describe()
df

Unnamed: 0,Micro,Macro,Weigh
count,15.0,15.0,15.0
mean,0.963248,0.960106,0.96321
std,0.015789,0.017886,0.015776
min,0.942308,0.932762,0.94218
25%,0.951923,0.946916,0.951611
50%,0.961538,0.959917,0.961489
75%,0.974359,0.972567,0.974286
max,0.987179,0.987883,0.987179


In [17]:
pd.DataFrame(list(zip(f1_scores, exec_time, walk_num, walk_len)),
               columns =['F1-micro', 'Exec time', 'walk_num', 'walk_len']).sort_values(by="F1-micro")

Unnamed: 0,F1-micro,Exec time,walk_num,walk_len
6,0.942308,3.092509,40.0,10.0
7,0.942308,6.780724,80.0,10.0
4,0.948718,1.075852,10.0,10.0
14,0.948718,10.251881,40.0,30.0
2,0.955128,1.407827,40.0,5.0
5,0.955128,1.798483,20.0,10.0
10,0.955128,7.233567,40.0,20.0
8,0.961538,1.912276,10.0,20.0
11,0.961538,13.178142,80.0,20.0
9,0.967949,3.54515,20.0,20.0


In [18]:
pd.DataFrame(list(zip(f1_scores_macro, exec_time, walk_num, walk_len)),
               columns =['F1-macro', 'Exec time', 'walk_num', 'walk_len']).sort_values(by="F1-macro")

Unnamed: 0,F1-macro,Exec time,walk_num,walk_len
7,0.932762,6.780724,80.0,10.0
6,0.938282,3.092509,40.0,10.0
14,0.942271,10.251881,40.0,30.0
4,0.946447,1.075852,10.0,10.0
5,0.947384,1.798483,20.0,10.0
2,0.951048,1.407827,40.0,5.0
10,0.951268,7.233567,40.0,20.0
8,0.959917,1.912276,10.0,20.0
11,0.961425,13.178142,80.0,20.0
9,0.966541,3.54515,20.0,20.0


In [19]:
pd.DataFrame(list(zip(f1_scores_weigh, exec_time, walk_num, walk_len)),
               columns =['F1-weigh', 'Exec time', 'walk_num', 'walk_len']).sort_values(by="F1-weigh")

Unnamed: 0,F1-weigh,Exec time,walk_num,walk_len
6,0.94218,3.092509,40.0,10.0
7,0.943051,6.780724,80.0,10.0
4,0.948213,1.075852,10.0,10.0
14,0.948534,10.251881,40.0,30.0
2,0.954688,1.407827,40.0,5.0
5,0.95485,1.798483,20.0,10.0
10,0.955445,7.233567,40.0,20.0
8,0.961489,1.912276,10.0,20.0
11,0.961662,13.178142,80.0,20.0
9,0.968037,3.54515,20.0,20.0


### MNMF

In [20]:
### Getting total tests number
tests_num = 0
exec_time = []
dim = []
it = []
with open("./mnmf/mnmf_info.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        if "Test" in line:
            tests_num += 1
        if "dimensions" in line:
            dim.append(float(line[12:]))
        if "iterations:" in line:
            it.append(float(line[12:]))
        if "Embedding" in line:
            exec_time.append(float(line[31:-2]))

In [21]:
f1_scores = []
f1_scores_macro = []
f1_scores_weigh = []

for i in range(tests_num):
    ### Random forest classifier creation with 70 trees
    clf = RandomForestClassifier(n_estimators=70)
    start_time = time.time()
    s_t = time.time()
    # Input 
    X_data = pd.read_csv("./mnmf/mnmf_emb_vectors" + str(i) + ".csv", header=None, sep=";").values.tolist()
    X_data = [X_data[i] for i in range(len(X_data)) if (i + 1) in nodes]

    # Output communities defined by Louvain algorithm
    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric    
    f1_scores.append(metrics.f1_score(y_test, y_pred, average="micro"))
    f1_scores_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    print("Completed iteration in:", time.time() - s_t)

    end_time = time.time()

Completed iteration in: 0.11961245536804199
Completed iteration in: 0.09112215042114258
Completed iteration in: 0.1023705005645752
Completed iteration in: 0.11526751518249512
Completed iteration in: 0.11420488357543945
Completed iteration in: 0.1050107479095459
Completed iteration in: 0.15201783180236816
Completed iteration in: 0.12703251838684082
Completed iteration in: 0.16389012336730957
Completed iteration in: 0.14551973342895508


In [22]:
df = pd.DataFrame(columns = ["Micro", 'Macro', 'Weigh'])
df["Micro"] = pd.Series(f1_scores).describe()
df["Weigh"] = pd.Series(f1_scores_weigh).describe()
df["Macro"] = pd.Series(f1_scores_macro).describe()
df

Unnamed: 0,Micro,Macro,Weigh
count,10.0,10.0,10.0
mean,0.967949,0.967096,0.967985
std,0.011307,0.01171,0.011314
min,0.955128,0.953822,0.954817
25%,0.956731,0.955667,0.956855
50%,0.967949,0.967778,0.968105
75%,0.974359,0.974233,0.974322
max,0.987179,0.98688,0.9872


In [23]:
pd.DataFrame(list(zip(f1_scores, exec_time, dim, it)),
               columns =['F1-micro', 'Exec time', 'Dimensions', 'Iterations']).sort_values(by="F1-micro")

Unnamed: 0,F1-micro,Exec time,Dimensions,Iterations
1,0.955128,1.749397,8.0,200.0
2,0.955128,1.368036,16.0,100.0
8,0.955128,24.153916,128.0,100.0
5,0.961538,18.921276,32.0,200.0
4,0.967949,7.710691,32.0,100.0
9,0.967949,55.908975,128.0,200.0
0,0.974359,0.995836,8.0,100.0
7,0.974359,39.003183,64.0,200.0
3,0.980769,2.778379,16.0,200.0
6,0.987179,13.537538,64.0,100.0


In [24]:
pd.DataFrame(list(zip(f1_scores_macro, exec_time, dim, it)),
               columns =['F1-macro', 'Exec time', 'Dimensions', 'Iterations']).sort_values(by="F1-macro")

Unnamed: 0,F1-macro,Exec time,Dimensions,Iterations
2,0.953822,1.368036,16.0,100.0
1,0.954231,1.749397,8.0,200.0
8,0.955091,24.153916,128.0,100.0
5,0.957395,18.921276,32.0,200.0
4,0.967088,7.710691,32.0,100.0
9,0.968468,55.908975,128.0,200.0
0,0.973633,0.995836,8.0,100.0
7,0.974433,39.003183,64.0,200.0
3,0.979915,2.778379,16.0,200.0
6,0.98688,13.537538,64.0,100.0


In [25]:
pd.DataFrame(list(zip(f1_scores_weigh, exec_time, dim, it)),
               columns =['F1-weigh', 'Exec time', 'Dimensions', 'Iterations']).sort_values(by="F1-weigh")

Unnamed: 0,F1-weigh,Exec time,Dimensions,Iterations
2,0.954817,1.368036,16.0,100.0
1,0.955075,1.749397,8.0,200.0
8,0.955112,24.153916,128.0,100.0
5,0.962083,18.921276,32.0,200.0
9,0.968099,55.908975,128.0,200.0
4,0.96811,7.710691,32.0,100.0
0,0.974307,0.995836,8.0,100.0
7,0.974327,39.003183,64.0,200.0
3,0.980714,2.778379,16.0,200.0
6,0.9872,13.537538,64.0,100.0


### AVPRA

In [26]:
### Reading VLs from file
obj = pd.read_pickle("./AVPRA_pred.pickled")

In [27]:
### Random forest classifier creation with 70 trees
clf = RandomForestClassifier(n_estimators=70)
start_time = time.time()
accuracies = []
f1_scores_macro = []
f1_scores_weigh = []
for res in obj:
    s_time = time.time()
    # Input 
    X_data = [res[1][i] for i in range(len(res[1])) if (i + 1) in nodes]
    # Output communities defined by Louvain algorithm
    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))

    print(f"Iteration completed in {time.time() - s_time}")
end_time = time.time()

Iteration completed in 0.10297822952270508
Iteration completed in 0.08941864967346191
Iteration completed in 0.1000511646270752
Iteration completed in 0.13392925262451172
Iteration completed in 0.1260991096496582
Iteration completed in 0.14834022521972656
Iteration completed in 0.13643264770507812
Iteration completed in 0.13856720924377441
Iteration completed in 0.11435890197753906
Iteration completed in 0.11973023414611816
Iteration completed in 0.10692238807678223
Iteration completed in 0.10859394073486328
Iteration completed in 0.11374878883361816
Iteration completed in 0.1078181266784668
Iteration completed in 0.12432718276977539
Iteration completed in 0.12973928451538086
Iteration completed in 0.1301584243774414
Iteration completed in 0.13368582725524902
Iteration completed in 0.12401056289672852
Iteration completed in 0.10871648788452148
Iteration completed in 0.10724472999572754


In [28]:
### Function that returns the 10 / 1 index of the maximum values of a list
def get10maxidx(l):
    return list(map(lambda x: x[1], sorted(zip(l, range(0, len(l))), reverse=True)[:10]))
def getmaxidx(l):
    return l.index(max(l))

In [29]:
### 10MWL classification
clf = RandomForestClassifier(n_estimators=70)
start_time2 = time.time()
accuracies2 = []
f1_scores2_macro = []
f1_scores2_weigh = []
for res in obj:
    # Input 
    X_data = list(map(lambda x: get10maxidx(x), [res[1][i] for i in range(len(res[1])) if (i + 1) in nodes]))

    # Output communities defined by Louvain algorithm
    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies2.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores2_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores2_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    
end_time2 = time.time()

In [30]:
### MWL classification
clf = RandomForestClassifier(n_estimators=70)
start_time3 = time.time()
accuracies3 = []
f1_scores3_macro = []
f1_scores3_weigh = []
for res in obj:
    # Input 
    X_data = list(map(lambda x: [getmaxidx(x)], [res[1][i] for i in range(len(res[1])) if (i + 1) in nodes]))

    # Output communities defined by Louvain algorithm
    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies3.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores3_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores3_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    
end_time3 = time.time()

In [31]:
import matplotlib
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False
})

### Comparison macro

In [32]:
# Plot F1-macro comparison
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, f1_scores_macro, "o", label="AVPRA F1-score-macro", markersize=10)
plt.plot(l, f1_scores2_macro, "o", label="AVPRA 10MWL F1-score-macro", markersize=10)
plt.plot(l, f1_scores3_macro, "o", label="AVPRA MWL F1-score-macro", markersize=10)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("F1-score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_all_macro.png", dpi=500)
plt.show()

In [33]:
max(f1_scores_macro), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores_macro.index(max(f1_scores_macro))]

(0.9808405483405483, 8)

In [34]:
max(f1_scores2_macro), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores2_macro.index(max(f1_scores2_macro))]

(0.9698276657805346, 4)

In [35]:
max(f1_scores3_macro), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores3_macro.index(max(f1_scores3_macro))]

(0.9601477577332302, 3)

### Comparison weighted

In [36]:
# Plot F1-macro comparison
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, f1_scores_weigh, "o", label="AVPRA F1-score-weighted", markersize=10)
plt.plot(l, f1_scores2_weigh, "o", label="AVPRA 10MWL F1-score-weighted", markersize=10)
plt.plot(l, f1_scores3_weigh, "o", label="AVPRA MWL F1-score-weighted", markersize=10)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("F1-score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_all_weighted.png", dpi=500)
plt.show()

In [37]:
max(f1_scores_weigh), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores_weigh.index(max(f1_scores_weigh))]

(0.9808417056022066, 28)

In [38]:
max(f1_scores2_weigh), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores2_weigh.index(max(f1_scores2_weigh))]

(0.9742302201318594, 4)

In [39]:
max(f1_scores3_weigh), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores3_weigh.index(max(f1_scores3_weigh))]

(0.9613601887125381, 2)

In [40]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, f1_scores_macro, "x", label="AVPRA F1-score-macro", color="blue", markersize=12)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza/F1-Score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_macro.png", dpi=500)
plt.show()

In [41]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, f1_scores_weigh, "x", label="AVPRA F1-score-weighted", color="blue", markersize=12)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza/F1-Score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_weighted.png", dpi=500)
plt.show()

In [42]:
max(accuracies), l[accuracies.index(max(accuracies))]

(0.9807692307692307, 8)

In [43]:
max(accuracies2), l[accuracies2.index(max(accuracies2))]

(0.9743589743589743, 4)

In [44]:
max(accuracies3), l[accuracies3.index(max(accuracies3))]

(0.9615384615384616, 2)

In [45]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, accuracies2, "o", label="AVPRA 10MWL Accuratezza", markersize=10)
plt.plot(l, accuracies3, "o", label="AVPRA MWL Accuratezza", markersize=10)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_all_micro.png", dpi=500)
plt.show()

### Only F1 micro

In [46]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_micro.png", dpi=500)
plt.show()