In [1]:
### Import useful libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn import metrics
import networkx as nx
import time
from cdlib import algorithms
import random
import csv
%matplotlib inline

Note: to be able to use all crisp methods, you need to install some additional packages:  {'wurlitzer', 'graph_tool'}
Note: to be able to use all overlapping methods, you need to install some additional packages:  {'ASLPAw'}
Note: to be able to use all bipartite methods, you need to install some additional packages:  {'wurlitzer'}


In [2]:
G = nx.read_edgelist("./EDGES_FILE.csv", create_using=nx.DiGraph, nodetype=int)
H = nx.read_edgelist("./EDGES_FILE.csv", create_using=nx.DiGraph, nodetype=int)

In [3]:
len(list(G.nodes()))

956

In [4]:
nodes = []
for (x, y) in G.edges():
    if(x == y):
        if(H.has_node(x)):
            nodes.append(x)
            H.remove_node(x)

In [5]:
len(list(H.nodes()))

765

In [6]:
G = H
nx.is_weakly_connected(G)

True

In [7]:
### Identifying Leiden communities
nodes = sorted(G.nodes())
comms = algorithms.leiden(G)

In [8]:
comms_dict = comms.to_node_community_map()
comms_dict_ok = {}
for node in nodes:
    comms_dict_ok[node] = comms_dict[node][0]
comms_dict = comms_dict_ok

### N2V

In [9]:
### Getting total tests number
tests_num = 0
exec_time = []
p = []
q = []
walk_len = []
walk_num = []
with open("./n2v/n2v_info.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        if "Test" in line:
            tests_num += 1
        if "Embedding" in line:
            exec_time.append(float(line[31:-2]))
        if "p:" in line:
            p.append(float(line[3:]))
        if "q:" in line:
            q.append(float(line[3:]))
        if "walk_len:" in line:
            walk_len.append(float(line[10:-1]))
        if "num_walks:" in line:
            walk_num.append(float(line[10:-1]))

In [10]:
f1_scores = []
f1_scores_macro = []
f1_scores_weigh = []

for i in range(tests_num):
    ### Random forest classifier creation with 70 trees
    clf = RandomForestClassifier(n_estimators=70)
    start_time = time.time()
    s_t = time.time()
    # Input 
    X_data = pd.read_csv("./n2v/n2v_emb_vectors" + str(i) + ".csv", header=None, sep=";").values.tolist()
    X_data = [X_data[i] for i in range(len(X_data)) if (i + 1) in nodes]

    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    f1_scores.append(metrics.f1_score(y_test, y_pred, average="micro"))
    f1_scores_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    print("Completed iteration in:", time.time() - s_t)

    end_time = time.time()

Completed iteration in: 0.21053171157836914
Completed iteration in: 0.18483328819274902
Completed iteration in: 0.20623159408569336
Completed iteration in: 0.22490620613098145
Completed iteration in: 0.19811534881591797
Completed iteration in: 0.2781236171722412
Completed iteration in: 0.31473374366760254
Completed iteration in: 0.25914669036865234
Completed iteration in: 0.19150137901306152
Completed iteration in: 0.22289824485778809
Completed iteration in: 0.20046448707580566
Completed iteration in: 0.2336583137512207
Completed iteration in: 0.18496346473693848
Completed iteration in: 0.2163865566253662
Completed iteration in: 0.20549941062927246
Completed iteration in: 0.32935309410095215
Completed iteration in: 0.17831158638000488
Completed iteration in: 0.2236626148223877
Completed iteration in: 0.22511696815490723
Completed iteration in: 0.23627018928527832
Completed iteration in: 0.18973875045776367
Completed iteration in: 0.22016382217407227
Completed iteration in: 0.2243664264

In [11]:
df = pd.DataFrame(columns = ["Micro", 'Macro', 'Weigh'])
df["Micro"] = pd.Series(f1_scores).describe()
df["Weigh"] = pd.Series(f1_scores_weigh).describe()
df["Macro"] = pd.Series(f1_scores_macro).describe()
df

Unnamed: 0,Micro,Macro,Weigh
count,24.0,24.0,24.0
mean,0.972222,0.971005,0.972268
std,0.01192,0.011986,0.01191
min,0.947712,0.951863,0.947809
25%,0.965686,0.960184,0.965541
50%,0.973856,0.973606,0.973924
75%,0.980392,0.979881,0.980409
max,0.993464,0.992449,0.993448


In [12]:
pd.DataFrame(list(zip(f1_scores, exec_time, p, q, walk_num, walk_len)),
               columns =['F1-micro', 'Exec time', 'p', 'q', 'walk_num', 'walk_len']).sort_values(by="F1-micro")

Unnamed: 0,F1-micro,Exec time,p,q,walk_num,walk_len
1,0.947712,7.993166,1.0,1.0,80.0,5.0
23,0.954248,14.734928,2.0,1.0,80.0,10.0
14,0.954248,13.128179,1.0,0.5,40.0,10.0
19,0.960784,16.335857,1.0,2.0,80.0,10.0
16,0.960784,11.407092,1.0,2.0,40.0,5.0
15,0.960784,16.266213,1.0,0.5,80.0,10.0
21,0.96732,11.243125,2.0,1.0,80.0,5.0
11,0.96732,17.349393,0.5,0.5,80.0,10.0
4,0.96732,10.246411,0.5,1.0,40.0,5.0
3,0.96732,10.022111,1.0,1.0,80.0,10.0


In [13]:
pd.DataFrame(list(zip(f1_scores_macro, exec_time, p, q, walk_num, walk_len)),
               columns =['F1-macro', 'Exec time', 'p', 'q', 'walk_num', 'walk_len']).sort_values(by="F1-macro")

Unnamed: 0,F1-macro,Exec time,p,q,walk_num,walk_len
1,0.951863,7.993166,1.0,1.0,80.0,5.0
23,0.953639,14.734928,2.0,1.0,80.0,10.0
14,0.953773,13.128179,1.0,0.5,40.0,10.0
16,0.956399,11.407092,1.0,2.0,40.0,5.0
15,0.957326,16.266213,1.0,0.5,80.0,10.0
19,0.958222,16.335857,1.0,2.0,80.0,10.0
3,0.960837,10.022111,1.0,1.0,80.0,10.0
11,0.963377,17.349393,0.5,0.5,80.0,10.0
7,0.964752,13.87576,0.5,1.0,80.0,10.0
21,0.967903,11.243125,2.0,1.0,80.0,5.0


In [14]:
pd.DataFrame(list(zip(f1_scores_weigh, exec_time, p, q, walk_num, walk_len)),
               columns =['F1-weigh', 'Exec time', 'p', 'q', 'walk_num', 'walk_len']).sort_values(by="F1-weigh")

Unnamed: 0,F1-weigh,Exec time,p,q,walk_num,walk_len
1,0.947809,7.993166,1.0,1.0,80.0,5.0
14,0.954299,13.128179,1.0,0.5,40.0,10.0
23,0.954514,14.734928,2.0,1.0,80.0,10.0
15,0.960622,16.266213,1.0,0.5,80.0,10.0
16,0.960771,11.407092,1.0,2.0,40.0,5.0
19,0.960815,16.335857,1.0,2.0,80.0,10.0
7,0.967116,13.87576,0.5,1.0,80.0,10.0
11,0.967236,17.349393,0.5,0.5,80.0,10.0
3,0.967441,10.022111,1.0,1.0,80.0,10.0
4,0.967475,10.246411,0.5,1.0,40.0,5.0


### DW

In [15]:
### Getting total tests number
tests_num = 0
exec_time = []
walk_num = []
walk_len = []
with open("./dw/dw_info.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        if "Test" in line:
            tests_num += 1
        if "walk_len:" in line:
            walk_len.append(float(line[10:-1]))
        if "num_walks:" in line:
            walk_num.append(float(line[10:-1]))
        if "Embedding" in line:
            exec_time.append(float(line[31:-2]))

In [16]:
f1_scores = []
f1_scores_macro = []
f1_scores_weigh = []
for i in range(tests_num):
    ### Random forest classifier creation with 70 trees
    clf = RandomForestClassifier(n_estimators=70)
    start_time = time.time()
    s_t = time.time()
    # Input 
    X_data = pd.read_csv("./dw/dw_emb_vectors" + str(i) + ".csv", header=None, sep=";").values.tolist()
    X_data = [X_data[i] for i in range(len(X_data)) if (i + 1) in nodes]

    
    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    f1_scores.append(metrics.f1_score(y_test, y_pred, average="micro"))
    f1_scores_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    print("Completed iteration in:", time.time() - s_t)

    end_time = time.time()

Completed iteration in: 0.17334198951721191
Completed iteration in: 0.15100383758544922
Completed iteration in: 0.17093443870544434
Completed iteration in: 0.17643976211547852
Completed iteration in: 0.16496992111206055
Completed iteration in: 0.1593186855316162
Completed iteration in: 0.19255399703979492
Completed iteration in: 0.1787552833557129
Completed iteration in: 0.17047429084777832
Completed iteration in: 0.17928743362426758
Completed iteration in: 0.191359281539917
Completed iteration in: 0.17358040809631348
Completed iteration in: 0.17797231674194336
Completed iteration in: 0.1743934154510498
Completed iteration in: 0.18355107307434082


In [17]:
df = pd.DataFrame(columns = ["Micro", 'Macro', 'Weigh'])
df["Micro"] = pd.Series(f1_scores).describe()
df["Weigh"] = pd.Series(f1_scores_weigh).describe()
df["Macro"] = pd.Series(f1_scores_macro).describe()
df

Unnamed: 0,Micro,Macro,Weigh
count,15.0,15.0,15.0
mean,0.975163,0.97322,0.975139
std,0.01164,0.012196,0.011672
min,0.954248,0.951302,0.954025
25%,0.970588,0.967312,0.970459
50%,0.980392,0.975178,0.980213
75%,0.980392,0.979858,0.980503
max,0.993464,0.990939,0.99343


In [18]:
pd.DataFrame(list(zip(f1_scores, exec_time, walk_num, walk_len)),
               columns =['F1-micro', 'Exec time', 'walk_num', 'walk_len']).sort_values(by="F1-micro")

Unnamed: 0,F1-micro,Exec time,walk_num,walk_len
2,0.954248,1.407827,40.0,5.0
14,0.954248,10.251881,40.0,30.0
7,0.960784,6.780724,80.0,10.0
10,0.96732,7.233567,40.0,20.0
0,0.973856,0.402893,10.0,5.0
3,0.973856,2.473622,80.0,5.0
13,0.973856,5.350335,20.0,30.0
1,0.980392,0.685927,20.0,5.0
4,0.980392,1.075852,10.0,10.0
5,0.980392,1.798483,20.0,10.0


In [19]:
pd.DataFrame(list(zip(f1_scores_macro, exec_time, walk_num, walk_len)),
               columns =['F1-macro', 'Exec time', 'walk_num', 'walk_len']).sort_values(by="F1-macro")

Unnamed: 0,F1-macro,Exec time,walk_num,walk_len
2,0.951302,1.407827,40.0,5.0
14,0.952415,10.251881,40.0,30.0
7,0.956753,6.780724,80.0,10.0
10,0.964369,7.233567,40.0,20.0
0,0.970255,0.402893,10.0,5.0
13,0.974168,5.350335,20.0,30.0
5,0.974731,1.798483,20.0,10.0
3,0.975178,2.473622,80.0,5.0
12,0.976753,2.839129,10.0,30.0
6,0.978394,3.092509,40.0,10.0


In [20]:
pd.DataFrame(list(zip(f1_scores_weigh, exec_time, walk_num, walk_len)),
               columns =['F1-weigh', 'Exec time', 'walk_num', 'walk_len']).sort_values(by="F1-weigh")

Unnamed: 0,F1-weigh,Exec time,walk_num,walk_len
2,0.954025,1.407827,40.0,5.0
14,0.954203,10.251881,40.0,30.0
7,0.960831,6.780724,80.0,10.0
10,0.96706,7.233567,40.0,20.0
13,0.973858,5.350335,20.0,30.0
3,0.973934,2.473622,80.0,5.0
0,0.97408,0.402893,10.0,5.0
6,0.980213,3.092509,40.0,10.0
1,0.980276,0.685927,20.0,5.0
5,0.980338,1.798483,20.0,10.0


### MNMF

In [21]:
### Getting total tests number
tests_num = 0
exec_time = []
dim = []
it = []
with open("./mnmf/mnmf_info.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        if "Test" in line:
            tests_num += 1
        if "dimensions" in line:
            dim.append(float(line[12:]))
        if "iterations:" in line:
            it.append(float(line[12:]))
        if "Embedding" in line:
            exec_time.append(float(line[31:-2]))

In [22]:
f1_scores = []
f1_scores_macro = []
f1_scores_weigh = []

for i in range(tests_num):
    ### Random forest classifier creation with 70 trees
    clf = RandomForestClassifier(n_estimators=70)
    start_time = time.time()
    s_t = time.time()
    # Input 
    X_data = pd.read_csv("./mnmf/mnmf_emb_vectors" + str(i) + ".csv", header=None, sep=";").values.tolist()
    X_data = [X_data[i] for i in range(len(X_data)) if (i + 1) in nodes]

    
    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric    
    f1_scores.append(metrics.f1_score(y_test, y_pred, average="micro"))
    f1_scores_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    print("Completed iteration in:", time.time() - s_t)

    end_time = time.time()

Completed iteration in: 0.12032055854797363
Completed iteration in: 0.08609390258789062
Completed iteration in: 0.10272741317749023
Completed iteration in: 0.10661673545837402
Completed iteration in: 0.1066741943359375
Completed iteration in: 0.10852956771850586
Completed iteration in: 0.12688612937927246
Completed iteration in: 0.1289222240447998
Completed iteration in: 0.14737439155578613
Completed iteration in: 0.14587640762329102


In [23]:
df = pd.DataFrame(columns = ["Micro", 'Macro', 'Weigh'])
df["Micro"] = pd.Series(f1_scores).describe()
df["Weigh"] = pd.Series(f1_scores_weigh).describe()
df["Macro"] = pd.Series(f1_scores_macro).describe()
df

Unnamed: 0,Micro,Macro,Weigh
count,10.0,10.0,10.0
mean,0.972549,0.97193,0.9726
std,0.015031,0.014653,0.014964
min,0.947712,0.950437,0.948049
25%,0.964052,0.964359,0.963938
50%,0.973856,0.971854,0.973835
75%,0.978758,0.978669,0.978936
max,0.993464,0.993329,0.993469


In [24]:
pd.DataFrame(list(zip(f1_scores, exec_time, dim, it)),
               columns =['F1-micro', 'Exec time', 'Dimensions', 'Iterations']).sort_values(by="F1-micro")

Unnamed: 0,F1-micro,Exec time,Dimensions,Iterations
6,0.947712,13.537538,64.0,100.0
8,0.954248,24.153916,128.0,100.0
9,0.960784,55.908975,128.0,200.0
0,0.973856,0.995836,8.0,100.0
1,0.973856,1.749397,8.0,200.0
2,0.973856,1.368036,16.0,100.0
4,0.973856,7.710691,32.0,100.0
3,0.980392,2.778379,16.0,200.0
5,0.993464,18.921276,32.0,200.0
7,0.993464,39.003183,64.0,200.0


In [25]:
pd.DataFrame(list(zip(f1_scores_macro, exec_time, dim, it)),
               columns =['F1-macro', 'Exec time', 'Dimensions', 'Iterations']).sort_values(by="F1-macro")

Unnamed: 0,F1-macro,Exec time,Dimensions,Iterations
6,0.950437,13.537538,64.0,100.0
8,0.95198,24.153916,128.0,100.0
9,0.962633,55.908975,128.0,200.0
0,0.969537,0.995836,8.0,100.0
1,0.971593,1.749397,8.0,200.0
2,0.972115,1.368036,16.0,100.0
4,0.974488,7.710691,32.0,100.0
3,0.980063,2.778379,16.0,200.0
7,0.99313,39.003183,64.0,200.0
5,0.993329,18.921276,32.0,200.0


In [26]:
pd.DataFrame(list(zip(f1_scores_weigh, exec_time, dim, it)),
               columns =['F1-weigh', 'Exec time', 'Dimensions', 'Iterations']).sort_values(by="F1-weigh")

Unnamed: 0,F1-weigh,Exec time,Dimensions,Iterations
6,0.948049,13.537538,64.0,100.0
8,0.954389,24.153916,128.0,100.0
9,0.96074,55.908975,128.0,200.0
0,0.973531,0.995836,8.0,100.0
4,0.973778,7.710691,32.0,100.0
1,0.973893,1.749397,8.0,200.0
2,0.974157,1.368036,16.0,100.0
3,0.98053,2.778379,16.0,200.0
5,0.993466,18.921276,32.0,200.0
7,0.993469,39.003183,64.0,200.0


### AVPRA

In [27]:
### Reading VLs from file
obj = pd.read_pickle("./AVPRA_pred.pickled")

In [29]:
### Random forest classifier creation with 70 trees
clf = RandomForestClassifier(n_estimators=70)
start_time = time.time()
accuracies = []
f1_scores_macro = []
f1_scores_weigh = []
for res in obj:
    s_time = time.time()
    # Input 
    X_data = [res[1][i] for i in range(len(res[1])) if (i + 1) in nodes]
    
    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))

    print(f"Iteration completed in {time.time() - s_time}")
end_time = time.time()

Iteration completed in 0.10130739212036133
Iteration completed in 0.09215307235717773
Iteration completed in 0.1050100326538086
Iteration completed in 0.11096310615539551
Iteration completed in 0.09684062004089355
Iteration completed in 0.09367656707763672
Iteration completed in 0.10701274871826172
Iteration completed in 0.0941615104675293
Iteration completed in 0.09344244003295898
Iteration completed in 0.11207342147827148
Iteration completed in 0.1121053695678711
Iteration completed in 0.15716552734375
Iteration completed in 0.14570927619934082
Iteration completed in 0.14441227912902832
Iteration completed in 0.1334245204925537
Iteration completed in 0.11550045013427734
Iteration completed in 0.10151410102844238
Iteration completed in 0.10288357734680176
Iteration completed in 0.11579203605651855
Iteration completed in 0.10947155952453613
Iteration completed in 0.10305190086364746


In [30]:
### Function that returns the 10 / 1 index of the maximum values of a list
def get10maxidx(l):
    return list(map(lambda x: x[1], sorted(zip(l, range(0, len(l))), reverse=True)[:10]))
def getmaxidx(l):
    return l.index(max(l))

In [32]:
### 10MWL classification
clf = RandomForestClassifier(n_estimators=70)
start_time2 = time.time()
accuracies2 = []
f1_scores2_macro = []
f1_scores2_weigh = []
for res in obj:
    # Input 
    X_data = list(map(lambda x: get10maxidx(x), [res[1][i] for i in range(len(res[1])) if (i + 1) in nodes]))

    
    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies2.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores2_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores2_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    
end_time2 = time.time()

In [33]:
### MWL classification
clf = RandomForestClassifier(n_estimators=70)
start_time3 = time.time()
accuracies3 = []
f1_scores3_macro = []
f1_scores3_weigh = []
for res in obj:
    # Input 
    X_data = list(map(lambda x: [getmaxidx(x)], [res[1][i] for i in range(len(res[1])) if (i + 1) in nodes]))

    
    y_data = [comms_dict.get(i) for i in nodes]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies3.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores3_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores3_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    
end_time3 = time.time()

In [34]:
import matplotlib
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False
})

### Comparison macro

In [35]:
# Plot F1-macro comparison
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, f1_scores_macro, "o", label="AVPRA F1-score-macro", markersize=10)
plt.plot(l, f1_scores2_macro, "o", label="AVPRA 10MWL F1-score-macro", markersize=10)
plt.plot(l, f1_scores3_macro, "o", label="AVPRA MWL F1-score-macro", markersize=10)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("F1-score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_all_macro.png", dpi=500)
plt.show()

In [36]:
max(f1_scores_macro), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores_macro.index(max(f1_scores_macro))]

(0.9930735930735931, 6)

In [37]:
max(f1_scores2_macro), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores2_macro.index(max(f1_scores2_macro))]

(0.9671057613428409, 1)

In [38]:
max(f1_scores3_macro), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores3_macro.index(max(f1_scores3_macro))]

(0.966128915540488, 3)

### Comparison weighted

In [39]:
# Plot F1-macro comparison
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, f1_scores_weigh, "o", label="AVPRA F1-score-weighted", markersize=10)
plt.plot(l, f1_scores2_weigh, "o", label="AVPRA 10MWL F1-score-weighted", markersize=10)
plt.plot(l, f1_scores3_weigh, "o", label="AVPRA MWL F1-score-weighted", markersize=10)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("F1-score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_all_weighted.png", dpi=500)
plt.show()

In [40]:
max(f1_scores_weigh), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores_weigh.index(max(f1_scores_weigh))]

(0.9934357581416404, 6)

In [41]:
max(f1_scores2_weigh), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores2_weigh.index(max(f1_scores2_weigh))]

(0.9671995812902053, 1)

In [42]:
max(f1_scores3_weigh), (list(range(0,10)) + list(range(10, 32, 2)))[f1_scores3_weigh.index(max(f1_scores3_weigh))]

(0.9670879023608674, 3)

In [43]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, f1_scores_macro, "x", label="AVPRA F1-score-macro", color="blue", markersize=12)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza/F1-Score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_macro.png", dpi=500)
plt.show()

In [44]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, f1_scores_weigh, "x", label="AVPRA F1-score-weighted", color="blue", markersize=12)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza/F1-Score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_weighted.png", dpi=500)
plt.show()

In [45]:
max(accuracies), l[accuracies.index(max(accuracies))]

(0.9934640522875817, 6)

In [46]:
max(accuracies2), l[accuracies2.index(max(accuracies2))]

(0.9673202614379085, 1)

In [47]:
max(accuracies3), l[accuracies3.index(max(accuracies3))]

(0.9673202614379085, 3)

In [48]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, accuracies2, "o", label="AVPRA 10MWL Accuratezza", markersize=10)
plt.plot(l, accuracies3, "o", label="AVPRA MWL Accuratezza", markersize=10)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_all_micro.png", dpi=500)
plt.show()

### Only F1 micro

In [49]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 32, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)

plt.axvline(x=5, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_AVPRA_micro.png", dpi=500)
plt.show()