In [1]:
### Import useful libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn import metrics
import networkx as nx
import time
from cdlib import algorithms
import random
import csv
%matplotlib inline

Note: to be able to use all crisp methods, you need to install some additional packages:  {'wurlitzer', 'graph_tool'}
Note: to be able to use all overlapping methods, you need to install some additional packages:  {'ASLPAw'}
Note: to be able to use all bipartite methods, you need to install some additional packages:  {'wurlitzer'}


In [2]:
### Identifying Leiden communities
network = nx.read_edgelist("./edges_norm.csv")
nodes = network.nodes()
comms = algorithms.leiden(network)

In [3]:
comms_dict = comms.to_node_community_map()

In [4]:
comms_list = [comms_dict.get(str(i))[0] for i in range(1, len(comms_dict) + 1)]
with open("twitch_leid_comms.csv", "w") as f:
    writer = csv.writer(f, delimiter=";")
    writer.writerow(comms_list)

### DW

In [5]:
### Getting total tests number
tests_num = 0
exec_time = []
walk_num = []
walk_len = []
with open("./Twitch_dw/twitch_dw_info.txt", "r") as f:
    lines = f.readlines()
    for line in lines:
        if "Test" in line:
            tests_num += 1
        if "walk_length:" in line:
            walk_len.append(float(line[13:-1]))
        if "num_walk:" in line:
            walk_num.append(float(line[9:-1]))
        if "Embedding" in line:
            exec_time.append(float(line[31:-2]))

In [6]:
f1_scores = []
f1_scores_macro = []
f1_scores_weigh = []
for i in range(tests_num):
    ### Random forest classifier creation with 70 trees
    clf = RandomForestClassifier(n_estimators=70)
    start_time = time.time()
    s_t = time.time()
    # Input 
    X_data = pd.read_csv("./Twitch_dw/dw_emb_vectors" + str(i) + ".csv", header=None, sep=";").values.tolist()

    
    y_data = [comms_dict.get(str(i))[0] for i in range(1, len(comms_dict) + 1)]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    f1_scores.append(metrics.f1_score(y_test, y_pred, average="micro"))
    f1_scores_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    print("Completed iteration in:", time.time() - s_t)

    end_time = time.time()

Completed iteration in: 103.45653533935547
Completed iteration in: 98.68963623046875
Completed iteration in: 105.90389013290405
Completed iteration in: 97.12405157089233
Completed iteration in: 98.16132926940918
Completed iteration in: 99.29832911491394


In [7]:
df = pd.DataFrame(columns = ["Micro", 'Macro', 'Weigh'])
df["Micro"] = pd.Series(f1_scores).describe()
df["Weigh"] = pd.Series(f1_scores_weigh).describe()
df["Macro"] = pd.Series(f1_scores_macro).describe()
df

Unnamed: 0,Micro,Macro,Weigh
count,6.0,6.0,6.0
mean,0.843743,0.907477,0.843416
std,0.010358,0.010005,0.010491
min,0.823454,0.888398,0.822756
25%,0.845047,0.907594,0.84507
50%,0.846073,0.909769,0.845772
75%,0.848482,0.911812,0.8482
max,0.852869,0.917565,0.852353


In [8]:
pd.DataFrame(list(zip(f1_scores, exec_time, walk_num, walk_len)),
               columns =['F1-micro', 'Exec time', 'walk_num', 'walk_len']).sort_values(by="F1-micro")

Unnamed: 0,F1-micro,Exec time,walk_num,walk_len
0,0.823454,430.843513,10.0,5.0
2,0.844868,1684.428942,40.0,5.0
1,0.845582,852.987108,20.0,5.0
5,0.846563,4032.707988,40.0,10.0
3,0.849121,1034.079826,10.0,10.0
4,0.852869,2063.098696,20.0,10.0


In [9]:
pd.DataFrame(list(zip(f1_scores_macro, exec_time, walk_num, walk_len)),
               columns =['F1-macro', 'Exec time', 'walk_num', 'walk_len']).sort_values(by="F1-macro")

Unnamed: 0,F1-macro,Exec time,walk_num,walk_len
0,0.888398,430.843513,10.0,5.0
2,0.907267,1684.428942,40.0,5.0
3,0.908576,1034.079826,10.0,10.0
5,0.910962,4032.707988,40.0,10.0
1,0.912095,852.987108,20.0,5.0
4,0.917565,2063.098696,20.0,10.0


In [10]:
pd.DataFrame(list(zip(f1_scores_weigh, exec_time, walk_num, walk_len)),
               columns =['F1-weigh', 'Exec time', 'walk_num', 'walk_len']).sort_values(by="F1-weigh")

Unnamed: 0,F1-weigh,Exec time,walk_num,walk_len
0,0.822756,430.843513,10.0,5.0
2,0.845003,1684.428942,40.0,5.0
1,0.84527,852.987108,20.0,5.0
5,0.846273,4032.707988,40.0,10.0
3,0.848843,1034.079826,10.0,10.0
4,0.852353,2063.098696,20.0,10.0


In [11]:
import matplotlib
matplotlib.use("pgf")
matplotlib.rcParams.update({
    "pgf.texsystem": "pdflatex",
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False
})

### AVPRA only lang

In [16]:
l = list(range(0, 10)) + list(range(10, 30, 2))

In [13]:
### Reading VLs from file
obj = pd.read_pickle("./Only_lang/log_trial_0_LPStates.pickled") \
    + pd.read_pickle("./Only_lang/log_trial_1_LPStates.pickled")[1:] \
    + pd.read_pickle("./Only_lang/log_trial_2_LPStates.pickled")[1:] \
    + pd.read_pickle("./Only_lang/log_trial_3_LPStates.pickled")[1:]

In [17]:
### Random forest classifier creation with 70 trees
clf = RandomForestClassifier(n_estimators=70)
start_time = time.time()
accuracies = []
f1_scores = []
f1_scores_macro = []
f1_scores_weigh = []
for res in [obj[i] for i in l]:
    s_t = time.time()
    # Input 
    X_data = res[1]
    
    y_data = [comms_dict.get(str(i))[0] for i in range(1, len(comms_dict) + 1)]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores.append(metrics.f1_score(y_test, y_pred, average="micro"))
    f1_scores_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    print("Completed iteration in:", time.time() - s_t)
    
end_time = time.time()

Completed iteration in: 4.329277753829956
Completed iteration in: 12.010716915130615
Completed iteration in: 42.89314007759094
Completed iteration in: 44.32717514038086
Completed iteration in: 43.00024127960205
Completed iteration in: 43.27811074256897
Completed iteration in: 41.33672046661377
Completed iteration in: 42.15221381187439
Completed iteration in: 40.751484870910645
Completed iteration in: 41.18913125991821
Completed iteration in: 39.27448868751526
Completed iteration in: 42.54739332199097
Completed iteration in: 42.346495389938354
Completed iteration in: 42.13615560531616
Completed iteration in: 42.61327886581421
Completed iteration in: 42.486907958984375
Completed iteration in: 43.279874086380005
Completed iteration in: 44.548882484436035
Completed iteration in: 45.312294006347656
Completed iteration in: 48.706483364105225


In [18]:
### Function that returns the 10 / 1 index of the maximum values of a list
def get10maxidx(l):
    return list(map(lambda x: x[1], sorted(zip(l, range(0, len(l))), reverse=True)[:10]))
def getmaxidx(l):
    return l.index(max(l))

In [22]:
### 10MWL classification
clf = RandomForestClassifier(n_estimators=70)
start_time2 = time.time()
accuracies2 = []
f1_scores2_macro = []
f1_scores2_weigh = []
for res in [obj[i] for i in l]:
    # Input 
    X_data = list(map(lambda x: get10maxidx(x), res[1]))

    
    y_data = [comms_dict.get(str(i)) for i in range(1, len(comms_dict) + 1)]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies2.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores2_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores2_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    
end_time2 = time.time()

In [23]:
### MWL classification
clf = RandomForestClassifier(n_estimators=70)
start_time3 = time.time()
accuracies3 = []
f1_scores3_macro = []
f1_scores3_weigh = []
for res in [obj[i] for i in l]:
    # Input 
    X_data = list(map(lambda x: [getmaxidx(x)], res[1]))

    
    y_data = [comms_dict.get(str(i)) for i in range(1, len(comms_dict) + 1)]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies3.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores3_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores3_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    
end_time3 = time.time()

### Comparison macro

In [24]:
# Plot F1-macro comparison
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, f1_scores_macro, "o", label="AVPRA F1-score-macro", markersize=10)
plt.plot(l, f1_scores2_macro, "o", label="AVPRA 10MWL F1-score-macro", markersize=10)
plt.plot(l, f1_scores3_macro, "o", label="AVPRA MWL F1-score-macro", markersize=10)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("F1-score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_onlyL_AVPRA_all_macro.png", dpi=500)
plt.show()

In [25]:
max(f1_scores_macro), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores_macro.index(max(f1_scores_macro))]

(0.9007562593481003, 7)

In [26]:
max(f1_scores2_macro), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores2_macro.index(max(f1_scores2_macro))]

(0.7765723030823866, 9)

In [27]:
max(f1_scores3_macro), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores3_macro.index(max(f1_scores3_macro))]

(0.6469858474934775, 0)

### Comparison weighted

In [28]:
# Plot F1-macro comparison
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, f1_scores_weigh, "o", label="AVPRA F1-score-weighted", markersize=10)
plt.plot(l, f1_scores2_weigh, "o", label="AVPRA 10MWL F1-score-weighted", markersize=10)
plt.plot(l, f1_scores3_weigh, "o", label="AVPRA MWL F1-score-weighted", markersize=10)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("F1-score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_onlyL_AVPRA_all_weighted.png", dpi=500)
plt.show()

In [29]:
max(f1_scores_weigh), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores_weigh.index(max(f1_scores_weigh))]

(0.8248043798037514, 9)

In [30]:
max(f1_scores2_weigh), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores2_weigh.index(max(f1_scores2_weigh))]

(0.5877879135674272, 8)

In [31]:
max(f1_scores3_weigh), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores3_weigh.index(max(f1_scores3_weigh))]

(0.3085989340111455, 1)

In [32]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, f1_scores_macro, "x", label="AVPRA F1-score-macro", color="blue", markersize=12)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza/F1-Score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_onlyL_AVPRA_macro.png", dpi=500)
plt.show()

In [33]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, f1_scores_weigh, "x", label="AVPRA F1-score-weighted", color="blue", markersize=12)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza/F1-Score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_onlyL_AVPRA_weighted.png", dpi=500)
plt.show()

In [34]:
max(accuracies), l[accuracies.index(max(accuracies))]

(0.8261904053772715, 9)

In [35]:
max(accuracies2), l[accuracies2.index(max(accuracies2))]

(0.620527615025429, 8)

In [36]:
max(accuracies3), l[accuracies3.index(max(accuracies3))]

(0.42494720875591113, 0)

In [37]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, accuracies2, "o", label="AVPRA 10MWL Accuratezza", markersize=10)
plt.plot(l, accuracies3, "o", label="AVPRA MWL Accuratezza", markersize=10)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_onlyL_AVPRA_all_micro.png", dpi=500)
plt.show()

### Only F1 micro

In [38]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_onlyL_AVPRA_micro.png", dpi=500)
plt.show()

### AVPRA all feat

In [39]:
obj = []

In [40]:
### Reading VLs from file
obj = pd.read_pickle("./All_feat/log_trial_0_LPStates.pickled") \
    + pd.read_pickle("./All_feat/log_trial_1_LPStates.pickled") \
    + pd.read_pickle("./All_feat/log_trial_2_LPStates.pickled") \
    + pd.read_pickle("./All_feat/log_trial_3_LPStates.pickled")

In [41]:
### Random forest classifier creation with 70 trees
clf = RandomForestClassifier(n_estimators=70)
start_time = time.time()
accuracies = []
f1_scores = []
f1_scores_macro = []
f1_scores_weigh = []
for res in [obj[i] for i in l]:
    s_t = time.time()
    # Input 
    X_data = res[1]
    
    y_data = [comms_dict.get(str(i))[0] for i in range(1, len(comms_dict) + 1)]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores.append(metrics.f1_score(y_test, y_pred, average="micro"))
    f1_scores_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    print("Completed iteration in:", time.time() - s_t)
    
end_time = time.time()

Completed iteration in: 6.1553966999053955
Completed iteration in: 22.845343589782715
Completed iteration in: 60.08186197280884
Completed iteration in: 58.33820295333862
Completed iteration in: 56.503132820129395
Completed iteration in: 55.852912187576294
Completed iteration in: 54.675307750701904
Completed iteration in: 55.014700174331665
Completed iteration in: 54.4135422706604
Completed iteration in: 53.29962348937988
Completed iteration in: 54.103456258773804
Completed iteration in: 50.8409206867218
Completed iteration in: 50.59055542945862
Completed iteration in: 48.99703502655029
Completed iteration in: 48.65815806388855
Completed iteration in: 49.86013913154602
Completed iteration in: 50.00357437133789
Completed iteration in: 52.967915296554565
Completed iteration in: 63.142024993896484
Completed iteration in: 79.33202886581421


In [42]:
### Function that returns the 10 / 1 index of the maximum values of a list
def get10maxidx(l):
    return list(map(lambda x: x[1], sorted(zip(l, range(0, len(l))), reverse=True)[:10]))
def getmaxidx(l):
    return l.index(max(l))

In [43]:
### 10MWL classification
clf = RandomForestClassifier(n_estimators=70)
start_time2 = time.time()
accuracies2 = []
f1_scores2_macro = []
f1_scores2_weigh = []
for res in [obj[i] for i in l]:
    # Input 
    X_data = list(map(lambda x: get10maxidx(x), res[1]))

    
    y_data = [comms_dict.get(str(i)) for i in range(1, len(comms_dict) + 1)]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies2.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores2_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores2_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    
end_time2 = time.time()

In [44]:
### MWL classification
clf = RandomForestClassifier(n_estimators=70)
start_time3 = time.time()
accuracies3 = []
f1_scores3_macro = []
f1_scores3_weigh = []
for res in [obj[i] for i in l]:
    # Input 
    X_data = list(map(lambda x: [getmaxidx(x)], res[1]))

    
    y_data = [comms_dict.get(str(i)) for i in range(1, len(comms_dict) + 1)]

    # Split the data into training set and test set
    X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size=0.2)

    clf.fit(X_train, y_train)

    y_pred = clf.predict(X_test)

    ### Accuracy metric
    accuracies3.append(metrics.accuracy_score(y_test, y_pred))
    f1_scores3_macro.append(metrics.f1_score(y_test, y_pred, average="macro"))
    f1_scores3_weigh.append(metrics.f1_score(y_test, y_pred, average="weighted"))
    
end_time3 = time.time()

### Comparison macro

In [45]:
# Plot F1-macro comparison
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, f1_scores_macro, "o", label="AVPRA F1-score-macro", markersize=10)
plt.plot(l, f1_scores2_macro, "o", label="AVPRA 10MWL F1-score-macro", markersize=10)
plt.plot(l, f1_scores3_macro, "o", label="AVPRA MWL F1-score-macro", markersize=10)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("F1-score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_allF_AVPRA_all_macro.png", dpi=500)
plt.show()

In [46]:
max(f1_scores_macro), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores_macro.index(max(f1_scores_macro))]

(0.910530412856996, 5)

In [47]:
max(f1_scores2_macro), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores2_macro.index(max(f1_scores2_macro))]

(0.7604525287525417, 4)

In [48]:
max(f1_scores3_macro), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores3_macro.index(max(f1_scores3_macro))]

(0.6130360480995082, 0)

### Comparison weighted

In [49]:
# Plot F1-macro comparison
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, f1_scores_weigh, "o", label="AVPRA F1-score-weighted", markersize=10)
plt.plot(l, f1_scores2_weigh, "o", label="AVPRA 10MWL F1-score-weighted", markersize=10)
plt.plot(l, f1_scores3_weigh, "o", label="AVPRA MWL F1-score-weighted", markersize=10)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("F1-score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_allF_AVPRA_all_weighted.png", dpi=500)
plt.show()

In [50]:
max(f1_scores_weigh), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores_weigh.index(max(f1_scores_weigh))]

(0.8414919037029881, 7)

In [51]:
max(f1_scores2_weigh), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores2_weigh.index(max(f1_scores2_weigh))]

(0.6152342968416407, 3)

In [52]:
max(f1_scores3_weigh), (list(range(0,10)) + list(range(10, 30, 2)))[f1_scores3_weigh.index(max(f1_scores3_weigh))]

(0.30751664716173754, 0)

In [53]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, f1_scores_macro, "x", label="AVPRA F1-score-macro", color="blue", markersize=12)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza/F1-Score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_allF_AVPRA_macro.png", dpi=500)
plt.show()

In [54]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, f1_scores_weigh, "x", label="AVPRA F1-score-weighted", color="blue", markersize=12)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza/F1-Score", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_allF_AVPRA_weighted.png", dpi=500)
plt.show()

In [55]:
max(accuracies), l[accuracies.index(max(accuracies))]

(0.8429348957558814, 7)

In [56]:
max(accuracies2), l[accuracies2.index(max(accuracies2))]

(0.6241858251791929, 3)

In [57]:
max(accuracies3), l[accuracies3.index(max(accuracies3))]

(0.4268506676977069, 0)

In [58]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)
plt.plot(l, accuracies2, "o", label="AVPRA 10MWL Accuratezza", markersize=10)
plt.plot(l, accuracies3, "o", label="AVPRA MWL Accuratezza", markersize=10)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_allF_AVPRA_all_micro.png", dpi=500)
plt.show()

### Only F1 micro

In [59]:
# Plot accuracy graph
plt.figure(figsize=(10, 6))
l = list(range(0,10)) + list(range(10, 30, 2))
plt.plot(l, accuracies, "o", label="AVPRA Accuratezza", markersize=10)

plt.axvline(x=7, label="Diametro", linestyle="--")

plt.xlabel("Iterazione", fontsize=20)
plt.ylabel("Accuratezza", fontsize=20)
plt.legend(loc="right", prop={'size': 16})
plt.xticks(fontsize=16)
plt.yticks(fontsize=16)
plt.ylim(0,1)

plt.savefig("F1_twitch_allF_AVPRA_micro.png", dpi=500)
plt.show()