In [1]:
import json
from networkx.readwrite import json_graph
import os
import numpy as np
import networkx as nx
from pathlib import Path
import stellargraph as sg
from stellargraph.data import EdgeSplitter

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier 
#from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, StackingClassifier, VotingClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import classification_report,confusion_matrix,roc_auc_score
from sklearn.metrics import precision_score, recall_score,f1_score
from node2vec import Node2Vec

In [2]:
from sklearn.naive_bayes import GaussianNB 
from sklearn.neural_network import MLPClassifier
from sklearn.multioutput import MultiOutputClassifier


In [3]:
G = json_graph.node_link_graph(json.load(open("ppi-G.json")))
edges = [n for n in G.edges()]
ppi_edge_file = "ppi_edge_list.txt"
with open(ppi_edge_file, 'w') as fp:
    fp.write('\n'.join('{} {}'.format(x[0],x[1]) for x in edges))

In [4]:
class_map = json.load(open("ppi-class_map.json"))
n = len(class_map.keys())
m = len(class_map['0'])
target = np.zeros((n,m))
for i in range(n):
    target[i] = np.array(class_map[str(i)])

In [5]:
edge_labels_internal = json.load(open("ppi-class_map.json"))
edge_labels_internal = {int(i): l for i, l in edge_labels_internal.items()}
train_ids = [n for n in G.nodes()]
train_labels = np.array([edge_labels_internal[i] for i in train_ids])
if train_labels.ndim == 1:
    train_labels = np.expand_dims(train_labels, 1)

In [6]:
split_size = 2

In [7]:
directory = "tmp"
if not os.path.exists(directory):
    os.makedirs(directory)

In [9]:
EMBEDDING_DIMS = [64, 128]
WALK_LENGTHS = [40]
NUM_WALKS = [10]
WORKERS = [4]
Ps = [1]
Qs = [1]
for EMBEDDING_DIM in EMBEDDING_DIMS:
    for WALK_LENGTH in WALK_LENGTHS:
        for NUM_WALK in NUM_WALKS:
            for WORKER in WORKERS:
                for P in Ps:
                    for Q in Qs:
                        filename = "ppi_node2vec_full_embeddings_"+str(EMBEDDING_DIM)+"_"+str(WALK_LENGTH)+"_"+str(NUM_WALK)+"_"+str(P)+"_"+str(Q)+".emb"
                        print(filename)
                        if not Path(filename).is_file():
                            node2vec = Node2Vec(G, dimensions=EMBEDDING_DIM, walk_length=WALK_LENGTH, num_walks=NUM_WALK, workers=WORKER, p = P, q = Q, temp_folder="tmp/")
                            model = node2vec.fit(window=10, min_count=1, batch_words=4)
                            model.wv.save_word2vec_format(filename)
                        
                        data_emb = np.loadtxt(filename,skiprows=1)
                        emb_dim = len(data_emb[0])-1
                        num_nodes = len(list(G.nodes()))
                        embedding = np.zeros((len(G.nodes()),emb_dim))
                        for idx in range(data_emb.shape[0]):
                            embedding[int(data_emb[idx][0])] = data_emb[idx][1:]
                        X = np.zeros((num_nodes,emb_dim))
                        idx = 0
                        for node in G.nodes():
                            X[idx] = embedding[node]
                            idx += 1

                        y = target
                        s = np.arange(X.shape[0])
                        np.random.shuffle(s)
                        X2 = X[s]
                        y2 = y[s]
                        roc = []
                        prec = []
                        rec = []
                        f1 = []
                        kf = KFold(n_splits=5)
                        for train_index, test_index in kf.split(X2):
                            X_train2, X_test2 = X2[train_index], X2[test_index]
                            y_train2, y_test2 = y2[train_index], y2[test_index]
                            #clf = MLPClassifier(verbose=1)
                            #clf.fit(X_train2,y_train2)
                            forest = RandomForestClassifier(random_state=1,verbose=1,n_estimators=10)
                            clf = MultiOutputClassifier(forest, n_jobs=-1)
                            clf.fit(X_train2,y_train2)

                            pred = clf.predict(X_test2)


                            roc.append(roc_auc_score(y_test2,pred, average='micro'))
                            prec.append(precision_score(y_test2,pred, average='micro'))
                            rec.append(recall_score(y_test2,pred, average='micro'))
                            f1.append(f1_score(y_test2,pred, average='micro'))

                        result = str(EMBEDDING_DIM)+","+str(WALK_LENGTH)+","+str(NUM_WALK)+","+str(P)+","+str(Q)+","+str(np.mean(roc))+","+str(np.mean(prec))+","+str(np.mean(rec))+","+str(np.mean(f1))+"\n"
                        f= open("result_multiclass_ppi_node2vec.txt","a+")
                        f.write(result)
                        f.close()
                        
                        

ppi_node2vec_full_embeddings_64_40_10_1_1.emb


HBox(children=(IntProgress(value=0, description='Computing transition probabilities', max=56944, style=Progres…

Generating walks (CPU: 1):   0%|          | 0/3 [00:00<?, ?it/s]





Generating walks (CPU: 2):   0%|          | 0/3 [00:00<?, ?it/s][A

Generating walks (CPU: 3):   0%|          | 0/2 [00:00<?, ?it/s][A[A


Generating walks (CPU: 4):   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Generating walks (CPU: 4): 100%|██████████| 2/2 [28:32<00:00, 856.26s/it][A[A[A
Generating walks (CPU: 1):  67%|██████▋   | 2/3 [28:39<14:19, 859.75s/it][A

Generating walks (CPU: 4): 100%|██████████| 2/2 [52:32<00:00, 1576.10s/it][A[A
Generating walks (CPU: 3): 100%|██████████| 2/2 [52:35<00:00, 1577.74s/it]
Generating walks (CPU: 1): 100%|██████████| 3/3 [52:38<00:00, 1033.42s/it]
Generating walks (CPU: 2): 100%|██████████| 3/3 [52:37<00:00, 1033.20s/it][A
Generating walks (CPU: 1): 100%|██████████| 3/3 [1:00:13<00:00, 1204.39s/it]
Generating walks (CPU: 2): 100%|██████████| 3/3 [1:00:15<00:00, 1205.31s/it]


In [8]:
EMBEDDING_DIMS = [64]
WALK_LENGTHS = [10,100]
NUM_WALKS = [10]
WORKERS = [4]
Ps = [1]
Qs = [1]
for EMBEDDING_DIM in EMBEDDING_DIMS:
    for WALK_LENGTH in WALK_LENGTHS:
        for NUM_WALK in NUM_WALKS:
            for WORKER in WORKERS:
                for P in Ps:
                    for Q in Qs:
                        filename = "ppi_node2vec_full_embeddings_"+str(EMBEDDING_DIM)+"_"+str(WALK_LENGTH)+"_"+str(NUM_WALK)+"_"+str(P)+"_"+str(Q)+".emb"
                        print(filename)
                        if not Path(filename).is_file():
                            node2vec = Node2Vec(G, dimensions=EMBEDDING_DIM, walk_length=WALK_LENGTH, num_walks=NUM_WALK, workers=WORKER, p = P, q = Q, temp_folder="tmp/")
                            model = node2vec.fit(window=10, min_count=1, batch_words=4)
                            model.wv.save_word2vec_format(filename)
                        
                        data_emb = np.loadtxt(filename,skiprows=1)
                        emb_dim = len(data_emb[0])-1
                        num_nodes = len(list(G.nodes()))
                        embedding = np.zeros((len(G.nodes()),emb_dim))
                        for idx in range(data_emb.shape[0]):
                            embedding[int(data_emb[idx][0])] = data_emb[idx][1:]
                        X = np.zeros((num_nodes,emb_dim))
                        idx = 0
                        for node in G.nodes():
                            X[idx] = embedding[node]
                            idx += 1

                        y = target
                        s = np.arange(X.shape[0])
                        np.random.shuffle(s)
                        X2 = X[s]
                        y2 = y[s]
                        roc = []
                        prec = []
                        rec = []
                        f1 = []
                        kf = KFold(n_splits=5)
                        for train_index, test_index in kf.split(X2):
                            X_train2, X_test2 = X2[train_index], X2[test_index]
                            y_train2, y_test2 = y2[train_index], y2[test_index]
                            #clf = MLPClassifier(verbose=1)
                            #clf.fit(X_train2,y_train2)
                            forest = RandomForestClassifier(random_state=1,verbose=1,n_estimators=10)
                            clf = MultiOutputClassifier(forest, n_jobs=-1)
                            clf.fit(X_train2,y_train2)

                            pred = clf.predict(X_test2)


                            roc.append(roc_auc_score(y_test2,pred, average='micro'))
                            prec.append(precision_score(y_test2,pred, average='micro'))
                            rec.append(recall_score(y_test2,pred, average='micro'))
                            f1.append(f1_score(y_test2,pred, average='micro'))

                        result = str(EMBEDDING_DIM)+","+str(WALK_LENGTH)+","+str(NUM_WALK)+","+str(P)+","+str(Q)+","+str(np.mean(roc))+","+str(np.mean(prec))+","+str(np.mean(rec))+","+str(np.mean(f1))+"\n"
                        f= open("result_multiclass_ppi_node2vec.txt","a+")
                        f.write(result)
                        f.close()
                        
                        

ppi_node2vec_full_embeddings_64_100_10_1_1.emb


HBox(children=(IntProgress(value=0, description='Computing transition probabilities', max=56944, style=Progres…

Generating walks (CPU: 1):   0%|          | 0/3 [00:00<?, ?it/s]
Generating walks (CPU: 2):   0%|          | 0/3 [00:00<?, ?it/s][A

Generating walks (CPU: 3):   0%|          | 0/2 [00:00<?, ?it/s][A[A







Generating walks (CPU: 4):   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Generating walks (CPU: 4): 100%|██████████| 2/2 [21:34<00:00, 647.09s/it][A[A[A

Generating walks (CPU: 1):  67%|██████▋   | 2/3 [22:19<11:09, 669.73s/it][A[A
Generating walks (CPU: 4): 100%|██████████| 2/2 [42:05<00:00, 1262.80s/it][A
Generating walks (CPU: 3): 100%|██████████| 2/2 [42:25<00:00, 1272.84s/it]
Generating walks (CPU: 1): 100%|██████████| 3/3 [42:59<00:00, 840.69s/it]
Generating walks (CPU: 2): 100%|██████████| 3/3 [53:58<00:00, 1079.42s/it][A
Generating walks (CPU: 1): 100%|██████████| 3/3 [54:02<00:00, 1080.87s/it]


In [10]:
EMBEDDING_DIMS = [64]
WALK_LENGTHS = [40]
NUM_WALKS = [20,50]
WORKERS = [4]
Ps = [1]
Qs = [1]
for EMBEDDING_DIM in EMBEDDING_DIMS:
    for WALK_LENGTH in WALK_LENGTHS:
        for NUM_WALK in NUM_WALKS:
            for WORKER in WORKERS:
                for P in Ps:
                    for Q in Qs:
                        filename = "ppi_node2vec_full_embeddings_"+str(EMBEDDING_DIM)+"_"+str(WALK_LENGTH)+"_"+str(NUM_WALK)+"_"+str(P)+"_"+str(Q)+".emb"
                        print(filename)
                        if not Path(filename).is_file():
                            node2vec = Node2Vec(G, dimensions=EMBEDDING_DIM, walk_length=WALK_LENGTH, num_walks=NUM_WALK, workers=WORKER, p = P, q = Q, temp_folder="tmp/")
                            model = node2vec.fit(window=10, min_count=1, batch_words=4)
                            model.wv.save_word2vec_format(filename)
                        
                        data_emb = np.loadtxt(filename,skiprows=1)
                        emb_dim = len(data_emb[0])-1
                        num_nodes = len(list(G.nodes()))
                        embedding = np.zeros((len(G.nodes()),emb_dim))
                        for idx in range(data_emb.shape[0]):
                            embedding[int(data_emb[idx][0])] = data_emb[idx][1:]
                        X = np.zeros((num_nodes,emb_dim))
                        idx = 0
                        for node in G.nodes():
                            X[idx] = embedding[node]
                            idx += 1

                        y = target
                        s = np.arange(X.shape[0])
                        np.random.shuffle(s)
                        X2 = X[s]
                        y2 = y[s]
                        roc = []
                        prec = []
                        rec = []
                        f1 = []
                        kf = KFold(n_splits=5)
                        for train_index, test_index in kf.split(X2):
                            X_train2, X_test2 = X2[train_index], X2[test_index]
                            y_train2, y_test2 = y2[train_index], y2[test_index]
                            #clf = MLPClassifier(verbose=1)
                            #clf.fit(X_train2,y_train2)
                            forest = RandomForestClassifier(random_state=1,verbose=1,n_estimators=10)
                            clf = MultiOutputClassifier(forest, n_jobs=-1)
                            clf.fit(X_train2,y_train2)

                            pred = clf.predict(X_test2)


                            roc.append(roc_auc_score(y_test2,pred, average='micro'))
                            prec.append(precision_score(y_test2,pred, average='micro'))
                            rec.append(recall_score(y_test2,pred, average='micro'))
                            f1.append(f1_score(y_test2,pred, average='micro'))

                        result = str(EMBEDDING_DIM)+","+str(WALK_LENGTH)+","+str(NUM_WALK)+","+str(P)+","+str(Q)+","+str(np.mean(roc))+","+str(np.mean(prec))+","+str(np.mean(rec))+","+str(np.mean(f1))+"\n"
                        f= open("result_multiclass_ppi_node2vec.txt","a+")
                        f.write(result)
                        f.close()
                        
                        

ppi_node2vec_full_embeddings_64_40_20_1_1.emb


HBox(children=(IntProgress(value=0, description='Computing transition probabilities', max=56944, style=Progres…

Generating walks (CPU: 1):   0%|          | 0/5 [00:00<?, ?it/s]
Generating walks (CPU: 2):   0%|          | 0/5 [00:00<?, ?it/s][A

Generating walks (CPU: 3):   0%|          | 0/5 [00:00<?, ?it/s][A[A







Generating walks (CPU: 4):   0%|          | 0/5 [00:00<?, ?it/s][A[A[A
Generating walks (CPU: 2):  40%|████      | 2/5 [08:28<12:43, 254.39s/it][A

Generating walks (CPU: 3):  40%|████      | 2/5 [08:32<12:49, 256.39s/it][A[A


Generating walks (CPU: 1):  40%|████      | 2/5 [08:36<12:54, 258.06s/it][A[A[A
Generating walks (CPU: 2):  60%|██████    | 3/5 [16:34<10:47, 323.80s/it][A

Generating walks (CPU: 3):  60%|██████    | 3/5 [16:43<10:53, 326.54s/it][A[A


Generating walks (CPU: 1):  60%|██████    | 3/5 [16:51<10:58, 329.28s/it][A[A[A
Generating walks (CPU: 2):  80%|████████  | 4/5 [24:38<06:11, 371.82s/it][A

Generating walks (CPU: 3):  80%|████████  | 4/5 [24:52<06:15, 375.38s/it][A[A


Generating walks (CPU: 1):  80%|████████  | 4/5 [25:05<06:18, 378.75s/it][A[A[A
Generating walks (CPU: 2): 100%|██████████| 5/5 [33:23<00:00, 417.77s/it][A

Generating walks (CPU: 3): 100%|██████████| 5/5 [33:43<00:00, 422.24s/it][A[A


Generating walks (CPU: 1): 100%|█████

ppi_node2vec_full_embeddings_64_40_50_1_1.emb


HBox(children=(IntProgress(value=0, description='Computing transition probabilities', max=56944, style=Progres…

Generating walks (CPU: 1):   0%|          | 0/13 [00:00<?, ?it/s]
Generating walks (CPU: 2):   0%|          | 0/13 [00:00<?, ?it/s][A

Generating walks (CPU: 3):   0%|          | 0/12 [00:00<?, ?it/s][A[A







Generating walks (CPU: 4):   0%|          | 0/12 [00:00<?, ?it/s][A[A[A

Generating walks (CPU: 3):  17%|█▋        | 2/12 [08:11<40:57, 245.76s/it][A[A


Generating walks (CPU: 4):  17%|█▋        | 2/12 [08:11<40:58, 245.86s/it][A[A[A
Generating walks (CPU: 1):  15%|█▌        | 2/13 [08:19<45:47, 249.80s/it][A

Generating walks (CPU: 3):  25%|██▌       | 3/12 [17:09<50:02, 333.56s/it][A[A


Generating walks (CPU: 4):  25%|██▌       | 3/12 [17:09<50:02, 333.58s/it][A[A[A
Generating walks (CPU: 1):  23%|██▎       | 3/13 [17:30<56:39, 340.00s/it][A

Generating walks (CPU: 3):  33%|███▎      | 4/12 [25:24<50:54, 381.78s/it][A[A


Generating walks (CPU: 4):  33%|███▎      | 4/12 [25:24<50:55, 381.90s/it][A[A[A
Generating walks (CPU: 1):  31%|███       | 4/13 [25:49<58:11, 387.89s/it][A

Generating walks (CPU: 3):  42%|████▏     | 5/12 [33:31<48:13, 413.36s/it][A[A


Generating walks (CPU: 4):  42%|████▏     | 5/12 [33:31<48:14, 413.53s/it][A[A[A
Generating walks (C

In [8]:
EMBEDDING_DIMS = [64]
WALK_LENGTHS = [40]
NUM_WALKS = [10]
WORKERS = [4]
Ps = [0.2, 2]
Qs = [1]
for EMBEDDING_DIM in EMBEDDING_DIMS:
    for WALK_LENGTH in WALK_LENGTHS:
        for NUM_WALK in NUM_WALKS:
            for WORKER in WORKERS:
                for P in Ps:
                    for Q in Qs:
                        filename = "ppi_node2vec_full_embeddings_"+str(EMBEDDING_DIM)+"_"+str(WALK_LENGTH)+"_"+str(NUM_WALK)+"_"+str(P)+"_"+str(Q)+".emb"
                        print(filename)
                        if not Path(filename).is_file():
                            node2vec = Node2Vec(G, dimensions=EMBEDDING_DIM, walk_length=WALK_LENGTH, num_walks=NUM_WALK, workers=WORKER, p = P, q = Q, temp_folder="tmp/")
                            model = node2vec.fit(window=10, min_count=1, batch_words=4)
                            model.wv.save_word2vec_format(filename)
                        
                        data_emb = np.loadtxt(filename,skiprows=1)
                        emb_dim = len(data_emb[0])-1
                        num_nodes = len(list(G.nodes()))
                        embedding = np.zeros((len(G.nodes()),emb_dim))
                        for idx in range(data_emb.shape[0]):
                            embedding[int(data_emb[idx][0])] = data_emb[idx][1:]
                        X = np.zeros((num_nodes,emb_dim))
                        idx = 0
                        for node in G.nodes():
                            X[idx] = embedding[node]
                            idx += 1

                        y = target
                        s = np.arange(X.shape[0])
                        np.random.shuffle(s)
                        X2 = X[s]
                        y2 = y[s]
                        roc = []
                        prec = []
                        rec = []
                        f1 = []
                        kf = KFold(n_splits=5)
                        for train_index, test_index in kf.split(X2):
                            X_train2, X_test2 = X2[train_index], X2[test_index]
                            y_train2, y_test2 = y2[train_index], y2[test_index]
                            #clf = MLPClassifier(verbose=1)
                            #clf.fit(X_train2,y_train2)
                            forest = RandomForestClassifier(random_state=1,verbose=1,n_estimators=10)
                            clf = MultiOutputClassifier(forest, n_jobs=-1)
                            clf.fit(X_train2,y_train2)

                            pred = clf.predict(X_test2)


                            roc.append(roc_auc_score(y_test2,pred, average='micro'))
                            prec.append(precision_score(y_test2,pred, average='micro'))
                            rec.append(recall_score(y_test2,pred, average='micro'))
                            f1.append(f1_score(y_test2,pred, average='micro'))

                        result = str(EMBEDDING_DIM)+","+str(WALK_LENGTH)+","+str(NUM_WALK)+","+str(P)+","+str(Q)+","+str(np.mean(roc))+","+str(np.mean(prec))+","+str(np.mean(rec))+","+str(np.mean(f1))+"\n"
                        f= open("result_multiclass_ppi_node2vec.txt","a+")
                        f.write(result)
                        f.close()
                        
                        
                        

ppi_node2vec_full_embeddings_64_40_10_0.2_1.emb


HBox(children=(IntProgress(value=0, description='Computing transition probabilities', max=56944, style=Progres…

Generating walks (CPU: 1):   0%|          | 0/3 [00:00<?, ?it/s]





Generating walks (CPU: 2):   0%|          | 0/3 [00:00<?, ?it/s][A

Generating walks (CPU: 3):   0%|          | 0/2 [00:00<?, ?it/s][A[A


Generating walks (CPU: 4):   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Generating walks (CPU: 4): 100%|██████████| 2/2 [09:22<00:00, 281.28s/it][A[A[A
Generating walks (CPU: 2):  67%|██████▋   | 2/3 [09:29<04:44, 284.92s/it][A

Generating walks (CPU: 4): 100%|██████████| 2/2 [18:39<00:00, 559.54s/it][A[A

Generating walks (CPU: 3): 100%|██████████| 2/2 [18:54<00:00, 567.30s/it][A
Generating walks (CPU: 2): 100%|██████████| 3/3 [22:43<00:00, 454.56s/it]
Generating walks (CPU: 1): 100%|██████████| 3/3 [22:45<00:00, 455.21s/it]


ppi_node2vec_full_embeddings_64_40_10_2_1.emb


HBox(children=(IntProgress(value=0, description='Computing transition probabilities', max=56944, style=Progres…

Generating walks (CPU: 1):   0%|          | 0/3 [00:00<?, ?it/s]
Generating walks (CPU: 2):   0%|          | 0/3 [00:00<?, ?it/s][A






Generating walks (CPU: 3):   0%|          | 0/2 [00:00<?, ?it/s][A[A


Generating walks (CPU: 4):   0%|          | 0/2 [00:00<?, ?it/s][A[A[A


Generating walks (CPU: 4): 100%|██████████| 2/2 [10:32<00:00, 316.07s/it][A[A[A
Generating walks (CPU: 2):  67%|██████▋   | 2/3 [10:33<05:16, 316.73s/it][A

Generating walks (CPU: 4): 100%|██████████| 2/2 [21:08<00:00, 634.19s/it][A[A

Generating walks (CPU: 3): 100%|██████████| 2/2 [21:10<00:00, 635.00s/it][A
Generating walks (CPU: 1): 100%|██████████| 3/3 [21:15<00:00, 415.08s/it]
Generating walks (CPU: 1): 100%|██████████| 3/3 [25:17<00:00, 505.95s/it]
Generating walks (CPU: 2): 100%|██████████| 3/3 [25:53<00:00, 517.86s/it]


In [9]:
EMBEDDING_DIMS = [64]
WALK_LENGTHS = [40]
NUM_WALKS = [10]
WORKERS = [4]
Ps = [1]
Qs = [0.2, 2]

for EMBEDDING_DIM in EMBEDDING_DIMS:
    for WALK_LENGTH in WALK_LENGTHS:
        for NUM_WALK in NUM_WALKS:
            for WORKER in WORKERS:
                for P in Ps:
                    for Q in Qs:
                        filename = "ppi_node2vec_full_embeddings_"+str(EMBEDDING_DIM)+"_"+str(WALK_LENGTH)+"_"+str(NUM_WALK)+"_"+str(P)+"_"+str(Q)+".emb"
                        print(filename)
                        if not Path(filename).is_file():
                            node2vec = Node2Vec(G, dimensions=EMBEDDING_DIM, walk_length=WALK_LENGTH, num_walks=NUM_WALK, workers=WORKER, p = P, q = Q, temp_folder="tmp/")
                            model = node2vec.fit(window=10, min_count=1, batch_words=4)
                            model.wv.save_word2vec_format(filename)
                        
                        data_emb = np.loadtxt(filename,skiprows=1)
                        emb_dim = len(data_emb[0])-1
                        num_nodes = len(list(G.nodes()))
                        embedding = np.zeros((len(G.nodes()),emb_dim))
                        for idx in range(data_emb.shape[0]):
                            embedding[int(data_emb[idx][0])] = data_emb[idx][1:]
                        X = np.zeros((num_nodes,emb_dim))
                        idx = 0
                        for node in G.nodes():
                            X[idx] = embedding[node]
                            idx += 1

                        y = target
                        s = np.arange(X.shape[0])
                        np.random.shuffle(s)
                        X2 = X[s]
                        y2 = y[s]
                        roc = []
                        prec = []
                        rec = []
                        f1 = []
                        kf = KFold(n_splits=5)
                        for train_index, test_index in kf.split(X2):
                            X_train2, X_test2 = X2[train_index], X2[test_index]
                            y_train2, y_test2 = y2[train_index], y2[test_index]
                            #clf = MLPClassifier(verbose=1)
                            #clf.fit(X_train2,y_train2)
                            forest = RandomForestClassifier(random_state=1,verbose=1,n_estimators=10)
                            clf = MultiOutputClassifier(forest, n_jobs=-1)
                            clf.fit(X_train2,y_train2)

                            pred = clf.predict(X_test2)


                            roc.append(roc_auc_score(y_test2,pred, average='micro'))
                            prec.append(precision_score(y_test2,pred, average='micro'))
                            rec.append(recall_score(y_test2,pred, average='micro'))
                            f1.append(f1_score(y_test2,pred, average='micro'))

                        result = str(EMBEDDING_DIM)+","+str(WALK_LENGTH)+","+str(NUM_WALK)+","+str(P)+","+str(Q)+","+str(np.mean(roc))+","+str(np.mean(prec))+","+str(np.mean(rec))+","+str(np.mean(f1))+"\n"
                        f= open("result_multiclass_ppi_node2vec.txt","a+")
                        f.write(result)
                        f.close()
                        
                        
                        

ppi_node2vec_full_embeddings_64_10_40_1_0.2.emb


HBox(children=(IntProgress(value=0, description='Computing transition probabilities', max=56944, style=Progres…

Generating walks (CPU: 1):   0%|          | 0/10 [00:00<?, ?it/s]
Generating walks (CPU: 2):   0%|          | 0/10 [00:00<?, ?it/s][A

Generating walks (CPU: 3):   0%|          | 0/10 [00:00<?, ?it/s][A[A







Generating walks (CPU: 4):   0%|          | 0/10 [00:00<?, ?it/s][A[A[A


Generating walks (CPU: 4):  20%|██        | 2/10 [02:13<08:53, 66.69s/it][A[A[A

Generating walks (CPU: 3):  20%|██        | 2/10 [02:14<08:58, 67.25s/it][A[A
Generating walks (CPU: 1):  20%|██        | 2/10 [02:16<09:07, 68.42s/it][A


Generating walks (CPU: 4):  30%|███       | 3/10 [04:11<09:34, 82.12s/it][A[A[A

Generating walks (CPU: 3):  30%|███       | 3/10 [04:13<09:39, 82.73s/it][A[A
Generating walks (CPU: 1):  30%|███       | 3/10 [04:18<09:49, 84.26s/it][A


Generating walks (CPU: 4):  40%|████      | 4/10 [06:25<09:46, 97.74s/it][A[A[A

Generating walks (CPU: 3):  40%|████      | 4/10 [06:28<09:50, 98.44s/it][A[A
Generating walks (CPU: 1):  40%|████      | 4/10 [06:35<10:01, 100.28s/it][A


Generating walks (CPU: 4):  50%|█████     | 5/10 [08:21<08:35, 103.18s/it][A[A[A

Generating walks (CPU: 3):  50%|█████     | 5/10 [08:24<08:39, 103.81s/it][A[A
Generating walks (CPU: 1):  

ppi_node2vec_full_embeddings_64_10_40_1_2.emb


HBox(children=(IntProgress(value=0, description='Computing transition probabilities', max=56944, style=Progres…

Generating walks (CPU: 1):   0%|          | 0/10 [00:00<?, ?it/s]
Generating walks (CPU: 2):   0%|          | 0/10 [00:00<?, ?it/s][A

Generating walks (CPU: 3):   0%|          | 0/10 [00:00<?, ?it/s][A[A







Generating walks (CPU: 4):   0%|          | 0/10 [00:00<?, ?it/s][A[A[A


Generating walks (CPU: 4):  20%|██        | 2/10 [01:53<07:32, 56.53s/it][A[A[A
Generating walks (CPU: 2):  20%|██        | 2/10 [01:55<07:40, 57.54s/it][A

Generating walks (CPU: 1):  20%|██        | 2/10 [01:56<07:44, 58.04s/it][A[A


Generating walks (CPU: 4):  30%|███       | 3/10 [03:45<08:32, 73.19s/it][A[A[A
Generating walks (CPU: 2):  30%|███       | 3/10 [03:48<08:40, 74.37s/it][A

Generating walks (CPU: 1):  30%|███       | 3/10 [03:50<08:44, 74.95s/it][A[A


Generating walks (CPU: 4):  40%|████      | 4/10 [05:59<09:09, 91.60s/it][A[A[A
Generating walks (CPU: 2):  40%|████      | 4/10 [06:05<09:17, 92.98s/it][A

Generating walks (CPU: 1):  40%|████      | 4/10 [06:08<09:22, 93.74s/it][A[A


Generating walks (CPU: 4):  50%|█████     | 5/10 [07:57<08:17, 99.42s/it][A[A[A
Generating walks (CPU: 2):  50%|█████     | 5/10 [08:03<08:23, 100.74s/it][A

Generating walks (CPU: 1):  50%|