In [1]:
import numpy as np
import networkx as nx

import json

In [2]:
G = nx.read_edgelist('./datasets/PPI/ppi-walks.txt', delimiter='\t')

In [3]:
id2idx = {str(id) : idx for idx, id in enumerate(sorted(map(int, G.nodes())))}

In [4]:
with open('./node2vec/emb/ppi.emb', 'r') as handle:
    (n, d) = map(int, handle.readline().strip().split())

    X = np.empty((n, d), dtype=np.float32)
    while (line := handle.readline().strip()):
        (id, *emb) = line.split()
        X[id2idx[id]] = list(map(float, emb))

In [5]:
with open('./datasets/PPI/ppi-class_map.json', 'r') as handle : 
    class_map = json.load(handle)
    y = np.zeros((len(X), len(class_map['0'])), dtype=np.float32)
    for id in G.nodes() : y[id2idx[id]] = class_map[id]

In [6]:
# the neigbor trick
lamb = 0.01
deg_avg = sum(d for (n, d) in G.degree()) / G.number_of_nodes()

Xprime = X.copy()
for u in G.nodes() : 
    for v in G.neighbors(u) : 
        Xprime[id2idx[u]] += lamb*(G.degree(v)/deg_avg)*X[id2idx[v]]

X = Xprime.copy()

In [7]:
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import KFold
from sklearn.metrics import f1_score

clf = OneVsRestClassifier(LogisticRegression(penalty='l2'), n_jobs=-1)

kf = KFold(n_splits=10, shuffle=True)
f1 = []
for train_index, test_index in kf.split(X) :
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)

    f1.append(f1_score(y_test, y_pred, average='macro', zero_division=0))
    if (1) : print(f1[-1])

0.2187783377203484
0.20806608008689084
0.20993544708692208
0.2145199465019607
0.2150689995731843
0.2137190111578868
0.20991265263034128
0.2143544786408242
0.2113626350222458
0.20827349604071616


In [8]:
np.mean(f1)

0.21239910844613202