In [None]:
!pip install gensim==4.1.2

Collecting gensim==4.1.2
  Downloading gensim-4.1.2-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (24.1 MB)
[K     |████████████████████████████████| 24.1 MB 81.6 MB/s 
Installing collected packages: gensim
  Attempting uninstall: gensim
    Found existing installation: gensim 3.6.0
    Uninstalling gensim-3.6.0:
      Successfully uninstalled gensim-3.6.0
Successfully installed gensim-4.1.2


In [None]:
from node2vec import *
from gensim.models import KeyedVectors
import numpy as np
import os 
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
os.environ['PYTHONHASHSEED']='123'
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import make_scorer, f1_score, plot_roc_curve, auc, accuracy_score, roc_auc_score, roc_curve, matthews_corrcoef
from sklearn.model_selection import cross_validate, StratifiedKFold
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

In [None]:
def to_unweighted(G):
    for edge in G.edges():
            G[edge[0]][edge[1]]['weight'] = 1
    return G

def run_walks(graph, p, q):
    graph = to_unweighted(read_graph(graph, weighted = True))
    random.seed(1)
    np.random.seed(1)
    G = Node2Vec(graph, p, q)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(num_walks = 20, walk_length = 80)
    return walks

def run_walks_hub(graph, h):
    graph = read_graph(graph, weighted = True)
    random.seed(1)
    np.random.seed(1)
    G = HubsWalker(graph, h)
    G.preprocess_transition_probs()
    walks = G.simulate_walks(num_walks = 20, walk_length = 80)
    return walks

def embeddings(walks, dimensions, output):
    model = learn_embeddings(walks, dimensions, 10, 5, output)
    return model

In [None]:
model_p25_q200_noscore = embeddings(run_walks('PPI_BP_edgelist_EXP.txt', p = 0.25, q = 2), 512, 'model_p25_q200_NoScore.txt')

100%|██████████| 2968/2968 [00:01<00:00, 1502.39it/s]
Walk 1/20: 100%|██████████| 2968/2968 [00:21<00:00, 139.71it/s]
Walk 2/20: 100%|██████████| 2968/2968 [00:21<00:00, 138.08it/s]
Walk 3/20: 100%|██████████| 2968/2968 [00:21<00:00, 137.35it/s]
Walk 4/20: 100%|██████████| 2968/2968 [00:21<00:00, 137.25it/s]
Walk 5/20: 100%|██████████| 2968/2968 [00:21<00:00, 137.16it/s]
Walk 6/20: 100%|██████████| 2968/2968 [00:21<00:00, 139.87it/s]
Walk 7/20: 100%|██████████| 2968/2968 [00:21<00:00, 139.93it/s]
Walk 8/20: 100%|██████████| 2968/2968 [00:21<00:00, 138.61it/s]
Walk 9/20: 100%|██████████| 2968/2968 [00:21<00:00, 138.77it/s]
Walk 10/20: 100%|██████████| 2968/2968 [00:21<00:00, 140.16it/s]
Walk 11/20: 100%|██████████| 2968/2968 [00:21<00:00, 138.71it/s]
Walk 12/20: 100%|██████████| 2968/2968 [00:21<00:00, 140.72it/s]
Walk 13/20: 100%|██████████| 2968/2968 [00:21<00:00, 139.88it/s]
Walk 14/20: 100%|██████████| 2968/2968 [00:21<00:00, 139.66it/s]
Walk 15/20: 100%|██████████| 2968/2968 [00:20

In [None]:
model_p50_q100_noscore = embeddings(run_walks('PPI_BP_edgelist_EXP.txt', p = 0.5, q = 1), 512, 'model_p50_q100_NoScore.txt')

100%|██████████| 2968/2968 [00:01<00:00, 1544.09it/s]
Walk 1/20: 100%|██████████| 2968/2968 [00:19<00:00, 154.01it/s]
Walk 2/20: 100%|██████████| 2968/2968 [00:19<00:00, 152.32it/s]
Walk 3/20: 100%|██████████| 2968/2968 [00:19<00:00, 155.79it/s]
Walk 4/20: 100%|██████████| 2968/2968 [00:19<00:00, 155.58it/s]
Walk 5/20: 100%|██████████| 2968/2968 [00:19<00:00, 153.15it/s]
Walk 6/20: 100%|██████████| 2968/2968 [00:19<00:00, 154.28it/s]
Walk 7/20: 100%|██████████| 2968/2968 [00:19<00:00, 150.51it/s]
Walk 8/20: 100%|██████████| 2968/2968 [00:19<00:00, 152.32it/s]
Walk 9/20: 100%|██████████| 2968/2968 [00:19<00:00, 154.74it/s]
Walk 10/20: 100%|██████████| 2968/2968 [00:19<00:00, 155.40it/s]
Walk 11/20: 100%|██████████| 2968/2968 [00:19<00:00, 153.12it/s]
Walk 12/20: 100%|██████████| 2968/2968 [00:18<00:00, 156.70it/s]
Walk 13/20: 100%|██████████| 2968/2968 [00:19<00:00, 155.82it/s]
Walk 14/20: 100%|██████████| 2968/2968 [00:19<00:00, 155.15it/s]
Walk 15/20: 100%|██████████| 2968/2968 [00:19

In [None]:
model_p200_q25_noscore = embeddings(run_walks('PPI_BP_edgelist_EXP.txt', p = 2, q = 0.25), 512, 'model_p200_q25_NoScore.txt')

In [None]:
model_h50_noscore = embeddings(run_walks_hub('PPI_BP_edgelist_EXP.txt', h = 0.5), 512, 'model_h50_noscore.txt')
model_h75_noscore = embeddings(run_walks_hub('PPI_BP_edgelist_EXP.txt', h = 0.75), 512, 'model_h75_noscore.txt')

100%|██████████| 2968/2968 [00:02<00:00, 1035.62it/s]
Walk 1/20: 100%|██████████| 2968/2968 [00:20<00:00, 147.26it/s]
Walk 2/20: 100%|██████████| 2968/2968 [00:20<00:00, 142.71it/s]
Walk 3/20: 100%|██████████| 2968/2968 [00:20<00:00, 144.94it/s]
Walk 4/20: 100%|██████████| 2968/2968 [00:20<00:00, 142.13it/s]
Walk 5/20: 100%|██████████| 2968/2968 [00:20<00:00, 143.45it/s]
Walk 6/20: 100%|██████████| 2968/2968 [00:20<00:00, 144.81it/s]
Walk 7/20: 100%|██████████| 2968/2968 [00:20<00:00, 143.89it/s]
Walk 8/20: 100%|██████████| 2968/2968 [00:20<00:00, 146.96it/s]
Walk 9/20: 100%|██████████| 2968/2968 [00:20<00:00, 141.64it/s]
Walk 10/20: 100%|██████████| 2968/2968 [00:20<00:00, 147.29it/s]
Walk 11/20: 100%|██████████| 2968/2968 [00:20<00:00, 145.97it/s]
Walk 12/20: 100%|██████████| 2968/2968 [00:20<00:00, 143.15it/s]
Walk 13/20: 100%|██████████| 2968/2968 [00:20<00:00, 144.94it/s]
Walk 14/20: 100%|██████████| 2968/2968 [00:21<00:00, 139.46it/s]
Walk 15/20: 100%|██████████| 2968/2968 [00:20

In [None]:
model_h400_noscore = embeddings(run_walks_hub('PPI_BP_edgelist_EXP.txt', h = 4), 512, 'model_h400_noscore.txt')
model_h800_noscore = embeddings(run_walks_hub('PPI_BP_edgelist_EXP.txt', h = 8), 512, 'model_h800_noscore.txt')