In [9]:
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().resolve().parent))

import pandas as pd
import networkx as nx

from src.DeepWalk import  DeepWalk
from src.evaluate import evaluation

from torch_geometric.datasets import Planetoid

## Define Functions

## Read Datasets

In [2]:
path = Path.cwd()/'graph_dataset'
cora_datasets = Planetoid(path, 'Cora')
pubmed_datasets = Planetoid(path, 'PubMed')

In [3]:
cora_edge = pd.DataFrame(cora_datasets[0].edge_index.numpy().T, columns=['source', 'target'])
cora_label = pd.DataFrame(cora_datasets[0].y.numpy().T, columns=['label'])
cora_graph = nx.from_pandas_edgelist(cora_edge, source='source', target='target')
print('Cora: Num_of_nodes -', cora_graph.number_of_nodes(), ', Num_of_edges -', cora_graph.number_of_edges())

pub_edge = pd.DataFrame(pubmed_datasets[0].edge_index.numpy().T, columns=['source', 'target'])
pub_label = pd.DataFrame(pubmed_datasets[0].y.numpy().T, columns=['label'])
pub_graph = nx.from_pandas_edgelist(pub_edge, source='source', target='target')
print('PubMed: Num_of_nodes -', pub_graph.number_of_nodes(), ', Num_of_edges -', pub_graph.number_of_edges())


Cora: Num_of_nodes - 2708 , Num_of_edges - 5278
PubMed: Num_of_nodes - 19717 , Num_of_edges - 44324


## Cora Datasets

In [4]:
rw_deepwalk = DeepWalk(cora_graph, method='random_walk')
erw_deepwalk = DeepWalk(cora_graph, method='efficient_random_walk')

In [5]:
%%time
rw_deepwalk.random_walk(walk_len=20, num_walks=30)
rw_deepwalk.train(embed_size=64, window_size=5, epochs=10)


num_walks:  81240
Loss after epoch 0: 1895561.625
Loss after epoch 1: 1101357.875
Loss after epoch 2: 1081624.0
Loss after epoch 3: 1026770.0
Loss after epoch 4: 1023102.0
Loss after epoch 5: 1012534.0
Loss after epoch 6: 1008904.0
Loss after epoch 7: 950972.5
Loss after epoch 8: 929293.0
Loss after epoch 9: 929387.0
CPU times: user 38.5 s, sys: 228 ms, total: 38.7 s
Wall time: 13.8 s


In [6]:
%%time
erw_deepwalk.random_walk(walk_len=20, num_walks=30)
erw_deepwalk.train(embed_size=64, window_size=5, epochs=10)

num_walks:  10338
Loss after epoch 0: 846984.875
Loss after epoch 1: 219158.75
Loss after epoch 2: 161289.5
Loss after epoch 3: 152859.25
Loss after epoch 4: 147556.125
Loss after epoch 5: 148136.125
Loss after epoch 6: 146765.25
Loss after epoch 7: 143407.5
Loss after epoch 8: 143148.375
Loss after epoch 9: 138777.25
CPU times: user 5.83 s, sys: 39.1 ms, total: 5.87 s
Wall time: 2.5 s


In [7]:
print('Cora with random walk')
evaluation(rw_deepwalk, cora_label, 0.3, metric='f1')

print('Cora with efficient random walk')
evaluation(erw_deepwalk, cora_label, 0.3, metric='f1')

Cora with random walk
Training f1:  0.39313984168865435 Testing f1 0.29889298892988936
Cora with efficient random walk
Training f1:  0.3952506596306068 Testing f1 0.2988929889298893


## PubMed Dataset

In [8]:
rw_deepwalk = DeepWalk(pub_graph, method='random_walk')
erw_deepwalk = DeepWalk(pub_graph, method='efficient_random_walk')

In [9]:
%%time
rw_deepwalk.random_walk(walk_len=20, num_walks=20)
rw_deepwalk.train(embed_size=64, window_size=5, epochs=10)

num_walks:  394340
Loss after epoch 0: 8340368.5
Loss after epoch 1: 3780346.5
Loss after epoch 2: 3703453.0
Loss after epoch 3: 3254304.0
Loss after epoch 4: 3087558.0
Loss after epoch 5: 3041380.0
Loss after epoch 6: 3016232.0
Loss after epoch 7: 2980394.0
Loss after epoch 8: 2779744.0
Loss after epoch 9: 2054732.0
CPU times: user 3min 27s, sys: 854 ms, total: 3min 27s
Wall time: 1min 13s


In [10]:
%%time
erw_deepwalk.random_walk(walk_len=20, num_walks=20)
erw_deepwalk.train(embed_size=64, window_size=5, epochs=10)

num_walks:  78502
Loss after epoch 0: 4788352.0
Loss after epoch 1: 1137961.5
Loss after epoch 2: 935781.0
Loss after epoch 3: 880480.5
Loss after epoch 4: 827477.0
Loss after epoch 5: 751264.0
Loss after epoch 6: 735263.0
Loss after epoch 7: 729302.0
Loss after epoch 8: 722576.0
Loss after epoch 9: 714707.0
CPU times: user 48.9 s, sys: 250 ms, total: 49.2 s
Wall time: 28.3 s


In [11]:
print('PubMed with random walk')
evaluation(rw_deepwalk, pub_label, 0.3, metric='f1')

print('PubMed with efficient random walk')
evaluation(erw_deepwalk, pub_label, 0.3, metric='f1')

PubMed with random walk
Training f1:  0.550061589739874 Testing f1 0.3992224475997295
PubMed with efficient random walk
Training f1:  0.5434533729439895 Testing f1 0.4000338066260987
