In [None]:
from pathlib import Path
import pandas as pd
import networkx as nx

from src.DeepWalk import  DeepWalk
from src.evaluate import evaluation

from torch_geometric.datasets import Planetoid

: 

## Define Functions

## Read Datasets

In [2]:
path = Path.cwd()/'graph_dataset'
cora_datasets = Planetoid(path, 'Cora')
pubmed_datasets = Planetoid(path, 'PubMed')

In [3]:
cora_edge = pd.DataFrame(cora_datasets[0].edge_index.numpy().T, columns=['source', 'target'])
cora_label = pd.DataFrame(cora_datasets[0].y.numpy().T, columns=['label'])
cora_graph = nx.from_pandas_edgelist(cora_edge, source='source', target='target')
print('Cora: Num_of_nodes -', cora_graph.number_of_nodes(), ', Num_of_edges -', cora_graph.number_of_edges())

pub_edge = pd.DataFrame(pubmed_datasets[0].edge_index.numpy().T, columns=['source', 'target'])
pub_label = pd.DataFrame(pubmed_datasets[0].y.numpy().T, columns=['label'])
pub_graph = nx.from_pandas_edgelist(pub_edge, source='source', target='target')
print('PubMed: Num_of_nodes -', pub_graph.number_of_nodes(), ', Num_of_edges -', pub_graph.number_of_edges())


Cora: Num_of_nodes - 2708 , Num_of_edges - 5278
PubMed: Num_of_nodes - 19717 , Num_of_edges - 44324


## Cora Datasets

In [4]:
rw_deepwalk = DeepWalk(cora_graph, method='random_walk')
erw_deepwalk = DeepWalk(cora_graph, method='efficient_random_walk')

In [5]:
%%time
rw_deepwalk.random_walk(walk_len=20, num_walks=30)
rw_deepwalk.train(embed_size=64, window_size=5, epochs=10)


num_walks:  81240
Loss after epoch 0: 1862587.5
Loss after epoch 1: 1103390.0
Loss after epoch 2: 1084705.5
Loss after epoch 3: 1023409.0
Loss after epoch 4: 1018460.0
Loss after epoch 5: 1016488.5
Loss after epoch 6: 1014094.0
Loss after epoch 7: 945643.5
Loss after epoch 8: 925762.0
Loss after epoch 9: 921778.0
CPU times: user 38.2 s, sys: 233 ms, total: 38.4 s
Wall time: 13.8 s


In [6]:
%%time
erw_deepwalk.random_walk(walk_len=20, num_walks=30)
erw_deepwalk.train(embed_size=64, window_size=5, epochs=10)

num_walks:  10256
Loss after epoch 0: 846839.3125
Loss after epoch 1: 217447.9375
Loss after epoch 2: 160638.875
Loss after epoch 3: 151349.5
Loss after epoch 4: 148165.5
Loss after epoch 5: 146211.875
Loss after epoch 6: 144184.25
Loss after epoch 7: 144182.25
Loss after epoch 8: 141802.25
Loss after epoch 9: 138455.0
CPU times: user 5.68 s, sys: 48.4 ms, total: 5.73 s
Wall time: 2.48 s


In [7]:
print('Cora with random walk')
evaluation(rw_deepwalk, cora_label, 0.3, metric='f1')

print('Cora with efficient random walk')
evaluation(erw_deepwalk, cora_label, 0.3, metric='f1')

Cora with random walk
Training f1:  0.3837467018469657 Testing f1 0.29815498154981546
Cora with efficient random walk
Training f1:  0.4001055408970976 Testing f1 0.29864698646986476


## PubMed Dataset

In [8]:
rw_deepwalk = DeepWalk(pub_graph, method='random_walk')
erw_deepwalk = DeepWalk(pub_graph, method='efficient_random_walk')

In [9]:
%%time
rw_deepwalk.random_walk(walk_len=20, num_walks=20)
rw_deepwalk.train(embed_size=64, window_size=5, epochs=10)

num_walks:  394340
Loss after epoch 0: 8345724.0
Loss after epoch 1: 3785126.0
Loss after epoch 2: 3704197.0
Loss after epoch 3: 3235751.0
Loss after epoch 4: 3067702.0
Loss after epoch 5: 3042934.0
Loss after epoch 6: 3012326.0
Loss after epoch 7: 2992820.0
Loss after epoch 8: 2782876.0
Loss after epoch 9: 2048484.0
CPU times: user 3min 26s, sys: 937 ms, total: 3min 27s
Wall time: 1min 13s


In [10]:
%%time
erw_deepwalk.random_walk(walk_len=20, num_walks=20)
erw_deepwalk.train(embed_size=64, window_size=5, epochs=10)

num_walks:  78375
Loss after epoch 0: 4791145.0
Loss after epoch 1: 1132212.5
Loss after epoch 2: 930495.5
Loss after epoch 3: 873189.0
Loss after epoch 4: 825332.0
Loss after epoch 5: 746745.0
Loss after epoch 6: 734467.0
Loss after epoch 7: 715256.0
Loss after epoch 8: 718345.0
Loss after epoch 9: 714259.0
CPU times: user 48.7 s, sys: 228 ms, total: 48.9 s
Wall time: 28.4 s


In [11]:
print('PubMed with random walk')
evaluation(rw_deepwalk, pub_label, 0.3, metric='f1')

print('PubMed with efficient random walk')
evaluation(erw_deepwalk, pub_label, 0.3, metric='f1')

PubMed with random walk
Training f1:  0.5541627418303021 Testing f1 0.39827586206896554
PubMed with efficient random walk
Training f1:  0.5391783204115643 Testing f1 0.3982420554428668
