In [2]:
import pandas as pd
import networkx as nx
import numpy as np
import stellargraph as sg
from stellargraph import StellarGraph
from stellargraph.data import EdgeSplitter
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegressionCV

In [3]:
G = nx.read_graphml('../lm-vol/LANL_train_NTLM_Pikachu_filter_v2.graphml')
graph = StellarGraph.from_networkx(G)

In [4]:
print(graph.info())

StellarGraph: Undirected multigraph
 Nodes: 19540, Edges: 22586

 Node types:
  default: [19540]
    Features: none
    Edge types: default-default->default

 Edge types:
    default-default->default: [22586]
        Weights: all 1 (default)
        Features: none


In [5]:
from stellargraph.data import BiasedRandomWalk
from gensim.models import Word2Vec
from gensim.models.callbacks import CallbackAny2Vec

# init callback class
class callback(CallbackAny2Vec):
    """
    Callback to print loss after each epoch
    """
    def __init__(self):
        self.epoch = 0

    def on_epoch_end(self, model):
        loss = model.get_latest_training_loss()
        if self.epoch == 0:
            print('Loss after epoch {}: {}'.format(self.epoch, loss))
        else:
            print('Loss after epoch {}: {}'.format(self.epoch, loss- self.loss_previous_step))
        self.epoch += 1
        self.loss_previous_step = loss

In [6]:
p = 1.0
q = 1.0
dimensions = 128
num_walks = 20
walk_length = 10
window_size = 5
num_iter = 500
workers = 32

In [7]:
rw = BiasedRandomWalk(graph)
walks = rw.run(graph.nodes(), n=num_walks, length=walk_length, p=p, q=q)
print(f"Number of random walks for Graph: {len(walks)}")
model = Word2Vec(
    walks,
    vector_size=dimensions,
    window=window_size,
    min_count=0,
    sg=0,
    hs=0,
    negative=10,
    cbow_mean=1,
    workers=workers,
    epochs=num_iter,
    compute_loss=True,
    callbacks=[callback()]
)
model.get_latest_training_loss()
model.save(f"../lm-vol/2_28_NTML_Pikachu_500_epochs_word2vec.model")
model.wv.save(f"../lm-vol/2_28_NTML_Pikachu_500_epochs_word2vec.wordvectors")

Number of random walks for Graph: 390800
Loss after epoch 0: 157203.421875
Loss after epoch 1: 46996.859375
Loss after epoch 2: 33212.453125
Loss after epoch 3: 30628.359375
Loss after epoch 4: 27854.4375
Loss after epoch 5: 27815.15625
Loss after epoch 6: 25569.0625
Loss after epoch 7: 27087.3125
Loss after epoch 8: 21750.21875
Loss after epoch 9: 21013.0
Loss after epoch 10: 19970.46875
Loss after epoch 11: 20627.34375
Loss after epoch 12: 22963.4375
Loss after epoch 13: 23945.4375
Loss after epoch 14: 22568.09375
Loss after epoch 15: 19648.75
Loss after epoch 16: 19510.0
Loss after epoch 17: 20241.125
Loss after epoch 18: 19119.0
Loss after epoch 19: 19936.0
Loss after epoch 20: 19944.8125
Loss after epoch 21: 19399.9375
Loss after epoch 22: 17196.5625
Loss after epoch 23: 20411.4375
Loss after epoch 24: 20217.5625
Loss after epoch 25: 18100.25
Loss after epoch 26: 19466.3125
Loss after epoch 27: 19595.0
Loss after epoch 28: 19753.6875
Loss after epoch 29: 19120.5
Loss after epoch 3

Loss after epoch 268: 15294.0
Loss after epoch 269: 13210.5
Loss after epoch 270: 17076.5
Loss after epoch 271: 15973.0
Loss after epoch 272: 15662.0
Loss after epoch 273: 16639.5
Loss after epoch 274: 18248.5
Loss after epoch 275: 14006.5
Loss after epoch 276: 14509.0
Loss after epoch 277: 14125.0
Loss after epoch 278: 15392.5
Loss after epoch 279: 16254.0
Loss after epoch 280: 14927.5
Loss after epoch 281: 12765.0
Loss after epoch 282: 15955.0
Loss after epoch 283: 16619.5
Loss after epoch 284: 14608.5
Loss after epoch 285: 15971.0
Loss after epoch 286: 14561.0
Loss after epoch 287: 14471.0
Loss after epoch 288: 16653.5
Loss after epoch 289: 15757.0
Loss after epoch 290: 13630.0
Loss after epoch 291: 16666.5
Loss after epoch 292: 17616.5
Loss after epoch 293: 16381.5
Loss after epoch 294: 16104.5
Loss after epoch 295: 14256.0
Loss after epoch 296: 15300.5
Loss after epoch 297: 14616.5
Loss after epoch 298: 15962.5
Loss after epoch 299: 15129.0
Loss after epoch 300: 14789.0
Loss after

In [None]:
model.save(f"../lm-vol/2_21_100_epochs_word2vec.model")
model.wv.save(f"../lm-vol/2_21_100_epochs_word2vec.wordvectors")