In [1]:
import numpy as np
from temporal_walk import TemporalWalk

## Open ended random walks

In [2]:
temporal_walk = TemporalWalk(
    num_walks=1_000_000,
    len_walk=100,
    picker_type='Uniform',
    max_time_capacity=None
)

In [3]:
from test import read_data

edges_data = read_data('reddit')

In [4]:
temporal_walk.add_multiple_edges(edges_data)

In [5]:
walks = temporal_walk.get_random_walks()

In [6]:
len(walks)

1000000

In [7]:
trimmed_walks = []

for walk in walks:
    trimmed_walks.append(np.trim_zeros(walk, 'b'))

In [8]:
from gensim.models import Word2Vec

word2vec_model = Word2Vec(
    vector_size=128, 
    epochs=10, window=10, 
    min_count=1, 
    sg=1, 
    workers=10
)

In [9]:
word2vec_model.build_vocab([[str(node) for node in walk] for walk in trimmed_walks])

In [10]:
string_walks = [[str(node) for node in walk] for walk in trimmed_walks]
word2vec_model.train(string_walks, total_examples=len(trimmed_walks), epochs=10)

(27697668, 28486720)

In [11]:
len(word2vec_model.wv)

27777

In [12]:
node_set = set()

for (u, i, t) in edges_data:
    node_set.add(u)
    node_set.add(i)

In [13]:
len(node_set)

27863

## Walks for specific unique nodes

In [14]:
temporal_walk = TemporalWalk(
    num_walks=50,
    len_walk=100,
    picker_type='Uniform',
    max_time_capacity=None
)

In [15]:
temporal_walk.add_multiple_edges(edges_data)

In [16]:
walks_for_nodes = temporal_walk.get_random_walks_for_nodes('Random', temporal_walk.get_node_ids())

In [17]:
trimmed_walks = []

for node, walks in walks_for_nodes.items():
    for walk in walks:
        trimmed_walks.append(np.trim_zeros(walk, 'b'))

In [18]:
from gensim.models import Word2Vec

word2vec_model = Word2Vec(
    vector_size=128, 
    epochs=10, 
    window=10, 
    min_count=1, 
    sg=1, 
    workers=10
)

In [19]:
word2vec_model.build_vocab([[str(node) for node in walk] for walk in trimmed_walks])

In [20]:
string_walks = [[str(node) for node in walk] for walk in trimmed_walks]
word2vec_model.train(string_walks, total_examples=len(trimmed_walks), epochs=10)

(36303523, 36759830)

In [21]:
len(word2vec_model.wv)

27863

In [22]:
len(node_set)

27863