# Walking

In [None]:
import mowl
mowl.init_jvm("20g")

from mowl.datasets.ppi_yeast import PPIYeastSlimDataset
import logging
logging.basicConfig(level = logging.INFO)


## Loading a dataset

In [None]:
ds = PPIYeastSlimDataset()

## Projecting the ontology
First we need to get a graph from an ontology. The following function will do it:

In [None]:
from mowl.projection.factory import projector_factory
projector = projector_factory("dl2vec", bidirectional_taxonomy = True)
edges = projector.project(ds.ontology)

## Generate Random Walks

In [None]:
from mowl.walking.factory import walking_factory
walker = walking_factory("node2vec", num_walks = 10, walk_length = 10,  alpha = 0.1, workers = 16)
walker.walk(edges)

## Learning embeddings with Word2Vec

Once the walks are generated, we will use them to learn embeddings using the Word2Vec model:

In [None]:
import gensim
from gensim.models.word2vec import LineSentence
sentences = gensim.models.word2vec.LineSentence(walker.outfile)

model = gensim.models.Word2Vec(
        sentences, 
        sg=1, 
        min_count=1, 
        vector_size=100, 
        window = 10, 
        epochs = 10, 
        workers = 16)
vectors = model.wv


## Plotting TSNE representations
Once the embeddings are ready, we can use them for different tasks. Here we use the TSNE method to have a visual representation of them:

In [None]:
from mowl.visualization.base import TSNE

labels = ds.get_labels()

tsne = TSNE(vectors, labels)
tsne.generate_points(5000, workers = 8)
tsne.show()

## Putting all together and trying different walking methods
Now, we can use the functions defined above and test them with the walking methods existing in mOWL

In [None]:
from mowl.walking.node2vec.model import Node2Vec as N2V
from mowl.walking.deepwalk.model import DeepWalk as DW
from mowl.walking.walkRdfAndOwl.model import WalkRDFAndOWL as WRO

In [None]:
edges, entities = getOntProjection() 

### DeepWalk

In [None]:
logging.info("Walking..")
start = time.time()
walksFile = "data/walksDeepwalk"
walker = DW(
        edges, 
        100, #num_walks 
        100, #walk_length 
        0.1, #alpha
        walksFile, #file to write the walks
        workers = 16, #num_workers,
    )

walker.walk()
end = time.time()
logging.info("Walks generated in %f seconds", end - start)

dwEmbeddings, size = learnEmbeddingsWithWord2Vec(walksFile, entities)


In [None]:
plotTSNE(dwEmbeddings, size)

## Node2Vec

In [None]:
logging.info("Walking..")
start = time.time()
walksFile = "data/walksNode2Vec"

walker = N2V(
        edges, 
        100, #num_walks 
        100, #walk_length 
        10, #p
        0.1, #q
        walksFile,
        workers = 16, #num_workers,
    )
    

walker.walk()
end = time.time()
logging.info("Walks generated in %f seconds", end - start)

n2vEmbeddings, size = learnEmbeddingsWithWord2Vec(walksFile, entities)


In [None]:
plotTSNE(n2vEmbeddings, size)

## Walking RDF and OWL

In [None]:
logging.info("Walking..")
start = time.time()
walksFile = "data/walksWalkRDFAndOWL"

walker = WRO(
        edges, 
        100, #num_walks 
        100, #walk_length 
        walksFile,
        workers = 16, #num_workers,
    )
    

walker.walk()
end = time.time()
logging.info("Walks generated in %f seconds", end - start)

wroEmbeddings, size = learnEmbeddingsWithWord2Vec(walksFile, entities)

In [None]:
plotTSNE(wroEmbeddings, size)