### Load data

In [None]:
import os
import pandas as pd

In [None]:
DATA_DIR = os.path.join(os.path.realpath(''), 'data', 'homogeneous')

p2p_df = pd.read_csv(os.path.join(DATA_DIR, 'p2p.csv'))
terms_df = pd.read_csv(os.path.join(DATA_DIR, 'terms.csv'))
labels_df = pd.read_csv(os.path.join(DATA_DIR, 'labels.csv'))

In [None]:
from graph_ml.utility.graph_representation import adj_matrix_to_adj_list

In [None]:
adj_list = adj_matrix_to_adj_list(p2p_df.to_numpy())
terms = terms_df.to_numpy()
labels = labels_df['Label'].to_numpy()

### Split data

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
train_indices, test_indices, train_labels, test_labels = train_test_split(np.arange(len(labels)), labels, test_size=0.2, random_state=0)

### Calculate node representations using Node2Vec

In [None]:
from graph_ml.utility.random_walker import RandomWalker

In [None]:
walks = []
walker_params = [(1., 1.), (1.5, 0.5), (0.5, 1.5)]

for p, q in walker_params:
    walker = RandomWalker(adj_list, p, q)
    print(f'{p=}, {q=}:')
    walks.extend(walker.generate_walks(num_walks_per_node=2, walk_length=50, print_progress=True))

In [None]:
from gensim.models import Word2Vec

In [None]:
str_walks = [[str(node) for node in walk] for walk in walks]
node2vec = Word2Vec(str_walks, vector_size=128, window=5, min_count=0, sg=1, workers=2)
node_features = node2vec.wv[map(str, range(len(adj_list)))]

### Classify nodes

In [None]:
import tensorflow as tf
from tensorflow.keras import layers as l
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy

In [None]:
num_classes = len(set(labels))

model = Sequential([
    l.Dense(64),
    l.PReLU(),
    l.Dense(32),
    l.PReLU(),
    l.Dense(num_classes),
    l.Softmax()
])

model.compile(optimizer=Adam(1e-4), loss=SparseCategoricalCrossentropy(), metrics=[SparseCategoricalAccuracy(name="accuracy")])
_ = model.fit(x=node_features[train_indices], y=labels[train_indices], batch_size=500, epochs=30, validation_split=0.2, verbose=0)

In [None]:
from sklearn.metrics import classification_report

In [None]:
true_labels = labels[test_indices]
predicted_labels = model(node_features[test_indices]).numpy().argmax(axis=1)
print(classification_report(true_labels, predicted_labels))