### Load data

In [None]:
import os
import pandas as pd

In [None]:
DATA_DIR = os.path.join(os.path.realpath(''), 'data', 'homogeneous')

p2p_df = pd.read_csv(os.path.join(DATA_DIR, 'p2p.csv'))
terms_df = pd.read_csv(os.path.join(DATA_DIR, 'terms.csv'))
labels_df = pd.read_csv(os.path.join(DATA_DIR, 'labels.csv'))

In [None]:
from graph_ml.utility.graph_representation import adj_matrix_to_edges

In [None]:
edges = adj_matrix_to_edges(p2p_df.to_numpy())
terms = terms_df.to_numpy()
labels = labels_df['Label'].to_numpy()

### Split data

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
train_indices, test_indices, train_labels, test_labels = train_test_split(np.arange(len(labels)), labels, test_size=0.2, random_state=0)

### GAT Model

In [None]:
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras import layers as l

from graph_ml.utility.feed_forward_layer import FeedForwardLayer
from graph_ml.gnn.gat.gat_layer import MultiHeadGATLayer

In [None]:
class GATClassifier(Model):
    def __init__(self, node_features, edges, num_classes):
        super().__init__()
        
        edges = tf.constant(edges)
        
        self.node_features = tf.constant(node_features, dtype=tf.float32)
        
        node_count, _ = node_features.shape
        attention_adj_matrix_shape = (node_count, node_count)
        
        self.preprocessing1 = FeedForwardLayer([512])
        
        self.gat1 = MultiHeadGATLayer(edges, attention_adj_matrix_shape, hidden_units=[128, 96])
        self.gat2 = MultiHeadGATLayer(edges, attention_adj_matrix_shape, hidden_units=[96, 64])
        self.gat3 = MultiHeadGATLayer(edges, attention_adj_matrix_shape, hidden_units=[64, 32])
        
        self.postprocessing1 = FeedForwardLayer([64])

        self.logits1 = l.Dense(num_classes)
        self.softmax1 = l.Softmax()
        
    def call(self, indices):
        features = self.preprocessing1(self.node_features)
        
        features = self.gat1(features)
        features = self.gat2(features)
        features = self.gat3(features)
        
        features = tf.gather(features, indices)
        
        features = self.postprocessing1(features)
        logits = self.logits1(features)
        return self.softmax1(logits)

### Classify nodes

In [None]:
import warnings
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy

In [None]:
warnings.filterwarnings('ignore')
num_classes = len(set(labels))

model = GATClassifier(terms, edges, num_classes)
model.compile(optimizer=Adam(1e-4), loss=SparseCategoricalCrossentropy(), metrics=[SparseCategoricalAccuracy(name="accuracy")])
_ = model.fit(x=train_indices, y=labels[train_indices], batch_size=500, epochs=20, validation_split=0.2, verbose=0)

In [None]:
from sklearn.metrics import classification_report

In [None]:
true_labels = labels[test_indices]
predicted_labels = model(test_indices).numpy().argmax(axis=1)
print(classification_report(true_labels, predicted_labels))