### Load data

In [None]:
import os
import pandas as pd

In [None]:
DATA_DIR = os.path.join(os.path.realpath(''), 'data', 'heterogeneous')

p2p_df = pd.read_csv(os.path.join(DATA_DIR, 'p2p.csv'))
p2a_df = pd.read_csv(os.path.join(DATA_DIR, 'p2a.csv'))
terms_df = pd.read_csv(os.path.join(DATA_DIR, 'terms.csv'))
labels_df = pd.read_csv(os.path.join(DATA_DIR, 'labels.csv'))

In [None]:
from graph_ml.utility.graph_representation import adj_matrix_to_edges

In [None]:
p2p_edges = adj_matrix_to_edges(p2p_df.to_numpy())
p2a_edges = adj_matrix_to_edges(p2a_df.to_numpy())
a2p_edges = adj_matrix_to_edges(p2a_df.to_numpy().T)
terms = terms_df.to_numpy()
labels = labels_df['Label'].to_numpy()

### Split data

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split

In [None]:
train_indices, test_indices, train_labels, test_labels = train_test_split(np.arange(len(labels)), labels, test_size=0.2, random_state=0)

### Heterogeneous GAT Model

In [None]:
from itertools import zip_longest

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras import layers as l
from tensorflow.keras import activations as a
from tensorflow.keras.models import Sequential

from graph_ml.utility.feed_forward_layer import FeedForwardLayer
from graph_ml.gnn.gat.gat_layer import MultiHeadGATLayer
from graph_ml.gnn.gat.hete_gat_multi_head import HeteGATMultiHead
from graph_ml.gnn.gat.hete_gat_layer import HeteGATLayer

In [None]:
class HeteGATClassifier(Model):
    def __init__(self, p_features, n_papers, n_authors, p2p_edges, p2a_edges, a2p_edges, num_classes):
        super().__init__()
        
        p2p_edges = tf.constant(p2p_edges)
        p2a_edges = tf.constant(p2a_edges)
        a2p_edges = tf.constant(a2p_edges)
        
        self.author_index = tf.range(n_authors)
        self.author_embedding = l.Embedding(n_authors, 128)
        
        self.paper_features = tf.constant(p_features)
        self.p_preprocessing1 = l.Dense(128)
        
        p2p_gat1 = HeteGATMultiHead(p2p_edges, attention_adj_matrix_shape=(n_papers, n_papers), hidden_units=[64, 64])
        a2p_gat1 = HeteGATMultiHead(p2a_edges, attention_adj_matrix_shape=(n_papers, n_authors), hidden_units=[64, 64])
        self.pa2p_rgat1 = HeteGATLayer([p2p_gat1, a2p_gat1])
        
        self.p2a_gat2 = MultiHeadGATLayer(
            a2p_edges, attention_adj_matrix_shape=(n_authors, n_papers), hidden_units=[64, 32],
            add_self_features=False
        )
        
        p2p_gat3 = HeteGATMultiHead(p2p_edges, attention_adj_matrix_shape=(n_papers, n_papers), hidden_units=[32])
        a2p_gat3 = HeteGATMultiHead(p2a_edges, attention_adj_matrix_shape=(n_papers, n_authors), hidden_units=[32])
        self.pa2p_rgat3 = HeteGATLayer([p2p_gat3, a2p_gat3])
        
        self.postprocessing1 = FeedForwardLayer([64])
        self.logits1 = l.Dense(num_classes)
        self.softmax1 = l.Softmax()
        
    def call(self, indices):
        p_features = self.p_preprocessing1(self.paper_features)
        a_features = self.author_embedding(self.author_index)
        
        p_features = self.pa2p_rgat1(((p_features,) * 2, (p_features, a_features)))
        a_features = self.p2a_gat2(p_features)
        p_features = self.pa2p_rgat3(((p_features,) * 2, (p_features, a_features)))
        
        features = tf.gather(p_features, indices)
        
        features = self.postprocessing1(features)
        logits = self.logits1(features)
        return self.softmax1(logits)

### Classify nodes

In [None]:
import warnings
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.metrics import SparseCategoricalAccuracy

In [None]:
warnings.filterwarnings('ignore')

num_classes = len(set(labels))
n_papers, n_authors = p2a_df.shape

model = HeteGATClassifier(terms, n_papers, n_authors, p2p_edges, p2a_edges, a2p_edges, num_classes)
model.compile(optimizer=Adam(1e-4), loss=SparseCategoricalCrossentropy(), metrics=[SparseCategoricalAccuracy(name="accuracy")])
_ = model.fit(x=train_indices, y=labels[train_indices], batch_size=500, epochs=20, validation_split=0.2, verbose=0)

In [None]:
from sklearn.metrics import classification_report

In [None]:
true_labels = labels[test_indices]
predicted_labels = model(test_indices).numpy().argmax(axis=1)
print(classification_report(true_labels, predicted_labels))