## Part 2: Introduction of GAT Architecture



In [1]:
# Install required packages.
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

# %pip install torch-scatter torch-sparse torch-cluster torch-spline-conv torch-geometric -f https://data.pyg.org/whl/torch-1.11.0+cu113.html

1.11.0


In [3]:
from scipy.io import loadmat
import pickle
import numpy as np
import matplotlib.pyplot as plt

In [4]:

prefix = 'data/'

data_file = loadmat(prefix +  'YelpChi.mat')

labels = data_file['label'].flatten()
features = data_file['features'].todense().A
# load the preprocessed adj_lists
with open(prefix + 'yelp_homo_adjlists.pickle', 'rb') as file:
    homogenous = pickle.load(file)
file.close()


In [5]:
# Prelim network EDA
import networkx as nx


In [6]:
#Data needs to be translated into torch tensors when using pytorch geometric
print(f'features dtype = {features.dtype}, \n' + 
      f'label dtype = {labels.dtype}')


features dtype = float64, 
label dtype = int64


In [7]:
# Same split of data into test and train
from sklearn.model_selection import train_test_split 
split = 0.2
indices = np.arange(len(features))

xtrain, xtest, ytrain, ytest, idxtrain, idxtest = train_test_split(features, 
                                                                   labels,indices, 
                                                                   stratify=labels,
                                                                   test_size = split, 
                                                                   random_state = 99)



In [9]:
# from google.colab import drive
# drive.mount('/content/drive')

In [10]:
g = nx.Graph(homogenous)
print(f'Number of nodes: {g.number_of_nodes()}')
print(f'Number of edges: {g.number_of_edges()}')
print(f'Average node degree: {len(g.edges) / len(g.nodes):.2f}')


Number of nodes: 45954
Number of edges: 3892933
Average node degree: 84.71


In [11]:
# Convert to torch geometric datatype 
from torch_geometric.utils.convert import from_networkx

data = from_networkx(g)
data.x = torch.tensor(features).float()
data.y = torch.tensor(labels)
data.num_node_features = data.x.shape[-1]
data.num_classes = 1 

A = set(range(len(labels)))
data.train_mask = torch.tensor([x in idxtrain for x in A])
data.test_mask = torch.tensor([x in idxtest for x in A])

In [12]:
import torch_geometric.transforms as T
from torch_geometric.transforms import NormalizeFeatures

transform = NormalizeFeatures()

data

Data(edge_index=[2, 7739912], num_nodes=45954, x=[45954, 32], y=[45954], num_node_features=32, num_classes=1, train_mask=[45954], test_mask=[45954])

In [None]:
from torch_geometric.nn import GCNConv, Linear
import torch.nn.functional as F


In [None]:
class GCN(torch.nn.Module):
    def __init__(self, hidden_layers = 64):
        super().__init__()
        torch.manual_seed(2022)
        self.conv1 = GCNConv(data.num_node_features, hidden_layers)
        self.conv2 = GCNConv(hidden_layers, 1)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return torch.sigmoid(x)


In [None]:
# def train(criterion):
#     model.train()
#     optimizer.zero_grad()
#     out = model(data).to_device()
#     #for discussion on why masks is applied here, see: https://stackoverflow.com/questions/69019682/training-mask-not-used-in-pytorch-geometric-when-inputting-data-to-train-model
#     loss = criterion(out[data.train_mask], data.y[data.train_mask].reshape(-1,1).float())
#     loss.backward() 
#     optimizer.step()
#     return float(loss)


In [None]:
from sklearn.metrics import roc_auc_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)
model = GCN()
model.to(device)
data.to(device)

lr = 0.01
epochs = 100

optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=5e-4)
criterion = torch.nn.BCELoss()

losses = []
for e in range(epochs):    
    model.train()
    optimizer.zero_grad()
    out = model(data)
    #for discussion on why masks is applied here, see: https://stackoverflow.com/questions/69019682/training-mask-not-used-in-pytorch-geometric-when-inputting-data-to-train-model
    #loss
    loss = criterion(out[data.train_mask], data.y[data.train_mask].reshape(-1,1).float())
    loss.backward()
    losses.append(loss.item())

    optimizer.step()

    #accuracy
    ypred = model(data).clone().cpu()
    pred = data.y[data.test_mask].clone().cpu().detach().numpy()
    true = ypred[data.test_mask].detach().numpy()
    acc = roc_auc_score(pred,true)

    print(f'Epoch {e} | Loss {loss:6.2f} | Accuracy = {100*acc:6.3f}% | # True Labels = {ypred.round().sum()}')



In [None]:
from sklearn.metrics import roc_curve
fpr, tpr, _ = roc_curve(pred,true)

plt.figure(1)
plt.plot([0, 1], [0, 1])
plt.plot(fpr, tpr)
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.show()

In [None]:
data.cpu()

In [None]:
#GATConv does not scale by design... 
#https://github.com/pyg-team/pytorch_geometric/issues/527
#Single dense graph so using NeighborLoader (where DataLoader is for multi-graph problems)
from torch_geometric.loader import NeighborLoader

batch_size = 128
loader = NeighborLoader(
    data,
    # Sample 1000 neighbors for each node for 2 iterations
    num_neighbors=[1000]*2,
    # Use a batch size for sampling training nodes
    batch_size=batch_size,
    input_nodes=data.train_mask,
)

sampled_data = next(iter(loader))
print(f'Checking that batch size is {batch_size}: {batch_size == sampled_data.batch_size}')
print(f'Percentage fraud in batch: {100*sampled_data.y.sum()/len(sampled_data.y):.4f}%')
sampled_data

In [None]:
#As we are using batched data, important to also have batchnorm in network

from torch_geometric.nn import GATConv
import torch.nn as nn

class GAT(torch.nn.Module):
    def __init__(self, hidden_layers=32, heads=1, dropout_p=0.0):
        super().__init__()
        torch.manual_seed(2022)
        self.conv1 = GATConv(data.num_node_features, hidden_layers, heads, dropout=dropout_p)
        self.bn1 = nn.BatchNorm1d(hidden_layers*heads)
        self.conv2 = GATConv(hidden_layers * heads, 1, dropout=dropout_p)

    def forward(self, data, dropout_p=0.0):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return torch.sigmoid(x)


In [None]:
lr = 0.01
epochs = 20

model = GAT(hidden_layers = 64,heads=2)
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay=5e-4)
criterion = torch.nn.BCELoss()

losses = []
for e in range(epochs):    
    epoch_loss = 0.
    for i, sampled_data in enumerate(loader):
        sampled_data.to(device)

        model.train()
        optimizer.zero_grad()
        out = model(sampled_data)
        loss = criterion(out[sampled_data.train_mask], sampled_data.y[sampled_data.train_mask].reshape(-1,1).float())
        loss.backward()
        epoch_loss += loss.item()

        optimizer.step()

        #accuracy
        ypred = model(sampled_data).clone().cpu()
        pred = sampled_data.y[sampled_data.test_mask].clone().cpu().detach().numpy()
        true = ypred[sampled_data.test_mask].detach().numpy()
        acc = roc_auc_score(pred,true)
    
    losses.append(epoch_loss/batch_size)

    print(f'Epoch {e} | Loss {epoch_loss:6.2f} | Accuracy = {100*acc:6.3f}% | # True Labels = {ypred.round().sum()}')


In [None]:
data.to(device)
ypred = model(data).clone().cpu()
pred = data.y[data.test_mask].clone().cpu().detach().numpy()
true = ypred[data.test_mask].detach().numpy()
acc = roc_auc_score(pred,true)
print(f"Final model accuracy for GAT is : {100*acc:.4f}%")

In [None]:

fpr2, tpr2, _ = roc_curve(pred,true)

plt.figure(1)
plt.plot([0, 1], [0, 1])
plt.plot(fpr, tpr)
plt.plot(fpr2, tpr2)
plt.xlabel('False positive rate')
plt.ylabel('True positive rate')
plt.show()

In [None]:
# Additional parameters to change:
#  - number of epochs
#  - number of hidden layers
#  - number of heads
#  - dropout probability
#  - learning rate (optimizer)
#  - weight decay rate (optimizer)


### Advanced:

GATv2Conv —> https://arxiv.org/pdf/2105.14491.pdf

In [None]:
from torch_geometric.nn import GATv2Conv

class GATv2(torch.nn.Module):
    def __init__(self, hidden_layers=32, heads=1, dropout_p=0.0):
        super().__init__()
        torch.manual_seed(2022)
        self.conv1 = GATv2Conv(data.num_node_features, hidden_layers, heads, dropout=dropout_p)
        self.bn1 = nn.BatchNorm1d(hidden_layers*heads)
        self.conv2 = GATv2Conv(hidden_layers * heads, 1, dropout=dropout_p)

    def forward(self, data, dropout_p=0.0):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return torch.sigmoid(x)


In [None]:
lr = 0.01
epochs = 20

model = GATv2(hidden_layers = 64,heads=1)
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=lr,weight_decay=5e-4)


criterion = torch.nn.BCELoss()

losses = []
for e in range(epochs):    
    epoch_loss = 0.
    for i, sampled_data in enumerate(loader):
        sampled_data.to(device)

        model.train()
        optimizer.zero_grad()
        out = model(sampled_data)
        loss = criterion(out[sampled_data.train_mask], sampled_data.y[sampled_data.train_mask].reshape(-1,1).float())
        loss.backward()
        epoch_loss += loss.item()

        optimizer.step()

        #accuracy
        ypred = model(sampled_data).clone().cpu()
        pred = sampled_data.y[sampled_data.test_mask].clone().cpu().detach().numpy()
        true = ypred[sampled_data.test_mask].detach().numpy()
        acc = roc_auc_score(pred,true)
    
    losses.append(epoch_loss/batch_size)

    print(f'Epoch {e} | Loss {loss:6.2f} | Accuracy = {100*acc:6.3f}% | # True Labels = {ypred.round().sum()}')


In [None]:
data.to(device)
ypred = model(data).cpu()
pred = data.y[data.test_mask].clone().cpu().detach().numpy()
true = ypred[data.test_mask].detach().numpy()
acc = roc_auc_score(pred,true)
print(f"Final model accuracy for GATv2 is : {100*acc:.4f}%")