In [1]:
!pip install torch torchvision



In [2]:
import torch
print(torch.__version__)


2.1.0+cu121


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [4]:
!pip install torch_geometric


Collecting torch_geometric
  Downloading torch_geometric-2.4.0-py3-none-any.whl (1.0 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.0 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/1.0 MB[0m [31m9.0 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━[0m [32m0.9/1.0 MB[0m [31m13.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.4.0


# **Load Dataset**

## **Load citeseer**

In [5]:
import torch
from torch_geometric.datasets import Planetoid



citeseer_dataset = Planetoid(root='', name='CiteSeer')

citeseer = citeseer_dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


## **Load CoraFull**

In [6]:
from torch_geometric.datasets import CoraFull


root = './CoraFull'


CoraFull_dataset = CoraFull(root)

CoraFull_dataset.download()
CoraFull_dataset.process()

CoraFull =  CoraFull_dataset[0]



Downloading https://github.com/abojchevski/graph2gauss/raw/master/data/cora.npz
Processing...
Done!
Using existing file cora.npz


# **Split dataset**

## **split cora full**

In [7]:
from torch_geometric.transforms import RandomNodeSplit

transform = RandomNodeSplit(num_train_per_class=int(CoraFull.num_nodes * 0.7), num_val=int(CoraFull.num_nodes * 0.1), num_test=int(CoraFull.num_nodes * 0.2))
CoraFull = transform(CoraFull)

## **split citeseer**

In [8]:

transform = RandomNodeSplit(num_train_per_class=int(citeseer.num_nodes * 0.7), num_val=int(citeseer.num_nodes * 0.1), num_test=int(citeseer.num_nodes * 0.2))
citeseer = transform(citeseer)

# **important functions**

In [9]:
def train(model, optimizer, criterion, data):
    model.train()
    optimizer.zero_grad()
    data = data.to(device)
    out = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def validate(model, criterion, data):
    model.eval()
    data = data.to(device)
    with torch.no_grad():
        out = model(data)
        val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
    return val_loss.item()

def test(model, criterion, data):
    model.eval()
    data = data.to(device)
    with torch.no_grad():
        out = model(data)
        _, pred = torch.max(out, dim=1)
        correct = float(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
        acc = correct / data.test_mask.sum().item()
    return acc


# **E) implement GAT V2**

## **Two layer GAT V2**

In [10]:

from torch_geometric.nn import GATv2Conv
import torch.optim as optim
import torch.nn.functional as F

class TwoLayerGAT_v2(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, dropout=0.4, negative_slope=0.2, share_weights = False):
        super(TwoLayerGAT_v2, self).__init__()
        self.gat1 = GATv2Conv(input_dim, hidden_dim, heads=num_heads, dropout=dropout, negative_slope=negative_slope,share_weights = share_weights)
        self.gat2 = GATv2Conv(hidden_dim * num_heads, output_dim, heads=1, dropout=dropout, negative_slope=negative_slope,share_weights = share_weights)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.gat1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.gat2(x, edge_index)
        return F.log_softmax(x, dim=1)



### **Train and evaluate on Corafull dataset(two layer)**

In [11]:
torch.manual_seed(0)


hidden_dims = [16 , 32, 64, 128, 256]


best_validation_loss = float('inf')
best_num_heads = None


for hidden_dim in hidden_dims:

    model = TwoLayerGAT_v2(input_dim=CoraFull.num_node_features, hidden_dim= hidden_dim , output_dim=CoraFull_dataset.num_classes, num_heads = 4).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Hidden Dimension: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)


    print(f' Hidden Dimension: {hidden_dim}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dim = hidden_dim

print('Best Hidden Dimension: {}'.format(best_hidden_dim))

Hidden Dimension: 16, Epoch: 0, Loss: 4.2560, Validation Loss: 3.9831 
Hidden Dimension: 16, Epoch: 1, Loss: 3.9932, Validation Loss: 3.6448 
Hidden Dimension: 16, Epoch: 2, Loss: 3.7154, Validation Loss: 3.3450 
Hidden Dimension: 16, Epoch: 3, Loss: 3.4657, Validation Loss: 3.1165 
Hidden Dimension: 16, Epoch: 4, Loss: 3.2254, Validation Loss: 2.9030 
Hidden Dimension: 16, Epoch: 5, Loss: 3.0311, Validation Loss: 2.6771 
Hidden Dimension: 16, Epoch: 6, Loss: 2.8502, Validation Loss: 2.4674 
Hidden Dimension: 16, Epoch: 7, Loss: 2.6966, Validation Loss: 2.2883 
Hidden Dimension: 16, Epoch: 8, Loss: 2.5398, Validation Loss: 2.1233 
Hidden Dimension: 16, Epoch: 9, Loss: 2.4004, Validation Loss: 1.9723 
Hidden Dimension: 16, Epoch: 10, Loss: 2.2897, Validation Loss: 1.8488 
Hidden Dimension: 16, Epoch: 11, Loss: 2.1962, Validation Loss: 1.7390 
Hidden Dimension: 16, Epoch: 12, Loss: 2.1023, Validation Loss: 1.6407 
Hidden Dimension: 16, Epoch: 13, Loss: 2.0151, Validation Loss: 1.5652 
Hi

### **find best number of heads for  cora full dataset(two layer)**

In [12]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]

best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = TwoLayerGAT_v2(input_dim= CoraFull.num_node_features ,hidden_dim=best_hidden_dim , output_dim= CoraFull_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 4.2536, Validation Loss: 3.8501 
Attention Heads: 1, Epoch: 1, Loss: 3.8546, Validation Loss: 3.3241 
Attention Heads: 1, Epoch: 2, Loss: 3.5424, Validation Loss: 3.1489 
Attention Heads: 1, Epoch: 3, Loss: 3.2346, Validation Loss: 2.9019 
Attention Heads: 1, Epoch: 4, Loss: 3.0175, Validation Loss: 2.5663 
Attention Heads: 1, Epoch: 5, Loss: 2.7556, Validation Loss: 2.2926 
Attention Heads: 1, Epoch: 6, Loss: 2.5127, Validation Loss: 2.1193 
Attention Heads: 1, Epoch: 7, Loss: 2.3613, Validation Loss: 1.9525 
Attention Heads: 1, Epoch: 8, Loss: 2.2367, Validation Loss: 1.7938 
Attention Heads: 1, Epoch: 9, Loss: 2.0961, Validation Loss: 1.6670 
Attention Heads: 1, Epoch: 10, Loss: 2.0242, Validation Loss: 1.5792 
Attention Heads: 1, Epoch: 11, Loss: 1.9283, Validation Loss: 1.5054 
Attention Heads: 1, Epoch: 12, Loss: 1.8287, Validation Loss: 1.4307 
Attention Heads: 1, Epoch: 13, Loss: 1.7753, Validation Loss: 1.3643 
Attention Heads: 1, Epoch: 14,

In [13]:
torch.manual_seed(0)

# Train the best model
model = TwoLayerGAT_v2(input_dim=CoraFull.num_node_features, hidden_dim=best_hidden_dim , output_dim=CoraFull_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, CoraFull)
    val_loss = validate(model, criterion, CoraFull)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, CoraFull)
print(f'Test Accuracy(two layer) on CoraFull dataset: {test_accuracy:.4f}')

Early stopping at epoch 28 

Test Accuracy(two layer) on CoraFull dataset: 0.7079


### **Train and evaluate on Citeseer dataset(two layer)**

In [14]:
torch.manual_seed(0)


hidden_dims = [16 , 32, 64, 128, 256]


best_validation_loss = float('inf')
best_num_heads = None


for hidden_dim in hidden_dims:

    model = TwoLayerGAT_v2(input_dim=citeseer.num_node_features, hidden_dim= hidden_dim , output_dim=citeseer_dataset.num_classes, num_heads = 4).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Hidden Dimension: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)


    print(f' Hidden Dimension: {hidden_dim}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dim = hidden_dim

print('Best Hidden Dimension: {}'.format(best_hidden_dim))

Hidden Dimension: 16, Epoch: 0, Loss: 1.8110, Validation Loss: 1.5092 
Hidden Dimension: 16, Epoch: 1, Loss: 1.5265, Validation Loss: 1.2310 
Hidden Dimension: 16, Epoch: 2, Loss: 1.2586, Validation Loss: 1.0299 
Hidden Dimension: 16, Epoch: 3, Loss: 1.0816, Validation Loss: 0.9145 
Hidden Dimension: 16, Epoch: 4, Loss: 0.9599, Validation Loss: 0.8585 
Hidden Dimension: 16, Epoch: 5, Loss: 0.9299, Validation Loss: 0.8292 
Hidden Dimension: 16, Epoch: 6, Loss: 0.8509, Validation Loss: 0.8144 
Hidden Dimension: 16, Epoch: 7, Loss: 0.8310, Validation Loss: 0.8067 
Hidden Dimension: 16, Epoch: 8, Loss: 0.8012, Validation Loss: 0.8058 
Hidden Dimension: 16, Epoch: 9, Loss: 0.7678, Validation Loss: 0.8075 
Hidden Dimension: 16, Epoch: 10, Loss: 0.7184, Validation Loss: 0.8122 
Hidden Dimension: 16, Epoch: 11, Loss: 0.6863, Validation Loss: 0.8195 
Early stopping at epoch 12 

 Hidden Dimension: 16, Validation Loss: 0.8310 

Hidden Dimension: 32, Epoch: 0, Loss: 1.7975, Validation Loss: 1.416

### **find best number of heads for citeseer dataset (two layar )**

In [15]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]


best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = TwoLayerGAT_v2(input_dim=citeseer.num_node_features,hidden_dim= best_hidden_dim ,output_dim= citeseer_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 1.8322, Validation Loss: 1.6865 
Attention Heads: 1, Epoch: 1, Loss: 1.6909, Validation Loss: 1.5543 
Attention Heads: 1, Epoch: 2, Loss: 1.5944, Validation Loss: 1.4297 
Attention Heads: 1, Epoch: 3, Loss: 1.4490, Validation Loss: 1.3211 
Attention Heads: 1, Epoch: 4, Loss: 1.3460, Validation Loss: 1.2365 
Attention Heads: 1, Epoch: 5, Loss: 1.2650, Validation Loss: 1.1626 
Attention Heads: 1, Epoch: 6, Loss: 1.2073, Validation Loss: 1.1019 
Attention Heads: 1, Epoch: 7, Loss: 1.1911, Validation Loss: 1.0559 
Attention Heads: 1, Epoch: 8, Loss: 1.1235, Validation Loss: 1.0173 
Attention Heads: 1, Epoch: 9, Loss: 1.1003, Validation Loss: 0.9857 
Attention Heads: 1, Epoch: 10, Loss: 1.0428, Validation Loss: 0.9587 
Attention Heads: 1, Epoch: 11, Loss: 1.0688, Validation Loss: 0.9366 
Attention Heads: 1, Epoch: 12, Loss: 1.0114, Validation Loss: 0.9197 
Attention Heads: 1, Epoch: 13, Loss: 0.9962, Validation Loss: 0.9069 
Attention Heads: 1, Epoch: 14,

In [16]:
torch.manual_seed(0)

# Train the best model
model = TwoLayerGAT_v2(input_dim=citeseer.num_node_features,hidden_dim=best_hidden_dim , output_dim=citeseer_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, citeseer)
    val_loss = validate(model, criterion, citeseer)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, citeseer)
print(f'Test Accuracy(two layer) on citeseer dataset: {test_accuracy:.4f}')

Early stopping at epoch 12 

Test Accuracy(two layer) on citeseer dataset: 0.7579


## **One layer GAT V2**

In [17]:
from torch_geometric.nn import GATv2Conv
import torch.nn as nn
import torch.nn.functional as F

class OneLayerGAT_v2(nn.Module):
    def __init__(self, input_dim, output_dim, num_heads, dropout=0.6, negative_slope=0.2 , share_weights = False):
        super(OneLayerGAT_v2, self).__init__()
        self.gat1 = GATv2Conv(input_dim, output_dim, heads=num_heads, dropout=dropout, negative_slope=negative_slope ,share_weights = share_weights)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.gat1(x, edge_index)
        return x


### **Train and evaluate on Cora full dataset(One layer)**

### **find best number of heads for  cora full dataset(one layer)**

In [18]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]

best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = OneLayerGAT_v2(input_dim= CoraFull.num_node_features , output_dim= CoraFull_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 4.2528, Validation Loss: 3.8383 
Attention Heads: 1, Epoch: 1, Loss: 3.8225, Validation Loss: 3.4846 
Attention Heads: 1, Epoch: 2, Loss: 3.4909, Validation Loss: 3.1567 
Attention Heads: 1, Epoch: 3, Loss: 3.2231, Validation Loss: 2.8651 
Attention Heads: 1, Epoch: 4, Loss: 2.9754, Validation Loss: 2.6331 
Attention Heads: 1, Epoch: 5, Loss: 2.7408, Validation Loss: 2.4521 
Attention Heads: 1, Epoch: 6, Loss: 2.5664, Validation Loss: 2.3041 
Attention Heads: 1, Epoch: 7, Loss: 2.4458, Validation Loss: 2.1787 
Attention Heads: 1, Epoch: 8, Loss: 2.3273, Validation Loss: 2.0708 
Attention Heads: 1, Epoch: 9, Loss: 2.2164, Validation Loss: 1.9751 
Attention Heads: 1, Epoch: 10, Loss: 2.1577, Validation Loss: 1.8934 
Attention Heads: 1, Epoch: 11, Loss: 2.0681, Validation Loss: 1.8237 
Attention Heads: 1, Epoch: 12, Loss: 2.0119, Validation Loss: 1.7665 
Attention Heads: 1, Epoch: 13, Loss: 1.9641, Validation Loss: 1.7165 
Attention Heads: 1, Epoch: 14,

In [19]:
torch.manual_seed(0)

# Train the best model
model = OneLayerGAT_v2(input_dim=CoraFull.num_node_features, output_dim=CoraFull_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, CoraFull)
    val_loss = validate(model, criterion, CoraFull)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, CoraFull)
print(f'Test Accuracy(one layer) on CoraFull dataset: {test_accuracy:.4f}')

Early stopping at epoch 85 

Test Accuracy(one layer) on CoraFull dataset: 0.6998


### **Train and evaluate on Citeseer dataset(One layer)**

### **find best number of heads for  citeseer dataset(one layer)**

In [20]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]

best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = OneLayerGAT_v2(input_dim= citeseer.num_node_features , output_dim= citeseer_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 1.7895, Validation Loss: 1.6489 
Attention Heads: 1, Epoch: 1, Loss: 1.6214, Validation Loss: 1.5264 
Attention Heads: 1, Epoch: 2, Loss: 1.4957, Validation Loss: 1.4191 
Attention Heads: 1, Epoch: 3, Loss: 1.3919, Validation Loss: 1.3242 
Attention Heads: 1, Epoch: 4, Loss: 1.2975, Validation Loss: 1.2424 
Attention Heads: 1, Epoch: 5, Loss: 1.2049, Validation Loss: 1.1723 
Attention Heads: 1, Epoch: 6, Loss: 1.1517, Validation Loss: 1.1125 
Attention Heads: 1, Epoch: 7, Loss: 1.0823, Validation Loss: 1.0620 
Attention Heads: 1, Epoch: 8, Loss: 1.0646, Validation Loss: 1.0201 
Attention Heads: 1, Epoch: 9, Loss: 1.0139, Validation Loss: 0.9853 
Attention Heads: 1, Epoch: 10, Loss: 0.9657, Validation Loss: 0.9558 
Attention Heads: 1, Epoch: 11, Loss: 0.9426, Validation Loss: 0.9308 
Attention Heads: 1, Epoch: 12, Loss: 0.9243, Validation Loss: 0.9090 
Attention Heads: 1, Epoch: 13, Loss: 0.9134, Validation Loss: 0.8906 
Attention Heads: 1, Epoch: 14,

In [21]:
torch.manual_seed(0)

# Train the best model
model = OneLayerGAT_v2(input_dim=citeseer.num_node_features, output_dim=citeseer_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, citeseer)
    val_loss = validate(model, criterion, citeseer)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, citeseer)
print(f'Test Accuracy(one layer) on citeseer dataset: {test_accuracy:.4f}')

Early stopping at epoch 53 

Test Accuracy(one layer) on citeseer dataset: 0.7414


## **Three layer GAT V2**

In [22]:
from torch_geometric.nn import GATv2Conv
import torch.nn as nn
import torch.nn.functional as F

class ThreeLayerGAT_v2(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, dropout=0.6, negative_slope=0.2 , share_weights = False):
        super(ThreeLayerGAT_v2, self).__init__()
        self.gat1 = GATv2Conv(input_dim, hidden_dim, heads=num_heads, dropout=dropout, concat=True, negative_slope=negative_slope ,share_weights = share_weights)
        self.gat2 = GATv2Conv(hidden_dim * num_heads, hidden_dim, heads=num_heads, dropout=dropout, concat=True, negative_slope=negative_slope ,share_weights = share_weights)
        self.gat3 = GATv2Conv(hidden_dim * num_heads, output_dim, heads=1, dropout=dropout, negative_slope=negative_slope ,share_weights = share_weights)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.gat1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.gat2(x, edge_index)
        x = F.relu(x)
        x = self.gat3(x, edge_index)
        return F.log_softmax(x, dim=1)



### **Train and evaluate on Corafull dataset(three layer)**

In [23]:
torch.manual_seed(0)


hidden_dims = [16 , 32, 64, 128, 256]

best_validation_loss = float('inf')
best_hidden_dims = None


for hidden_dim in hidden_dims:

    model = ThreeLayerGAT_v2(input_dim=CoraFull.num_node_features, hidden_dim= hidden_dim , output_dim=CoraFull_dataset.num_classes , num_heads = 4).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break

        print(f'Hidden Dimension: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)


    print(f' Hidden Dimension: {hidden_dim}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dim = hidden_dim

print('Best Hidden Dimension: {}'.format(best_hidden_dim))

Hidden Dimension: 16, Epoch: 0, Loss: 4.2539, Validation Loss: 4.1139 
Hidden Dimension: 16, Epoch: 1, Loss: 4.1296, Validation Loss: 3.8635 
Hidden Dimension: 16, Epoch: 2, Loss: 3.9587, Validation Loss: 3.6388 
Hidden Dimension: 16, Epoch: 3, Loss: 3.8032, Validation Loss: 3.4437 
Hidden Dimension: 16, Epoch: 4, Loss: 3.6337, Validation Loss: 3.2344 
Hidden Dimension: 16, Epoch: 5, Loss: 3.4698, Validation Loss: 3.0136 
Hidden Dimension: 16, Epoch: 6, Loss: 3.2952, Validation Loss: 2.7770 
Hidden Dimension: 16, Epoch: 7, Loss: 3.1333, Validation Loss: 2.5796 
Hidden Dimension: 16, Epoch: 8, Loss: 2.9944, Validation Loss: 2.4109 
Hidden Dimension: 16, Epoch: 9, Loss: 2.9150, Validation Loss: 2.2829 
Hidden Dimension: 16, Epoch: 10, Loss: 2.7563, Validation Loss: 2.1444 
Hidden Dimension: 16, Epoch: 11, Loss: 2.6510, Validation Loss: 1.9985 
Hidden Dimension: 16, Epoch: 12, Loss: 2.5571, Validation Loss: 1.8635 
Hidden Dimension: 16, Epoch: 13, Loss: 2.4879, Validation Loss: 1.7655 
Hi

### **find best number of heads for  cora full dataset(three layer)**

In [24]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]

best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = ThreeLayerGAT_v2(input_dim= CoraFull.num_node_features ,hidden_dim=best_hidden_dim , output_dim= CoraFull_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 4.2597, Validation Loss: 4.1553 
Attention Heads: 1, Epoch: 1, Loss: 4.1931, Validation Loss: 4.0958 
Attention Heads: 1, Epoch: 2, Loss: 4.1233, Validation Loss: 3.9845 
Attention Heads: 1, Epoch: 3, Loss: 4.0446, Validation Loss: 3.8314 
Attention Heads: 1, Epoch: 4, Loss: 3.9572, Validation Loss: 3.6874 
Attention Heads: 1, Epoch: 5, Loss: 3.9724, Validation Loss: 3.6211 
Attention Heads: 1, Epoch: 6, Loss: 3.8283, Validation Loss: 3.5761 
Attention Heads: 1, Epoch: 7, Loss: 3.7966, Validation Loss: 3.5179 
Attention Heads: 1, Epoch: 8, Loss: 3.7632, Validation Loss: 3.4421 
Attention Heads: 1, Epoch: 9, Loss: 3.6884, Validation Loss: 3.3614 
Attention Heads: 1, Epoch: 10, Loss: 3.6250, Validation Loss: 3.2808 
Attention Heads: 1, Epoch: 11, Loss: 3.5758, Validation Loss: 3.2174 
Attention Heads: 1, Epoch: 12, Loss: 3.5227, Validation Loss: 3.1698 
Attention Heads: 1, Epoch: 13, Loss: 3.4461, Validation Loss: 3.1151 
Attention Heads: 1, Epoch: 14,

In [25]:
torch.manual_seed(0)

# Train the best model
model = ThreeLayerGAT_v2(input_dim=CoraFull.num_node_features, hidden_dim=best_hidden_dim , output_dim=CoraFull_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, CoraFull)
    val_loss = validate(model, criterion, CoraFull)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, CoraFull)
print(f'Test Accuracy(three layer) on CoraFull dataset: {test_accuracy:.4f}')

Early stopping at epoch 50 

Test Accuracy(three layer) on CoraFull dataset: 0.6789


### **Train and evaluate on Citeseer dataset(three layer)**

In [26]:
torch.manual_seed(0)


hidden_dims = [16 , 32, 64, 128, 256]

best_validation_loss = float('inf')
best_hidden_dims = None


for hidden_dim in hidden_dims:

    model = ThreeLayerGAT_v2(input_dim=citeseer.num_node_features, hidden_dim= hidden_dim , output_dim= citeseer_dataset.num_classes , num_heads = 4).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break

        # Print the loss
        print(f'Hidden Dimension: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)


    print(f' Hidden Dimension: {hidden_dim}, Validation Loss: {validation_loss:.4f} \n')

    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dim = hidden_dim

print('Best Hidden Dimension: {}'.format(best_hidden_dim))

Hidden Dimension: 16, Epoch: 0, Loss: 1.7866, Validation Loss: 1.6352 
Hidden Dimension: 16, Epoch: 1, Loss: 1.6530, Validation Loss: 1.4322 
Hidden Dimension: 16, Epoch: 2, Loss: 1.4975, Validation Loss: 1.2163 
Hidden Dimension: 16, Epoch: 3, Loss: 1.3310, Validation Loss: 1.0526 
Hidden Dimension: 16, Epoch: 4, Loss: 1.2642, Validation Loss: 0.9695 
Hidden Dimension: 16, Epoch: 5, Loss: 1.1919, Validation Loss: 0.9299 
Hidden Dimension: 16, Epoch: 6, Loss: 1.1570, Validation Loss: 0.9215 
Hidden Dimension: 16, Epoch: 7, Loss: 1.1268, Validation Loss: 0.9208 
Hidden Dimension: 16, Epoch: 8, Loss: 1.1033, Validation Loss: 0.9187 
Hidden Dimension: 16, Epoch: 9, Loss: 1.1043, Validation Loss: 0.9128 
Hidden Dimension: 16, Epoch: 10, Loss: 1.0620, Validation Loss: 0.9091 
Hidden Dimension: 16, Epoch: 11, Loss: 1.0512, Validation Loss: 0.9052 
Hidden Dimension: 16, Epoch: 12, Loss: 1.0243, Validation Loss: 0.9054 
Hidden Dimension: 16, Epoch: 13, Loss: 1.0000, Validation Loss: 0.9148 
Hi

### **find best number of heads for  citeseer dataset(three layer)**

In [27]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]

best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = ThreeLayerGAT_v2(input_dim= citeseer.num_node_features ,hidden_dim=best_hidden_dim , output_dim= citeseer_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 1.8107, Validation Loss: 1.6966 
Attention Heads: 1, Epoch: 1, Loss: 1.7179, Validation Loss: 1.5796 
Attention Heads: 1, Epoch: 2, Loss: 1.6169, Validation Loss: 1.4568 
Attention Heads: 1, Epoch: 3, Loss: 1.5220, Validation Loss: 1.3314 
Attention Heads: 1, Epoch: 4, Loss: 1.4583, Validation Loss: 1.2249 
Attention Heads: 1, Epoch: 5, Loss: 1.4576, Validation Loss: 1.1446 
Attention Heads: 1, Epoch: 6, Loss: 1.3632, Validation Loss: 1.0893 
Attention Heads: 1, Epoch: 7, Loss: 1.3762, Validation Loss: 1.0514 
Attention Heads: 1, Epoch: 8, Loss: 1.3484, Validation Loss: 1.0279 
Attention Heads: 1, Epoch: 9, Loss: 1.2946, Validation Loss: 1.0098 
Attention Heads: 1, Epoch: 10, Loss: 1.2634, Validation Loss: 0.9864 
Attention Heads: 1, Epoch: 11, Loss: 1.2650, Validation Loss: 0.9606 
Attention Heads: 1, Epoch: 12, Loss: 1.2452, Validation Loss: 0.9374 
Attention Heads: 1, Epoch: 13, Loss: 1.2247, Validation Loss: 0.9233 
Attention Heads: 1, Epoch: 14,

In [28]:
torch.manual_seed(0)

# Train the best model
model = ThreeLayerGAT_v2(input_dim=citeseer.num_node_features, hidden_dim=best_hidden_dim , output_dim=citeseer_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, citeseer)
    val_loss = validate(model, criterion, citeseer)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, citeseer)
print(f'Test Accuracy(three layer) on citeseer dataset: {test_accuracy:.4f}')

Early stopping at epoch 20 

Test Accuracy(three layer) on citeseer dataset: 0.7323
