In [1]:
!pip install torch torchvision



In [2]:
import torch
print(torch.__version__)


2.1.0+cu121


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [4]:
!pip install torch_geometric


Collecting torch_geometric
  Downloading torch_geometric-2.4.0-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.4.0


# **Load Dataset**

## **Load citeseer**

In [5]:
import torch
from torch_geometric.datasets import Planetoid



citeseer_dataset = Planetoid(root='', name='CiteSeer')

citeseer = citeseer_dataset[0]

Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.x
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.tx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.allx
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.y
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ty
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.ally
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.graph
Downloading https://github.com/kimiyoung/planetoid/raw/master/data/ind.citeseer.test.index
Processing...
Done!


## **Load CoraFull**

In [6]:
from torch_geometric.datasets import CoraFull


root = './CoraFull'


CoraFull_dataset = CoraFull(root)

CoraFull_dataset.download()
CoraFull_dataset.process()

CoraFull =  CoraFull_dataset[0]



Downloading https://github.com/abojchevski/graph2gauss/raw/master/data/cora.npz
Processing...
Done!
Using existing file cora.npz


# **Split dataset**

## **split cora full**

In [7]:
from torch_geometric.transforms import RandomNodeSplit

transform = RandomNodeSplit(num_train_per_class=int(CoraFull.num_nodes * 0.7), num_val=int(CoraFull.num_nodes * 0.1), num_test=int(CoraFull.num_nodes * 0.2))
CoraFull = transform(CoraFull)

## **split citeseer**

In [8]:

transform = RandomNodeSplit(num_train_per_class=int(citeseer.num_nodes * 0.7), num_val=int(citeseer.num_nodes * 0.1), num_test=int(citeseer.num_nodes * 0.2))
citeseer = transform(citeseer)

# **important functions**

In [9]:
def train(model, optimizer, criterion, data):
    model.train()
    optimizer.zero_grad()
    data = data.to(device)
    out = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def validate(model, criterion, data):
    model.eval()
    data = data.to(device)
    with torch.no_grad():
        out = model(data)
        val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
    return val_loss.item()

def test(model, criterion, data):
    model.eval()
    data = data.to(device)
    with torch.no_grad():
        out = model(data)
        _, pred = torch.max(out, dim=1)
        correct = float(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
        acc = correct / data.test_mask.sum().item()
    return acc


# **D) implement GAT**

## **Two layer GAT**

In [10]:

from torch_geometric.nn import GATConv
import torch.optim as optim
import torch.nn.functional as F

class TwoLayerGAT(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, dropout=0.4, negative_slope=0.2):
        super(TwoLayerGAT, self).__init__()
        self.gat1 = GATConv(input_dim, hidden_dim, heads=num_heads, dropout=dropout, negative_slope=negative_slope)
        self.gat2 = GATConv(hidden_dim * num_heads, output_dim, heads=1, dropout=dropout, negative_slope=negative_slope)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.gat1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.gat2(x, edge_index)
        return F.log_softmax(x, dim=1)



### **Train and evaluate on Corafull dataset(two layer)**

In [11]:
torch.manual_seed(0)


hidden_dims = [16 , 32, 64, 128, 256]


best_validation_loss = float('inf')
best_num_heads = None


for hidden_dim in hidden_dims:

    model = TwoLayerGAT(input_dim=CoraFull.num_node_features, hidden_dim= hidden_dim , output_dim=CoraFull_dataset.num_classes, num_heads = 4).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Hidden Dimension: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)


    print(f' Hidden Dimension: {hidden_dim}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dim = hidden_dim

print('Best Hidden Dimension: {}'.format(best_hidden_dim))

Hidden Dimension: 16, Epoch: 0, Loss: 4.2540, Validation Loss: 4.0407 
Hidden Dimension: 16, Epoch: 1, Loss: 4.0489, Validation Loss: 3.7696 
Hidden Dimension: 16, Epoch: 2, Loss: 3.8117, Validation Loss: 3.4700 
Hidden Dimension: 16, Epoch: 3, Loss: 3.5546, Validation Loss: 3.1713 
Hidden Dimension: 16, Epoch: 4, Loss: 3.3187, Validation Loss: 2.9201 
Hidden Dimension: 16, Epoch: 5, Loss: 3.0925, Validation Loss: 2.6899 
Hidden Dimension: 16, Epoch: 6, Loss: 2.8957, Validation Loss: 2.4650 
Hidden Dimension: 16, Epoch: 7, Loss: 2.7306, Validation Loss: 2.2754 
Hidden Dimension: 16, Epoch: 8, Loss: 2.5764, Validation Loss: 2.1198 
Hidden Dimension: 16, Epoch: 9, Loss: 2.4124, Validation Loss: 1.9802 
Hidden Dimension: 16, Epoch: 10, Loss: 2.2947, Validation Loss: 1.8614 
Hidden Dimension: 16, Epoch: 11, Loss: 2.1900, Validation Loss: 1.7619 
Hidden Dimension: 16, Epoch: 12, Loss: 2.1035, Validation Loss: 1.6785 
Hidden Dimension: 16, Epoch: 13, Loss: 2.0177, Validation Loss: 1.6061 
Hi

### **find best number of heads for  cora full dataset(two layer)**

In [12]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]

best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = TwoLayerGAT(input_dim= CoraFull.num_node_features ,hidden_dim=best_hidden_dim , output_dim= CoraFull_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 4.2531, Validation Loss: 4.0613 
Attention Heads: 1, Epoch: 1, Loss: 4.0674, Validation Loss: 3.7771 
Attention Heads: 1, Epoch: 2, Loss: 3.8324, Validation Loss: 3.4404 
Attention Heads: 1, Epoch: 3, Loss: 3.5733, Validation Loss: 3.1708 
Attention Heads: 1, Epoch: 4, Loss: 3.3280, Validation Loss: 2.8764 
Attention Heads: 1, Epoch: 5, Loss: 3.1077, Validation Loss: 2.6519 
Attention Heads: 1, Epoch: 6, Loss: 2.9314, Validation Loss: 2.4978 
Attention Heads: 1, Epoch: 7, Loss: 2.7960, Validation Loss: 2.3297 
Attention Heads: 1, Epoch: 8, Loss: 2.6227, Validation Loss: 2.1623 
Attention Heads: 1, Epoch: 9, Loss: 2.5126, Validation Loss: 2.0351 
Attention Heads: 1, Epoch: 10, Loss: 2.4262, Validation Loss: 1.9448 
Attention Heads: 1, Epoch: 11, Loss: 2.3045, Validation Loss: 1.8551 
Attention Heads: 1, Epoch: 12, Loss: 2.2161, Validation Loss: 1.7679 
Attention Heads: 1, Epoch: 13, Loss: 2.1475, Validation Loss: 1.6957 
Attention Heads: 1, Epoch: 14,

In [13]:
torch.manual_seed(0)

# Train the best model
model = TwoLayerGAT(input_dim=CoraFull.num_node_features, hidden_dim=best_hidden_dim , output_dim=CoraFull_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, CoraFull)
    val_loss = validate(model, criterion, CoraFull)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, CoraFull)
print(f'Test Accuracy(two layer) on CoraFull dataset: {test_accuracy:.4f}')

Early stopping at epoch 42 

Test Accuracy(two layer) on CoraFull dataset: 0.7135


### **Train and evaluate on Citeseer dataset(two layer)**

In [14]:
torch.manual_seed(0)


hidden_dims = [16 , 32, 64, 128, 256]


best_validation_loss = float('inf')
best_num_heads = None


for hidden_dim in hidden_dims:

    model = TwoLayerGAT(input_dim=citeseer.num_node_features, hidden_dim= hidden_dim , output_dim=citeseer_dataset.num_classes, num_heads = 4).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Hidden Dimension: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)


    print(f' Hidden Dimension: {hidden_dim}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dim = hidden_dim

print('Best Hidden Dimension: {}'.format(best_hidden_dim))

Hidden Dimension: 16, Epoch: 0, Loss: 1.7958, Validation Loss: 1.5136 
Hidden Dimension: 16, Epoch: 1, Loss: 1.5056, Validation Loss: 1.2191 
Hidden Dimension: 16, Epoch: 2, Loss: 1.2490, Validation Loss: 0.9789 
Hidden Dimension: 16, Epoch: 3, Loss: 1.0566, Validation Loss: 0.8304 
Hidden Dimension: 16, Epoch: 4, Loss: 0.9464, Validation Loss: 0.7518 
Hidden Dimension: 16, Epoch: 5, Loss: 0.8797, Validation Loss: 0.7183 
Hidden Dimension: 16, Epoch: 6, Loss: 0.8223, Validation Loss: 0.6991 
Hidden Dimension: 16, Epoch: 7, Loss: 0.8397, Validation Loss: 0.6819 
Hidden Dimension: 16, Epoch: 8, Loss: 0.7556, Validation Loss: 0.6690 
Hidden Dimension: 16, Epoch: 9, Loss: 0.7404, Validation Loss: 0.6636 
Hidden Dimension: 16, Epoch: 10, Loss: 0.7110, Validation Loss: 0.6641 
Hidden Dimension: 16, Epoch: 11, Loss: 0.7004, Validation Loss: 0.6689 
Hidden Dimension: 16, Epoch: 12, Loss: 0.6380, Validation Loss: 0.6772 
Hidden Dimension: 16, Epoch: 13, Loss: 0.6418, Validation Loss: 0.6878 
Ea

### **find best number of heads for citeseer dataset (two layar )**

In [15]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]


best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = TwoLayerGAT(input_dim=citeseer.num_node_features,hidden_dim= best_hidden_dim ,output_dim= citeseer_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 1.7975, Validation Loss: 1.5140 
Attention Heads: 1, Epoch: 1, Loss: 1.5095, Validation Loss: 1.2177 
Attention Heads: 1, Epoch: 2, Loss: 1.2745, Validation Loss: 0.9838 
Attention Heads: 1, Epoch: 3, Loss: 1.0791, Validation Loss: 0.8397 
Attention Heads: 1, Epoch: 4, Loss: 0.9827, Validation Loss: 0.7696 
Attention Heads: 1, Epoch: 5, Loss: 0.9399, Validation Loss: 0.7370 
Attention Heads: 1, Epoch: 6, Loss: 0.8748, Validation Loss: 0.7079 
Attention Heads: 1, Epoch: 7, Loss: 0.8925, Validation Loss: 0.6891 
Attention Heads: 1, Epoch: 8, Loss: 0.8142, Validation Loss: 0.6740 
Attention Heads: 1, Epoch: 9, Loss: 0.7901, Validation Loss: 0.6657 
Attention Heads: 1, Epoch: 10, Loss: 0.7812, Validation Loss: 0.6645 
Attention Heads: 1, Epoch: 11, Loss: 0.7518, Validation Loss: 0.6712 
Attention Heads: 1, Epoch: 12, Loss: 0.7374, Validation Loss: 0.6794 
Attention Heads: 1, Epoch: 13, Loss: 0.7298, Validation Loss: 0.6913 
Attention Heads: 1, Epoch: 14,

In [16]:
torch.manual_seed(0)

# Train the best model
model = TwoLayerGAT(input_dim=citeseer.num_node_features,hidden_dim=best_hidden_dim , output_dim=citeseer_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, citeseer)
    val_loss = validate(model, criterion, citeseer)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, citeseer)
print(f'Test Accuracy(two layer) on citeseer dataset: {test_accuracy:.4f}')

Early stopping at epoch 9 

Test Accuracy(two layer) on citeseer dataset: 0.7308


## **One layer GAT**

In [17]:
from torch_geometric.nn import GATConv
import torch.nn as nn
import torch.nn.functional as F

class OneLayerGAT(nn.Module):
    def __init__(self, input_dim, output_dim, num_heads, dropout=0.6, negative_slope=0.2):
        super(OneLayerGAT, self).__init__()
        self.gat1 = GATConv(input_dim, output_dim, heads=num_heads, dropout=dropout, negative_slope=negative_slope)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.gat1(x, edge_index)
        return x


### **Train and evaluate on Cora full dataset(One layer)**

### **find best number of heads for  cora full dataset(one layer)**

In [18]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]

best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = OneLayerGAT(input_dim= CoraFull.num_node_features , output_dim= CoraFull_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 4.2543, Validation Loss: 3.8281 
Attention Heads: 1, Epoch: 1, Loss: 3.8231, Validation Loss: 3.4111 
Attention Heads: 1, Epoch: 2, Loss: 3.5078, Validation Loss: 3.0978 
Attention Heads: 1, Epoch: 3, Loss: 3.2361, Validation Loss: 2.8806 
Attention Heads: 1, Epoch: 4, Loss: 3.0056, Validation Loss: 2.7098 
Attention Heads: 1, Epoch: 5, Loss: 2.8281, Validation Loss: 2.5213 
Attention Heads: 1, Epoch: 6, Loss: 2.6634, Validation Loss: 2.3686 
Attention Heads: 1, Epoch: 7, Loss: 2.5811, Validation Loss: 2.2856 
Attention Heads: 1, Epoch: 8, Loss: 2.4504, Validation Loss: 2.2229 
Attention Heads: 1, Epoch: 9, Loss: 2.3693, Validation Loss: 2.1461 
Attention Heads: 1, Epoch: 10, Loss: 2.2879, Validation Loss: 2.0680 
Attention Heads: 1, Epoch: 11, Loss: 2.2164, Validation Loss: 1.9896 
Attention Heads: 1, Epoch: 12, Loss: 2.1435, Validation Loss: 1.9212 
Attention Heads: 1, Epoch: 13, Loss: 2.0866, Validation Loss: 1.8679 
Attention Heads: 1, Epoch: 14,

In [19]:
torch.manual_seed(0)

# Train the best model
model = OneLayerGAT(input_dim=CoraFull.num_node_features, output_dim=CoraFull_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, CoraFull)
    val_loss = validate(model, criterion, CoraFull)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, CoraFull)
print(f'Test Accuracy(one layer) on CoraFull dataset: {test_accuracy:.4f}')

Early stopping at epoch 72 

Test Accuracy(one layer) on CoraFull dataset: 0.7016


### **Train and evaluate on Citeseer dataset(One layer)**

### **find best number of heads for  citeseer dataset(one layer)**

In [20]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]

best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = OneLayerGAT(input_dim= citeseer.num_node_features , output_dim= citeseer_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 1.8000, Validation Loss: 1.6394 
Attention Heads: 1, Epoch: 1, Loss: 1.6309, Validation Loss: 1.5077 
Attention Heads: 1, Epoch: 2, Loss: 1.4966, Validation Loss: 1.3924 
Attention Heads: 1, Epoch: 3, Loss: 1.4007, Validation Loss: 1.2932 
Attention Heads: 1, Epoch: 4, Loss: 1.3165, Validation Loss: 1.2080 
Attention Heads: 1, Epoch: 5, Loss: 1.2217, Validation Loss: 1.1350 
Attention Heads: 1, Epoch: 6, Loss: 1.1657, Validation Loss: 1.0727 
Attention Heads: 1, Epoch: 7, Loss: 1.0983, Validation Loss: 1.0191 
Attention Heads: 1, Epoch: 8, Loss: 1.0814, Validation Loss: 0.9750 
Attention Heads: 1, Epoch: 9, Loss: 1.0268, Validation Loss: 0.9376 
Attention Heads: 1, Epoch: 10, Loss: 0.9895, Validation Loss: 0.9056 
Attention Heads: 1, Epoch: 11, Loss: 0.9721, Validation Loss: 0.8789 
Attention Heads: 1, Epoch: 12, Loss: 0.9386, Validation Loss: 0.8562 
Attention Heads: 1, Epoch: 13, Loss: 0.9355, Validation Loss: 0.8364 
Attention Heads: 1, Epoch: 14,

In [21]:
torch.manual_seed(0)

# Train the best model
model = OneLayerGAT(input_dim=citeseer.num_node_features, output_dim=citeseer_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, citeseer)
    val_loss = validate(model, criterion, citeseer)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, citeseer)
print(f'Test Accuracy(one layer) on citeseer dataset: {test_accuracy:.4f}')

Early stopping at epoch 38 

Test Accuracy(one layer) on citeseer dataset: 0.7368


## **Three layer GAT**

In [22]:
from torch_geometric.nn import GATConv
import torch.nn as nn
import torch.nn.functional as F

class ThreeLayerGAT(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_heads, dropout=0.6, negative_slope=0.2):
        super(ThreeLayerGAT, self).__init__()
        self.gat1 = GATConv(input_dim, hidden_dim, heads=num_heads, dropout=dropout, concat=True, negative_slope=negative_slope)
        self.gat2 = GATConv(hidden_dim * num_heads, hidden_dim, heads=num_heads, dropout=dropout, concat=True, negative_slope=negative_slope)
        self.gat3 = GATConv(hidden_dim * num_heads, output_dim, heads=1, dropout=dropout, negative_slope=negative_slope)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.gat1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.gat2(x, edge_index)
        x = F.relu(x)
        x = self.gat3(x, edge_index)
        return F.log_softmax(x, dim=1)



### **Train and evaluate on Corafull dataset(three layer)**

In [23]:
torch.manual_seed(0)


hidden_dims = [16 , 32, 64, 128, 256]

best_validation_loss = float('inf')
best_hidden_dims = None


for hidden_dim in hidden_dims:

    model = ThreeLayerGAT(input_dim=CoraFull.num_node_features, hidden_dim= hidden_dim , output_dim=CoraFull_dataset.num_classes , num_heads = 4).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break

        print(f'Hidden Dimension: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)


    print(f' Hidden Dimension: {hidden_dim}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dim = hidden_dim

print('Best Hidden Dimension: {}'.format(best_hidden_dim))

Hidden Dimension: 16, Epoch: 0, Loss: 4.2492, Validation Loss: 4.1232 
Hidden Dimension: 16, Epoch: 1, Loss: 4.1367, Validation Loss: 3.8983 
Hidden Dimension: 16, Epoch: 2, Loss: 3.9832, Validation Loss: 3.7161 
Hidden Dimension: 16, Epoch: 3, Loss: 3.8327, Validation Loss: 3.5031 
Hidden Dimension: 16, Epoch: 4, Loss: 3.6808, Validation Loss: 3.3070 
Hidden Dimension: 16, Epoch: 5, Loss: 3.5263, Validation Loss: 3.1088 
Hidden Dimension: 16, Epoch: 6, Loss: 3.3883, Validation Loss: 2.9292 
Hidden Dimension: 16, Epoch: 7, Loss: 3.2353, Validation Loss: 2.7312 
Hidden Dimension: 16, Epoch: 8, Loss: 3.1209, Validation Loss: 2.5419 
Hidden Dimension: 16, Epoch: 9, Loss: 2.9720, Validation Loss: 2.3824 
Hidden Dimension: 16, Epoch: 10, Loss: 2.8504, Validation Loss: 2.2484 
Hidden Dimension: 16, Epoch: 11, Loss: 2.7538, Validation Loss: 2.1331 
Hidden Dimension: 16, Epoch: 12, Loss: 2.6495, Validation Loss: 2.0222 
Hidden Dimension: 16, Epoch: 13, Loss: 2.5806, Validation Loss: 1.9203 
Hi

### **find best number of heads for  cora full dataset(three layer)**

In [24]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]

best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = ThreeLayerGAT(input_dim= CoraFull.num_node_features ,hidden_dim=best_hidden_dim , output_dim= CoraFull_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 4.2486, Validation Loss: 4.2194 
Attention Heads: 1, Epoch: 1, Loss: 4.2197, Validation Loss: 4.1653 
Attention Heads: 1, Epoch: 2, Loss: 4.4370, Validation Loss: 4.1783 
Attention Heads: 1, Epoch: 3, Loss: 4.1675, Validation Loss: 4.1733 
Attention Heads: 1, Epoch: 4, Loss: 4.1700, Validation Loss: 4.1534 
Attention Heads: 1, Epoch: 5, Loss: 4.1492, Validation Loss: 4.1245 
Attention Heads: 1, Epoch: 6, Loss: 4.1242, Validation Loss: 4.0885 
Attention Heads: 1, Epoch: 7, Loss: 4.0988, Validation Loss: 4.0500 
Attention Heads: 1, Epoch: 8, Loss: 4.0838, Validation Loss: 4.0142 
Attention Heads: 1, Epoch: 9, Loss: 4.0452, Validation Loss: 3.9817 
Attention Heads: 1, Epoch: 10, Loss: 3.9956, Validation Loss: 3.9483 
Attention Heads: 1, Epoch: 11, Loss: 3.9907, Validation Loss: 3.9172 
Attention Heads: 1, Epoch: 12, Loss: 3.9648, Validation Loss: 3.8854 
Attention Heads: 1, Epoch: 13, Loss: 3.9318, Validation Loss: 3.8514 
Attention Heads: 1, Epoch: 14,

In [25]:
torch.manual_seed(0)

# Train the best model
model = ThreeLayerGAT(input_dim=CoraFull.num_node_features, hidden_dim=best_hidden_dim , output_dim=CoraFull_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, CoraFull)
    val_loss = validate(model, criterion, CoraFull)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, CoraFull)
print(f'Test Accuracy(three layer) on CoraFull dataset: {test_accuracy:.4f}')

Early stopping at epoch 42 

Test Accuracy(three layer) on CoraFull dataset: 0.6930


### **Train and evaluate on Citeseer dataset(three layer)**

In [26]:
torch.manual_seed(0)


hidden_dims = [16 , 32, 64, 128, 256]

best_validation_loss = float('inf')
best_hidden_dims = None


for hidden_dim in hidden_dims:

    model = ThreeLayerGAT(input_dim=citeseer.num_node_features, hidden_dim= hidden_dim , output_dim= citeseer_dataset.num_classes , num_heads = 4).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break

        # Print the loss
        print(f'Hidden Dimension: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)


    print(f' Hidden Dimension: {hidden_dim}, Validation Loss: {validation_loss:.4f} \n')

    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dim = hidden_dim

print('Best Hidden Dimension: {}'.format(best_hidden_dim))

Hidden Dimension: 16, Epoch: 0, Loss: 1.7945, Validation Loss: 1.6741 
Hidden Dimension: 16, Epoch: 1, Loss: 1.6708, Validation Loss: 1.4569 
Hidden Dimension: 16, Epoch: 2, Loss: 1.5316, Validation Loss: 1.2061 
Hidden Dimension: 16, Epoch: 3, Loss: 1.3840, Validation Loss: 1.0382 
Hidden Dimension: 16, Epoch: 4, Loss: 1.2975, Validation Loss: 0.9378 
Hidden Dimension: 16, Epoch: 5, Loss: 1.2030, Validation Loss: 0.8562 
Hidden Dimension: 16, Epoch: 6, Loss: 1.1838, Validation Loss: 0.8105 
Hidden Dimension: 16, Epoch: 7, Loss: 1.1351, Validation Loss: 0.7824 
Hidden Dimension: 16, Epoch: 8, Loss: 1.1033, Validation Loss: 0.7408 
Hidden Dimension: 16, Epoch: 9, Loss: 1.0629, Validation Loss: 0.7045 
Hidden Dimension: 16, Epoch: 10, Loss: 1.0370, Validation Loss: 0.6816 
Hidden Dimension: 16, Epoch: 11, Loss: 1.0257, Validation Loss: 0.6693 
Hidden Dimension: 16, Epoch: 12, Loss: 0.9782, Validation Loss: 0.6632 
Hidden Dimension: 16, Epoch: 13, Loss: 0.9589, Validation Loss: 0.6582 
Hi

### **find best number of heads for  citeseer dataset(three layer)**

In [27]:
torch.manual_seed(0)


num_heads_list = [1, 2, 4 , 8]

best_validation_loss = float('inf')
best_hidden_dims = None


for num_heads in num_heads_list:

    model = ThreeLayerGAT(input_dim= citeseer.num_node_features ,hidden_dim=best_hidden_dim , output_dim= citeseer_dataset.num_classes , num_heads = num_heads).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Attention Heads: {num_heads}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)


    print(f' Attention Heads: {num_heads}, Validation Loss: {validation_loss:.4f} \n')
    # Check if this number of attention heads is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_num_heads = num_heads

print('Best Number of Attention Heads: {}'.format(best_num_heads))


Attention Heads: 1, Epoch: 0, Loss: 1.8028, Validation Loss: 1.7497 
Attention Heads: 1, Epoch: 1, Loss: 1.7451, Validation Loss: 1.6756 
Attention Heads: 1, Epoch: 2, Loss: 1.6858, Validation Loss: 1.5962 
Attention Heads: 1, Epoch: 3, Loss: 1.6271, Validation Loss: 1.5263 
Attention Heads: 1, Epoch: 4, Loss: 1.5838, Validation Loss: 1.4708 
Attention Heads: 1, Epoch: 5, Loss: 1.5500, Validation Loss: 1.4239 
Attention Heads: 1, Epoch: 6, Loss: 1.5342, Validation Loss: 1.3979 
Attention Heads: 1, Epoch: 7, Loss: 1.5499, Validation Loss: 1.3815 
Attention Heads: 1, Epoch: 8, Loss: 1.5139, Validation Loss: 1.3633 
Attention Heads: 1, Epoch: 9, Loss: 1.4819, Validation Loss: 1.3372 
Attention Heads: 1, Epoch: 10, Loss: 1.4422, Validation Loss: 1.3033 
Attention Heads: 1, Epoch: 11, Loss: 1.4320, Validation Loss: 1.2625 
Attention Heads: 1, Epoch: 12, Loss: 1.4567, Validation Loss: 1.2194 
Attention Heads: 1, Epoch: 13, Loss: 1.4000, Validation Loss: 1.1769 
Attention Heads: 1, Epoch: 14,

In [28]:
torch.manual_seed(0)

# Train the best model
model = ThreeLayerGAT(input_dim=citeseer.num_node_features, hidden_dim=best_hidden_dim , output_dim=citeseer_dataset.num_classes, num_heads=best_num_heads).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, citeseer)
    val_loss = validate(model, criterion, citeseer)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, citeseer)
print(f'Test Accuracy(three layer) on citeseer dataset: {test_accuracy:.4f}')

Early stopping at epoch 34 

Test Accuracy(three layer) on citeseer dataset: 0.7308
