# Train GAT models for OGBN datasets

In [1]:
import torch
import torch.nn.functional as F

def train(model, optimizer, data, epochs):
    model.train()
    criterion = torch.nn.CrossEntropyLoss()

    for _ in range(epochs):
        optimizer.zero_grad()
        out = model(data.x, data.edge_index.to(device))
        loss = criterion(out[data.train_mask], data.y[data.train_mask])
        acc = (out[data.train_mask].argmax(dim=1) == data.y[data.train_mask]).sum().item() / data.train_mask.sum().item()
        loss.backward()
        optimizer.step()

    return model, loss, acc

def train_batch(model, optimizer, train_loader, epochs):
    model.train()

    total_loss = total_correct = 0
    for _ in range(epochs):
        for batch_size, n_id, adjs in train_loader:
            # `adjs` holds a list of `(edge_index, e_id, size)` tuples.
            adjs = [adj.to(device) for adj in adjs]

            optimizer.zero_grad()
            out = model(x[n_id], adjs)
            loss = F.nll_loss(out, y[n_id[:batch_size]])
            loss.backward()
            optimizer.step()

            total_loss += float(loss)
            total_correct += int(out.argmax(dim=-1).eq(y[n_id[:batch_size]]).sum())

    loss = total_loss / len(train_loader)
    approx_acc = total_correct / train_idx.size(0)

    return model, loss, approx_acc


@torch.no_grad()
def test(model, data):
    model.eval()
    criterion = torch.nn.CrossEntropyLoss()
    x, edge_index, y = data.x, data.edge_index, data.y
    out = model(x, edge_index)
    loss = criterion(out[data.test_mask], y[data.test_mask])
    acc = (out[data.test_mask].argmax(dim=1) == y[data.test_mask]).sum().item() / data.test_mask.sum().item()
    return loss, acc

### OBGN-products

In [4]:
import os.path as osp

import torch
from ogb.nodeproppred import Evaluator, PygNodePropPredDataset

from torch_geometric.loader import NeighborSampler

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
root = osp.join(osp.dirname(osp.realpath('[Dataset]OGBN.ipynb')), '..', 'data', 'products')
dataset = PygNodePropPredDataset('ogbn-products', root)
split_idx = dataset.get_idx_split()
evaluator = Evaluator(name='ogbn-products')
data = dataset[0]

x = data.x.to(device)
y = data.y.squeeze().to(device)

train_idx = split_idx['train']
train_loader = NeighborSampler(data.edge_index, node_idx=train_idx,
                               sizes=[10, 10, 10], batch_size=512,
                               shuffle=True, num_workers=12)
subgraph_loader = NeighborSampler(data.edge_index, node_idx=None, sizes=[-1],
                                  batch_size=1024, shuffle=False,
                                  num_workers=12)

out_channels = data.y.max().item() + 1

In [11]:
# Train GAT models on the BA-Shapes dataset
from models import GAT_L2_intervention, GAT_L3_intervention

# Define several GAT models with 1, 2, 4, 8 attention heads to be used for 'data.pt', and move them to the GPU device (if available)
model1_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=1)
model2_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=2)
model4_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=4)
model8_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=8)
model1_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=1)
model2_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=2)
model4_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=4)
model8_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=8)

# Move the models to the GPU device (if available)
model1_L2 = model1_L2.to(device)
model2_L2 = model2_L2.to(device)
model4_L2 = model4_L2.to(device)
model8_L2 = model8_L2.to(device)
model1_L3 = model1_L3.to(device)
model2_L3 = model2_L3.to(device)
model4_L3 = model4_L3.to(device)
model8_L3 = model8_L3.to(device)

In [12]:
# Define the number of epochs
epochs = 60
# Define the learning rate
lr = 0.001
# Prepare the optimizer
optimizer1_L2 = torch.optim.Adam(model1_L2.parameters(), lr=lr, weight_decay=0)
optimizer2_L2 = torch.optim.Adam(model2_L2.parameters(), lr=lr, weight_decay=0)
optimizer4_L2 = torch.optim.Adam(model4_L2.parameters(), lr=lr, weight_decay=0)
optimizer8_L2 = torch.optim.Adam(model8_L2.parameters(), lr=lr, weight_decay=0)
optimizer1_L3 = torch.optim.Adam(model1_L3.parameters(), lr=lr, weight_decay=0)
optimizer2_L3 = torch.optim.Adam(model2_L3.parameters(), lr=lr, weight_decay=0)
optimizer4_L3 = torch.optim.Adam(model4_L3.parameters(), lr=lr, weight_decay=0)
optimizer8_L3 = torch.optim.Adam(model8_L3.parameters(), lr=lr, weight_decay=0)

# Train the models
model1_L2, loss1_L2, acc1_L2 = train_batch(model=model1_L2, train_loader=train_loader, optimizer=optimizer1_L2, epochs=epochs)
model2_L2, loss2_L2, acc2_L2 = train_batch(model=model2_L2, train_loader=train_loader, optimizer=optimizer2_L2, epochs=epochs)
model4_L2, loss4_L2, acc4_L2 = train_batch(model=model4_L2, train_loader=train_loader, optimizer=optimizer4_L2, epochs=epochs)
model8_L2, loss8_L2, acc8_L2 = train_batch(model=model8_L2, train_loader=train_loader, optimizer=optimizer8_L2, epochs=epochs)
model1_L3, loss1_L3, acc1_L3 = train_batch(model=model1_L3, train_loader=train_loader, optimizer=optimizer1_L3, epochs=epochs)
model2_L3, loss2_L3, acc2_L3 = train_batch(model=model2_L3, train_loader=train_loader, optimizer=optimizer2_L3, epochs=epochs)
model4_L3, loss4_L3, acc4_L3 = train_batch(model=model4_L3, train_loader=train_loader, optimizer=optimizer4_L3, epochs=epochs)
model8_L3, loss8_L3, acc8_L3 = train_batch(model=model8_L3, train_loader=train_loader, optimizer=optimizer8_L3, epochs=epochs)

# Save the model locally
torch.save(model1_L2, '/workspace/models/GAT_Products_2L1H.pt')
torch.save(model2_L2, '/workspace/models/GAT_Products_2L2H.pt')
torch.save(model4_L2, '/workspace/models/GAT_Products_2L4H.pt')
torch.save(model8_L2, '/workspace/models/GAT_Products_2L8H.pt')

torch.save(model1_L3, '/workspace/models/GAT_Products_3L1H.pt')
torch.save(model2_L3, '/workspace/models/GAT_Products_3L2H.pt')
torch.save(model4_L3, '/workspace/models/GAT_Products_3L4H.pt')
torch.save(model8_L3, '/workspace/models/GAT_Products_3L8H.pt')

In [10]:
loss1_L2, acc1_L2

NameError: name 'loss1_L2' is not defined

In [3]:
@torch.no_grad()
def test(model, data):
    model.eval()

    out = model.inference(x)

    y_true = y.cpu().unsqueeze(-1)
    y_pred = out.argmax(dim=-1, keepdim=True)

    train_acc = evaluator.eval({
        'y_true': y_true[split_idx['train']],
        'y_pred': y_pred[split_idx['train']],
    })['acc']
    val_acc = evaluator.eval({
        'y_true': y_true[split_idx['valid']],
        'y_pred': y_pred[split_idx['valid']],
    })['acc']
    test_acc = evaluator.eval({
        'y_true': y_true[split_idx['test']],
        'y_pred': y_pred[split_idx['test']],
    })['acc']

    return train_acc, val_acc, test_acc


# Test the models
test_loss1_L2, test_acc1_L2 = test(model=model1_L2, data=data)
test_loss2_L2, test_acc2_L2 = test(model=model2_L2, data=data)
test_loss4_L2, test_acc4_L2 = test(model=model4_L2, data=data)
test_loss8_L2, test_acc8_L2 = test(model=model8_L2, data=data)
test_loss1_L3, test_acc1_L3 = test(model=model1_L3, data=data)
test_loss2_L3, test_acc2_L3 = test(model=model2_L3, data=data)
test_loss4_L3, test_acc4_L3 = test(model=model4_L3, data=data)
test_loss8_L3, test_acc8_L3 = test(model=model8_L3, data=data)

# Print the results
print(f"Model: GAT_Cora_2L1H, Loss: {loss1_L2:.4f}, Train Accuracy: {acc1_L2:.4f}, Test Loss: {test_loss1_L2:.4f}, Test Accuracy: {test_acc1_L2:.4f}")
print(f"Model: GAT_Cora_2L2H, Loss: {loss2_L2:.4f}, Train Accuracy: {acc2_L2:.4f}, Test Loss: {test_loss2_L2:.4f}, Test Accuracy: {test_acc2_L2:.4f}")
print(f"Model: GAT_Cora_2L4H, Loss: {loss4_L2:.4f}, Train Accuracy: {acc4_L2:.4f}, Test Loss: {test_loss4_L2:.4f}, Test Accuracy: {test_acc4_L2:.4f}")
print(f"Model: GAT_Cora_2L8H, Loss: {loss8_L2:.4f}, Train Accuracy: {acc8_L2:.4f}, Test Loss: {test_loss8_L2:.4f}, Test Accuracy: {test_acc8_L2:.4f}")
print(f"Model: GAT_Cora_3L1H, Loss: {loss1_L3:.4f}, Train Accuracy: {acc1_L3:.4f}, Test Loss: {test_loss1_L3:.4f}, Test Accuracy: {test_acc1_L3:.4f}")
print(f"Model: GAT_Cora_3L2H, Loss: {loss2_L3:.4f}, Train Accuracy: {acc2_L3:.4f}, Test Loss: {test_loss2_L3:.4f}, Test Accuracy: {test_acc2_L3:.4f}")
print(f"Model: GAT_Cora_3L4H, Loss: {loss4_L3:.4f}, Train Accuracy: {acc4_L3:.4f}, Test Loss: {test_loss4_L3:.4f}, Test Accuracy: {test_acc4_L3:.4f}")
print(f"Model: GAT_Cora_3L8H, Loss: {loss8_L3:.4f}, Train Accuracy: {acc8_L3:.4f}, Test Loss: {test_loss8_L3:.4f}, Test Accuracy: {test_acc8_L3:.4f}")

Model: GAT_Cora_2L1H, Loss: 1.7192, Train Accuracy: 0.9057, Test Loss: 1.7484, Test Accuracy: 0.8327
Model: GAT_Cora_2L2H, Loss: 1.6044, Train Accuracy: 0.9029, Test Loss: 1.6541, Test Accuracy: 0.8327
Model: GAT_Cora_2L4H, Loss: 1.3822, Train Accuracy: 0.9100, Test Loss: 1.4581, Test Accuracy: 0.8416
Model: GAT_Cora_2L8H, Loss: 1.0693, Train Accuracy: 0.9000, Test Loss: 1.1965, Test Accuracy: 0.8416
Model: GAT_Cora_3L1H, Loss: 1.3797, Train Accuracy: 0.8800, Test Loss: 1.4372, Test Accuracy: 0.8098
Model: GAT_Cora_3L2H, Loss: 0.8098, Train Accuracy: 0.8957, Test Loss: 0.9117, Test Accuracy: 0.8342
Model: GAT_Cora_3L4H, Loss: 0.3302, Train Accuracy: 0.9243, Test Loss: 0.5271, Test Accuracy: 0.8466
Model: GAT_Cora_3L8H, Loss: 0.1487, Train Accuracy: 0.9571, Test Loss: 0.5025, Test Accuracy: 0.8476


In [4]:
# Save the model locally
torch.save(model1_L2, '/workspace/GAT_Cora_2L1H.pt')
torch.save(model2_L2, '/workspace/GAT_Cora_2L2H.pt')
torch.save(model4_L2, '/workspace/GAT_Cora_2L4H.pt')
torch.save(model8_L2, '/workspace/GAT_Cora_2L8H.pt')

torch.save(model1_L3, '/workspace/GAT_Cora_3L1H.pt')
torch.save(model2_L3, '/workspace/GAT_Cora_3L2H.pt')
torch.save(model4_L3, '/workspace/GAT_Cora_3L4H.pt')
torch.save(model8_L3, '/workspace/GAT_Cora_3L8H.pt')

### Citeseer

In [3]:
import os.path as osp
import torch
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

dataset = 'CiteSeer'
transform = T.Compose([T.NormalizeFeatures(),
                    T.RandomNodeSplit(split='test_rest', 
                                    num_train_per_class=100,
                                    num_val=0)])
path = osp.join(osp.dirname(osp.realpath('/workspace/[Dataset]Citation.ipynb')), '..', 'data', dataset)
dataset = Planetoid(path, dataset, transform=transform)
data = dataset[0].to(device)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Print some dataset statistics
print(f"Num. nodes: {data.num_nodes} | Num. edges: {data.num_edges} | Num. classes: {data.y.max() + 1} | Num. features: {data.num_features} | Num. train.: {data.train_mask.sum()} | Num. test: {data.test_mask.sum()}")
# Save to local
torch.save(data, "/workspace/Citeseer.pt")

Num. nodes: 3327 | Num. edges: 9104 | Num. classes: 6 | Num. features: 3703 | Num. train.: 600 | Num. test: 2727


In [4]:
# Train GAT models on the BA-Shapes dataset
from models import GAT_L2_intervention, GAT_L3_intervention

out_channels = data.y.max().item() + 1

# Define several GAT models with 1, 2, 4, 8 attention heads to be used for 'data.pt', and move them to the GPU device (if available)
model1_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=1)
model2_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=2)
model4_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=4)
model8_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=8)
model1_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=1)
model2_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=2)
model4_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=4)
model8_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=8)

# Move the models to the GPU device (if available)
model1_L2 = model1_L2.to(device)
model2_L2 = model2_L2.to(device)
model4_L2 = model4_L2.to(device)
model8_L2 = model8_L2.to(device)
model1_L3 = model1_L3.to(device)
model2_L3 = model2_L3.to(device)
model4_L3 = model4_L3.to(device)
model8_L3 = model8_L3.to(device)

"""
Now we can train all the models and compare their performance.
Keep the number of epochs and the learning rate the same for all the models.
"""

# Define the number of epochs
epochs = 100
# Define the learning rate
lr = 0.001
# Prepare the optimizer
optimizer1_L2 = torch.optim.Adam(model1_L2.parameters(), lr=lr, weight_decay=0)
optimizer2_L2 = torch.optim.Adam(model2_L2.parameters(), lr=lr, weight_decay=0)
optimizer4_L2 = torch.optim.Adam(model4_L2.parameters(), lr=lr, weight_decay=0)
optimizer8_L2 = torch.optim.Adam(model8_L2.parameters(), lr=lr, weight_decay=0)
optimizer1_L3 = torch.optim.Adam(model1_L3.parameters(), lr=lr, weight_decay=0)
optimizer2_L3 = torch.optim.Adam(model2_L3.parameters(), lr=lr, weight_decay=0)
optimizer4_L3 = torch.optim.Adam(model4_L3.parameters(), lr=lr, weight_decay=0)
optimizer8_L3 = torch.optim.Adam(model8_L3.parameters(), lr=lr, weight_decay=0)

# Train the models
model1_L2, loss1_L2, acc1_L2 = train(model=model1_L2, data=data, optimizer=optimizer1_L2, epochs=epochs)
model2_L2, loss2_L2, acc2_L2 = train(model=model2_L2, data=data, optimizer=optimizer2_L2, epochs=epochs)
model4_L2, loss4_L2, acc4_L2 = train(model=model4_L2, data=data, optimizer=optimizer4_L2, epochs=epochs)
model8_L2, loss8_L2, acc8_L2 = train(model=model8_L2, data=data, optimizer=optimizer8_L2, epochs=epochs)
model1_L3, loss1_L3, acc1_L3 = train(model=model1_L3, data=data, optimizer=optimizer1_L3, epochs=epochs)
model2_L3, loss2_L3, acc2_L3 = train(model=model2_L3, data=data, optimizer=optimizer2_L3, epochs=epochs)
model4_L3, loss4_L3, acc4_L3 = train(model=model4_L3, data=data, optimizer=optimizer4_L3, epochs=epochs)
model8_L3, loss8_L3, acc8_L3 = train(model=model8_L3, data=data, optimizer=optimizer8_L3, epochs=epochs)

# Test the models
test_loss1_L2, test_acc1_L2 = test(model=model1_L2, data=data)
test_loss2_L2, test_acc2_L2 = test(model=model2_L2, data=data)
test_loss4_L2, test_acc4_L2 = test(model=model4_L2, data=data)
test_loss8_L2, test_acc8_L2 = test(model=model8_L2, data=data)
test_loss1_L3, test_acc1_L3 = test(model=model1_L3, data=data)
test_loss2_L3, test_acc2_L3 = test(model=model2_L3, data=data)
test_loss4_L3, test_acc4_L3 = test(model=model4_L3, data=data)
test_loss8_L3, test_acc8_L3 = test(model=model8_L3, data=data)

# Print the results
print(f"Model: GAT_Citeseer_2L1H, Loss: {loss1_L2:.4f}, Train Accuracy: {acc1_L2:.4f}, Test Loss: {test_loss1_L2:.4f}, Test Accuracy: {test_acc1_L2:.4f}")
print(f"Model: GAT_Citeseer_2L2H, Loss: {loss2_L2:.4f}, Train Accuracy: {acc2_L2:.4f}, Test Loss: {test_loss2_L2:.4f}, Test Accuracy: {test_acc2_L2:.4f}")
print(f"Model: GAT_Citeseer_2L4H, Loss: {loss4_L2:.4f}, Train Accuracy: {acc4_L2:.4f}, Test Loss: {test_loss4_L2:.4f}, Test Accuracy: {test_acc4_L2:.4f}")
print(f"Model: GAT_Citeseer_2L8H, Loss: {loss8_L2:.4f}, Train Accuracy: {acc8_L2:.4f}, Test Loss: {test_loss8_L2:.4f}, Test Accuracy: {test_acc8_L2:.4f}")
print(f"Model: GAT_Citeseer_3L1H, Loss: {loss1_L3:.4f}, Train Accuracy: {acc1_L3:.4f}, Test Loss: {test_loss1_L3:.4f}, Test Accuracy: {test_acc1_L3:.4f}")
print(f"Model: GAT_Citeseer_3L2H, Loss: {loss2_L3:.4f}, Train Accuracy: {acc2_L3:.4f}, Test Loss: {test_loss2_L3:.4f}, Test Accuracy: {test_acc2_L3:.4f}")
print(f"Model: GAT_Citeseer_3L4H, Loss: {loss4_L3:.4f}, Train Accuracy: {acc4_L3:.4f}, Test Loss: {test_loss4_L3:.4f}, Test Accuracy: {test_acc4_L3:.4f}")
print(f"Model: GAT_Citeseer_3L8H, Loss: {loss8_L3:.4f}, Train Accuracy: {acc8_L3:.4f}, Test Loss: {test_loss8_L3:.4f}, Test Accuracy: {test_acc8_L3:.4f}")

Model: GAT_Citeseer_2L1H, Loss: 1.3092, Train Accuracy: 0.8817, Test Loss: 1.4677, Test Accuracy: 0.7334
Model: GAT_Citeseer_2L2H, Loss: 1.0237, Train Accuracy: 0.8883, Test Loss: 1.2675, Test Accuracy: 0.7294
Model: GAT_Citeseer_2L4H, Loss: 0.6952, Train Accuracy: 0.8983, Test Loss: 1.0480, Test Accuracy: 0.7319
Model: GAT_Citeseer_2L8H, Loss: 0.3951, Train Accuracy: 0.9250, Test Loss: 0.8965, Test Accuracy: 0.7261
Model: GAT_Citeseer_3L1H, Loss: 0.5238, Train Accuracy: 0.8783, Test Loss: 0.9247, Test Accuracy: 0.7173
Model: GAT_Citeseer_3L2H, Loss: 0.2218, Train Accuracy: 0.9350, Test Loss: 0.9078, Test Accuracy: 0.7140
Model: GAT_Citeseer_3L4H, Loss: 0.0866, Train Accuracy: 0.9783, Test Loss: 1.1514, Test Accuracy: 0.6799
Model: GAT_Citeseer_3L8H, Loss: 0.0330, Train Accuracy: 0.9933, Test Loss: 1.5214, Test Accuracy: 0.6733


In [5]:
# Save the model locally
torch.save(model1_L2, '/workspace/GAT_Citeseer_2L1H.pt')
torch.save(model2_L2, '/workspace/GAT_Citeseer_2L2H.pt')
torch.save(model4_L2, '/workspace/GAT_Citeseer_2L4H.pt')
torch.save(model8_L2, '/workspace/GAT_Citeseer_2L8H.pt')

torch.save(model1_L3, '/workspace/GAT_Citeseer_3L1H.pt')
torch.save(model2_L3, '/workspace/GAT_Citeseer_3L2H.pt')
torch.save(model4_L3, '/workspace/GAT_Citeseer_3L4H.pt')
torch.save(model8_L3, '/workspace/GAT_Citeseer_3L8H.pt')

### Pubmed

In [6]:
import os.path as osp
import torch
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid

dataset = 'PubMed'
transform = T.Compose([T.NormalizeFeatures(),
                    T.RandomNodeSplit(split='test_rest', 
                                    num_train_per_class=100,
                                    num_val=0)])
path = osp.join(osp.dirname(osp.realpath('/workspace/[Dataset]Citation.ipynb')), '..', 'data', dataset)
dataset = Planetoid(path, dataset, transform=transform)
data = dataset[0].to(device)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Print some dataset statistics
print(f"Num. nodes: {data.num_nodes} | Num. edges: {data.num_edges} | Num. classes: {data.y.max() + 1} | Num. features: {data.num_features} | Num. train.: {data.train_mask.sum()} | Num. test: {data.test_mask.sum()}")
# Save to local
torch.save(data, "/workspace/Pubmed.pt")

Num. nodes: 19717 | Num. edges: 88648 | Num. classes: 3 | Num. features: 500 | Num. train.: 300 | Num. test: 19417


In [7]:
# Train GAT models on the BA-Shapes dataset
from models import GAT_L2_intervention, GAT_L3_intervention

out_channels = data.y.max().item() + 1

# Define several GAT models with 1, 2, 4, 8 attention heads to be used for 'data.pt', and move them to the GPU device (if available)
model1_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=1)
model2_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=2)
model4_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=4)
model8_L2 = GAT_L2_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=8)
model1_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=1)
model2_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=2)
model4_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=4)
model8_L3 = GAT_L3_intervention(in_channels=data.num_node_features, hidden_channels=64, out_channels=out_channels, heads=8)

# Move the models to the GPU device (if available)
model1_L2 = model1_L2.to(device)
model2_L2 = model2_L2.to(device)
model4_L2 = model4_L2.to(device)
model8_L2 = model8_L2.to(device)
model1_L3 = model1_L3.to(device)
model2_L3 = model2_L3.to(device)
model4_L3 = model4_L3.to(device)
model8_L3 = model8_L3.to(device)

"""
Now we can train all the models and compare their performance.
Keep the number of epochs and the learning rate the same for all the models.
"""
# Define the number of epochs
epochs = 100
# Define the learning rate
lr = 0.001
# Prepare the optimizer
optimizer1_L2 = torch.optim.Adam(model1_L2.parameters(), lr=lr, weight_decay=0)
optimizer2_L2 = torch.optim.Adam(model2_L2.parameters(), lr=lr, weight_decay=0)
optimizer4_L2 = torch.optim.Adam(model4_L2.parameters(), lr=lr, weight_decay=0)
optimizer8_L2 = torch.optim.Adam(model8_L2.parameters(), lr=lr, weight_decay=0)
optimizer1_L3 = torch.optim.Adam(model1_L3.parameters(), lr=lr, weight_decay=0)
optimizer2_L3 = torch.optim.Adam(model2_L3.parameters(), lr=lr, weight_decay=0)
optimizer4_L3 = torch.optim.Adam(model4_L3.parameters(), lr=lr, weight_decay=0)
optimizer8_L3 = torch.optim.Adam(model8_L3.parameters(), lr=lr, weight_decay=0)

# Train the models
model1_L2, loss1_L2, acc1_L2 = train(model=model1_L2, data=data, optimizer=optimizer1_L2, epochs=epochs)
model2_L2, loss2_L2, acc2_L2 = train(model=model2_L2, data=data, optimizer=optimizer2_L2, epochs=epochs)
model4_L2, loss4_L2, acc4_L2 = train(model=model4_L2, data=data, optimizer=optimizer4_L2, epochs=epochs)
model8_L2, loss8_L2, acc8_L2 = train(model=model8_L2, data=data, optimizer=optimizer8_L2, epochs=epochs)
model1_L3, loss1_L3, acc1_L3 = train(model=model1_L3, data=data, optimizer=optimizer1_L3, epochs=epochs)
model2_L3, loss2_L3, acc2_L3 = train(model=model2_L3, data=data, optimizer=optimizer2_L3, epochs=epochs)
model4_L3, loss4_L3, acc4_L3 = train(model=model4_L3, data=data, optimizer=optimizer4_L3, epochs=epochs)
model8_L3, loss8_L3, acc8_L3 = train(model=model8_L3, data=data, optimizer=optimizer8_L3, epochs=epochs)

# Test the models
test_loss1_L2, test_acc1_L2 = test(model=model1_L2, data=data)
test_loss2_L2, test_acc2_L2 = test(model=model2_L2, data=data)
test_loss4_L2, test_acc4_L2 = test(model=model4_L2, data=data)
test_loss8_L2, test_acc8_L2 = test(model=model8_L2, data=data)
test_loss1_L3, test_acc1_L3 = test(model=model1_L3, data=data)
test_loss2_L3, test_acc2_L3 = test(model=model2_L3, data=data)
test_loss4_L3, test_acc4_L3 = test(model=model4_L3, data=data)
test_loss8_L3, test_acc8_L3 = test(model=model8_L3, data=data)

# Print the results
print(f"Model: GAT_Pubmed_2L1H, Loss: {loss1_L2:.4f}, Train Accuracy: {acc1_L2:.4f}, Test Loss: {test_loss1_L2:.4f}, Test Accuracy: {test_acc1_L2:.4f}")
print(f"Model: GAT_Pubmed_2L2H, Loss: {loss2_L2:.4f}, Train Accuracy: {acc2_L2:.4f}, Test Loss: {test_loss2_L2:.4f}, Test Accuracy: {test_acc2_L2:.4f}")
print(f"Model: GAT_Pubmed_2L4H, Loss: {loss4_L2:.4f}, Train Accuracy: {acc4_L2:.4f}, Test Loss: {test_loss4_L2:.4f}, Test Accuracy: {test_acc4_L2:.4f}")
print(f"Model: GAT_Pubmed_2L8H, Loss: {loss8_L2:.4f}, Train Accuracy: {acc8_L2:.4f}, Test Loss: {test_loss8_L2:.4f}, Test Accuracy: {test_acc8_L2:.4f}")
print(f"Model: GAT_Pubmed_3L1H, Loss: {loss1_L3:.4f}, Train Accuracy: {acc1_L3:.4f}, Test Loss: {test_loss1_L3:.4f}, Test Accuracy: {test_acc1_L3:.4f}")
print(f"Model: GAT_Pubmed_3L2H, Loss: {loss2_L3:.4f}, Train Accuracy: {acc2_L3:.4f}, Test Loss: {test_loss2_L3:.4f}, Test Accuracy: {test_acc2_L3:.4f}")
print(f"Model: GAT_Pubmed_3L4H, Loss: {loss4_L3:.4f}, Train Accuracy: {acc4_L3:.4f}, Test Loss: {test_loss4_L3:.4f}, Test Accuracy: {test_acc4_L3:.4f}")
print(f"Model: GAT_Pubmed_3L8H, Loss: {loss8_L3:.4f}, Train Accuracy: {acc8_L3:.4f}, Test Loss: {test_loss8_L3:.4f}, Test Accuracy: {test_acc8_L3:.4f}")

Model: GAT_Pubmed_2L1H, Loss: 0.7602, Train Accuracy: 0.8100, Test Loss: 0.8184, Test Accuracy: 0.7316
Model: GAT_Pubmed_2L2H, Loss: 0.5968, Train Accuracy: 0.8333, Test Loss: 0.6898, Test Accuracy: 0.7536
Model: GAT_Pubmed_2L4H, Loss: 0.4766, Train Accuracy: 0.8500, Test Loss: 0.5984, Test Accuracy: 0.7695
Model: GAT_Pubmed_2L8H, Loss: 0.3535, Train Accuracy: 0.8933, Test Loss: 0.5215, Test Accuracy: 0.7948
Model: GAT_Pubmed_3L1H, Loss: 0.3548, Train Accuracy: 0.8900, Test Loss: 0.5218, Test Accuracy: 0.7921
Model: GAT_Pubmed_3L2H, Loss: 0.2183, Train Accuracy: 0.9300, Test Loss: 0.5265, Test Accuracy: 0.7970
Model: GAT_Pubmed_3L4H, Loss: 0.0702, Train Accuracy: 0.9800, Test Loss: 0.7013, Test Accuracy: 0.7751
Model: GAT_Pubmed_3L8H, Loss: 0.0224, Train Accuracy: 0.9967, Test Loss: 1.1359, Test Accuracy: 0.7474


In [8]:
# Save the model locally
torch.save(model1_L2, '/workspace/GAT_Pubmed_2L1H.pt')
torch.save(model2_L2, '/workspace/GAT_Pubmed_2L2H.pt')
torch.save(model4_L2, '/workspace/GAT_Pubmed_2L4H.pt')
torch.save(model8_L2, '/workspace/GAT_Pubmed_2L8H.pt')

torch.save(model1_L3, '/workspace/GAT_Pubmed_3L1H.pt')
torch.save(model2_L3, '/workspace/GAT_Pubmed_3L2H.pt')
torch.save(model4_L3, '/workspace/GAT_Pubmed_3L4H.pt')
torch.save(model8_L3, '/workspace/GAT_Pubmed_3L8H.pt')