In [None]:
!pip install torch torchvision



In [None]:
import torch
print(torch.__version__)


2.1.0+cu121


In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

Using device: cuda


In [None]:
!pip install torch_geometric




# **Load Dataset**

## **Load citeseer**

In [None]:
import torch
from torch_geometric.datasets import Planetoid



citeseer_dataset = Planetoid(root='', name='CiteSeer')

citeseer = citeseer_dataset[0]

## **Load CoraFull**

In [None]:
from torch_geometric.datasets import CoraFull


root = './CoraFull'


CoraFull_dataset = CoraFull(root)

CoraFull_dataset.download()
CoraFull_dataset.process()

CoraFull =  CoraFull_dataset[0]



Using existing file cora.npz


Number of nodes: 19793
Number of edges: 126842
Number of features: 8710


# **Split dataset**

## **split cora full**

In [None]:
from torch_geometric.transforms import RandomNodeSplit

transform = RandomNodeSplit(num_train_per_class=int(CoraFull.num_nodes * 0.7), num_val=int(CoraFull.num_nodes * 0.1), num_test=int(CoraFull.num_nodes * 0.2))
CoraFull = transform(CoraFull)

## **split citeseer**

In [None]:

transform = RandomNodeSplit(num_train_per_class=int(citeseer.num_nodes * 0.7), num_val=int(citeseer.num_nodes * 0.1), num_test=int(citeseer.num_nodes * 0.2))
citeseer = transform(citeseer)

# **important functions**

In [None]:
def train(model, optimizer, criterion, data):
    model.train()
    optimizer.zero_grad()
    data = data.to(device)
    out = model(data)
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss.item()

def validate(model, criterion, data):
    model.eval()
    data = data.to(device)
    with torch.no_grad():
        out = model(data)
        val_loss = criterion(out[data.val_mask], data.y[data.val_mask])
    return val_loss.item()

def test(model, criterion, data):
    model.eval()
    data = data.to(device)
    with torch.no_grad():
        out = model(data)
        _, pred = torch.max(out, dim=1)
        correct = float(pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
        acc = correct / data.test_mask.sum().item()
    return acc


# **C) implement GCN**

## **Two layer GCN**

In [None]:
from torch_geometric.nn import GCNConv
import torch.optim as optim
import torch.nn.functional as F


class TwoLayerGCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(TwoLayerGCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)




### **Train and evaluate on Cora Full dataset(two layer)**

In [None]:
torch.manual_seed(0)



hidden_dims = [16 , 32, 64, 128, 256]

best_validation_loss = float('inf')
best_hidden_dim = None

for hidden_dim in hidden_dims:


    model = TwoLayerGCN(input_dim=CoraFull.num_node_features, hidden_dim=hidden_dim, output_dim=CoraFull_dataset.num_classes).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model

    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Hidden Dimension: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)
    print(f' Hidden Dimension: {hidden_dim}, Validation Loss: {validation_loss:.4f} \n')

    # Check if this hidden dimension is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dim = hidden_dim

print('Best Hidden Dimension: {}'.format(best_hidden_dim))

Hidden Dimension: 16, Epoch: 0, Loss: 4.2510, Validation Loss: 4.1893 
Hidden Dimension: 16, Epoch: 1, Loss: 4.1878, Validation Loss: 4.1109 
Hidden Dimension: 16, Epoch: 2, Loss: 4.1086, Validation Loss: 4.0264 
Hidden Dimension: 16, Epoch: 3, Loss: 4.0230, Validation Loss: 3.9365 
Hidden Dimension: 16, Epoch: 4, Loss: 3.9406, Validation Loss: 3.8462 
Hidden Dimension: 16, Epoch: 5, Loss: 3.8539, Validation Loss: 3.7512 
Hidden Dimension: 16, Epoch: 6, Loss: 3.7559, Validation Loss: 3.6538 
Hidden Dimension: 16, Epoch: 7, Loss: 3.6736, Validation Loss: 3.5583 
Hidden Dimension: 16, Epoch: 8, Loss: 3.5760, Validation Loss: 3.4645 
Hidden Dimension: 16, Epoch: 9, Loss: 3.4843, Validation Loss: 3.3767 
Hidden Dimension: 16, Epoch: 10, Loss: 3.3994, Validation Loss: 3.2912 
Hidden Dimension: 16, Epoch: 11, Loss: 3.3233, Validation Loss: 3.2064 
Hidden Dimension: 16, Epoch: 12, Loss: 3.2396, Validation Loss: 3.1203 
Hidden Dimension: 16, Epoch: 13, Loss: 3.1640, Validation Loss: 3.0341 
Hi

In [None]:
torch.manual_seed(0)

#train best model
model = TwoLayerGCN(input_dim=CoraFull.num_node_features, hidden_dim=best_hidden_dim, output_dim=CoraFull_dataset.num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, CoraFull)
    val_loss = validate(model, criterion, CoraFull)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0 # Reset counter
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, CoraFull)
print(f'Test Accuracy(two layer) on CoraFull dataset: {test_accuracy:.4f}')

Early stopping at epoch 64 

Test Accuracy(two layer) on CoraFull dataset: 0.7155


### **Train and evaluate on Citeseer dataset(two layer)**

In [None]:
torch.manual_seed(0)


hidden_dims = [16 , 32, 64, 128, 256]

best_validation_loss = float('inf')
best_hidden_dim = None


for hidden_dim in hidden_dims:

    model = TwoLayerGCN(input_dim=citeseer.num_node_features, hidden_dim=hidden_dim, output_dim=citeseer_dataset.num_classes).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch} \n')
                break


        print(f'Hidden Dimension: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f} ')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)
    print(f' Hidden Dimension: {hidden_dim}, Validation Loss: {validation_loss:.4f} \n')

    # Check if this hidden dimension is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dim = hidden_dim

print('Best Hidden Dimension: {}'.format(best_hidden_dim))

Hidden Dimension: 16, Epoch: 0, Loss: 1.7921, Validation Loss: 1.6630 
Hidden Dimension: 16, Epoch: 1, Loss: 1.6456, Validation Loss: 1.4801 
Hidden Dimension: 16, Epoch: 2, Loss: 1.4645, Validation Loss: 1.3123 
Hidden Dimension: 16, Epoch: 3, Loss: 1.3012, Validation Loss: 1.1731 
Hidden Dimension: 16, Epoch: 4, Loss: 1.1676, Validation Loss: 1.0566 
Hidden Dimension: 16, Epoch: 5, Loss: 1.0694, Validation Loss: 0.9591 
Hidden Dimension: 16, Epoch: 6, Loss: 0.9681, Validation Loss: 0.8816 
Hidden Dimension: 16, Epoch: 7, Loss: 0.8860, Validation Loss: 0.8208 
Hidden Dimension: 16, Epoch: 8, Loss: 0.8387, Validation Loss: 0.7731 
Hidden Dimension: 16, Epoch: 9, Loss: 0.7628, Validation Loss: 0.7368 
Hidden Dimension: 16, Epoch: 10, Loss: 0.7325, Validation Loss: 0.7093 
Hidden Dimension: 16, Epoch: 11, Loss: 0.6878, Validation Loss: 0.6878 
Hidden Dimension: 16, Epoch: 12, Loss: 0.6601, Validation Loss: 0.6718 
Hidden Dimension: 16, Epoch: 13, Loss: 0.6370, Validation Loss: 0.6597 
Hi

In [None]:
torch.manual_seed(0)

#train best model
model = TwoLayerGCN(input_dim=citeseer.num_node_features, hidden_dim=best_hidden_dim, output_dim=citeseer_dataset.num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, citeseer)
    val_loss = validate(model, criterion, citeseer)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, citeseer)
print(f'Test Accuracy(two layer) on citeseer dataset: {test_accuracy:.4f}')

Early stopping at epoch 16 

Test Accuracy(two layer) on citeseer dataset: 0.7639


## **One layer GCN**

In [None]:
import torch
from torch_geometric.nn import GCNConv

class OneLayerGCN(torch.nn.Module):
    def __init__(self, input_dim, output_dim):
        super(OneLayerGCN, self).__init__()
        self.conv = GCNConv(input_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv(x, edge_index)
        return x


### **Train and evaluate on Cora Full dataset(One layer)**

In [None]:
torch.manual_seed(0)

#train best model
model = OneLayerGCN(input_dim=CoraFull.num_node_features , output_dim=CoraFull_dataset.num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, CoraFull)
    val_loss = validate(model, criterion, CoraFull)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0 # Reset counter
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, CoraFull)
print(f'Test Accuracy(one layer) on Cora full dataset: {test_accuracy:.4f}')


Test Accuracy(one layer) on Cora full dataset: 0.7009


### **Train and evaluate on Citeseer dataset(One layer)**

In [None]:
torch.manual_seed(0)

#train best model
model = OneLayerGCN(input_dim=citeseer.num_node_features, output_dim=citeseer_dataset.num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()


# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, citeseer)
    val_loss = validate(model, criterion, citeseer)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, citeseer)
print(f'Test Accuracy(one layer) on citeseer dataset: {test_accuracy:.4f}')


Early stopping at epoch 34 

Test Accuracy(one layer) on citeseer dataset: 0.7639


## **Three layer GCN**

In [None]:
class ThreeLayerGCN(torch.nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(ThreeLayerGCN, self).__init__()
        self.conv1 = GCNConv(input_dim, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        self.conv3 = GCNConv(hidden_dim, output_dim)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index
        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv3(x, edge_index)
        return F.log_softmax(x, dim=1)


### **Train and evaluate on Cora Full dataset(three layer)**

In [None]:
torch.manual_seed(0)

hidden_dims = [16 , 32, 64, 128, 256]

best_validation_loss = float('inf')
best_hidden_dims = None

for hidden_dim in hidden_dims:

    model = ThreeLayerGCN(input_dim= CoraFull.num_node_features, hidden_dim=hidden_dim , output_dim=CoraFull_dataset.num_classes).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, CoraFull)
        val_loss = validate(model, criterion, CoraFull)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch}\n')
                break


        print(f'Hidden Dimensions: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f}')

    # Evaluate the model
    validation_loss = validate(model, criterion, CoraFull)
    print(f'Hidden Dimensions: {hidden_dim}, Validation Loss: {validation_loss:.4f}\n')

    # Check if this combination of hidden dimensions is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dims = hidden_dim

print('Best Hidden Dimensions: {}'.format(best_hidden_dims))


Hidden Dimensions: 16, Epoch: 0, Loss: 4.2503, Validation Loss: 4.2309
Hidden Dimensions: 16, Epoch: 1, Loss: 4.2306, Validation Loss: 4.2003
Hidden Dimensions: 16, Epoch: 2, Loss: 4.1997, Validation Loss: 4.1544
Hidden Dimensions: 16, Epoch: 3, Loss: 4.1583, Validation Loss: 4.0947
Hidden Dimensions: 16, Epoch: 4, Loss: 4.0993, Validation Loss: 4.0239
Hidden Dimensions: 16, Epoch: 5, Loss: 4.0387, Validation Loss: 3.9429
Hidden Dimensions: 16, Epoch: 6, Loss: 3.9652, Validation Loss: 3.8526
Hidden Dimensions: 16, Epoch: 7, Loss: 3.8880, Validation Loss: 3.7596
Hidden Dimensions: 16, Epoch: 8, Loss: 3.8060, Validation Loss: 3.6671
Hidden Dimensions: 16, Epoch: 9, Loss: 3.7370, Validation Loss: 3.5743
Hidden Dimensions: 16, Epoch: 10, Loss: 3.6582, Validation Loss: 3.4855
Hidden Dimensions: 16, Epoch: 11, Loss: 3.5801, Validation Loss: 3.4031
Hidden Dimensions: 16, Epoch: 12, Loss: 3.4986, Validation Loss: 3.3256
Hidden Dimensions: 16, Epoch: 13, Loss: 3.4343, Validation Loss: 3.2507
Hi

In [None]:
torch.manual_seed(0)

#train best model
model = ThreeLayerGCN(input_dim=CoraFull.num_node_features, hidden_dim = best_hidden_dims , output_dim=CoraFull_dataset.num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, CoraFull)
    val_loss = validate(model, criterion, CoraFull)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0 # Reset counter
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, CoraFull)
print(f'Test Accuracy(three layer) on CoraFull dataset: {test_accuracy:.4f}')


Early stopping at epoch 50 

Test Accuracy(three layer) on CoraFull dataset: 0.6945


### **Train and evaluate on Citeseer dataset(three layer)**

In [None]:
torch.manual_seed(0)

hidden_dims = [16 , 32, 64, 128, 256]

best_validation_loss = float('inf')
best_hidden_dims = None


for hidden_dim in hidden_dims:

    model = ThreeLayerGCN(input_dim=citeseer.num_node_features, hidden_dim = hidden_dim , output_dim=citeseer_dataset.num_classes).to(device)
    optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    criterion = torch.nn.CrossEntropyLoss()

    # Early stopping parameters
    patience = 5
    min_delta = 0.001
    patience_counter = 0
    best_loss = float('inf')

    # Train the model
    for epoch in range(200):
        loss = train(model, optimizer, criterion, citeseer)
        val_loss = validate(model, criterion, citeseer)

        # Check for improvement in validation loss
        if val_loss < best_loss - min_delta:
            best_loss = val_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f'Early stopping at epoch {epoch}\n')
                break


        print(f'Hidden Dimensions: {hidden_dim}, Epoch: {epoch}, Loss: {loss:.4f}, Validation Loss: {val_loss:.4f}')

    # Evaluate the model
    validation_loss = validate(model, criterion, citeseer)
    print(f'Hidden Dimensions: {hidden_dim}, Validation Loss: {validation_loss:.4f}\n')

    # Check if this combination of hidden dimensions is the best so far
    if validation_loss < best_validation_loss:
        best_validation_loss = validation_loss
        best_hidden_dims = hidden_dim

print('Best Hidden Dimensions: {}'.format(best_hidden_dims))


Hidden Dimensions: 16, Epoch: 0, Loss: 1.7916, Validation Loss: 1.7399
Hidden Dimensions: 16, Epoch: 1, Loss: 1.7428, Validation Loss: 1.6435
Hidden Dimensions: 16, Epoch: 2, Loss: 1.6698, Validation Loss: 1.5339
Hidden Dimensions: 16, Epoch: 3, Loss: 1.5826, Validation Loss: 1.4149
Hidden Dimensions: 16, Epoch: 4, Loss: 1.4737, Validation Loss: 1.2955
Hidden Dimensions: 16, Epoch: 5, Loss: 1.4110, Validation Loss: 1.1868
Hidden Dimensions: 16, Epoch: 6, Loss: 1.3235, Validation Loss: 1.0931
Hidden Dimensions: 16, Epoch: 7, Loss: 1.2612, Validation Loss: 1.0134
Hidden Dimensions: 16, Epoch: 8, Loss: 1.1914, Validation Loss: 0.9434
Hidden Dimensions: 16, Epoch: 9, Loss: 1.1229, Validation Loss: 0.8783
Hidden Dimensions: 16, Epoch: 10, Loss: 1.0479, Validation Loss: 0.8179
Hidden Dimensions: 16, Epoch: 11, Loss: 0.9921, Validation Loss: 0.7660
Hidden Dimensions: 16, Epoch: 12, Loss: 0.9448, Validation Loss: 0.7246
Hidden Dimensions: 16, Epoch: 13, Loss: 0.8934, Validation Loss: 0.6928
Hi

In [None]:
torch.manual_seed(0)

#train best model
model = ThreeLayerGCN(input_dim=citeseer.num_node_features, hidden_dim = best_hidden_dims, output_dim=citeseer_dataset.num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
criterion = torch.nn.CrossEntropyLoss()

# Early stopping parameters
patience = 5
min_delta = 0.001
patience_counter = 0
best_loss = float('inf')

# Train the best model
for epoch in range(200):
    loss = train(model, optimizer, criterion, citeseer)
    val_loss = validate(model, criterion, citeseer)

    # Check for improvement in validation loss
    if val_loss < best_loss - min_delta:
        best_loss = val_loss
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= patience:
            print(f'Early stopping at epoch {epoch} \n')
            break

# Evaluate the model
test_accuracy = test(model, criterion, citeseer)
print(f'Test Accuracy(three layer) on citeseer dataset: {test_accuracy:.4f}')


Early stopping at epoch 25 

Test Accuracy(three layer) on citeseer dataset: 0.7669
