In [None]:
!pip install torch_geometric

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as graphnn
from sklearn.metrics import f1_score
from torch_geometric.loader import DataLoader
from torch_geometric.utils import scatter
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import KFold
from itertools import product

In [3]:
from torch_geometric.datasets import TUDataset

dataset_en = TUDataset(root='', name='ENZYMES',use_node_attr = True)
dataset_rd = TUDataset(root='', name='REDDIT-BINARY')
dataset_pr = TUDataset(root='', name='PROTEINS')

Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip
Processing...
Done!
Downloading https://www.chrsmrrs.com/graphkerneldatasets/REDDIT-BINARY.zip
Processing...
Done!
Downloading https://www.chrsmrrs.com/graphkerneldatasets/PROTEINS.zip
Processing...
Done!


In [4]:
print(len(dataset_en))
print(len(dataset_rd))
print(len(dataset_pr))

print(dataset_en.num_classes)
print(dataset_rd.num_classes)
print(dataset_pr.num_classes)

print(dataset_en.num_node_features)
print(dataset_rd.num_node_features)
print(dataset_pr.num_node_features)

600
2000
1113
6
2
2
21
0
3


In [5]:
from torch_geometric.loader import DataLoader
train_en = dataset_en[:int(len(dataset_en)*0.8)]
val_en = dataset_en[int(len(dataset_en)*0.8):int(len(dataset_en)*0.9)]
test_en = dataset_en[int(len(dataset_en)*0.9):]

train_en_loader = DataLoader(train_en, batch_size=32, shuffle=True)
val_en_loader = DataLoader(val_en, batch_size=32, shuffle=False)
test_en_loader = DataLoader(test_en, batch_size=32, shuffle=False)

In [25]:
class MLP1(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP1, self).__init__()

        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, data):
        x = self.fc1(data.x)
        x = graphnn.global_add_pool(x, data.batch)
        x = self.fc2(x)
        return x

In [26]:
def train(model, loss_fcn, device, optimizer, max_epochs, train_dataloader, val_dataloader, patience=30):
    best_val_score = 0
    patience_counter = 0
    metrics_history = {'train_loss': [], 'val_loss': [], 'f1_micro': [], 'f1_macro': [], 'accuracy': [], 'best_score':[]}

    for epoch in range(max_epochs):
        model.train()
        train_losses = []
        for batch in train_dataloader:
            if batch.x is None:
                raise ValueError("Node features are missing. Ensure data.x is correctly set.")
            batch = batch.to(device)
            optimizer.zero_grad()
            logits = model(batch)
            loss = loss_fcn(logits, batch.y)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        val_loss = evaluate_loss(model, loss_fcn, device, val_dataloader)
        f1_micro, f1_macro, accuracy = evaluate_metrics(model, device, val_dataloader)

        # Save metrics
        metrics_history['train_loss'].append(np.mean(train_losses))
        metrics_history['val_loss'].append(val_loss)
        metrics_history['f1_micro'].append(f1_micro)
        metrics_history['f1_macro'].append(f1_macro)
        metrics_history['accuracy'].append(accuracy)

        print(f"Epoch {epoch+1}, Train Loss: {np.mean(train_losses):.4f}, Val Loss: {val_loss:.4f}, F1 Micro: {f1_micro:.4f}, F1 Macro: {f1_macro:.4f}, Accuracy: {accuracy:.4f}")

        # Early stopping logic using f1_micro score
        if f1_micro > best_val_score:
            best_val_score = f1_micro
            metrics_history['best_score'] = best_val_score
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered")
                break

    return metrics_history

In [27]:
def evaluate_loss(model, loss_fcn, device, dataloader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in dataloader:
            batch = batch.to(device)
            outputs = model(batch)
            loss = loss_fcn(outputs, batch.y)
            total_loss += loss.item()
    return total_loss / len(dataloader)

In [28]:
def evaluate_metrics(model, device, dataloader):
    model.eval()
    total_preds = []
    total_targets = []

    with torch.no_grad():
        for batch in dataloader:
            batch = batch.to(device)
            outputs = model(batch)
            _, predicted = torch.max(outputs, 1)
            total_preds.extend(predicted.view(-1).cpu().numpy())
            total_targets.extend(batch.y.view(-1).cpu().numpy())

    f1_micro = f1_score(total_targets, total_preds, average='micro')
    f1_macro = f1_score(total_targets, total_preds, average='macro')
    accuracy = accuracy_score(total_targets, total_preds)
    return f1_micro, f1_macro, accuracy

In [29]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("\nDevice: ", device)

### Max number of epochs
max_epochs = 500
n_features = dataset_en.num_node_features
n_classes = dataset_en.num_classes


Device:  cuda


In [30]:
baseline = MLP1(
    input_size=n_features, hidden_size=256, output_size=n_classes
).to(device)

### DEFINE LOSS FUNCTION
loss_fcn = nn.CrossEntropyLoss()
### DEFINE OPTIMIZER
optimizer = torch.optim.Adam(baseline.parameters(), lr=0.005)

### TRAIN THE MODEL
metrics_history=train(
    baseline,
    loss_fcn,
    device,
    optimizer,
    max_epochs,
    train_en_loader,
    val_en_loader,
)

Epoch 1, Train Loss: 222.7368, Val Loss: 956.3276, F1 Micro: 0.0000, F1 Macro: 0.0000, Accuracy: 0.0000
Epoch 2, Train Loss: 97.3466, Val Loss: 833.8297, F1 Micro: 0.0000, F1 Macro: 0.0000, Accuracy: 0.0000
Epoch 3, Train Loss: 56.3178, Val Loss: 782.7109, F1 Micro: 0.0000, F1 Macro: 0.0000, Accuracy: 0.0000
Epoch 4, Train Loss: 58.0872, Val Loss: 847.0623, F1 Micro: 0.0000, F1 Macro: 0.0000, Accuracy: 0.0000
Epoch 5, Train Loss: 54.6008, Val Loss: 755.0434, F1 Micro: 0.0000, F1 Macro: 0.0000, Accuracy: 0.0000
Epoch 6, Train Loss: 23.8182, Val Loss: 677.7715, F1 Micro: 0.0000, F1 Macro: 0.0000, Accuracy: 0.0000
Epoch 7, Train Loss: 33.5637, Val Loss: 657.1805, F1 Micro: 0.0000, F1 Macro: 0.0000, Accuracy: 0.0000
Epoch 8, Train Loss: 39.5702, Val Loss: 639.0893, F1 Micro: 0.1500, F1 Macro: 0.1364, Accuracy: 0.1500
Epoch 9, Train Loss: 27.9771, Val Loss: 581.0397, F1 Micro: 0.0000, F1 Macro: 0.0000, Accuracy: 0.0000
Epoch 10, Train Loss: 15.2881, Val Loss: 552.8737, F1 Micro: 0.1000, F1 

In [31]:
def plot_metrics(metrics_history):
    epochs = range(1, len(metrics_history['train_loss']) + 1)

    plt.figure(figsize=(14, 10))

    plt.subplot(2, 2, 1)
    plt.plot(epochs, metrics_history['train_loss'], label='Train Loss')
    plt.plot(epochs, metrics_history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(2, 2, 2)
    plt.plot(epochs, metrics_history['f1_micro'], label='F1 Score (Micro)')
    plt.plot(epochs, metrics_history['f1_macro'], label='F1 Score (Macro)')
    plt.title('F1 Scores')
    plt.xlabel('Epochs')
    plt.ylabel('F1 Score')
    plt.legend()

    plt.subplot(2, 2, 3)
    plt.plot(epochs, metrics_history['accuracy'], label='Accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [32]:
from sklearn.model_selection import KFold
from itertools import product

# Outer k-fold cross-validation setup
outer_k_folds = 5
inner_k_folds = 5
num_epochs = 200

# Possible hyperparameters to tune
learning_rates = [0.01, 0.001]
batch_sizes = [8, 16]
patiences = [10, 50]

# Set list to store the evaluation metrics
f1_micro_test_list = []
f1_macro_test_list = []
accuracy_test_list = []

# Prepare the outer k-fold cross-validation
outer_kf = KFold(n_splits=outer_k_folds, shuffle=True, random_state=42)

# Loop over each fold for the outer k-fold
for fold, (train_val_idx, test_idx) in enumerate(outer_kf.split(dataset_en)):
    print(f"Outer FOLD {fold}")
    print("--------------------------------")

    # Split dataset into train_val and test for the current outer fold
    train_val_dataset = dataset_en[train_val_idx]
    test_dataset = dataset_en[test_idx]

    # Initialize the best hyperparameter set and its performance score
    best_hyperparams = None
    best_score = 0

    # Inner k-fold cross-validation for hyperparameter tuning
    inner_kf = KFold(n_splits=inner_k_folds, shuffle=True, random_state=42)

    # Create all combinations of hyperparameters
    all_params = list(product(learning_rates, batch_sizes, patiences))

    # Loop over all combinations of hyperparameters
    for params in all_params:
        lr, batch_size, patience = params
        inner_scores = []

        # Perform inner k-fold cross-validation
        for inner_fold, (inner_train_idx, inner_val_idx) in enumerate(inner_kf.split(train_val_dataset)):
            print(f"Inner FOLD {inner_fold}")
            print(f"Hyperparameters: LR={lr}, Batch Size={batch_size}, Patience={patience}")

            # Split dataset into inner train and validation sets
            inner_train_dataset = train_val_dataset[inner_train_idx]
            inner_val_dataset = train_val_dataset[inner_val_idx]

            # Define train and validation dataloaders for the current inner fold
            inner_train_loader = DataLoader(inner_train_dataset, batch_size=batch_size, shuffle=True)
            inner_val_loader = DataLoader(inner_val_dataset, batch_size=batch_size, shuffle=False)

            # Initialize model and optimizer for the current inner fold
            model = MLP1(
                input_size=dataset_en.num_node_features,
                hidden_size=256,
                output_size=dataset_en.num_classes
            ).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss_fcn = torch.nn.CrossEntropyLoss()

            # Train the model for the current inner fold
            inner_metrics = train(model, loss_fcn, device, optimizer, num_epochs, inner_train_loader, inner_val_loader, patience)

            # Evaluate model performance, e.g., using validation F1 score
            # Save the model performance score for the current hyperparameter combination
            inner_scores.append(inner_metrics['best_score'])

        # Calculate the average performance over all inner folds for the current hyperparameter set
        average_score = np.mean(inner_scores)
        print(f"Average Score for hyperparameters {params}: {average_score}")

        # If the current hyperparameters outperform the previous ones, update the best_hyperparams
        if average_score > best_score:
            best_hyperparams = params
            best_score = average_score

    print(f"Best hyperparameters for Outer FOLD {fold}: {best_hyperparams} with score {best_score}")

    # Now retrain the model on the full train_val_dataset with the best_hyperparams

    # Extract best hyperparameters
    best_lr, best_batch_size, best_patience = best_hyperparams

    # DataLoader for the combined training and validation set
    train_val_loader = DataLoader(train_val_dataset, batch_size=best_batch_size, shuffle=True)

    # DataLoader for the test set
    test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

    # Initialize the model with the best hyperparameters
    model = MLP1(
        input_size=dataset_en.num_node_features,
        hidden_size=256,
        output_size=dataset_en.num_classes
    ).to(device)

    # Initialize the optimizer with the best learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

    # Loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Retrain the model on the full train_val_dataset
    retrained_metrics = train(
        model,
        loss_fcn,
        device,
        optimizer,
        num_epochs,
        train_val_loader,
        test_loader,  # We're using the test_loader here to monitor the performance, but we do not use this for making decisions
        best_patience
    )

    # After retraining, evaluate on the test set
    f1_micro_test, f1_macro_test, accuracy_test = evaluate_metrics(model, device, test_loader)
    print(f"Test set evaluation - F1 Micro: {f1_micro_test:.4f}, F1 Macro: {f1_macro_test:.4f}, Accuracy: {accuracy_test:.4f}")
    f1_micro_test_list.append(f1_micro_test)
    f1_macro_test_list.append(f1_macro_test)
    accuracy_test_list.append(accuracy_test)
    # Optionally, save your retrained model
    torch.save(model.state_dict(), f'Basic_model_fold_{fold}.pth')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 60, Train Loss: 11.1851, Val Loss: 11.5204, F1 Micro: 0.1875, F1 Macro: 0.1142, Accuracy: 0.1875
Epoch 61, Train Loss: 7.4297, Val Loss: 7.5818, F1 Micro: 0.2396, F1 Macro: 0.1544, Accuracy: 0.2396
Early stopping triggered
Inner FOLD 1
Hyperparameters: LR=0.01, Batch Size=16, Patience=50
Epoch 1, Train Loss: 615.2097, Val Loss: 199.2527, F1 Micro: 0.2188, F1 Macro: 0.1458, Accuracy: 0.2188
Epoch 2, Train Loss: 95.9572, Val Loss: 31.0099, F1 Micro: 0.1667, F1 Macro: 0.1186, Accuracy: 0.1667
Epoch 3, Train Loss: 21.6182, Val Loss: 13.5147, F1 Micro: 0.1667, F1 Macro: 0.1095, Accuracy: 0.1667
Epoch 4, Train Loss: 11.4158, Val Loss: 16.6105, F1 Micro: 0.1250, F1 Macro: 0.0858, Accuracy: 0.1250
Epoch 5, Train Loss: 15.7774, Val Loss: 20.5447, F1 Micro: 0.1979, F1 Macro: 0.1423, Accuracy: 0.1979
Epoch 6, Train Loss: 10.0722, Val Loss: 8.1791, F1 Micro: 0.2708, F1 Macro: 0.2134, Accuracy: 0.2708
Epoch 7, Train Loss: 8.5989

In [33]:
print(np.mean(f1_micro_test_list))
print(np.mean(f1_macro_test_list))
print(np.mean(accuracy_test_list))

0.25166666666666665
0.18534248801769299
0.25166666666666665


In [34]:
# Initialize a dictionary to store metrics for different models
models_evaluation_metrics = {}

# Example model identifiers
model_names = ['BasicGraphModel', 'GraphSAGEModel', 'GINModel']

# Initialize metric dictionaries for each model
for model_name in model_names:
    models_evaluation_metrics[model_name] = {'f1_micro': [], 'f1_macro': [], 'accuracy': []}

def update_model_metrics(model_name, f1_micro, f1_macro, accuracy):
    models_evaluation_metrics[model_name]['f1_micro'].append(f1_micro)
    models_evaluation_metrics[model_name]['f1_macro'].append(f1_macro)
    models_evaluation_metrics[model_name]['accuracy'].append(accuracy)

update_model_metrics('BasicGraphModel', f1_micro_test_list, f1_macro_test_list, accuracy_test_list)

print(models_evaluation_metrics)

{'BasicGraphModel': {'f1_micro': [[0.3, 0.24166666666666667, 0.18333333333333332, 0.275, 0.25833333333333336]], 'f1_macro': [[0.24020232640922298, 0.12173005938051677, 0.1502883461554861, 0.19894665720752677, 0.2155450509357123]], 'accuracy': [[0.3, 0.24166666666666667, 0.18333333333333332, 0.275, 0.25833333333333336]]}, 'GraphSAGEModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}, 'GINModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}}


# Do the same thing to the dataset REDDIT-BINARY

In [None]:
# The dataset Reddit-Binary has no node_features, so we use node_degree as its feature
import torch
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import BaseTransform
from torch_geometric.utils import degree

class AddDegreeFeature(BaseTransform):
    def __call__(self, data):
        deg = degree(data.edge_index[0], dtype=torch.float)
        data.x = deg.unsqueeze(-1)  # Make it a 2D tensor [num_nodes, 1]
        return data

# Load your dataset and apply the transformation
dataset_rd = TUDataset(root='/tmp/REDDIT-BINARY', name='REDDIT-BINARY', transform=AddDegreeFeature())

# Now verify by printing the features of the first few graphs
for i, data in enumerate(dataset_rd):
    if i >= 5:  # Check the first 5 graphs
        break
    print(data.x)

In [37]:
from torch.utils.data import Subset

# Outer k-fold cross-validation setup
outer_k_folds = 5
inner_k_folds = 5
num_epochs = 200

# Possible hyperparameters to tune
learning_rates = [0.01, 0.001]
batch_sizes = [8, 16]
patiences = [10, 50]

# Set list to store the evaluation metrics
f1_micro_test_list = []
f1_macro_test_list = []
accuracy_test_list = []

# Prepare the outer k-fold cross-validation
outer_kf = KFold(n_splits=outer_k_folds, shuffle=True, random_state=42)

# Loop over each fold for the outer k-fold
for fold, (train_val_idx, test_idx) in enumerate(outer_kf.split(dataset_rd)):
    print(f"Outer FOLD {fold}")
    print("--------------------------------")

    # Split dataset into train_val and test for the current outer fold
    train_val_subset = Subset(dataset_rd, train_val_idx)
    test_subset = Subset(dataset_rd, test_idx)

    # Initialize the best hyperparameter set and its performance score
    best_hyperparams = None
    best_score = 0

    # Inner k-fold cross-validation for hyperparameter tuning
    inner_kf = KFold(n_splits=inner_k_folds, shuffle=True, random_state=42)

    # Create all combinations of hyperparameters
    all_params = list(product(learning_rates, batch_sizes, patiences))

    # Loop over all combinations of hyperparameters
    for params in all_params:
        lr, batch_size, patience = params
        inner_scores = []

        # Perform inner k-fold cross-validation
        for inner_fold, (inner_train_idx, inner_val_idx) in enumerate(inner_kf.split(train_val_dataset)):
            print(f"Inner FOLD {inner_fold}")
            print(f"Hyperparameters: LR={lr}, Batch Size={batch_size}, Patience={patience}")

            # Split dataset into inner train and validation sets
            inner_train_subset = Subset(train_val_subset, inner_train_idx)
            inner_val_subset = Subset(train_val_subset, inner_val_idx)

            # Define train and validation dataloaders for the current inner fold
            inner_train_loader = DataLoader(inner_train_subset, batch_size=batch_size, shuffle=True)
            inner_val_loader = DataLoader(inner_val_subset, batch_size=batch_size, shuffle=False)

            # Initialize model and optimizer for the current inner fold
            model = MLP1(
                input_size=1,
                hidden_size=256,
                output_size=dataset_rd.num_classes
            ).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss_fcn = torch.nn.CrossEntropyLoss()

            # Train the model for the current inner fold
            inner_metrics = train(model, loss_fcn, device, optimizer, num_epochs, inner_train_loader, inner_val_loader, patience)

            # Evaluate model performance, e.g., using validation F1 score
            # Save the model performance score for the current hyperparameter combination
            inner_scores.append(inner_metrics['best_score'])

        # Calculate the average performance over all inner folds for the current hyperparameter set
        average_score = np.mean(inner_scores)
        print(f"Average Score for hyperparameters {params}: {average_score}")

        # If the current hyperparameters outperform the previous ones, update the best_hyperparams
        if average_score > best_score:
            best_hyperparams = params
            best_score = average_score

    print(f"Best hyperparameters for Outer FOLD {fold}: {best_hyperparams} with score {best_score}")

    # Now retrain the model on the full train_val_dataset with the best_hyperparams

    # Extract best hyperparameters
    best_lr, best_batch_size, best_patience = best_hyperparams

    # DataLoader for the combined training and validation set
    train_val_loader = DataLoader(train_val_subset, batch_size=best_batch_size, shuffle=True)

    # DataLoader for the test set
    test_loader = DataLoader(test_subset, batch_size=best_batch_size, shuffle=False)

    # Initialize the model with the best hyperparameters
    model = MLP1(
        input_size=1,
        hidden_size=256,
        output_size=dataset_rd.num_classes
    ).to(device)

    # Initialize the optimizer with the best learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

    # Loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Retrain the model on the full train_val_dataset
    retrained_metrics = train(
        model,
        loss_fcn,
        device,
        optimizer,
        num_epochs,
        train_val_loader,
        test_loader,  # We're using the test_loader here to monitor the performance, but we do not use this for making decisions
        best_patience
    )

    # After retraining, evaluate on the test set
    f1_micro_test, f1_macro_test, accuracy_test = evaluate_metrics(model, device, test_loader)
    print(f"Test set evaluation - F1 Micro: {f1_micro_test:.4f}, F1 Macro: {f1_macro_test:.4f}, Accuracy: {accuracy_test:.4f}")
    f1_micro_test_list.append(f1_micro_test)
    f1_macro_test_list.append(f1_macro_test)
    accuracy_test_list.append(accuracy_test)
    # Optionally, save your retrained model
    torch.save(model.state_dict(), f'rd_Basic_model_fold_{fold}.pth')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 28, Train Loss: 38.3186, Val Loss: 48.7488, F1 Micro: 0.2083, F1 Macro: 0.1724, Accuracy: 0.2083
Epoch 29, Train Loss: 17.0715, Val Loss: 10.5695, F1 Micro: 0.3125, F1 Macro: 0.3050, Accuracy: 0.3125
Epoch 30, Train Loss: 26.4205, Val Loss: 8.3602, F1 Micro: 0.7708, F1 Macro: 0.5107, Accuracy: 0.7708
Epoch 31, Train Loss: 40.2417, Val Loss: 8.9798, F1 Micro: 0.4167, F1 Macro: 0.4167, Accuracy: 0.4167
Epoch 32, Train Loss: 31.3814, Val Loss: 5.7499, F1 Micro: 0.6146, F1 Macro: 0.5697, Accuracy: 0.6146
Epoch 33, Train Loss: 21.5324, Val Loss: 8.5518, F1 Micro: 0.4792, F1 Macro: 0.4771, Accuracy: 0.4792
Epoch 34, Train Loss: 19.7031, Val Loss: 38.1296, F1 Micro: 0.7917, F1 Macro: 0.4419, Accuracy: 0.7917
Epoch 35, Train Loss: 14.9647, Val Loss: 12.2819, F1 Micro: 0.7812, F1 Macro: 0.5171, Accuracy: 0.7812
Epoch 36, Train Loss: 21.8412, Val Loss: 26.8941, F1 Micro: 0.2188, F1 Macro: 0.1869, Accuracy: 0.2188
Epoch 37, Tr

In [38]:
print(np.mean(f1_micro_test_list))
print(np.mean(f1_macro_test_list))
print(np.mean(accuracy_test_list))

0.603
0.5491640114702554
0.603


In [39]:
# Initialize a dictionary to store metrics for different models
models_evaluation_metrics = {}

# Example model identifiers
model_names = ['BasicGraphModel', 'GraphSAGEModel', 'GINModel']

# Initialize metric dictionaries for each model
for model_name in model_names:
    models_evaluation_metrics[model_name] = {'f1_micro': [], 'f1_macro': [], 'accuracy': []}

def update_model_metrics(model_name, f1_micro, f1_macro, accuracy):
    models_evaluation_metrics[model_name]['f1_micro'].append(f1_micro)
    models_evaluation_metrics[model_name]['f1_macro'].append(f1_macro)
    models_evaluation_metrics[model_name]['accuracy'].append(accuracy)

update_model_metrics('BasicGraphModel', f1_micro_test_list, f1_macro_test_list, accuracy_test_list)

print(models_evaluation_metrics)


{'BasicGraphModel': {'f1_micro': [[0.735, 0.5825, 0.7075, 0.4475, 0.5425]], 'f1_macro': [[0.7271135825352693, 0.561768931516555, 0.7018633540372672, 0.30915371329879104, 0.44592047596339446]], 'accuracy': [[0.735, 0.5825, 0.7075, 0.4475, 0.5425]]}, 'GraphSAGEModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}, 'GINModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}}


# Do the same thing with Protein Dataset

In [41]:
class MLP2(nn.Module):
    def __init__(self, input_size, output_size):
        super(MLP2, self).__init__()

        self.fc = nn.Linear(input_size, output_size)
        self.relu = nn.ReLU()

    def forward(self, data):
        x = graphnn.global_add_pool(data.x, data.batch)
        x = self.fc(x)
        x = self.relu(x)

        return x

In [43]:
# Outer k-fold cross-validation setup
outer_k_folds = 5
inner_k_folds = 5
num_epochs = 200

# Possible hyperparameters to tune
learning_rates = [0.01, 0.001]
batch_sizes = [8, 16]
patiences = [10, 50]

# Set list to store the evaluation metrics
f1_micro_test_list = []
f1_macro_test_list = []
accuracy_test_list = []

# Prepare the outer k-fold cross-validation
outer_kf = KFold(n_splits=outer_k_folds, shuffle=True, random_state=42)

# Loop over each fold for the outer k-fold
for fold, (train_val_idx, test_idx) in enumerate(outer_kf.split(dataset_pr)):
    print(f"Outer FOLD {fold}")
    print("--------------------------------")

    # Split dataset into train_val and test for the current outer fold
    train_val_dataset = dataset_pr[train_val_idx]
    test_dataset = dataset_pr[test_idx]

    # Initialize the best hyperparameter set and its performance score
    best_hyperparams = None
    best_score = 0

    # Inner k-fold cross-validation for hyperparameter tuning
    inner_kf = KFold(n_splits=inner_k_folds, shuffle=True, random_state=42)

    # Create all combinations of hyperparameters
    all_params = list(product(learning_rates, batch_sizes, patiences))

    # Loop over all combinations of hyperparameters
    for params in all_params:
        lr, batch_size, patience = params
        inner_scores = []

        # Perform inner k-fold cross-validation
        for inner_fold, (inner_train_idx, inner_val_idx) in enumerate(inner_kf.split(train_val_dataset)):
            print(f"Inner FOLD {inner_fold}")
            print(f"Hyperparameters: LR={lr}, Batch Size={batch_size}, Patience={patience}")

            # Split dataset into inner train and validation sets
            inner_train_dataset = train_val_dataset[inner_train_idx]
            inner_val_dataset = train_val_dataset[inner_val_idx]

            # Define train and validation dataloaders for the current inner fold
            inner_train_loader = DataLoader(inner_train_dataset, batch_size=batch_size, shuffle=True)
            inner_val_loader = DataLoader(inner_val_dataset, batch_size=batch_size, shuffle=False)

            # Initialize model and optimizer for the current inner fold
            model = MLP2(
                input_size=dataset_pr.num_node_features,
                output_size=dataset_pr.num_classes
            ).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss_fcn = torch.nn.CrossEntropyLoss()

            # Train the model for the current inner fold
            inner_metrics = train(model, loss_fcn, device, optimizer, num_epochs, inner_train_loader, inner_val_loader, patience)

            # Evaluate model performance, e.g., using validation F1 score
            # Save the model performance score for the current hyperparameter combination
            inner_scores.append(inner_metrics['best_score'])

        # Calculate the average performance over all inner folds for the current hyperparameter set
        average_score = np.mean(inner_scores)
        print(f"Average Score for hyperparameters {params}: {average_score}")

        # If the current hyperparameters outperform the previous ones, update the best_hyperparams
        if average_score > best_score:
            best_hyperparams = params
            best_score = average_score

    print(f"Best hyperparameters for Outer FOLD {fold}: {best_hyperparams} with score {best_score}")

    # Now retrain the model on the full train_val_dataset with the best_hyperparams

    # Extract best hyperparameters
    best_lr, best_batch_size, best_patience = best_hyperparams

    # DataLoader for the combined training and validation set
    train_val_loader = DataLoader(train_val_dataset, batch_size=best_batch_size, shuffle=True)

    # DataLoader for the test set
    test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

    # Initialize the model with the best hyperparameters
    model = MLP2(
        input_size=dataset_pr.num_node_features,
        output_size=dataset_pr.num_classes
    ).to(device)

    # Initialize the optimizer with the best learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

    # Loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Retrain the model on the full train_val_dataset
    retrained_metrics = train(
        model,
        loss_fcn,
        device,
        optimizer,
        num_epochs,
        train_val_loader,
        test_loader,  # We're using the test_loader here to monitor the performance, but we do not use this for making decisions
        best_patience
    )

    # After retraining, evaluate on the test set
    f1_micro_test, f1_macro_test, accuracy_test = evaluate_metrics(model, device, test_loader)
    print(f"Test set evaluation - F1 Micro: {f1_micro_test:.4f}, F1 Macro: {f1_macro_test:.4f}, Accuracy: {accuracy_test:.4f}")
    f1_micro_test_list.append(f1_micro_test)
    f1_macro_test_list.append(f1_macro_test)
    accuracy_test_list.append(accuracy_test)
    # Optionally, save your retrained model
    torch.save(model.state_dict(), f'Basic_model_fold_{fold}.pth')

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 7, Train Loss: 0.7261, Val Loss: 0.7657, F1 Micro: 0.6910, F1 Macro: 0.5056, Accuracy: 0.6910
Epoch 8, Train Loss: 0.7215, Val Loss: 0.7590, F1 Micro: 0.6910, F1 Macro: 0.5056, Accuracy: 0.6910
Epoch 9, Train Loss: 0.7181, Val Loss: 0.7488, F1 Micro: 0.6910, F1 Macro: 0.5056, Accuracy: 0.6910
Epoch 10, Train Loss: 0.7125, Val Loss: 0.7429, F1 Micro: 0.6854, F1 Macro: 0.5021, Accuracy: 0.6854
Epoch 11, Train Loss: 0.7081, Val Loss: 0.7365, F1 Micro: 0.6854, F1 Macro: 0.5021, Accuracy: 0.6854
Epoch 12, Train Loss: 0.7042, Val Loss: 0.7297, F1 Micro: 0.6854, F1 Macro: 0.5021, Accuracy: 0.6854
Early stopping triggered
Inner FOLD 2
Hyperparameters: LR=0.001, Batch Size=16, Patience=10
Epoch 1, Train Loss: 4.3945, Val Loss: 3.7242, F1 Micro: 0.5899, F1 Macro: 0.3710, Accuracy: 0.5899
Epoch 2, Train Loss: 3.8500, Val Loss: 3.1755, F1 Micro: 0.5899, F1 Macro: 0.3710, Accuracy: 0.5899
Epoch 3, Train Loss: 3.2504, Val Loss: 2

In [44]:
print(np.mean(f1_micro_test_list))
print(np.mean(f1_macro_test_list))
print(np.mean(accuracy_test_list))

0.6936775340362784
0.6447894038088363
0.6936775340362784


In [45]:
# Initialize a dictionary to store metrics for different models
models_evaluation_metrics = {}

# Example model identifiers
model_names = ['BasicGraphModel', 'GraphSAGEModel', 'GINModel']

# Initialize metric dictionaries for each model
for model_name in model_names:
    models_evaluation_metrics[model_name] = {'f1_micro': [], 'f1_macro': [], 'accuracy': []}

def update_model_metrics(model_name, f1_micro, f1_macro, accuracy):
    models_evaluation_metrics[model_name]['f1_micro'].append(f1_micro)
    models_evaluation_metrics[model_name]['f1_macro'].append(f1_macro)
    models_evaluation_metrics[model_name]['accuracy'].append(accuracy)

update_model_metrics('BasicGraphModel', f1_micro_test_list, f1_macro_test_list, accuracy_test_list)

print(models_evaluation_metrics)

{'BasicGraphModel': {'f1_micro': [[0.6905829596412556, 0.7219730941704036, 0.6053811659192825, 0.7432432432432431, 0.7072072072072072]], 'f1_macro': [[0.6474510253179058, 0.5684769038701623, 0.6030744336569579, 0.7332349530937072, 0.6717097031054488]], 'accuracy': [[0.6905829596412556, 0.7219730941704036, 0.6053811659192825, 0.7432432432432432, 0.7072072072072072]]}, 'GraphSAGEModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}, 'GINModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}}
