In [2]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.5.2-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch_geometric
Successfully installed torch_geometric-2.5.2


In [3]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch_geometric.nn as graphnn
import pandas as pd
from sklearn.metrics import f1_score
from torch_geometric.loader import DataLoader
from torch_geometric.utils import scatter
from sklearn.metrics import f1_score, accuracy_score
from sklearn.model_selection import KFold
from itertools import product


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("\nDevice: ", device)



Device:  cuda


In [5]:
from torch_geometric.datasets import TUDataset

dataset_en = TUDataset(root='', name='ENZYMES',use_node_attr = True)
dataset_rd = TUDataset(root='', name='REDDIT-BINARY')
dataset_pr = TUDataset(root='', name='PROTEINS')


Downloading https://www.chrsmrrs.com/graphkerneldatasets/ENZYMES.zip
Processing...
Done!
Downloading https://www.chrsmrrs.com/graphkerneldatasets/REDDIT-BINARY.zip
Processing...
Done!
Downloading https://www.chrsmrrs.com/graphkerneldatasets/PROTEINS.zip
Processing...
Done!


In [6]:
print(len(dataset_en))
print(len(dataset_rd))
print(len(dataset_pr))

print(dataset_en.num_classes)
print(dataset_rd.num_classes)
print(dataset_pr.num_classes)

print(dataset_en.num_node_features)
print(dataset_rd.num_node_features)
print(dataset_pr.num_node_features)


600
2000
1113
6
2
2
21
0
3


# try with a basic GNN model

In [7]:
class BasicGraphModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size,dropout_rate=0.5):
        super().__init__()

        self.graphconv1 = graphnn.GCNConv(input_size, hidden_size)
        self.graphconv2 = graphnn.GCNConv(hidden_size, hidden_size)
        self.graphconv3 = graphnn.GCNConv(hidden_size, output_size)
        self.dropout = nn.Dropout(dropout_rate)

        self.elu = nn.ELU()

    def forward(self, data):
        x = self.graphconv1(data.x, data.edge_index)
        x = self.elu(x)
        x = self.dropout(x)
        x = self.graphconv2(x, data.edge_index)
        x = self.elu(x)
        x = self.dropout(x)
        x = self.graphconv3(x, data.edge_index)
        x = graphnn.global_mean_pool(x, data.batch)

        return x

#Define evaluate and train function

In [8]:
from torch_geometric.loader import DataLoader
train_en = dataset_en[:int(len(dataset_en)*0.8)]
val_en = dataset_en[int(len(dataset_en)*0.8):int(len(dataset_en)*0.9)]
test_en = dataset_en[int(len(dataset_en)*0.9):]

train_en_loader = DataLoader(train_en, batch_size=32, shuffle=True)
val_en_loader = DataLoader(val_en, batch_size=32, shuffle=False)
test_en_loader = DataLoader(test_en, batch_size=32, shuffle=False)

In [9]:
def train(model, loss_fcn, device, optimizer, max_epochs, train_dataloader, val_dataloader, patience=30):
    best_val_score = 0
    patience_counter = 0
    metrics_history = {'train_loss': [], 'val_loss': [], 'f1_micro': [], 'f1_macro': [], 'accuracy': [], 'best_score':[]}

    for epoch in range(max_epochs):
        model.train()
        train_losses = []
        for batch in train_dataloader:
            if batch.x is None:
                raise ValueError("Node features are missing. Ensure data.x is correctly set.")
            batch = batch.to(device)
            optimizer.zero_grad()
            logits = model(batch)
            loss = loss_fcn(logits, batch.y)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        val_loss = evaluate_loss(model, loss_fcn, device, val_dataloader)
        f1_micro, f1_macro, accuracy = evaluate_metrics(model, device, val_dataloader)

        # Save metrics
        metrics_history['train_loss'].append(np.mean(train_losses))
        metrics_history['val_loss'].append(val_loss)
        metrics_history['f1_micro'].append(f1_micro)
        metrics_history['f1_macro'].append(f1_macro)
        metrics_history['accuracy'].append(accuracy)

        print(f"Epoch {epoch+1}, Train Loss: {np.mean(train_losses):.4f}, Val Loss: {val_loss:.4f}, F1 Micro: {f1_micro:.4f}, F1 Macro: {f1_macro:.4f}, Accuracy: {accuracy:.4f}")

        # Early stopping logic using f1_micro score
        if f1_micro > best_val_score:
            best_val_score = f1_micro
            metrics_history['best_score'] = best_val_score
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping triggered")
                break

    return metrics_history

In [10]:
def evaluate_loss(model, loss_fcn, device, dataloader):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch in dataloader:
            batch = batch.to(device)
            outputs = model(batch)
            loss = loss_fcn(outputs, batch.y)
            total_loss += loss.item()
    return total_loss / len(dataloader)

In [11]:
def evaluate_metrics(model, device, dataloader):
    model.eval()
    total_preds = []
    total_targets = []

    with torch.no_grad():
        for batch in dataloader:
            batch = batch.to(device)
            outputs = model(batch)
            _, predicted = torch.max(outputs, 1)
            total_preds.extend(predicted.view(-1).cpu().numpy())
            total_targets.extend(batch.y.view(-1).cpu().numpy())

    f1_micro = f1_score(total_targets, total_preds, average='micro')
    f1_macro = f1_score(total_targets, total_preds, average='macro')
    accuracy = accuracy_score(total_targets, total_preds)
    return f1_micro, f1_macro, accuracy

In [16]:

### Max number of epochs
max_epochs = 300
n_features = dataset_en.num_node_features
n_classes = dataset_en.num_classes
### DEFINE THE MODEL
basic_model = BasicGraphModel(
    input_size=n_features, hidden_size=256, output_size=n_classes
).to(device)

### DEFINE LOSS FUNCTION
loss_fcn = nn.CrossEntropyLoss()
### DEFINE OPTIMIZER
optimizer = torch.optim.Adam(basic_model.parameters(), lr=0.001)

### TRAIN THE MODEL
metrics_history=train(
    basic_model,
    loss_fcn,
    device,
    optimizer,
    max_epochs,
    train_en_loader,
    val_en_loader,
    patience=100
)

Epoch 1, Train Loss: 3.0010, Val Loss: 24.1678, F1 Micro: 0.0000, F1 Macro: 0.0000, Accuracy: 0.0000
Epoch 2, Train Loss: 2.2168, Val Loss: 23.8126, F1 Micro: 0.3000, F1 Macro: 0.2000, Accuracy: 0.3000
Epoch 3, Train Loss: 2.1167, Val Loss: 21.7633, F1 Micro: 0.3000, F1 Macro: 0.1043, Accuracy: 0.3000
Epoch 4, Train Loss: 1.9451, Val Loss: 19.5818, F1 Micro: 0.3000, F1 Macro: 0.1304, Accuracy: 0.3000
Epoch 5, Train Loss: 1.9856, Val Loss: 17.9018, F1 Micro: 0.0833, F1 Macro: 0.0833, Accuracy: 0.0833
Epoch 6, Train Loss: 1.8673, Val Loss: 16.2364, F1 Micro: 0.0000, F1 Macro: 0.0000, Accuracy: 0.0000
Epoch 7, Train Loss: 1.7605, Val Loss: 15.1460, F1 Micro: 0.0000, F1 Macro: 0.0000, Accuracy: 0.0000
Epoch 8, Train Loss: 1.7038, Val Loss: 14.0629, F1 Micro: 0.0167, F1 Macro: 0.0160, Accuracy: 0.0167
Epoch 9, Train Loss: 1.6442, Val Loss: 12.9626, F1 Micro: 0.2833, F1 Macro: 0.0958, Accuracy: 0.2833
Epoch 10, Train Loss: 1.6621, Val Loss: 11.9540, F1 Micro: 0.1167, F1 Macro: 0.0778, Accura

# Try the k-folder

In [12]:
def plot_metrics(metrics_history):
    epochs = range(1, len(metrics_history['train_loss']) + 1)

    plt.figure(figsize=(14, 10))

    plt.subplot(2, 2, 1)
    plt.plot(epochs, metrics_history['train_loss'], label='Train Loss')
    plt.plot(epochs, metrics_history['val_loss'], label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(2, 2, 2)
    plt.plot(epochs, metrics_history['f1_micro'], label='F1 Score (Micro)')
    plt.plot(epochs, metrics_history['f1_macro'], label='F1 Score (Macro)')
    plt.title('F1 Scores')
    plt.xlabel('Epochs')
    plt.ylabel('F1 Score')
    plt.legend()

    plt.subplot(2, 2, 3)
    plt.plot(epochs, metrics_history['accuracy'], label='Accuracy')
    plt.title('Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [16]:

# Outer k-fold cross-validation setup
outer_k_folds = 5
inner_k_folds = 5
num_epochs = 200

# Possible hyperparameters to tune
learning_rates = [0.01, 0.001]
batch_sizes = [8, 16]
patiences = [10, 50]

# Set list to store the evaluation metrics
f1_micro_test_list = []
f1_macro_test_list = []
accuracy_test_list = []

# Prepare the outer k-fold cross-validation
outer_kf = KFold(n_splits=outer_k_folds, shuffle=True, random_state=42)

# Loop over each fold for the outer k-fold
for fold, (train_val_idx, test_idx) in enumerate(outer_kf.split(dataset_en)):
    print(f"Outer FOLD {fold}")
    print("--------------------------------")

    # Split dataset into train_val and test for the current outer fold
    train_val_dataset = dataset_en[train_val_idx]
    test_dataset = dataset_en[test_idx]

    # Initialize the best hyperparameter set and its performance score
    best_hyperparams = None
    best_score = 0

    # Inner k-fold cross-validation for hyperparameter tuning
    inner_kf = KFold(n_splits=inner_k_folds, shuffle=True, random_state=42)

    # Create all combinations of hyperparameters
    all_params = list(product(learning_rates, batch_sizes, patiences))

    # Loop over all combinations of hyperparameters
    for params in all_params:
        lr, batch_size, patience = params
        inner_scores = []

        # Perform inner k-fold cross-validation
        for inner_fold, (inner_train_idx, inner_val_idx) in enumerate(inner_kf.split(train_val_dataset)):
            print(f"Inner FOLD {inner_fold}")
            print(f"Hyperparameters: LR={lr}, Batch Size={batch_size}, Patience={patience}")

            # Split dataset into inner train and validation sets
            inner_train_dataset = train_val_dataset[inner_train_idx]
            inner_val_dataset = train_val_dataset[inner_val_idx]

            # Define train and validation dataloaders for the current inner fold
            inner_train_loader = DataLoader(inner_train_dataset, batch_size=batch_size, shuffle=True)
            inner_val_loader = DataLoader(inner_val_dataset, batch_size=batch_size, shuffle=False)

            # Initialize model and optimizer for the current inner fold
            model = BasicGraphModel(
                input_size=dataset_en.num_node_features,
                hidden_size=256,
                output_size=dataset_en.num_classes,
                dropout_rate=0.5
            ).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss_fcn = torch.nn.CrossEntropyLoss()

            # Train the model for the current inner fold
            inner_metrics = train(model, loss_fcn, device, optimizer, num_epochs, inner_train_loader, inner_val_loader, patience)

            # Evaluate model performance, e.g., using validation F1 score
            # Save the model performance score for the current hyperparameter combination
            inner_scores.append(inner_metrics['best_score'])

        # Calculate the average performance over all inner folds for the current hyperparameter set
        average_score = np.mean(inner_scores)
        print(f"Average Score for hyperparameters {params}: {average_score}")

        # If the current hyperparameters outperform the previous ones, update the best_hyperparams
        if average_score > best_score:
            best_hyperparams = params
            best_score = average_score

    print(f"Best hyperparameters for Outer FOLD {fold}: {best_hyperparams} with score {best_score}")

    # Now retrain the model on the full train_val_dataset with the best_hyperparams

    # Extract best hyperparameters
    best_lr, best_batch_size, best_patience = best_hyperparams

    # DataLoader for the combined training and validation set
    train_val_loader = DataLoader(train_val_dataset, batch_size=best_batch_size, shuffle=True)

    # DataLoader for the test set
    test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

    # Initialize the model with the best hyperparameters
    model = BasicGraphModel(
        input_size=dataset_en.num_node_features,
        hidden_size=256,
        output_size=dataset_en.num_classes,
        dropout_rate=0.5  # You could also tune the dropout rate if you wanted
    ).to(device)

    # Initialize the optimizer with the best learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

    # Loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Retrain the model on the full train_val_dataset
    retrained_metrics = train(
        model,
        loss_fcn,
        device,
        optimizer,
        num_epochs,
        train_val_loader,
        test_loader,  # We're using the test_loader here to monitor the performance, but we do not use this for making decisions
        best_patience
    )

    # After retraining, evaluate on the test set
    f1_micro_test, f1_macro_test, accuracy_test = evaluate_metrics(model, device, test_loader)
    print(f"Test set evaluation - F1 Micro: {f1_micro_test:.4f}, F1 Macro: {f1_macro_test:.4f}, Accuracy: {accuracy_test:.4f}")
    f1_micro_test_list.append(f1_micro_test)
    f1_macro_test_list.append(f1_macro_test)
    accuracy_test_list.append(accuracy_test)
    # Optionally, save your retrained model
    torch.save(model.state_dict(), f'Basic_model_fold_{fold}.pth')




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 183, Train Loss: 0.7700, Val Loss: 1.6199, F1 Micro: 0.5000, F1 Macro: 0.4958, Accuracy: 0.5000
Epoch 184, Train Loss: 0.7443, Val Loss: 1.3067, F1 Micro: 0.5938, F1 Macro: 0.5931, Accuracy: 0.5938
Epoch 185, Train Loss: 0.6439, Val Loss: 1.3061, F1 Micro: 0.6250, F1 Macro: 0.6276, Accuracy: 0.6250
Epoch 186, Train Loss: 0.6463, Val Loss: 1.3514, F1 Micro: 0.6042, F1 Macro: 0.6010, Accuracy: 0.6042
Epoch 187, Train Loss: 0.6730, Val Loss: 1.4169, F1 Micro: 0.6354, F1 Macro: 0.6385, Accuracy: 0.6354
Epoch 188, Train Loss: 0.6975, Val Loss: 1.3984, F1 Micro: 0.6250, F1 Macro: 0.6355, Accuracy: 0.6250
Epoch 189, Train Loss: 0.6944, Val Loss: 1.4154, F1 Micro: 0.6250, F1 Macro: 0.6263, Accuracy: 0.6250
Epoch 190, Train Loss: 0.6498, Val Loss: 1.3965, F1 Micro: 0.6042, F1 Macro: 0.6001, Accuracy: 0.6042
Epoch 191, Train Loss: 0.6610, Val Loss: 1.3900, F1 Micro: 0.5833, F1 Macro: 0.5760, Accuracy: 0.5833
Epoch 192, Train 

In [39]:
print(np.mean(f1_micro_test_list))
print(np.mean(f1_macro_test_list))
print(np.mean(accuracy_test_list))

0.5716666666666667
0.5658546749965786
0.5716666666666667


In [24]:
# Initialize a dictionary to store metrics for different models
models_evaluation_metrics = {}

# Example model identifiers
model_names = ['BasicGraphModel', 'GraphSAGEModel', 'GINModel']

# Initialize metric dictionaries for each model
for model_name in model_names:
    models_evaluation_metrics[model_name] = {'f1_micro': [], 'f1_macro': [], 'accuracy': []}

def update_model_metrics(model_name, f1_micro, f1_macro, accuracy):
    models_evaluation_metrics[model_name]['f1_micro'].append(f1_micro)
    models_evaluation_metrics[model_name]['f1_macro'].append(f1_macro)
    models_evaluation_metrics[model_name]['accuracy'].append(accuracy)

#update_model_metrics('BasicGraphModel', f1_micro_test_list, f1_macro_test_list, accuracy_test_list)

print(models_evaluation_metrics)


{'BasicGraphModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}, 'GraphSAGEModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}, 'GINModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}}


In [41]:
models_evaluation_metrics

{'BasicGraphModel': {'f1_micro': [[0.5416666666666666,
    0.575,
    0.575,
    0.6083333333333333,
    0.5583333333333333]],
  'f1_macro': [[0.5216981751875497,
    0.5734944050949523,
    0.5641898378231431,
    0.6101596293233924,
    0.5597313275538554]],
  'accuracy': [[0.5416666666666666,
    0.575,
    0.575,
    0.6083333333333333,
    0.5583333333333333]]},
 'GraphSAGEModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []},
 'GINModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}}

# Try the model GraphSAGE

In [20]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv, global_mean_pool

class GraphSAGEModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, dropout_rate=0.5):
        super(GraphSAGEModel, self).__init__()
        self.num_layers = num_layers
        self.convs = nn.ModuleList()
        self.bns = nn.ModuleList()
        self.dropout_rate = dropout_rate

        # Input layer
        self.convs.append(SAGEConv(input_dim, hidden_dim))
        self.bns.append(nn.BatchNorm1d(hidden_dim))

        # Hidden layers
        for i in range(num_layers - 2):
            self.convs.append(SAGEConv(hidden_dim, hidden_dim))
            self.bns.append(nn.BatchNorm1d(hidden_dim))

        # Output layer
        self.convs.append(SAGEConv(hidden_dim, output_dim))

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        # Go through the layers
        for i in range(self.num_layers - 1):
            x = self.convs[i](x, edge_index)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout_rate, training=self.training)

        # Output layer
        x = self.convs[-1](x, edge_index)

        # Apply global mean pooling to get graph-level output
        x = global_mean_pool(x, data.batch)

        return x


In [21]:
# Outer k-fold cross-validation setup
outer_k_folds = 5
inner_k_folds = 5
num_epochs = 200

# Possible hyperparameters to tune
learning_rates = [0.01, 0.001]
batch_sizes = [8, 16]
patiences = [10, 50]

# Set list to store the evaluation metrics
f1_micro_test_list2 = []
f1_macro_test_list2 = []
accuracy_test_list2 = []

# Prepare the outer k-fold cross-validation
outer_kf = KFold(n_splits=outer_k_folds, shuffle=True, random_state=42)

# Loop over each fold for the outer k-fold
for fold, (train_val_idx, test_idx) in enumerate(outer_kf.split(dataset_en)):
    print(f"Outer FOLD {fold}")
    print("--------------------------------")

    # Split dataset into train_val and test for the current outer fold
    train_val_dataset = dataset_en[train_val_idx]
    test_dataset = dataset_en[test_idx]

    # Initialize the best hyperparameter set and its performance score
    best_hyperparams = None
    best_score = 0

    # Inner k-fold cross-validation for hyperparameter tuning
    inner_kf = KFold(n_splits=inner_k_folds, shuffle=True, random_state=42)

    # Create all combinations of hyperparameters
    all_params = list(product(learning_rates, batch_sizes, patiences))

    # Loop over all combinations of hyperparameters
    for params in all_params:
        lr, batch_size, patience = params
        inner_scores = []

        # Perform inner k-fold cross-validation
        for inner_fold, (inner_train_idx, inner_val_idx) in enumerate(inner_kf.split(train_val_dataset)):
            print(f"Inner FOLD {inner_fold}")
            print(f"Hyperparameters: LR={lr}, Batch Size={batch_size}, Patience={patience}")

            # Split dataset into inner train and validation sets
            inner_train_dataset = train_val_dataset[inner_train_idx]
            inner_val_dataset = train_val_dataset[inner_val_idx]

            # Define train and validation dataloaders for the current inner fold
            inner_train_loader = DataLoader(inner_train_dataset, batch_size=batch_size, shuffle=True)
            inner_val_loader = DataLoader(inner_val_dataset, batch_size=batch_size, shuffle=False)

            # Initialize model and optimizer for the current inner fold
            model = GraphSAGEModel(
                input_dim=dataset_en.num_node_features,
                hidden_dim=256,
                output_dim=dataset_en.num_classes,
                num_layers=2,
                dropout_rate=0.5
            ).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss_fcn = torch.nn.CrossEntropyLoss()

            # Train the model for the current inner fold
            inner_metrics = train(model, loss_fcn, device, optimizer, num_epochs, inner_train_loader, inner_val_loader, patience)

            # Evaluate model performance, e.g., using validation F1 score
            # Save the model performance score for the current hyperparameter combination
            inner_scores.append(inner_metrics['best_score'])

        # Calculate the average performance over all inner folds for the current hyperparameter set
        average_score = np.mean(inner_scores)
        print(f"Average Score for hyperparameters {params}: {average_score}")

        # If the current hyperparameters outperform the previous ones, update the best_hyperparams
        if average_score > best_score:
            best_hyperparams = params
            best_score = average_score

    print(f"Best hyperparameters for Outer FOLD {fold}: {best_hyperparams} with score {best_score}")

    # Now retrain the model on the full train_val_dataset with the best_hyperparams

    # Extract best hyperparameters
    best_lr, best_batch_size, best_patience = best_hyperparams

    # DataLoader for the combined training and validation set
    train_val_loader = DataLoader(train_val_dataset, batch_size=best_batch_size, shuffle=True)

    # DataLoader for the test set
    test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

    # Initialize the model with the best hyperparameters
    model = GraphSAGEModel(
                input_dim=dataset_en.num_node_features,
                hidden_dim=256,
                output_dim=dataset_en.num_classes,
                num_layers=2,
                dropout_rate=0.5
            ).to(device)
    # Initialize the optimizer with the best learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

    # Loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Retrain the model on the full train_val_dataset
    retrained_metrics = train(
        model,
        loss_fcn,
        device,
        optimizer,
        num_epochs,
        train_val_loader,
        test_loader,  # We're using the test_loader here to monitor the performance, but we do not use this for making decisions
        best_patience
    )

    # After retraining, evaluate on the test set
    f1_micro_test, f1_macro_test, accuracy_test = evaluate_metrics(model, device, test_loader)
    print(f"Test set evaluation - F1 Micro: {f1_micro_test:.4f}, F1 Macro: {f1_macro_test:.4f}, Accuracy: {accuracy_test:.4f}")
    f1_micro_test_list2.append(f1_micro_test)
    f1_macro_test_list2.append(f1_macro_test)
    accuracy_test_list2.append(accuracy_test)
    # Optionally, save your retrained model
    torch.save(model.state_dict(), f'GSAGE_fold_{fold}.pth')



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 43, Train Loss: 1.2831, Val Loss: 2.1009, F1 Micro: 0.2292, F1 Macro: 0.2175, Accuracy: 0.2292
Epoch 44, Train Loss: 1.2897, Val Loss: 1.6548, F1 Micro: 0.3854, F1 Macro: 0.3455, Accuracy: 0.3854
Epoch 45, Train Loss: 1.2679, Val Loss: 1.6538, F1 Micro: 0.3438, F1 Macro: 0.3437, Accuracy: 0.3438
Epoch 46, Train Loss: 1.2413, Val Loss: 1.5623, F1 Micro: 0.4271, F1 Macro: 0.3948, Accuracy: 0.4271
Epoch 47, Train Loss: 1.2798, Val Loss: 1.6896, F1 Micro: 0.3854, F1 Macro: 0.3738, Accuracy: 0.3854
Epoch 48, Train Loss: 1.2218, Val Loss: 1.7280, F1 Micro: 0.3854, F1 Macro: 0.3005, Accuracy: 0.3854
Epoch 49, Train Loss: 1.2482, Val Loss: 1.8296, F1 Micro: 0.3542, F1 Macro: 0.2860, Accuracy: 0.3542
Epoch 50, Train Loss: 1.2382, Val Loss: 1.5301, F1 Micro: 0.3854, F1 Macro: 0.3441, Accuracy: 0.3854
Epoch 51, Train Loss: 1.2042, Val Loss: 1.6051, F1 Micro: 0.3333, F1 Macro: 0.3003, Accuracy: 0.3333
Epoch 52, Train Loss: 1.20

In [25]:
models_evaluation_metrics['GraphSAGEModel']['f1_micro_test_list2']=0
models_evaluation_metrics['GraphSAGEModel']['f1_macro_test_list2']=0
models_evaluation_metrics['GraphSAGEModel']['accuracy_test_list2']=0

update_model_metrics('GraphSAGEModel', f1_micro_test_list2, f1_macro_test_list2, accuracy_test_list2)
print(models_evaluation_metrics)


{'BasicGraphModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}, 'GraphSAGEModel': {'f1_micro': [[0.5416666666666666, 0.5916666666666667, 0.55, 0.625, 0.625]], 'f1_macro': [[0.5416629853341376, 0.5733081992486917, 0.5063247285356819, 0.6092964760197358, 0.6286759471141622]], 'accuracy': [[0.5416666666666666, 0.5916666666666667, 0.55, 0.625, 0.625]], 'f1_micro_test_list2': 0, 'f1_macro_test_list2': 0, 'accuracy_test_list2': 0}, 'GINModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}}


# Try the model GIN

In [25]:
from torch_geometric.nn import GINConv, global_mean_pool, BatchNorm

class GINModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, num_layers=2, dropout_rate=0.5):
        super(GINModel, self).__init__()
        self.num_layers = num_layers
        self.convs = nn.ModuleList()
        self.bns = nn.ModuleList()
        self.dropout_rate = dropout_rate

        # MLP for GINConv
        mlp = lambda input_dim, output_dim: nn.Sequential(
            nn.Linear(input_dim, output_dim),
            nn.ReLU(),
            nn.Linear(output_dim, output_dim)
        )

        # Input layer
        self.convs.append(GINConv(mlp(input_dim, hidden_dim)))
        self.bns.append(BatchNorm(hidden_dim))

        # Hidden layers
        for _ in range(num_layers - 2):
            self.convs.append(GINConv(mlp(hidden_dim, hidden_dim)))
            self.bns.append(BatchNorm(hidden_dim))

        # Output layer
        self.convs.append(GINConv(mlp(hidden_dim, output_dim)))
        # Note: Batch normalization is not applied after the last GINConv layer before global pooling

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        # Go through the layers
        for i in range(self.num_layers - 1):
            x = self.convs[i](x, edge_index)
            x = self.bns[i](x)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout_rate, training=self.training)

        # Output layer
        x = self.convs[-1](x, edge_index)

        # Apply global mean pooling to get graph-level output
        x = global_mean_pool(x, data.batch)

        return x


In [46]:
# Outer k-fold cross-validation setup
outer_k_folds = 5
inner_k_folds = 5
num_epochs = 200

# Possible hyperparameters to tune
learning_rates = [0.01, 0.001]
batch_sizes = [8, 16]
patiences = [10, 50]

# Set list to store the evaluation metrics
f1_micro_test_list3 = []
f1_macro_test_list3 = []
accuracy_test_list3 = []

# Prepare the outer k-fold cross-validation
outer_kf = KFold(n_splits=outer_k_folds, shuffle=True, random_state=42)

# Loop over each fold for the outer k-fold
for fold, (train_val_idx, test_idx) in enumerate(outer_kf.split(dataset_en)):
    print(f"Outer FOLD {fold}")
    print("--------------------------------")

    # Split dataset into train_val and test for the current outer fold
    train_val_dataset = dataset_en[train_val_idx]
    test_dataset = dataset_en[test_idx]

    # Initialize the best hyperparameter set and its performance score
    best_hyperparams = None
    best_score = 0

    # Inner k-fold cross-validation for hyperparameter tuning
    inner_kf = KFold(n_splits=inner_k_folds, shuffle=True, random_state=42)

    # Create all combinations of hyperparameters
    all_params = list(product(learning_rates, batch_sizes, patiences))

    # Loop over all combinations of hyperparameters
    for params in all_params:
        lr, batch_size, patience = params
        inner_scores = []

        # Perform inner k-fold cross-validation
        for inner_fold, (inner_train_idx, inner_val_idx) in enumerate(inner_kf.split(train_val_dataset)):
            print(f"Inner FOLD {inner_fold}")
            print(f"Hyperparameters: LR={lr}, Batch Size={batch_size}, Patience={patience}")

            # Split dataset into inner train and validation sets
            inner_train_dataset = train_val_dataset[inner_train_idx]
            inner_val_dataset = train_val_dataset[inner_val_idx]

            # Define train and validation dataloaders for the current inner fold
            inner_train_loader = DataLoader(inner_train_dataset, batch_size=batch_size, shuffle=True)
            inner_val_loader = DataLoader(inner_val_dataset, batch_size=batch_size, shuffle=False)

            # Initialize model and optimizer for the current inner fold
            model = GINModel(
                input_dim=dataset_en.num_node_features,
                hidden_dim=256,
                output_dim=dataset_en.num_classes,
                dropout_rate=0.5
            ).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss_fcn = torch.nn.CrossEntropyLoss()

            # Train the model for the current inner fold
            inner_metrics = train(model, loss_fcn, device, optimizer, num_epochs, inner_train_loader, inner_val_loader, patience)

            # Evaluate model performance, e.g., using validation F1 score
            # Save the model performance score for the current hyperparameter combination
            inner_scores.append(inner_metrics['best_score'])

        # Calculate the average performance over all inner folds for the current hyperparameter set
        average_score = np.mean(inner_scores)
        print(f"Average Score for hyperparameters {params}: {average_score}")

        # If the current hyperparameters outperform the previous ones, update the best_hyperparams
        if average_score > best_score:
            best_hyperparams = params
            best_score = average_score

    print(f"Best hyperparameters for Outer FOLD {fold}: {best_hyperparams} with score {best_score}")

    # Now retrain the model on the full train_val_dataset with the best_hyperparams

    # Extract best hyperparameters
    best_lr, best_batch_size, best_patience = best_hyperparams

    # DataLoader for the combined training and validation set
    train_val_loader = DataLoader(train_val_dataset, batch_size=best_batch_size, shuffle=True)

    # DataLoader for the test set
    test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

    # Initialize the model with the best hyperparameters
    model = GINModel(
                input_dim=dataset_en.num_node_features,
                hidden_dim=256,
                output_dim=dataset_en.num_classes,
                num_layers=2,
                dropout_rate=0.5
            ).to(device)
    # Initialize the optimizer with the best learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

    # Loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Retrain the model on the full train_val_dataset
    retrained_metrics = train(
        model,
        loss_fcn,
        device,
        optimizer,
        num_epochs,
        train_val_loader,
        test_loader,  # We're using the test_loader here to monitor the performance, but we do not use this for making decisions
        best_patience
    )

    # After retraining, evaluate on the test set
    f1_micro_test, f1_macro_test, accuracy_test = evaluate_metrics(model, device, test_loader)
    print(f"Test set evaluation - F1 Micro: {f1_micro_test:.4f}, F1 Macro: {f1_macro_test:.4f}, Accuracy: {accuracy_test:.4f}")
    f1_micro_test_list3.append(f1_micro_test)
    f1_macro_test_list3.append(f1_macro_test)
    accuracy_test_list3.append(accuracy_test)
    # Optionally, save your retrained model
    torch.save(model.state_dict(), f'GSAGE_fold_{fold}.pth')



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 16, Train Loss: 1.6826, Val Loss: 1.8837, F1 Micro: 0.2292, F1 Macro: 0.1600, Accuracy: 0.2292
Epoch 17, Train Loss: 1.7126, Val Loss: 1.9081, F1 Micro: 0.1875, F1 Macro: 0.1276, Accuracy: 0.1875
Epoch 18, Train Loss: 1.6811, Val Loss: 1.7543, F1 Micro: 0.2396, F1 Macro: 0.1730, Accuracy: 0.2396
Epoch 19, Train Loss: 1.6941, Val Loss: 1.8801, F1 Micro: 0.2292, F1 Macro: 0.1606, Accuracy: 0.2292
Epoch 20, Train Loss: 1.6783, Val Loss: 1.7680, F1 Micro: 0.2292, F1 Macro: 0.1643, Accuracy: 0.2292
Epoch 21, Train Loss: 1.6840, Val Loss: 1.8256, F1 Micro: 0.2292, F1 Macro: 0.1582, Accuracy: 0.2292
Epoch 22, Train Loss: 1.6784, Val Loss: 1.8337, F1 Micro: 0.2812, F1 Macro: 0.2079, Accuracy: 0.2812
Epoch 23, Train Loss: 1.6827, Val Loss: 1.8336, F1 Micro: 0.2188, F1 Macro: 0.1459, Accuracy: 0.2188
Epoch 24, Train Loss: 1.6845, Val Loss: 1.8093, F1 Micro: 0.2292, F1 Macro: 0.1645, Accuracy: 0.2292
Epoch 25, Train Loss: 1.66

In [47]:
#models_evaluation_metrics['GINModel']['f1_micro_test_list2']=0
#models_evaluation_metrics['GINModel']['f1_macro_test_list2']=0
#models_evaluation_metrics['GINModel']['accuracy_test_list2']=0

update_model_metrics('GINModel', f1_micro_test_list3, f1_macro_test_list3, accuracy_test_list3)
print(models_evaluation_metrics)


{'BasicGraphModel': {'f1_micro': [[0.5416666666666666, 0.575, 0.575, 0.6083333333333333, 0.5583333333333333]], 'f1_macro': [[0.5216981751875497, 0.5734944050949523, 0.5641898378231431, 0.6101596293233924, 0.5597313275538554]], 'accuracy': [[0.5416666666666666, 0.575, 0.575, 0.6083333333333333, 0.5583333333333333]]}, 'GraphSAGEModel': {'f1_micro': [[0.5916666666666667, 0.6, 0.6333333333333333, 0.6583333333333333, 0.5083333333333333]], 'f1_macro': [[0.6040470090032383, 0.5977082905775605, 0.6285090585090586, 0.6539447051206633, 0.5127822164358357]], 'accuracy': [[0.5916666666666667, 0.6, 0.6333333333333333, 0.6583333333333333, 0.5083333333333333]]}, 'GINModel': {'f1_micro': [[0.5916666666666667, 0.4166666666666667, 0.5166666666666667, 0.55, 0.5166666666666667]], 'f1_macro': [[0.5938997746174508, 0.4003812470564388, 0.5118082202875694, 0.5469489826498971, 0.5208997141658859]], 'accuracy': [[0.5916666666666667, 0.4166666666666667, 0.5166666666666667, 0.55, 0.5166666666666667]]}}


In [48]:
models_evaluation_metrics

{'BasicGraphModel': {'f1_micro': [[0.5416666666666666,
    0.575,
    0.575,
    0.6083333333333333,
    0.5583333333333333]],
  'f1_macro': [[0.5216981751875497,
    0.5734944050949523,
    0.5641898378231431,
    0.6101596293233924,
    0.5597313275538554]],
  'accuracy': [[0.5416666666666666,
    0.575,
    0.575,
    0.6083333333333333,
    0.5583333333333333]]},
 'GraphSAGEModel': {'f1_micro': [[0.5916666666666667,
    0.6,
    0.6333333333333333,
    0.6583333333333333,
    0.5083333333333333]],
  'f1_macro': [[0.6040470090032383,
    0.5977082905775605,
    0.6285090585090586,
    0.6539447051206633,
    0.5127822164358357]],
  'accuracy': [[0.5916666666666667,
    0.6,
    0.6333333333333333,
    0.6583333333333333,
    0.5083333333333333]]},
 'GINModel': {'f1_micro': [[0.5916666666666667,
    0.4166666666666667,
    0.5166666666666667,
    0.55,
    0.5166666666666667]],
  'f1_macro': [[0.5938997746174508,
    0.4003812470564388,
    0.5118082202875694,
    0.5469489826498971,

In [13]:
import pandas as pd
data = {
    'BasicGraphModel': {'f1_micro': [[0.5416666666666666, 0.575, 0.575, 0.6083333333333333, 0.5583333333333333]],
    'f1_macro': [[0.5216981751875497, 0.5734944050949523, 0.5641898378231431, 0.6101596293233924, 0.5597313275538554]],
    'accuracy': [[0.5416666666666666, 0.575, 0.575, 0.6083333333333333, 0.5583333333333333]]},
    'GraphSAGEModel': {'f1_micro': [[0.5416666666666666, 0.5916666666666667, 0.55, 0.625, 0.625]],
    'f1_macro': [[0.5416629853341376, 0.5733081992486917, 0.5063247285356819, 0.6092964760197358, 0.6286759471141622]],
    'accuracy': [[0.5416666666666666, 0.5916666666666667, 0.55, 0.625, 0.625]]},
    'GINModel': {'f1_micro': [[0.5916666666666667, 0.4166666666666667, 0.5166666666666667, 0.55, 0.5166666666666667]],
    'f1_macro': [[0.5938997746174508, 0.4003812470564388, 0.5118082202875694, 0.5469489826498971, 0.5208997141658859]],
    'accuracy': [[0.5916666666666667, 0.4166666666666667, 0.5166666666666667, 0.55, 0.5166666666666667]]}
}

# Convert the nested structure to a flat structure suitable for DataFrame
records = []
for model_name, metrics in data.items():
    for metric_name, metric_values in metrics.items():
        for values in metric_values:  # metric_values is a list of lists
            record = {"Model": model_name, "Metric": metric_name}
            # Assuming a fixed number of folds, e.g., 5
            for fold_index, value in enumerate(values, start=1):
                record[f"Fold{fold_index}"] = value
            records.append(record)

# Create DataFrame
df = pd.DataFrame(records)
df['Mean'] = df.mean(axis=1)

print(df)


             Model    Metric     Fold1     Fold2     Fold3     Fold4  \
0  BasicGraphModel  f1_micro  0.541667  0.575000  0.575000  0.608333   
1  BasicGraphModel  f1_macro  0.521698  0.573494  0.564190  0.610160   
2  BasicGraphModel  accuracy  0.541667  0.575000  0.575000  0.608333   
3   GraphSAGEModel  f1_micro  0.541667  0.591667  0.550000  0.625000   
4   GraphSAGEModel  f1_macro  0.541663  0.573308  0.506325  0.609296   
5   GraphSAGEModel  accuracy  0.541667  0.591667  0.550000  0.625000   
6         GINModel  f1_micro  0.591667  0.416667  0.516667  0.550000   
7         GINModel  f1_macro  0.593900  0.400381  0.511808  0.546949   
8         GINModel  accuracy  0.591667  0.416667  0.516667  0.550000   

      Fold5      Mean  
0  0.558333  0.571667  
1  0.559731  0.565855  
2  0.558333  0.571667  
3  0.625000  0.586667  
4  0.628676  0.571854  
5  0.625000  0.586667  
6  0.516667  0.518333  
7  0.520900  0.514788  
8  0.516667  0.518333  


  df['Mean'] = df.mean(axis=1)


In [14]:
df

Unnamed: 0,Model,Metric,Fold1,Fold2,Fold3,Fold4,Fold5,Mean
0,BasicGraphModel,f1_micro,0.541667,0.575,0.575,0.608333,0.558333,0.571667
1,BasicGraphModel,f1_macro,0.521698,0.573494,0.56419,0.61016,0.559731,0.565855
2,BasicGraphModel,accuracy,0.541667,0.575,0.575,0.608333,0.558333,0.571667
3,GraphSAGEModel,f1_micro,0.541667,0.591667,0.55,0.625,0.625,0.586667
4,GraphSAGEModel,f1_macro,0.541663,0.573308,0.506325,0.609296,0.628676,0.571854
5,GraphSAGEModel,accuracy,0.541667,0.591667,0.55,0.625,0.625,0.586667
6,GINModel,f1_micro,0.591667,0.416667,0.516667,0.55,0.516667,0.518333
7,GINModel,f1_macro,0.5939,0.400381,0.511808,0.546949,0.5209,0.514788
8,GINModel,accuracy,0.591667,0.416667,0.516667,0.55,0.516667,0.518333


# Do the same thing to the dataset REDDIT-BINARY



In [None]:
# The dataset Reddit-Binary has no node_features, so we use node_degree as its feature
import torch
from torch_geometric.datasets import TUDataset
from torch_geometric.transforms import BaseTransform
from torch_geometric.utils import degree

class AddDegreeFeature(BaseTransform):
    def __call__(self, data):
        deg = degree(data.edge_index[0], dtype=torch.float)
        data.x = deg.unsqueeze(-1)  # Make it a 2D tensor [num_nodes, 1]
        return data

# Load your dataset and apply the transformation
dataset_rd = TUDataset(root='/tmp/REDDIT-BINARY', name='REDDIT-BINARY', transform=AddDegreeFeature())

# Now verify by printing the features of the first few graphs
for i, data in enumerate(dataset_rd):
    if i >= 5:  # Check the first 5 graphs
        break
    print(data.x)


# Basic model for reddit

In [None]:
from torch.utils.data import Subset

# Outer k-fold cross-validation setup
outer_k_folds = 5
inner_k_folds = 5
num_epochs = 200

# Possible hyperparameters to tune
learning_rates = [0.01, 0.001]
batch_sizes = [8, 16]
patiences = [10, 50]

# Set list to store the evaluation metrics
f1_micro_test_list = []
f1_macro_test_list = []
accuracy_test_list = []

# Prepare the outer k-fold cross-validation
outer_kf = KFold(n_splits=outer_k_folds, shuffle=True, random_state=42)

# Loop over each fold for the outer k-fold
for fold, (train_val_idx, test_idx) in enumerate(outer_kf.split(dataset_rd)):
    print(f"Outer FOLD {fold}")
    print("--------------------------------")

    # Split dataset into train_val and test for the current outer fold
    train_val_subset = Subset(dataset_rd, train_val_idx)
    test_subset = Subset(dataset_rd, test_idx)

    # Initialize the best hyperparameter set and its performance score
    best_hyperparams = None
    best_score = 0

    # Inner k-fold cross-validation for hyperparameter tuning
    inner_kf = KFold(n_splits=inner_k_folds, shuffle=True, random_state=42)

    # Create all combinations of hyperparameters
    all_params = list(product(learning_rates, batch_sizes, patiences))

    # Loop over all combinations of hyperparameters
    for params in all_params:
        lr, batch_size, patience = params
        inner_scores = []

        # Perform inner k-fold cross-validation
        for inner_fold, (inner_train_idx, inner_val_idx) in enumerate(inner_kf.split(train_val_dataset)):
            print(f"Inner FOLD {inner_fold}")
            print(f"Hyperparameters: LR={lr}, Batch Size={batch_size}, Patience={patience}")

            # Split dataset into inner train and validation sets
            inner_train_subset = Subset(train_val_subset, inner_train_idx)
            inner_val_subset = Subset(train_val_subset, inner_val_idx)

            # Define train and validation dataloaders for the current inner fold
            inner_train_loader = DataLoader(inner_train_subset, batch_size=batch_size, shuffle=True)
            inner_val_loader = DataLoader(inner_val_subset, batch_size=batch_size, shuffle=False)

            # Initialize model and optimizer for the current inner fold
            model = BasicGraphModel(
                input_size=1,
                hidden_size=256,
                output_size=dataset_rd.num_classes,
                dropout_rate=0.5
            ).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss_fcn = torch.nn.CrossEntropyLoss()

            # Train the model for the current inner fold
            inner_metrics = train(model, loss_fcn, device, optimizer, num_epochs, inner_train_loader, inner_val_loader, patience)

            # Evaluate model performance, e.g., using validation F1 score
            # Save the model performance score for the current hyperparameter combination
            inner_scores.append(inner_metrics['best_score'])

        # Calculate the average performance over all inner folds for the current hyperparameter set
        average_score = np.mean(inner_scores)
        print(f"Average Score for hyperparameters {params}: {average_score}")

        # If the current hyperparameters outperform the previous ones, update the best_hyperparams
        if average_score > best_score:
            best_hyperparams = params
            best_score = average_score

    print(f"Best hyperparameters for Outer FOLD {fold}: {best_hyperparams} with score {best_score}")

    # Now retrain the model on the full train_val_dataset with the best_hyperparams

    # Extract best hyperparameters
    best_lr, best_batch_size, best_patience = best_hyperparams

    # DataLoader for the combined training and validation set
    train_val_loader = DataLoader(train_val_subset, batch_size=best_batch_size, shuffle=True)

    # DataLoader for the test set
    test_loader = DataLoader(test_subset, batch_size=best_batch_size, shuffle=False)

    # Initialize the model with the best hyperparameters
    model = BasicGraphModel(
        input_size=1,
        hidden_size=256,
        output_size=dataset_rd.num_classes,
        dropout_rate=0.5  # You could also tune the dropout rate if you wanted
    ).to(device)

    # Initialize the optimizer with the best learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

    # Loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Retrain the model on the full train_val_dataset
    retrained_metrics = train(
        model,
        loss_fcn,
        device,
        optimizer,
        num_epochs,
        train_val_loader,
        test_loader,  # We're using the test_loader here to monitor the performance, but we do not use this for making decisions
        best_patience
    )

    # After retraining, evaluate on the test set
    f1_micro_test, f1_macro_test, accuracy_test = evaluate_metrics(model, device, test_loader)
    print(f"Test set evaluation - F1 Micro: {f1_micro_test:.4f}, F1 Macro: {f1_macro_test:.4f}, Accuracy: {accuracy_test:.4f}")
    f1_micro_test_list.append(f1_micro_test)
    f1_macro_test_list.append(f1_macro_test)
    accuracy_test_list.append(accuracy_test)
    # Optionally, save your retrained model
    torch.save(model.state_dict(), f'rd_Basic_model_fold_{fold}.pth')




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 154, Train Loss: 0.5199, Val Loss: 0.4386, F1 Micro: 0.7719, F1 Macro: 0.7718, Accuracy: 0.7719
Epoch 155, Train Loss: 0.4836, Val Loss: 0.4516, F1 Micro: 0.8156, F1 Macro: 0.8143, Accuracy: 0.8156
Epoch 156, Train Loss: 0.4814, Val Loss: 0.3971, F1 Micro: 0.7906, F1 Macro: 0.7864, Accuracy: 0.7906
Epoch 157, Train Loss: 0.4927, Val Loss: 0.4502, F1 Micro: 0.7500, F1 Macro: 0.7333, Accuracy: 0.7500
Epoch 158, Train Loss: 0.5153, Val Loss: 0.4213, F1 Micro: 0.7625, F1 Macro: 0.7475, Accuracy: 0.7625
Epoch 159, Train Loss: 0.5047, Val Loss: 0.4833, F1 Micro: 0.7312, F1 Macro: 0.7047, Accuracy: 0.7312
Epoch 160, Train Loss: 0.5170, Val Loss: 0.5018, F1 Micro: 0.7531, F1 Macro: 0.7362, Accuracy: 0.7531
Epoch 161, Train Loss: 0.4849, Val Loss: 0.4317, F1 Micro: 0.7781, F1 Macro: 0.7728, Accuracy: 0.7781
Epoch 162, Train Loss: 0.4976, Val Loss: 0.6837, F1 Micro: 0.5062, F1 Macro: 0.3974, Accuracy: 0.5062
Epoch 163, Train 

In [100]:
# Initialize a dictionary to store metrics for different models
models_evaluation_metrics_rd = {}

# Example model identifiers
model_names = ['BasicGraphModel', 'GraphSAGEModel', 'GINModel']

# Initialize metric dictionaries for each model
for model_name in model_names:
    models_evaluation_metrics[model_name] = {'f1_micro': [], 'f1_macro': [], 'accuracy': []}

def update_model_metrics(model_name, f1_micro, f1_macro, accuracy):
    models_evaluation_metrics[model_name]['f1_micro'].append(f1_micro)
    models_evaluation_metrics[model_name]['f1_macro'].append(f1_macro)
    models_evaluation_metrics[model_name]['accuracy'].append(accuracy)

update_model_metrics('BasicGraphModel', f1_micro_test_list, f1_macro_test_list, accuracy_test_list)

print(models_evaluation_metrics)


{'BasicGraphModel': {'f1_micro': [[0.6975, 0.8000000000000002, 0.6975, 0.7175, 0.8025]], 'f1_macro': [[0.6892954581929065, 0.7999199679871949, 0.6882226760028085, 0.7162069679979406, 0.8001909567438713]], 'accuracy': [[0.6975, 0.8, 0.6975, 0.7175, 0.8025]]}, 'GraphSAGEModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}, 'GINModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}}


#GraphSAGE model for reddit

In [19]:
from torch.utils.data import Subset

# Outer k-fold cross-validation setup
outer_k_folds = 5
inner_k_folds = 5
num_epochs = 200

# Possible hyperparameters to tune
learning_rates = [0.01, 0.001]
batch_sizes = [8, 16]
patiences = [10, 50]

# Set list to store the evaluation metrics
f1_micro_test_list = []
f1_macro_test_list = []
accuracy_test_list = []

# Prepare the outer k-fold cross-validation
outer_kf = KFold(n_splits=outer_k_folds, shuffle=True, random_state=42)

# Loop over each fold for the outer k-fold
for fold, (train_val_idx, test_idx) in enumerate(outer_kf.split(dataset_rd)):
    print(f"Outer FOLD {fold}")
    print("--------------------------------")

    # Split dataset into train_val and test for the current outer fold
    train_val_subset = Subset(dataset_rd, train_val_idx)
    test_subset = Subset(dataset_rd, test_idx)

    # Initialize the best hyperparameter set and its performance score
    best_hyperparams = None
    best_score = 0

    # Inner k-fold cross-validation for hyperparameter tuning
    inner_kf = KFold(n_splits=inner_k_folds, shuffle=True, random_state=42)

    # Create all combinations of hyperparameters
    all_params = list(product(learning_rates, batch_sizes, patiences))

    # Loop over all combinations of hyperparameters
    for params in all_params:
        lr, batch_size, patience = params
        inner_scores = []

        # Perform inner k-fold cross-validation
        for inner_fold, (inner_train_idx, inner_val_idx) in enumerate(inner_kf.split(train_val_dataset)):
            print(f"Inner FOLD {inner_fold}")
            print(f"Hyperparameters: LR={lr}, Batch Size={batch_size}, Patience={patience}")

            # Split dataset into inner train and validation sets
            inner_train_subset = Subset(train_val_subset, inner_train_idx)
            inner_val_subset = Subset(train_val_subset, inner_val_idx)

            # Define train and validation dataloaders for the current inner fold
            inner_train_loader = DataLoader(inner_train_subset, batch_size=batch_size, shuffle=True)
            inner_val_loader = DataLoader(inner_val_subset, batch_size=batch_size, shuffle=False)

            # Initialize model and optimizer for the current inner fold
            model = GraphSAGEModel(
                input_size=1,
                hidden_size=256,
                output_size=dataset_rd.num_classes,
                dropout_rate=0.5
            ).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss_fcn = torch.nn.CrossEntropyLoss()

            # Train the model for the current inner fold
            inner_metrics = train(model, loss_fcn, device, optimizer, num_epochs, inner_train_loader, inner_val_loader, patience)

            # Evaluate model performance, e.g., using validation F1 score
            # Save the model performance score for the current hyperparameter combination
            inner_scores.append(inner_metrics['best_score'])

        # Calculate the average performance over all inner folds for the current hyperparameter set
        average_score = np.mean(inner_scores)
        print(f"Average Score for hyperparameters {params}: {average_score}")

        # If the current hyperparameters outperform the previous ones, update the best_hyperparams
        if average_score > best_score:
            best_hyperparams = params
            best_score = average_score

    print(f"Best hyperparameters for Outer FOLD {fold}: {best_hyperparams} with score {best_score}")

    # Now retrain the model on the full train_val_dataset with the best_hyperparams

    # Extract best hyperparameters
    best_lr, best_batch_size, best_patience = best_hyperparams

    # DataLoader for the combined training and validation set
    train_val_loader = DataLoader(train_val_subset, batch_size=best_batch_size, shuffle=True)

    # DataLoader for the test set
    test_loader = DataLoader(test_subset, batch_size=best_batch_size, shuffle=False)

    # Initialize the model with the best hyperparameters
    model = GraphSAGEModel(
        input_size=1,
        hidden_size=256,
        output_size=dataset_rd.num_classes,
        dropout_rate=0.5  # You could also tune the dropout rate if you wanted
    ).to(device)

    # Initialize the optimizer with the best learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

    # Loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Retrain the model on the full train_val_dataset
    retrained_metrics = train(
        model,
        loss_fcn,
        device,
        optimizer,
        num_epochs,
        train_val_loader,
        test_loader,  # We're using the test_loader here to monitor the performance, but we do not use this for making decisions
        best_patience
    )

    # After retraining, evaluate on the test set
    f1_micro_test, f1_macro_test, accuracy_test = evaluate_metrics(model, device, test_loader)
    print(f"Test set evaluation - F1 Micro: {f1_micro_test:.4f}, F1 Macro: {f1_macro_test:.4f}, Accuracy: {accuracy_test:.4f}")
    f1_micro_test_list.append(f1_micro_test)
    f1_macro_test_list.append(f1_macro_test)
    accuracy_test_list.append(accuracy_test)
    # Optionally, save your retrained model
    torch.save(model.state_dict(), f'rd_Basic_model_fold_{fold}.pth')




Outer FOLD 0
--------------------------------
Inner FOLD 0
Hyperparameters: LR=0.01, Batch Size=8, Patience=10


NameError: name 'GraphSAGEModel' is not defined

# do the same to dataset proteins

First is the basic model

In [None]:

# Outer k-fold cross-validation setup
outer_k_folds = 5
inner_k_folds = 5
num_epochs = 200

# Possible hyperparameters to tune
learning_rates = [0.01, 0.001]
batch_sizes = [8, 16]
patiences = [10, 50]

# Set list to store the evaluation metrics
f1_micro_test_list = []
f1_macro_test_list = []
accuracy_test_list = []

# Prepare the outer k-fold cross-validation
outer_kf = KFold(n_splits=outer_k_folds, shuffle=True, random_state=42)

# Loop over each fold for the outer k-fold
for fold, (train_val_idx, test_idx) in enumerate(outer_kf.split(dataset_pr)):
    print(f"Outer FOLD {fold}")
    print("--------------------------------")

    # Split dataset into train_val and test for the current outer fold
    train_val_dataset = dataset_pr[train_val_idx]
    test_dataset = dataset_pr[test_idx]

    # Initialize the best hyperparameter set and its performance score
    best_hyperparams = None
    best_score = 0

    # Inner k-fold cross-validation for hyperparameter tuning
    inner_kf = KFold(n_splits=inner_k_folds, shuffle=True, random_state=42)

    # Create all combinations of hyperparameters
    all_params = list(product(learning_rates, batch_sizes, patiences))

    # Loop over all combinations of hyperparameters
    for params in all_params:
        lr, batch_size, patience = params
        inner_scores = []

        # Perform inner k-fold cross-validation
        for inner_fold, (inner_train_idx, inner_val_idx) in enumerate(inner_kf.split(train_val_dataset)):
            print(f"Inner FOLD {inner_fold}")
            print(f"Hyperparameters: LR={lr}, Batch Size={batch_size}, Patience={patience}")

            # Split dataset into inner train and validation sets
            inner_train_dataset = train_val_dataset[inner_train_idx]
            inner_val_dataset = train_val_dataset[inner_val_idx]

            # Define train and validation dataloaders for the current inner fold
            inner_train_loader = DataLoader(inner_train_dataset, batch_size=batch_size, shuffle=True)
            inner_val_loader = DataLoader(inner_val_dataset, batch_size=batch_size, shuffle=False)

            # Initialize model and optimizer for the current inner fold
            model = BasicGraphModel(
                input_size=dataset_pr.num_node_features,
                hidden_size=256,
                output_size=dataset_pr.num_classes,
                dropout_rate=0.5
            ).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss_fcn = torch.nn.CrossEntropyLoss()

            # Train the model for the current inner fold
            inner_metrics = train(model, loss_fcn, device, optimizer, num_epochs, inner_train_loader, inner_val_loader, patience)

            # Evaluate model performance, e.g., using validation F1 score
            # Save the model performance score for the current hyperparameter combination
            inner_scores.append(inner_metrics['best_score'])

        # Calculate the average performance over all inner folds for the current hyperparameter set
        average_score = np.mean(inner_scores)
        print(f"Average Score for hyperparameters {params}: {average_score}")

        # If the current hyperparameters outperform the previous ones, update the best_hyperparams
        if average_score > best_score:
            best_hyperparams = params
            best_score = average_score

    print(f"Best hyperparameters for Outer FOLD {fold}: {best_hyperparams} with score {best_score}")

    # Now retrain the model on the full train_val_dataset with the best_hyperparams

    # Extract best hyperparameters
    best_lr, best_batch_size, best_patience = best_hyperparams

    # DataLoader for the combined training and validation set
    train_val_loader = DataLoader(train_val_dataset, batch_size=best_batch_size, shuffle=True)

    # DataLoader for the test set
    test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

    # Initialize the model with the best hyperparameters
    model = BasicGraphModel(
        input_size=dataset_pr.num_node_features,
        hidden_size=256,
        output_size=dataset_pr.num_classes,
        dropout_rate=0.5  # You could also tune the dropout rate if you wanted
    ).to(device)

    # Initialize the optimizer with the best learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

    # Loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Retrain the model on the full train_val_dataset
    retrained_metrics = train(
        model,
        loss_fcn,
        device,
        optimizer,
        num_epochs,
        train_val_loader,
        test_loader,  # We're using the test_loader here to monitor the performance, but we do not use this for making decisions
        best_patience
    )

    # After retraining, evaluate on the test set
    f1_micro_test, f1_macro_test, accuracy_test = evaluate_metrics(model, device, test_loader)
    print(f"Test set evaluation - F1 Micro: {f1_micro_test:.4f}, F1 Macro: {f1_macro_test:.4f}, Accuracy: {accuracy_test:.4f}")
    f1_micro_test_list.append(f1_micro_test)
    f1_macro_test_list.append(f1_macro_test)
    accuracy_test_list.append(accuracy_test)
    # Optionally, save your retrained model
    torch.save(model.state_dict(), f'Basic_model_fold_{fold}.pth')




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 18, Train Loss: 0.6718, Val Loss: 0.7041, F1 Micro: 0.6760, F1 Macro: 0.6155, Accuracy: 0.6760
Epoch 19, Train Loss: 0.7001, Val Loss: 0.6313, F1 Micro: 0.6983, F1 Macro: 0.6206, Accuracy: 0.6983
Epoch 20, Train Loss: 0.7277, Val Loss: 0.6248, F1 Micro: 0.6816, F1 Macro: 0.5672, Accuracy: 0.6816
Epoch 21, Train Loss: 0.7355, Val Loss: 0.6599, F1 Micro: 0.6704, F1 Macro: 0.6287, Accuracy: 0.6704
Epoch 22, Train Loss: 0.6875, Val Loss: 0.6288, F1 Micro: 0.6592, F1 Macro: 0.5742, Accuracy: 0.6592
Epoch 23, Train Loss: 0.6564, Val Loss: 0.6468, F1 Micro: 0.6648, F1 Macro: 0.5979, Accuracy: 0.6648
Epoch 24, Train Loss: 0.6741, Val Loss: 0.6497, F1 Micro: 0.6872, F1 Macro: 0.5961, Accuracy: 0.6872
Epoch 25, Train Loss: 0.6648, Val Loss: 0.6701, F1 Micro: 0.6257, F1 Macro: 0.5747, Accuracy: 0.6257
Epoch 26, Train Loss: 0.6652, Val Loss: 0.6210, F1 Micro: 0.6704, F1 Macro: 0.5449, Accuracy: 0.6704
Epoch 27, Train Loss: 0.66

In [17]:
# Initialize a dictionary to store metrics for different models
models_evaluation_metrics_pr = {}

# Example model identifiers
model_names = ['BasicGraphModel', 'GraphSAGEModel', 'GINModel']

# Initialize metric dictionaries for each model
for model_name in model_names:
    models_evaluation_metrics_pr[model_name] = {'f1_micro': [], 'f1_macro': [], 'accuracy': []}

def update_model_metrics_pr(model_name, f1_micro, f1_macro, accuracy):
    models_evaluation_metrics_pr[model_name]['f1_micro'].append(f1_micro)
    models_evaluation_metrics_pr[model_name]['f1_macro'].append(f1_macro)
    models_evaluation_metrics_pr[model_name]['accuracy'].append(accuracy)

update_model_metrics_pr('BasicGraphModel', f1_micro_test_list, f1_macro_test_list, accuracy_test_list)

print(models_evaluation_metrics_pr)


{'BasicGraphModel': {'f1_micro': [[0.672645739910314, 0.7713004484304933, 0.57847533632287, 0.6441441441441441, 0.6576576576576577]], 'f1_macro': [[0.6368645296571416, 0.7291175419792784, 0.5760113268608413, 0.586319141408185, 0.6266265380189431]], 'accuracy': [[0.672645739910314, 0.7713004484304933, 0.57847533632287, 0.6441441441441441, 0.6576576576576577]]}, 'GraphSAGEModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}, 'GINModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}}


GraphSAGE model for protein

In [22]:

# Outer k-fold cross-validation setup
outer_k_folds = 5
inner_k_folds = 5
num_epochs = 200

# Possible hyperparameters to tune
learning_rates = [0.01, 0.001]
batch_sizes = [8, 16]
patiences = [10, 50]

# Set list to store the evaluation metrics
f1_micro_test_list = []
f1_macro_test_list = []
accuracy_test_list = []

# Prepare the outer k-fold cross-validation
outer_kf = KFold(n_splits=outer_k_folds, shuffle=True, random_state=42)

# Loop over each fold for the outer k-fold
for fold, (train_val_idx, test_idx) in enumerate(outer_kf.split(dataset_pr)):
    print(f"Outer FOLD {fold}")
    print("--------------------------------")

    # Split dataset into train_val and test for the current outer fold
    train_val_dataset = dataset_pr[train_val_idx]
    test_dataset = dataset_pr[test_idx]

    # Initialize the best hyperparameter set and its performance score
    best_hyperparams = None
    best_score = 0

    # Inner k-fold cross-validation for hyperparameter tuning
    inner_kf = KFold(n_splits=inner_k_folds, shuffle=True, random_state=42)

    # Create all combinations of hyperparameters
    all_params = list(product(learning_rates, batch_sizes, patiences))

    # Loop over all combinations of hyperparameters
    for params in all_params:
        lr, batch_size, patience = params
        inner_scores = []

        # Perform inner k-fold cross-validation
        for inner_fold, (inner_train_idx, inner_val_idx) in enumerate(inner_kf.split(train_val_dataset)):
            print(f"Inner FOLD {inner_fold}")
            print(f"Hyperparameters: LR={lr}, Batch Size={batch_size}, Patience={patience}")

            # Split dataset into inner train and validation sets
            inner_train_dataset = train_val_dataset[inner_train_idx]
            inner_val_dataset = train_val_dataset[inner_val_idx]

            # Define train and validation dataloaders for the current inner fold
            inner_train_loader = DataLoader(inner_train_dataset, batch_size=batch_size, shuffle=True)
            inner_val_loader = DataLoader(inner_val_dataset, batch_size=batch_size, shuffle=False)

            # Initialize model and optimizer for the current inner fold
            model = GraphSAGEModel(
                input_dim=dataset_pr.num_node_features,
                hidden_dim=256,
                output_dim=dataset_pr.num_classes,
                dropout_rate=0.5
            ).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss_fcn = torch.nn.CrossEntropyLoss()

            # Train the model for the current inner fold
            inner_metrics = train(model, loss_fcn, device, optimizer, num_epochs, inner_train_loader, inner_val_loader, patience)

            # Evaluate model performance, e.g., using validation F1 score
            # Save the model performance score for the current hyperparameter combination
            inner_scores.append(inner_metrics['best_score'])

        # Calculate the average performance over all inner folds for the current hyperparameter set
        average_score = np.mean(inner_scores)
        print(f"Average Score for hyperparameters {params}: {average_score}")

        # If the current hyperparameters outperform the previous ones, update the best_hyperparams
        if average_score > best_score:
            best_hyperparams = params
            best_score = average_score

    print(f"Best hyperparameters for Outer FOLD {fold}: {best_hyperparams} with score {best_score}")

    # Now retrain the model on the full train_val_dataset with the best_hyperparams

    # Extract best hyperparameters
    best_lr, best_batch_size, best_patience = best_hyperparams

    # DataLoader for the combined training and validation set
    train_val_loader = DataLoader(train_val_dataset, batch_size=best_batch_size, shuffle=True)

    # DataLoader for the test set
    test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

    # Initialize the model with the best hyperparameters
    model = GraphSAGEModel(
        input_dim=dataset_pr.num_node_features,
        hidden_dim=256,
        output_dim=dataset_pr.num_classes,
        dropout_rate=0.5  # You could also tune the dropout rate if you wanted
    ).to(device)

    # Initialize the optimizer with the best learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

    # Loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Retrain the model on the full train_val_dataset
    retrained_metrics = train(
        model,
        loss_fcn,
        device,
        optimizer,
        num_epochs,
        train_val_loader,
        test_loader,  # We're using the test_loader here to monitor the performance, but we do not use this for making decisions
        best_patience
    )

    # After retraining, evaluate on the test set
    f1_micro_test, f1_macro_test, accuracy_test = evaluate_metrics(model, device, test_loader)
    print(f"Test set evaluation - F1 Micro: {f1_micro_test:.4f}, F1 Macro: {f1_macro_test:.4f}, Accuracy: {accuracy_test:.4f}")
    f1_micro_test_list.append(f1_micro_test)
    f1_macro_test_list.append(f1_macro_test)
    accuracy_test_list.append(accuracy_test)
    # Optionally, save your retrained model
    torch.save(model.state_dict(), f'Basic_model_fold_{fold}.pth')




[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 21, Train Loss: 0.6243, Val Loss: 0.6680, F1 Micro: 0.7079, F1 Macro: 0.6613, Accuracy: 0.7079
Epoch 22, Train Loss: 0.6374, Val Loss: 0.7012, F1 Micro: 0.6461, F1 Macro: 0.5281, Accuracy: 0.6461
Epoch 23, Train Loss: 0.6557, Val Loss: 0.6565, F1 Micro: 0.7079, F1 Macro: 0.6704, Accuracy: 0.7079
Epoch 24, Train Loss: 0.6335, Val Loss: 0.6724, F1 Micro: 0.6180, F1 Macro: 0.6131, Accuracy: 0.6180
Epoch 25, Train Loss: 0.6190, Val Loss: 0.6737, F1 Micro: 0.6573, F1 Macro: 0.6267, Accuracy: 0.6573
Epoch 26, Train Loss: 0.6250, Val Loss: 0.7082, F1 Micro: 0.6404, F1 Macro: 0.5170, Accuracy: 0.6404
Epoch 27, Train Loss: 0.6323, Val Loss: 0.6705, F1 Micro: 0.7135, F1 Macro: 0.6725, Accuracy: 0.7135
Epoch 28, Train Loss: 0.6234, Val Loss: 0.6620, F1 Micro: 0.6910, F1 Macro: 0.6499, Accuracy: 0.6910
Epoch 29, Train Loss: 0.6289, Val Loss: 0.6616, F1 Micro: 0.6573, F1 Macro: 0.6361, Accuracy: 0.6573
Epoch 30, Train Loss: 0.64

In [23]:
update_model_metrics_pr('GraphSAGEModel', f1_micro_test_list, f1_macro_test_list, accuracy_test_list)

print(models_evaluation_metrics_pr)


{'BasicGraphModel': {'f1_micro': [[0.672645739910314, 0.7713004484304933, 0.57847533632287, 0.6441441441441441, 0.6576576576576577]], 'f1_macro': [[0.6368645296571416, 0.7291175419792784, 0.5760113268608413, 0.586319141408185, 0.6266265380189431]], 'accuracy': [[0.672645739910314, 0.7713004484304933, 0.57847533632287, 0.6441441441441441, 0.6576576576576577]]}, 'GraphSAGEModel': {'f1_micro': [[0.6457399103139013, 0.7668161434977578, 0.6681614349775785, 0.6846846846846847, 0.7027027027027027]], 'f1_macro': [[0.6094224587074603, 0.7003927242662258, 0.6639377647442164, 0.6615853658536586, 0.6719211822660098]], 'accuracy': [[0.6457399103139013, 0.7668161434977578, 0.6681614349775785, 0.6846846846846847, 0.7027027027027027]]}, 'GINModel': {'f1_micro': [], 'f1_macro': [], 'accuracy': []}}


In [33]:
# Outer k-fold cross-validation setup
outer_k_folds = 5
inner_k_folds = 5
num_epochs = 200

# Possible hyperparameters to tune
learning_rates = [0.01, 0.001]
batch_sizes = [8, 16]
patiences = [10, 50]

# Set list to store the evaluation metrics
f1_micro_test_list3 = []
f1_macro_test_list3 = []
accuracy_test_list3 = []

# Prepare the outer k-fold cross-validation
outer_kf = KFold(n_splits=outer_k_folds, shuffle=True, random_state=42)

# Loop over each fold for the outer k-fold
for fold, (train_val_idx, test_idx) in enumerate(outer_kf.split(dataset_pr)):
    print(f"Outer FOLD {fold}")
    print("--------------------------------")

    # Split dataset into train_val and test for the current outer fold
    train_val_dataset = dataset_pr[train_val_idx]
    test_dataset = dataset_pr[test_idx]

    # Initialize the best hyperparameter set and its performance score
    best_hyperparams = None
    best_score = 0

    # Inner k-fold cross-validation for hyperparameter tuning
    inner_kf = KFold(n_splits=inner_k_folds, shuffle=True, random_state=42)

    # Create all combinations of hyperparameters
    all_params = list(product(learning_rates, batch_sizes, patiences))

    # Loop over all combinations of hyperparameters
    for params in all_params:
        lr, batch_size, patience = params
        inner_scores = []

        # Perform inner k-fold cross-validation
        for inner_fold, (inner_train_idx, inner_val_idx) in enumerate(inner_kf.split(train_val_dataset)):
            print(f"Inner FOLD {inner_fold}")
            print(f"Hyperparameters: LR={lr}, Batch Size={batch_size}, Patience={patience}")

            # Split dataset into inner train and validation sets
            inner_train_dataset = train_val_dataset[inner_train_idx]
            inner_val_dataset = train_val_dataset[inner_val_idx]

            # Define train and validation dataloaders for the current inner fold
            inner_train_loader = DataLoader(inner_train_dataset, batch_size=batch_size, shuffle=True)
            inner_val_loader = DataLoader(inner_val_dataset, batch_size=batch_size, shuffle=False)

            # Initialize model and optimizer for the current inner fold
            model = GINModel(
                input_dim=dataset_pr.num_node_features,
                hidden_dim=256,
                output_dim=dataset_pr.num_classes,
                dropout_rate=0.5
            ).to(device)

            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
            loss_fcn = torch.nn.CrossEntropyLoss()

            # Train the model for the current inner fold
            inner_metrics = train(model, loss_fcn, device, optimizer, num_epochs, inner_train_loader, inner_val_loader, patience)

            # Evaluate model performance, e.g., using validation F1 score
            # Save the model performance score for the current hyperparameter combination
            inner_scores.append(inner_metrics['best_score'])

        # Calculate the average performance over all inner folds for the current hyperparameter set
        average_score = np.mean(inner_scores)
        print(f"Average Score for hyperparameters {params}: {average_score}")

        # If the current hyperparameters outperform the previous ones, update the best_hyperparams
        if average_score > best_score:
            best_hyperparams = params
            best_score = average_score

    print(f"Best hyperparameters for Outer FOLD {fold}: {best_hyperparams} with score {best_score}")

    # Now retrain the model on the full train_val_dataset with the best_hyperparams

    # Extract best hyperparameters
    best_lr, best_batch_size, best_patience = best_hyperparams

    # DataLoader for the combined training and validation set
    train_val_loader = DataLoader(train_val_dataset, batch_size=best_batch_size, shuffle=True)

    # DataLoader for the test set
    test_loader = DataLoader(test_dataset, batch_size=best_batch_size, shuffle=False)

    # Initialize the model with the best hyperparameters
    model = GINModel(
                input_dim=dataset_pr.num_node_features,
                hidden_dim=256,
                output_dim=dataset_pr.num_classes,
                num_layers=2,
                dropout_rate=0.5
            ).to(device)
    # Initialize the optimizer with the best learning rate
    optimizer = torch.optim.Adam(model.parameters(), lr=best_lr)

    # Loss function
    loss_fcn = torch.nn.CrossEntropyLoss()

    # Retrain the model on the full train_val_dataset
    retrained_metrics = train(
        model,
        loss_fcn,
        device,
        optimizer,
        num_epochs,
        train_val_loader,
        test_loader,  # We're using the test_loader here to monitor the performance, but we do not use this for making decisions
        best_patience
    )

    # After retraining, evaluate on the test set
    f1_micro_test, f1_macro_test, accuracy_test = evaluate_metrics(model, device, test_loader)
    print(f"Test set evaluation - F1 Micro: {f1_micro_test:.4f}, F1 Macro: {f1_macro_test:.4f}, Accuracy: {accuracy_test:.4f}")
    f1_micro_test_list3.append(f1_micro_test)
    f1_macro_test_list3.append(f1_macro_test)
    accuracy_test_list3.append(accuracy_test)
    # Optionally, save your retrained model
    torch.save(model.state_dict(), f'GSAGE_fold_pr{fold}.pth')



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Epoch 14, Train Loss: 0.6347, Val Loss: 0.5959, F1 Micro: 0.6966, F1 Macro: 0.6198, Accuracy: 0.6966
Epoch 15, Train Loss: 0.6224, Val Loss: 0.5937, F1 Micro: 0.6854, F1 Macro: 0.6106, Accuracy: 0.6854
Epoch 16, Train Loss: 0.6274, Val Loss: 0.6044, F1 Micro: 0.6966, F1 Macro: 0.6373, Accuracy: 0.6966
Epoch 17, Train Loss: 0.6313, Val Loss: 0.6504, F1 Micro: 0.6517, F1 Macro: 0.5579, Accuracy: 0.6517
Epoch 18, Train Loss: 0.6222, Val Loss: 0.6157, F1 Micro: 0.6966, F1 Macro: 0.6332, Accuracy: 0.6966
Epoch 19, Train Loss: 0.6265, Val Loss: 0.5919, F1 Micro: 0.6798, F1 Macro: 0.6339, Accuracy: 0.6798
Epoch 20, Train Loss: 0.6256, Val Loss: 0.5996, F1 Micro: 0.7247, F1 Macro: 0.7059, Accuracy: 0.7247
Epoch 21, Train Loss: 0.6263, Val Loss: 0.5921, F1 Micro: 0.7191, F1 Macro: 0.6831, Accuracy: 0.7191
Epoch 22, Train Loss: 0.6239, Val Loss: 0.6003, F1 Micro: 0.7191, F1 Macro: 0.6906, Accuracy: 0.7191
Epoch 23, Train Loss: 0.61

In [34]:
update_model_metrics_pr('GINModel', f1_micro_test_list3, f1_macro_test_list3, accuracy_test_list3)

print(models_evaluation_metrics_pr)


{'BasicGraphModel': {'f1_micro': [[0.672645739910314, 0.7713004484304933, 0.57847533632287, 0.6441441441441441, 0.6576576576576577]], 'f1_macro': [[0.6368645296571416, 0.7291175419792784, 0.5760113268608413, 0.586319141408185, 0.6266265380189431]], 'accuracy': [[0.672645739910314, 0.7713004484304933, 0.57847533632287, 0.6441441441441441, 0.6576576576576577]]}, 'GraphSAGEModel': {'f1_micro': [[0.6457399103139013, 0.7668161434977578, 0.6681614349775785, 0.6846846846846847, 0.7027027027027027]], 'f1_macro': [[0.6094224587074603, 0.7003927242662258, 0.6639377647442164, 0.6615853658536586, 0.6719211822660098]], 'accuracy': [[0.6457399103139013, 0.7668161434977578, 0.6681614349775785, 0.6846846846846847, 0.7027027027027027]]}, 'GINModel': {'f1_micro': [[0.6322869955156951, 0.6860986547085202, 0.6547085201793722, 0.6981981981981982, 0.6891891891891891]], 'f1_macro': [[0.5933552748621241, 0.40691489361702127, 0.6443823133478306, 0.6852662984828286, 0.6580421922089519]], 'accuracy': [[0.6322869

In [11]:
data = {
    'BasicGraphModel': {'f1_micro': [[0.672645739910314, 0.7713004484304933, 0.57847533632287, 0.6441441441441441, 0.6576576576576577]],
                     'f1_macro': [[0.6368645296571416, 0.7291175419792784, 0.5760113268608413, 0.586319141408185, 0.6266265380189431]],
                     'accuracy': [[0.672645739910314, 0.7713004484304933, 0.57847533632287, 0.6441441441441441, 0.6576576576576577]]},
 'GraphSAGEModel': {'f1_micro': [[0.6457399103139013, 0.7668161434977578, 0.6681614349775785, 0.6846846846846847, 0.7027027027027027]],
                    'f1_macro': [[0.6094224587074603, 0.7003927242662258, 0.6639377647442164, 0.6615853658536586, 0.6719211822660098]],
                    'accuracy': [[0.6457399103139013, 0.7668161434977578, 0.6681614349775785, 0.6846846846846847, 0.7027027027027027]]},
 'GINModel': {'f1_micro': [[0.6322869955156951, 0.6860986547085202, 0.6547085201793722, 0.6981981981981982, 0.6891891891891891]],
              'f1_macro': [[0.5933552748621241, 0.40691489361702127, 0.6443823133478306, 0.6852662984828286, 0.6580421922089519]],
              'accuracy': [[0.6322869955156951, 0.6860986547085202, 0.6547085201793722, 0.6981981981981982, 0.6891891891891891]]}}


# Convert the nested structure to a flat structure suitable for DataFrame
records = []
for model_name, metrics in data.items():
    for metric_name, metric_values in metrics.items():
        for values in metric_values:  # metric_values is a list of lists
            record = {"Model": model_name, "Metric": metric_name}
            # Assuming a fixed number of folds, e.g., 5
            for fold_index, value in enumerate(values, start=1):
                record[f"Fold{fold_index}"] = value
            records.append(record)

# Create DataFrame
df = pd.DataFrame(records)
df['Mean'] = df.mean(axis=1)



  df['Mean'] = df.mean(axis=1)


In [12]:
df

Unnamed: 0,Model,Metric,Fold1,Fold2,Fold3,Fold4,Fold5,Mean
0,BasicGraphModel,f1_micro,0.672646,0.7713,0.578475,0.644144,0.657658,0.664845
1,BasicGraphModel,f1_macro,0.636865,0.729118,0.576011,0.586319,0.626627,0.630988
2,BasicGraphModel,accuracy,0.672646,0.7713,0.578475,0.644144,0.657658,0.664845
3,GraphSAGEModel,f1_micro,0.64574,0.766816,0.668161,0.684685,0.702703,0.693621
4,GraphSAGEModel,f1_macro,0.609422,0.700393,0.663938,0.661585,0.671921,0.661452
5,GraphSAGEModel,accuracy,0.64574,0.766816,0.668161,0.684685,0.702703,0.693621
6,GINModel,f1_micro,0.632287,0.686099,0.654709,0.698198,0.689189,0.672096
7,GINModel,f1_macro,0.593355,0.406915,0.644382,0.685266,0.658042,0.597592
8,GINModel,accuracy,0.632287,0.686099,0.654709,0.698198,0.689189,0.672096
