In [2]:
# src/data_utils.py

import torch
from torch_geometric.datasets import ZINC
from torch_geometric.loader import DataLoader
import torch.nn.functional as F
import torch.nn as nn


def get_zinc_dataset(root='../data/ZINC', batch_size=64, subset=True):
    """
    Loads the ZINC dataset from the specified root directory.
    
    Args:
        root (str): Path to the dataset folder.
        batch_size (int): Batch size for DataLoader.
    
    Returns:
        (DataLoader, DataLoader, DataLoader): train, val, and test loaders.
    """
    train_dataset = ZINC(root, split='train', subset=subset)
    val_dataset = ZINC(root, split='val', subset=subset)
    test_dataset = ZINC(root, split='test', subset=subset)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, val_loader, test_loader



def get_activation_fn(name):
    name = name.lower()
    if name == 'relu':
        return F.relu
    elif name == 'leakyrelu':
        return F.leaky_relu
    elif name == 'elu':
        return F.elu
    else:
        raise ValueError(f"Unsupported activation: {name}")


def get_activation_module(name):
    """
    For usage in nn.Sequential, we need an nn.Module (e.g. nn.ReLU).
    """
    name = name.lower()
    if name == 'relu':
        return nn.ReLU()
    elif name == 'leakyrelu':
        return nn.LeakyReLU(negative_slope=0.2)
    elif name == 'elu':
        return nn.ELU()
    else:
        raise ValueError(f"Unsupported activation module: {name}")


# src/model.py

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import (
    GCNConv, 
    GINConv, 
    GATConv, 
    SAGEConv,
    GINEConv,
    global_mean_pool,
    global_max_pool,
    GlobalAttention
)

###############################################################################
# Pooling Helpers
###############################################################################
def get_pooling_fn(pool_type, hidden_dim):
    pool_type = pool_type.lower()
    if pool_type == 'mean':
        return global_mean_pool
    elif pool_type == 'max':
        return global_max_pool
    elif pool_type == 'attention':
        gate_nn = nn.Sequential(nn.Linear(hidden_dim, 1))
        return GlobalAttention(gate_nn)
    else:
        raise ValueError(f"Unsupported pooling type: {pool_type}")


###############################################################################
# 1. GCN Model (no edge features)
###############################################################################
class GCNModel(nn.Module):
    """
    A flexible 2-layer GCN supporting dropout, batch norm, residual, etc.
    This model ignores edge_attr entirely.
    """
    def __init__(
        self, 
        in_channels, 
        hidden_dim=64, 
        out_channels=1,
        dropout=0.0,
        activation='relu',
        pool='mean',
        residual=False,
        batch_norm=False
    ):
        super(GCNModel, self).__init__()
        
        self.residual = residual
        self.batch_norm = batch_norm

        self.conv1 = GCNConv(in_channels, hidden_dim)
        self.conv2 = GCNConv(hidden_dim, hidden_dim)
        
        if self.batch_norm:
            self.bn1 = nn.BatchNorm1d(hidden_dim)
            self.bn2 = nn.BatchNorm1d(hidden_dim)

        self.dropout_layer = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
        self.activation_fn = get_activation_fn(activation)
        self.pool = get_pooling_fn(pool, hidden_dim)
        self.lin = nn.Linear(hidden_dim, out_channels)

    def forward(self, x, edge_index, batch, edge_attr=None):
        # We ignore edge_attr for standard GCN
        x_in = x
        x = self.conv1(x, edge_index)
        x = self.activation_fn(x)
        if self.batch_norm:
            x = self.bn1(x)
        x = self.dropout_layer(x)
        if self.residual and x.shape == x_in.shape:
            x = x + x_in

        x_in = x
        x = self.conv2(x, edge_index)
        x = self.activation_fn(x)
        if self.batch_norm:
            x = self.bn2(x)
        x = self.dropout_layer(x)
        if self.residual and x.shape == x_in.shape:
            x = x + x_in
        
        x = self.pool(x, batch)
        x = self.lin(x)
        return x


###############################################################################
# 2. GIN Model (no edge features)
###############################################################################
class GINModel(nn.Module):
    """
    Standard 2-layer GIN ignoring edge_attr.
    """
    def __init__(
        self,
        in_channels,
        hidden_dim=64,
        out_channels=1,
        dropout=0.0,
        activation='relu',
        pool='mean',
        residual=False,
        batch_norm=False
    ):
        super(GINModel, self).__init__()
        self.residual = residual
        self.batch_norm = batch_norm
        self.activation_fn = get_activation_fn(activation)
        self.dropout_layer = nn.Dropout(dropout) if dropout > 0 else nn.Identity()

        self.mlp1 = nn.Sequential(
            nn.Linear(in_channels, hidden_dim),
            get_activation_module(activation),
            nn.Linear(hidden_dim, hidden_dim)
        )
        self.conv1 = GINConv(self.mlp1)

        if self.batch_norm:
            self.bn1 = nn.BatchNorm1d(hidden_dim)
        
        self.mlp2 = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            get_activation_module(activation),
            nn.Linear(hidden_dim, hidden_dim)
        )
        self.conv2 = GINConv(self.mlp2)

        if self.batch_norm:
            self.bn2 = nn.BatchNorm1d(hidden_dim)

        self.pool = get_pooling_fn(pool, hidden_dim)
        self.lin = nn.Linear(hidden_dim, out_channels)

    def forward(self, x, edge_index, batch, edge_attr=None):
        # We ignore edge_attr for standard GIN
        x_in = x
        x = self.conv1(x, edge_index)
        if self.batch_norm:
            x = self.bn1(x)
        x = self.dropout_layer(x)
        if self.residual and x.shape == x_in.shape:
            x = x + x_in

        x_in = x
        x = self.conv2(x, edge_index)
        if self.batch_norm:
            x = self.bn2(x)
        x = self.dropout_layer(x)
        if self.residual and x.shape == x_in.shape:
            x = x + x_in

        x = self.pool(x, batch)
        x = self.lin(x)
        return x


###############################################################################
# 3. GINE Model (USES edge features)
###############################################################################
class GINEModel(nn.Module):
    """
    GINE variant that can incorporate edge_attr (e.g., bond types).
    """
    def __init__(
        self,
        in_channels,
        hidden_dim=64,
        out_channels=1,
        dropout=0.0,
        activation='relu',
        pool='mean',
        residual=False,
        batch_norm=False,
        edge_dim=None  # dimension of edge_attr, if known
    ):
        super(GINEModel, self).__init__()
        self.residual = residual
        self.batch_norm = batch_norm
        self.edge_dim = edge_dim

        self.activation_fn = get_activation_fn(activation)
        self.dropout_layer = nn.Dropout(dropout) if dropout > 0 else nn.Identity()

        # MLP for first GINEConv
        self.mlp1 = nn.Sequential(
            nn.Linear(in_channels, hidden_dim),
            get_activation_module(activation),
            nn.Linear(hidden_dim, hidden_dim)
        )
        self.conv1 = GINEConv(nn=self.mlp1, edge_dim=edge_dim if edge_dim else 0)

        if self.batch_norm:
            self.bn1 = nn.BatchNorm1d(hidden_dim)
        
        # MLP for second GINEConv
        self.mlp2 = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim),
            get_activation_module(activation),
            nn.Linear(hidden_dim, hidden_dim)
        )
        self.conv2 = GINEConv(nn=self.mlp2, edge_dim=edge_dim if edge_dim else 0)

        if self.batch_norm:
            self.bn2 = nn.BatchNorm1d(hidden_dim)

        self.pool = get_pooling_fn(pool, hidden_dim)
        self.lin = nn.Linear(hidden_dim, out_channels)

    def forward(self, x, edge_index, batch, edge_attr=None):
        # Fix dimension if edge_attr is 1D
        if edge_attr is not None and edge_attr.dim() == 1:
            edge_attr = edge_attr.unsqueeze(-1)
        
        # 1st GINEConv
        x_in = x
        x = self.conv1(x, edge_index, edge_attr)
        if self.batch_norm:
            x = self.bn1(x)
        x = self.dropout_layer(x)
        if self.residual and x.shape == x_in.shape:
            x = x + x_in

        # 2nd GINEConv
        x_in = x
        x = self.conv2(x, edge_index, edge_attr)
        if self.batch_norm:
            x = self.bn2(x)
        x = self.dropout_layer(x)
        if self.residual and x.shape == x_in.shape:
            x = x + x_in

        x = self.pool(x, batch)
        x = self.lin(x)
        return x


###############################################################################
# 4. GAT Model (no edge features)
###############################################################################
class GATModel(nn.Module):
    """
    A flexible 2-layer GAT supporting dropout, batch norm, residual, etc.
    Ignores edge_attr unless you implement a custom attention mechanism.
    """
    def __init__(
        self, 
        in_channels, 
        hidden_dim=64, 
        out_channels=1, 
        heads=4,
        dropout=0.0, 
        activation='relu', 
        pool='mean', 
        residual=False, 
        batch_norm=False
    ):
        super(GATModel, self).__init__()
        
        self.residual = residual
        self.batch_norm = batch_norm
        
        self.activation_fn = get_activation_fn(activation)
        self.dropout_layer = nn.Dropout(dropout) if dropout > 0 else nn.Identity()

        self.conv1 = GATConv(in_channels, hidden_dim, heads=heads, concat=True)
        self.conv2 = GATConv(hidden_dim * heads, hidden_dim, heads=1, concat=True)

        if self.batch_norm:
            self.bn1 = nn.BatchNorm1d(hidden_dim * heads)
            self.bn2 = nn.BatchNorm1d(hidden_dim)

        self.pool = get_pooling_fn(pool, hidden_dim)
        self.lin = nn.Linear(hidden_dim, out_channels)

    def forward(self, x, edge_index, batch, edge_attr=None):
        x_in = x
        x = self.conv1(x, edge_index)
        x = self.activation_fn(x)
        if self.batch_norm:
            x = self.bn1(x)
        x = self.dropout_layer(x)
        if self.residual and x.shape == x_in.shape:
            x = x + x_in

        x_in = x
        x = self.conv2(x, edge_index)
        x = self.activation_fn(x)
        if self.batch_norm:
            x = self.bn2(x)
        x = self.dropout_layer(x)
        if self.residual and x.shape == x_in.shape:
            x = x + x_in

        x = self.pool(x, batch)
        x = self.lin(x)
        return x


###############################################################################
# 5. GraphSAGE Model (no edge features)
###############################################################################
class SAGEModel(nn.Module):
    """
    A flexible 2-layer GraphSAGE supporting dropout, batch norm, residual, etc.
    Ignores edge_attr unless you implement a custom aggregator.
    """
    def __init__(
        self,
        in_channels,
        hidden_dim=64,
        out_channels=1,
        dropout=0.0,
        activation='relu',
        pool='mean',
        residual=False,
        batch_norm=False
    ):
        super(SAGEModel, self).__init__()
        
        self.residual = residual
        self.batch_norm = batch_norm
        
        self.activation_fn = get_activation_fn(activation)
        self.dropout_layer = nn.Dropout(dropout) if dropout > 0 else nn.Identity()

        self.conv1 = SAGEConv(in_channels, hidden_dim)
        self.conv2 = SAGEConv(hidden_dim, hidden_dim)

        if self.batch_norm:
            self.bn1 = nn.BatchNorm1d(hidden_dim)
            self.bn2 = nn.BatchNorm1d(hidden_dim)

        self.pool = get_pooling_fn(pool, hidden_dim)
        self.lin = nn.Linear(hidden_dim, out_channels)

    def forward(self, x, edge_index, batch, edge_attr=None):
        x_in = x
        x = self.conv1(x, edge_index)
        x = self.activation_fn(x)
        if self.batch_norm:
            x = self.bn1(x)
        x = self.dropout_layer(x)
        if self.residual and x.shape == x_in.shape:
            x = x + x_in

        x_in = x
        x = self.conv2(x, edge_index)
        x = self.activation_fn(x)
        if self.batch_norm:
            x = self.bn2(x)
        x = self.dropout_layer(x)
        if self.residual and x.shape == x_in.shape:
            x = x + x_in

        x = self.pool(x, batch)
        x = self.lin(x)
        return x


###############################################################################
# 6. Graph Transformer (no edge features by default)
###############################################################################
class GraphTransformer(nn.Module):
    """
    Placeholder for a Graph Transformer approach.
    Currently does not incorporate edge_attr in attention.
    """
    def __init__(self, in_channels, hidden_dim=64, out_channels=1, num_heads=4):
        super(GraphTransformer, self).__init__()
        self.encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(d_model=hidden_dim, nhead=num_heads),
            num_layers=2
        )
        self.lin_in = nn.Linear(in_channels, hidden_dim)
        self.lin_out = nn.Linear(hidden_dim, out_channels)

    def forward(self, x, edge_index, batch, edge_attr=None):
        # We ignore edge_index and edge_attr in this placeholder
        x = self.lin_in(x)
        x = x.unsqueeze(1)           # [num_nodes, 1, hidden_dim]
        x = x.permute(0, 1, 2)       # [num_nodes, 1, hidden_dim]
        x = self.encoder(x)          # [num_nodes, 1, hidden_dim]
        x = x.squeeze(1)             # [num_nodes, hidden_dim]
        x = global_mean_pool(x, batch)
        x = self.lin_out(x)
        return x


###############################################################################
# 7. Factory Method
###############################################################################
def get_model(
    model_name, 
    in_channels, 
    hidden_dim=64, 
    out_channels=1,
    dropout=0.0,
    activation='relu',
    pool='mean',
    residual=False,
    batch_norm=False,
    heads=4,    # used for GAT/Transformer
    edge_dim=None  # used for GINE
):
    """
    Returns an instance of the requested model by name.
    - 'gcn', 'gin', 'gat', 'sage', 'transformer' ignore edge_attr
    - 'gine' uses edge_attr
    """
    model_name = model_name.lower()

    if model_name == 'gcn':
        return GCNModel(
            in_channels, 
            hidden_dim, 
            out_channels,
            dropout=dropout,
            activation=activation,
            pool=pool,
            residual=residual,
            batch_norm=batch_norm
        )
    elif model_name == 'gin':
        return GINModel(
            in_channels,
            hidden_dim,
            out_channels,
            dropout=dropout,
            activation=activation,
            pool=pool,
            residual=residual,
            batch_norm=batch_norm
        )
    elif model_name == 'gine':
        return GINEModel(
            in_channels,
            hidden_dim,
            out_channels,
            dropout=dropout,
            activation=activation,
            pool=pool,
            residual=residual,
            batch_norm=batch_norm,
            edge_dim=edge_dim
        )
    elif model_name == 'gat':
        return GATModel(
            in_channels=in_channels,
            hidden_dim=hidden_dim,
            out_channels=out_channels,
            heads=heads,
            dropout=dropout,
            activation=activation,
            pool=pool,
            residual=residual,
            batch_norm=batch_norm
        )
    elif model_name == 'sage':
        return SAGEModel(
            in_channels=in_channels,
            hidden_dim=hidden_dim,
            out_channels=out_channels,
            dropout=dropout,
            activation=activation,
            pool=pool,
            residual=residual,
            batch_norm=batch_norm
        )
    elif model_name == 'transformer':
        return GraphTransformer(
            in_channels,
            hidden_dim,
            out_channels,
            num_heads=heads
        )
    else:
        raise ValueError(f"Unknown model name: {model_name}.")


import sys
import os

# If needed:
# sys.path.append(os.path.abspath("../src"))


import torch
import torch.nn as nn
import time
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, r2_score

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)


###############################################################################
# 1. LOAD DATA ONCE FOR ALL MODELS
###############################################################################
batch_size = 64
train_loader, val_loader, test_loader = get_zinc_dataset(
    root='../data/ZINC',
    batch_size=batch_size,
    subset=True
)

sample_batch = next(iter(train_loader))
in_channels = sample_batch.x.size(-1)
print(f"Inferred in_channels: {in_channels}")


###############################################################################
# 2. UTILITY FUNCTION TO RUN EXPERIMENT
###############################################################################
def run_experiment(variation_name, model_params, epochs=25, lr=0.001):
    """
    Train and evaluate a model given by model_params, for 'epochs' epochs,
    using MSE loss + Adam at LR=lr. Returns (model, run_info).
    """
    model = get_model(**model_params).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    criterion = nn.MSELoss()

    # We'll store results here for logging/analysis
    run_info = {
        'variation': variation_name,
        'epoch': [],
        'train_loss': [],
        'val_loss': [],
        'epoch_time': [],
        # 'num_params': sum(p.numel() for p in model.parameters() if p.requires_grad)
    }
    
    # Training Loop
    for epoch in range(1, epochs + 1):
        start_time = time.time()
        
        train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
        val_loss = evaluate(model, val_loader, criterion, device)
        
        epoch_time = time.time() - start_time
        
        run_info['epoch'].append(epoch)
        run_info['train_loss'].append(train_loss)
        run_info['val_loss'].append(val_loss)
        run_info['epoch_time'].append(epoch_time)
        
        if epoch % 5 == 0:
            print(f"[{variation_name}] Epoch {epoch}/{epochs} | "
                  f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | "
                  f"Time: {epoch_time:.2f}s")

    # Final Test
    test_loss = evaluate(model, test_loader, criterion, device)
    run_info['test_loss'] = test_loss

    # Additional metrics
    preds, targets = predict(model, test_loader, device)
    preds_np, targets_np = preds.numpy(), targets.numpy()
    run_info['test_mae'] = mean_absolute_error(targets_np, preds_np)
    run_info['test_r2']  = r2_score(targets_np, preds_np)

    print(f"[{variation_name}] FINAL TEST | "
          f"MSE: {test_loss:.4f} | MAE: {run_info['test_mae']:.4f} | R^2: {run_info['test_r2']:.4f}")

    return model, run_info

# src/train.py

import torch
import torch.nn.functional as F
import numpy as np

def train_one_epoch(model, dataloader, optimizer, criterion, device):
    model.train()
    total_loss = 0
    for batch_data in dataloader:
        batch_data = batch_data.to(device)
        optimizer.zero_grad()

        # Pass edge_attr to the model
        out = model(
            x=batch_data.x.float(),
            edge_index=batch_data.edge_index,
            batch=batch_data.batch,
            edge_attr=batch_data.edge_attr.float()  # <-- Added
        ).squeeze(-1)

        loss = criterion(out, batch_data.y.float())
        loss.backward()
        optimizer.step()

        total_loss += loss.item() * batch_data.num_graphs

    return total_loss / len(dataloader.dataset)

@torch.no_grad()
def evaluate(model, dataloader, criterion, device):
    model.eval()
    total_loss = 0
    for batch_data in dataloader:
        batch_data = batch_data.to(device)

        out = model(
            x=batch_data.x.float(),
            edge_index=batch_data.edge_index,
            batch=batch_data.batch,
            edge_attr=batch_data.edge_attr.float()  # <-- Added
        ).squeeze(-1)

        loss = criterion(out, batch_data.y.float())
        total_loss += loss.item() * batch_data.num_graphs

    return total_loss / len(dataloader.dataset)

def predict(model, dataloader, device):
    """
    Return predictions and targets for analysis (plotting, etc).
    """
    model.eval()
    all_preds, all_targets = [], []
    for batch_data in dataloader:
        batch_data = batch_data.to(device)

        out = model(
            x=batch_data.x.float(),
            edge_index=batch_data.edge_index,
            batch=batch_data.batch,
            edge_attr=batch_data.edge_attr.float()  # <-- Added
        ).squeeze(-1)

        all_preds.append(out.detach().cpu())
        all_targets.append(batch_data.y.cpu())

    return torch.cat(all_preds), torch.cat(all_targets)




Using device: cpu
Inferred in_channels: 1


In [3]:
### Variation 2: Dropout + LeakyReLU
gine_v2_params = {
    'model_name': 'gine',
    'in_channels': in_channels,
    'hidden_dim': 64,
    'out_channels': 1,
    'dropout': 0.3,
    'activation': 'leakyrelu',
    'pool': 'mean',
    'residual': False,
    'batch_norm': False
}

gine_v2_model, gine_v2_info = run_experiment(
    variation_name="GINE_V2_Dropout_LeakyReLU",
    model_params=gine_v2_params,
    epochs=3000,
    lr=0.001
)
torch.save(gine_v2_model.state_dict(), os.path.join(save_dir, "gine_v2_model.pt"))
torch.save(gine_v2_info,              os.path.join(save_dir, "gine_v2_info.pt"))

[GINE_V2_Dropout_LeakyReLU] Epoch 5/3000 | Train Loss: 2.4782 | Val Loss: 2.4598 | Time: 0.87s
[GINE_V2_Dropout_LeakyReLU] Epoch 10/3000 | Train Loss: 2.3808 | Val Loss: 2.4162 | Time: 0.78s
[GINE_V2_Dropout_LeakyReLU] Epoch 15/3000 | Train Loss: 2.2669 | Val Loss: 2.6009 | Time: 0.93s
[GINE_V2_Dropout_LeakyReLU] Epoch 20/3000 | Train Loss: 2.1741 | Val Loss: 2.2120 | Time: 1.02s
[GINE_V2_Dropout_LeakyReLU] Epoch 25/3000 | Train Loss: 2.0724 | Val Loss: 2.0570 | Time: 1.08s
[GINE_V2_Dropout_LeakyReLU] Epoch 30/3000 | Train Loss: 2.0011 | Val Loss: 2.0009 | Time: 1.07s
[GINE_V2_Dropout_LeakyReLU] Epoch 35/3000 | Train Loss: 1.9120 | Val Loss: 1.8161 | Time: 1.19s
[GINE_V2_Dropout_LeakyReLU] Epoch 40/3000 | Train Loss: 1.9117 | Val Loss: 1.7921 | Time: 1.50s
[GINE_V2_Dropout_LeakyReLU] Epoch 45/3000 | Train Loss: 1.8958 | Val Loss: 2.1936 | Time: 0.95s
[GINE_V2_Dropout_LeakyReLU] Epoch 50/3000 | Train Loss: 1.8443 | Val Loss: 1.8012 | Time: 0.87s
[GINE_V2_Dropout_LeakyReLU] Epoch 55/3000

NameError: name 'save_dir' is not defined

In [4]:
import torch
import os

save_dir = "../data/experiments_"
os.makedirs(save_dir, exist_ok=True)

torch.save(gine_v2_model.state_dict(), os.path.join(save_dir, "gine_v2_model.pt"))
torch.save(gine_v2_info,              os.path.join(save_dir, "gine_v2_info.pt"))