In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/torch-geometric-2-6-1/torch_geometric-2.6.1-py3-none-any.whl
/kaggle/input/neurips-open-polymer-prediction-2025/sample_submission.csv
/kaggle/input/neurips-open-polymer-prediction-2025/train.csv
/kaggle/input/neurips-open-polymer-prediction-2025/test.csv
/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset2.csv
/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset4.csv
/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset1.csv
/kaggle/input/neurips-open-polymer-prediction-2025/train_supplement/dataset3.csv
/kaggle/input/rdkit-2025-3-3-cp311/rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl


In [2]:
# Train/test split
import pandas as pd
from sklearn.model_selection import train_test_split

csv_path = '/kaggle/input/neurips-open-polymer-prediction-2025/train.csv'
test_df = pd.read_csv('/kaggle/input/neurips-open-polymer-prediction-2025/test.csv')
sample_df = pd.read_csv('/kaggle/input/neurips-open-polymer-prediction-2025/sample_submission.csv')
train_df = pd.read_csv(csv_path)

In [3]:
!pip install /kaggle/input/rdkit-2025-3-3-cp311/rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl

Processing /kaggle/input/rdkit-2025-3-3-cp311/rdkit-2025.3.3-cp311-cp311-manylinux_2_28_x86_64.whl
Installing collected packages: rdkit
Successfully installed rdkit-2025.3.3


In [4]:
!pip install /kaggle/input/torch-geometric-2-6-1/torch_geometric-2.6.1-py3-none-any.whl

Processing /kaggle/input/torch-geometric-2-6-1/torch_geometric-2.6.1-py3-none-any.whl
Installing collected packages: torch-geometric
Successfully installed torch-geometric-2.6.1


In [5]:
# Credit: https://www.kaggle.com/code/abdulrahmanqaten/neurips-gnns-solve-deep-learning

In [6]:
import torch
from torch_geometric.data import Data
from rdkit import Chem
import numpy as np

# A dictionary to map atom symbols to integer indices
ATOM_MAP = {
    'C': 0, 'N': 1, 'O': 2, 'F': 3, 'P': 4, 'S': 5, 'Cl': 6, 'Br': 7, 'I': 8,
    'H': 9, # Include Hydrogen
    # Add other atoms if they appear in the dataset
}

In [7]:
from rdkit import Chem
from torch_geometric.data import Data
import torch
from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')  # disables all RDKit warnings

def smiles_to_graph(smiles_str: str, y_val=None):
    """
    Converts a SMILES string into a PyTorch Geometric Data object with enriched features.
    Ensures all atoms have the same feature length regardless of whether they are in ATOM_MAP.
    """
    mol = Chem.MolFromSmiles(smiles_str)
    if mol is None:
        return None

    node_features = []
    for atom in mol.GetAtoms():
        # ----- Atom type one-hot -----
        features = [0] * len(ATOM_MAP)
        if atom.GetSymbol() in ATOM_MAP:
            features[ATOM_MAP[atom.GetSymbol()]] = 1
            
        # ----- Base numeric features -----
        features.extend([
            atom.GetAtomicNum(),
            atom.GetDegree(),                     # instead of GetTotalDegree()
            atom.GetFormalCharge(),
            atom.GetTotalNumHs(includeNeighbors=True),  # instead of GetTotalNumHs()
            int(atom.GetIsAromatic())
        ])

        # ----- Hybridization one-hot -----
        hyb = atom.GetHybridization()
        features.extend([int(hyb == h) for h in [
            Chem.rdchem.HybridizationType.SP,
            Chem.rdchem.HybridizationType.SP2,
            Chem.rdchem.HybridizationType.SP3,
            Chem.rdchem.HybridizationType.SP3D,
            Chem.rdchem.HybridizationType.SP3D2
        ]])

        # ----- Chirality one-hot -----
        chiral_tag = atom.GetChiralTag()
        features.extend([int(chiral_tag == t) for t in [
            Chem.rdchem.ChiralType.CHI_UNSPECIFIED,
            Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CW,
            Chem.rdchem.ChiralType.CHI_TETRAHEDRAL_CCW,
            Chem.rdchem.ChiralType.CHI_OTHER
        ]])

        # ----- Ring membership & implicit valence -----
        features.append(int(atom.IsInRing()))
        features.append(atom.GetImplicitValence())

        node_features.append(features)

    x = torch.tensor(node_features, dtype=torch.float)

    # ----- Edge features -----
    edge_indices, edge_attrs = [], []
    for bond in mol.GetBonds():
        i, j = bond.GetBeginAtomIdx(), bond.GetEndAtomIdx()
        edge_indices.extend([(i, j), (j, i)])

        edge_feat = [
            bond.GetBondTypeAsDouble(),    # bond type
            int(bond.GetIsConjugated()),   # conjugation
            int(bond.IsInRing()),          # ring membership
            bond.GetStereo()               # stereo (as float)
        ]
        edge_attrs.extend([edge_feat, edge_feat])

    edge_index = torch.tensor(edge_indices, dtype=torch.long).t().contiguous()
    edge_attr = torch.tensor(edge_attrs, dtype=torch.float)

    # ----- Target -----
    if y_val is not None:
        y_tensor = torch.tensor([[y_val]], dtype=torch.float)
        return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y_tensor, smiles=smiles_str)
    else:
        return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, smiles=smiles_str)


In [8]:
TARGET = 'Tg' # We will start by training a model for Tg only
from tqdm import tqdm

# Create a clean DataFrame for this target
target_df = train_df[['SMILES', TARGET]].dropna()

print(f"Processing {len(target_df)} molecules for target '{TARGET}'...")
# Create a list of graph objects
data_list = [smiles_to_graph(row['SMILES'], row[TARGET]) for _, row in tqdm(target_df.iterrows(), total=len(target_df))]
data_list = [d for d in data_list if d is not None] # Filter out any failed conversions

print(f"Successfully created {len(data_list)} graph objects.")

# Split data into training and validation sets
train_data, val_data = train_test_split(data_list, test_size=0.15, random_state=42)

print("\nDataset and DataLoaders are ready!")

Processing 511 molecules for target 'Tg'...


100%|██████████| 511/511 [00:00<00:00, 952.90it/s]

Successfully created 511 graph objects.

Dataset and DataLoaders are ready!





In [9]:
# Brief demo of smiles to graph
example_smiles = 'CCO'
graph = smiles_to_graph(example_smiles)

print("--- Graph Conversion Successful! ---")
print("\nSMILES String:")
print(example_smiles)
print("\nGenerated Graph Object:")
print(graph)

# You can inspect the different parts of the graph object
print("\nNode Features (x):")
print(graph.x)
print(f"Shape: {graph.x.shape}") # Should be [num_atoms, num_node_features]

print("\nEdge Index (edge_index):")
print(graph.edge_index)
print(f"Shape: {graph.edge_index.shape}") # Should be [2, num_bonds * 2]

print("\nEdge Attributes (edge_attr):")
print(graph.edge_attr)
print(f"Shape: {graph.edge_attr.shape}") # Should be [num_bonds * 2, num_edge_features]

--- Graph Conversion Successful! ---

SMILES String:
CCO

Generated Graph Object:
Data(x=[3, 26], edge_index=[2, 4], edge_attr=[4, 4], smiles='CCO')

Node Features (x):
tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 6., 1., 0., 3., 0., 0., 0., 1.,
         0., 0., 1., 0., 0., 0., 0., 3.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 6., 2., 0., 2., 0., 0., 0., 1.,
         0., 0., 1., 0., 0., 0., 0., 2.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 8., 1., 0., 1., 0., 0., 0., 1.,
         0., 0., 1., 0., 0., 0., 0., 1.]])
Shape: torch.Size([3, 26])

Edge Index (edge_index):
tensor([[0, 1, 1, 2],
        [1, 0, 2, 1]])
Shape: torch.Size([2, 4])

Edge Attributes (edge_attr):
tensor([[1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.],
        [1., 0., 0., 0.]])
Shape: torch.Size([4, 4])


In [10]:
from torch_geometric.nn import GINConv, global_mean_pool
import torch.nn.functional as F
import torch.nn as nn

class GINModel(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_layers, dropout):
        super(GINModel, self).__init__()
        torch.manual_seed(42)

        self.layers = torch.nn.ModuleList()
        in_channels = num_node_features
        out_channels = hidden_channels

        # First layer
        nn1 = nn.Sequential(
            nn.Linear(in_channels, out_channels),
            nn.ReLU(),
            nn.Linear(out_channels, out_channels),
        )
        self.layers.append(GINConv(nn1))

        # Hidden layers
        for _ in range(num_layers - 1):
            nnk = nn.Sequential(
                nn.Linear(out_channels, out_channels),
                nn.ReLU(),
                nn.Linear(out_channels, out_channels),
            )
            self.layers.append(GINConv(nnk))
        
        # Output layer
        self.lin = nn.Linear(out_channels, 1)
        self.dropout = dropout

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        for conv in self.layers:
            x = conv(x, edge_index)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)

        x = global_mean_pool(x, batch)
        return self.lin(x)

# Get the number of features from our dataset to initialize the model
num_features = data_list[0].num_node_features
model = GINModel(num_node_features=num_features, hidden_channels=64, num_layers=3, dropout=0.25)
print("GNN Model Architecture:")
print(model)

GNN Model Architecture:
GINModel(
  (layers): ModuleList(
    (0): GINConv(nn=Sequential(
      (0): Linear(in_features=26, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
    ))
    (1-2): 2 x GINConv(nn=Sequential(
      (0): Linear(in_features=64, out_features=64, bias=True)
      (1): ReLU()
      (2): Linear(in_features=64, out_features=64, bias=True)
    ))
  )
  (lin): Linear(in_features=64, out_features=1, bias=True)
)


In [11]:
from torch_geometric.nn import GATConv

class GATModel(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_layers=2, heads=4, dropout=0.25):
        super().__init__()
        self.convs = torch.nn.ModuleList()
        in_ch = num_node_features
        for i in range(num_layers):
            self.convs.append(GATConv(in_ch, hidden_channels, heads=heads))
            in_ch = hidden_channels * heads
        
        self.dropout = dropout
        
        # Projection head: Linear + LayerNorm + activation + Linear
        self.fc1 = torch.nn.Linear(in_ch, hidden_channels)
        self.norm = torch.nn.LayerNorm(hidden_channels)
        self.fc2 = torch.nn.Linear(hidden_channels, 1)

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch
        
        for conv in self.convs:
            x = F.elu(conv(x, edge_index))
            x = F.dropout(x, p=self.dropout, training=self.training)
        
        x = global_mean_pool(x, batch)
        
        # Projection head
        x = self.fc1(x)
        x = self.norm(x)
        x = F.relu(x)
        x = F.dropout(x, p=self.dropout, training=self.training)
        x = self.fc2(x)
        
        return x

In [12]:
from torch_geometric.nn import GCNConv

class GCNModel(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_layers, dropout):
        super(GCNModel, self).__init__()
        torch.manual_seed(42)

        self.layers = torch.nn.ModuleList()
        in_channels = num_node_features
        out_channels = hidden_channels

        # First layer
        self.layers.append(GCNConv(in_channels, out_channels))

        # Hidden layers
        for i in range(num_layers - 1):
            self.layers.append(GCNConv(out_channels, out_channels))
        
        # Output layer
        self.lin = torch.nn.Linear(out_channels, 1)
        self.dropout = dropout

    def forward(self, data):
        x, edge_index, batch = data.x, data.edge_index, data.batch

        for conv in self.layers:
            x = conv(x, edge_index)
            x = F.relu(x)
            x = F.dropout(x, p=self.dropout, training=self.training)

        x = global_mean_pool(x, batch)
        return self.lin(x)

# Get the number of features from our dataset to initialize the model
num_features = data_list[0].num_node_features
model = GCNModel(num_node_features=num_features, hidden_channels=64, num_layers=3, dropout=0.25)
print("GCN Model Architecture:")
print(model)

GCN Model Architecture:
GCNModel(
  (layers): ModuleList(
    (0): GCNConv(26, 64)
    (1-2): 2 x GCNConv(64, 64)
  )
  (lin): Linear(in_features=64, out_features=1, bias=True)
)


In [13]:
TARGET_VARIABLES = ["Tg", "FFV", "Tc", "Density", "Rg"]
N_SPLITS = 5
RANDOM_STATE = 42
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

In [14]:
import optuna
from torch_geometric.loader import DataLoader
import torch.nn.functional as F
import numpy as np

def gnn_objective(trial, data_list, num_features, device=DEVICE, model="gcn"):
    # -----------------------------
    # Suggest hyperparameters
    # -----------------------------
    hidden_channels = trial.suggest_categorical("hidden_channels", [64, 128, 256])
    num_layers = trial.suggest_int("num_layers", 2, 3)
    dropout = trial.suggest_float("dropout", 0.1, 0.3)
    lr = trial.suggest_float("lr", 1e-4, 1e-2, log=True)
    weight_decay = trial.suggest_float("weight_decay", 1e-6, 1e-3, log=True)

    # -----------------------------
    # K-fold cross-validation
    # -----------------------------
    kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=RANDOM_STATE)
    val_losses = []

    for fold, (train_idx, val_idx) in enumerate(kf.split(data_list)):
        train_data = [data_list[i] for i in train_idx]
        val_data = [data_list[i] for i in val_idx]

        train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_data, batch_size=32)

        if model == "gin":      
            model = GINModel(
                num_node_features=num_features,
                hidden_channels=hidden_channels,
                num_layers=num_layers,
                dropout=dropout
            ).to(device)
        elif model == "gcn":
            model = GCNModel(
                num_node_features=num_features,
                hidden_channels=hidden_channels,
                num_layers=num_layers,
                dropout=dropout
            ).to(device)
        else:
            model = GATModel(
                num_node_features=num_features,
                hidden_channels=hidden_channels,
                num_layers=num_layers,
                dropout=dropout
            ).to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
        criterion = torch.nn.MSELoss()

        # Train only a few epochs for hyperparameter search
        for epoch in range(10):
            model.train()
            for batch in train_loader:
                batch = batch.to(device)
                optimizer.zero_grad()
                out = model(batch).view(-1)
                loss = criterion(out, batch.y.view(-1).float())
                loss.backward()
                optimizer.step()

        # Validation
        model.eval()
        fold_val_loss = 0
        with torch.no_grad():
            for batch in val_loader:
                batch = batch.to(device)
                out = model(batch).view(-1)
                fold_val_loss += criterion(out, batch.y.view(-1).float()).item()
        fold_val_loss /= len(val_loader)
        val_losses.append(fold_val_loss)

        # Pruning
        trial.report(fold_val_loss, fold)
        if trial.should_prune():
            raise optuna.exceptions.TrialPruned()

    return np.mean(val_losses)

In [15]:
import optuna
import xgboost as xgb
import numpy as np
from sklearn.model_selection import cross_val_score, KFold

def meta_objective(trial, X_meta, y_meta):
    # Search space
    params = {
        "n_estimators": trial.suggest_int("n_estimators", 100, 1000),
        "max_depth": trial.suggest_int("max_depth", 2, 6),
        "learning_rate": trial.suggest_float("learning_rate", 1e-3, 0.3, log=True),
        "subsample": trial.suggest_float("subsample", 0.6, 1.0),
        "colsample_bytree": trial.suggest_float("colsample_bytree", 0.6, 1.0),
        "reg_lambda": trial.suggest_float("reg_lambda", 1e-3, 10.0, log=True),
        "reg_alpha": trial.suggest_float("reg_alpha", 1e-3, 10.0, log=True),
        "n_jobs": -1,
        "random_state": 42,
    }

    model = xgb.XGBRegressor(**params)

    # 5-fold CV on meta dataset
    kf = KFold(n_splits=5, shuffle=True, random_state=42)
    scores = cross_val_score(model, X_meta, y_meta, cv=kf, scoring="neg_mean_squared_error")

    # Optuna minimizes, so return positive RMSE
    rmse = np.mean(np.sqrt(-scores))
    return rmse

# --- Run optimization ---
def tune_meta_model(X_meta, y_meta, n_trials=50):
    study = optuna.create_study(direction="minimize")
    study.optimize(lambda trial: meta_objective(trial, X_meta, y_meta), n_trials=n_trials, n_jobs=-1)
    print("Best params:", study.best_params)
    print("Best RMSE:", study.best_value)

    # Train final model with best params
    best_model = xgb.XGBRegressor(**study.best_params, n_jobs=-1, random_state=42)
    best_model.fit(X_meta, y_meta)

    return best_model, study.best_params

In [16]:
import pandas as pd
import numpy as np
import warnings
import random
import torch
from torch import nn
from tqdm.auto import tqdm
import gc

from rdkit import Chem
from rdkit.Chem import Descriptors, AllChem
from rdkit.ML.Descriptors import MoleculeDescriptors

import xgboost as xgb
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler

from dask.distributed import Client, LocalCluster

cluster = LocalCluster(n_workers=4, threads_per_worker=1)
client = Client(cluster)
print(client)

gnn_predictions_df = pd.DataFrame({'id': test_df['id']})
test_graphs = [smiles_to_graph(s) for s in test_df['SMILES']]; test_graphs = [g for g in test_graphs if g is not None]
test_loader = DataLoader(test_graphs, batch_size=128, shuffle=False)
num_features = smiles_to_graph(train_df['SMILES'][0]).num_node_features

for target in TARGET_VARIABLES:
    print(f"  Training GNN for: {target}...")
    target_df = train_df[['SMILES', target]].dropna()
    scaler = StandardScaler()
    target_values = target_df[target].values.reshape(-1, 1)
    target_values_scaled = scaler.fit_transform(target_values).flatten()

    data_list = [
        smiles_to_graph(row.SMILES, y_scaled)
        for (row, y_scaled) in zip(target_df.itertuples(index=False), target_values_scaled)
    ]

    # ----- Optuna hyperparameter tuning -----
    def optimize_model(model_type):
        study = optuna.create_study(direction="minimize", study_name=f"{model_type}_{target}", sampler=optuna.samplers.TPESampler())
        study.optimize(lambda trial: gnn_objective(trial, data_list, num_features, model=model_type), n_trials=30, n_jobs=1)
        return model_type, study.best_trial.params

    futures = client.map(optimize_model, ["gcn", "gin", "gat"])
    results = client.gather(futures)
    best_params = {model_type: params for model_type, params in results}
    print(f"Best hyperparameters: {best_params}")

    # ----- KFold training and stacking -----
    test_preds_folds = np.zeros(len(test_df))
    oof_preds = []               # will hold (n_val_fold, 3) per fold
    y_meta_scaled_parts = []     # will hold (n_val_fold,) per fold using val_idx mapping
    test_preds_folds_list = []   # will hold (n_test, 3) per fold
    kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=42)
    
    for fold, (train_idx, val_idx) in enumerate(kf.split(data_list)):
        print(f"Fold {fold+1}/{N_SPLITS}")
        train_data = [data_list[i] for i in train_idx]
        val_data   = [data_list[i] for i in val_idx]
    
        train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
        val_loader   = DataLoader(val_data, batch_size=128, shuffle=False)
    
        # instantiate models with best params (example uses keys used earlier)
        model_gcn = GCNModel(num_node_features=num_features,
                             hidden_channels=best_params["gcn"]["hidden_channels"],
                             num_layers=best_params["gcn"]["num_layers"],
                             dropout=best_params["gcn"]["dropout"]).to(DEVICE)
        model_gin = GINModel(num_node_features=num_features,
                             hidden_channels=best_params["gin"]["hidden_channels"],
                             num_layers=best_params["gin"]["num_layers"],
                             dropout=best_params["gin"]["dropout"]).to(DEVICE)
        model_gat = GATModel(num_node_features=num_features,
                             hidden_channels=best_params["gat"]["hidden_channels"],
                             num_layers=best_params["gat"]["num_layers"],
                             dropout=best_params["gat"]["dropout"]).to(DEVICE)
    
        opt_gcn = torch.optim.Adam(model_gcn.parameters(), lr=best_params["gcn"]["lr"], weight_decay=best_params["gcn"]["weight_decay"])
        opt_gin = torch.optim.Adam(model_gin.parameters(), lr=best_params["gin"]["lr"], weight_decay=best_params["gin"]["weight_decay"])
        opt_gat = torch.optim.Adam(model_gat.parameters(), lr=best_params["gat"]["lr"], weight_decay=best_params["gat"]["weight_decay"])
    
        criterion = torch.nn.MSELoss()
    
        # training loop
        for epoch in range(100):
            model_gcn.train(); model_gin.train(); model_gat.train()
            for batch in train_loader:
                # parallel streams (optional)
                s1 = torch.cuda.Stream(); s2 = torch.cuda.Stream(); s3 = torch.cuda.Stream()
                with torch.cuda.stream(s1):
                    d1 = batch.to(DEVICE)
                    opt_gcn.zero_grad()
                    loss_gcn = criterion(model_gcn(d1), d1.y)
                    loss_gcn.backward()
                    opt_gcn.step()
                with torch.cuda.stream(s2):
                    d2 = batch.to(DEVICE)
                    opt_gin.zero_grad()
                    loss_gin = criterion(model_gin(d2), d2.y)
                    loss_gin.backward()
                    opt_gin.step()
                with torch.cuda.stream(s3):
                    d3 = batch.to(DEVICE)
                    opt_gat.zero_grad()
                    loss_gat = criterion(model_gat(d3), d3.y)
                    loss_gat.backward()
                    opt_gat.step()
                torch.cuda.synchronize()
    
        # ---- Generate OOF preds for this fold (keep SCALED-space) ----
        model_gcn.eval(); model_gin.eval(); model_gat.eval()
        fold_val_rows = []   # will append (batch_size, 3) arrays
        with torch.no_grad():
            for batch in val_loader:
                batch = batch.to(DEVICE)
                p_gcn = model_gcn(batch).cpu().numpy().reshape(-1)   # (b,)
                p_gin = model_gin(batch).cpu().numpy().reshape(-1)
                p_gat = model_gat(batch).cpu().numpy().reshape(-1)
    
                stacked = np.stack([p_gcn, p_gin, p_gat], axis=1)   # (b,3)
                fold_val_rows.append(stacked)
    
        fold_val_arr = np.vstack(fold_val_rows)   # (n_val_fold, 3)
        oof_preds.append(fold_val_arr)
    
        # collect true scaled targets for this validation fold via val_idx mapping
        # target_values_scaled must be numpy array aligned with data_list
        y_fold_scaled = target_values_scaled[val_idx]     # (n_val_fold,)
        y_meta_scaled_parts.append(y_fold_scaled)
    
        # ---- Test predictions for this fold (collect scaled preds) ----
        fold_test_rows = []
        with torch.no_grad():
            for batch in test_loader:
                batch = batch.to(DEVICE)
                p_gcn = model_gcn(batch).cpu().numpy().reshape(-1)
                p_gin = model_gin(batch).cpu().numpy().reshape(-1)
                p_gat = model_gat(batch).cpu().numpy().reshape(-1)
    
                stacked_test = np.stack([p_gcn, p_gin, p_gat], axis=1)  # (batch_test,3)
                fold_test_rows.append(stacked_test)
    
        fold_test_arr = np.vstack(fold_test_rows)   # (n_test, 3)
        test_preds_folds_list.append(fold_test_arr)
    
        # cleanup
        del model_gcn, model_gin, model_gat
        torch.cuda.empty_cache(); gc.collect()
    
    # ---- after all folds: build meta training set ----
    X_meta = np.vstack(oof_preds)
    y_meta_scaled = np.concatenate(y_meta_scaled_parts) # (n_train_total,)
    
    fold_weights = []
    for fold_preds, y_val_fold in zip(oof_preds, y_meta_scaled_parts):
        # Compute fold RMSE or correlation
        rmse = np.sqrt(np.mean((fold_preds.mean(axis=1) - y_val_fold)**2))
        fold_weights.append(1 / rmse)  # better folds get higher weight
    fold_weights = np.array(fold_weights) / np.sum(fold_weights)
    
    # weighted average of test predictions across folds
    X_test_meta = np.average(np.stack(test_preds_folds_list, axis=0), axis=0, weights=fold_weights)
    
    # Sanity checks
    # print("SHAPES: X_meta", X_meta.shape, "y_meta_scaled", y_meta_scaled.shape, "X_test_meta", X_test_meta.shape)
    # print("RANGES: X_meta min/max", X_meta.min(), X_meta.max(), "y_meta_scaled min/max", y_meta_scaled.min(), y_meta_scaled.max())
    # print("SAMPLE: X_meta[0:5]\n", X_meta[:5])
    # print("SAMPLE: y_meta_scaled[0:5]\n", y_meta_scaled[:5])
    
    # ---- Fit XGBoost meta-model on SCALED space ----
    meta_model, best_params = tune_meta_model(X_meta, y_meta_scaled, n_trials=100)
    
    # ---- Meta predictions on test set (scaled -> unscale once) ----
    y_test_meta_scaled = meta_model.predict(X_test_meta)          # shape (n_test,)
    y_test_meta = scaler.inverse_transform(y_test_meta_scaled.reshape(-1,1)).flatten()
    
    # store
    gnn_predictions_df[target] = y_test_meta

print("--- Stacked Predictions ready. ---")

<Client: 'tcp://127.0.0.1:46205' processes=4 threads=4, memory=29.00 GiB>
  Training GNN for: Tg...


[I 2025-09-15 17:46:11,891] A new study created in memory with name: gin_Tg
[I 2025-09-15 17:46:13,171] A new study created in memory with name: gcn_Tg
[I 2025-09-15 17:46:13,342] A new study created in memory with name: gat_Tg
[I 2025-09-15 17:46:21,967] Trial 0 finished with value: 0.5045575559139251 and parameters: {'hidden_channels': 64, 'num_layers': 2, 'dropout': 0.10912335311048704, 'lr': 0.002414088288836432, 'weight_decay': 1.169407586680422e-06}. Best is trial 0 with value: 0.5045575559139251.
[I 2025-09-15 17:46:22,670] Trial 0 finished with value: 0.5523644208908081 and parameters: {'hidden_channels': 64, 'num_layers': 3, 'dropout': 0.18517428504412814, 'lr': 0.0001816369988281371, 'weight_decay': 4.671988574476392e-06}. Best is trial 0 with value: 0.5523644208908081.
[I 2025-09-15 17:46:24,882] Trial 0 finished with value: 0.6670994147658348 and parameters: {'hidden_channels': 256, 'num_layers': 3, 'dropout': 0.12985410348987314, 'lr': 0.003712667730626587, 'weight_decay':

Best hyperparameters: {'gcn': {'hidden_channels': 256, 'num_layers': 2, 'dropout': 0.12541183142357903, 'lr': 0.0035045879172940935, 'weight_decay': 1.4300981942232255e-05}, 'gin': {'hidden_channels': 256, 'num_layers': 2, 'dropout': 0.2625427366051246, 'lr': 0.0007946107674521004, 'weight_decay': 6.56580584206386e-05}, 'gat': {'hidden_channels': 64, 'num_layers': 2, 'dropout': 0.24720557961241477, 'lr': 0.0029478232355022774, 'weight_decay': 1.6245404700466167e-05}}
Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5


[I 2025-09-15 17:50:36,676] A new study created in memory with name: no-name-b7574a84-f7a6-4f2c-97f4-9b751989dc89
[I 2025-09-15 17:50:42,845] Trial 1 finished with value: 0.6176631999180604 and parameters: {'n_estimators': 652, 'max_depth': 3, 'learning_rate': 0.004583978219913395, 'subsample': 0.9118354445849913, 'colsample_bytree': 0.9800746075764528, 'reg_lambda': 0.012240362346755004, 'reg_alpha': 2.6322013356339338}. Best is trial 1 with value: 0.6176631999180604.
[I 2025-09-15 17:50:50,283] Trial 3 finished with value: 0.6567630465379264 and parameters: {'n_estimators': 778, 'max_depth': 6, 'learning_rate': 0.0020981097817964166, 'subsample': 0.7965820934404526, 'colsample_bytree': 0.688853759830993, 'reg_lambda': 0.1502778417163731, 'reg_alpha': 0.0014566092005623705}. Best is trial 1 with value: 0.6176631999180604.
[I 2025-09-15 17:50:50,809] Trial 2 finished with value: 0.7020357346863219 and parameters: {'n_estimators': 994, 'max_depth': 5, 'learning_rate': 0.0157860020593704

Best params: {'n_estimators': 634, 'max_depth': 3, 'learning_rate': 0.08828857245837879, 'subsample': 0.8673048082337482, 'colsample_bytree': 0.7608576332212862, 'reg_lambda': 0.07112139641033376, 'reg_alpha': 8.37844142793409}
Best RMSE: 0.6164194968348695
  Training GNN for: FFV...


This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
[I 2025-09-15 17:53:42,094] A new study created in memory with name: gat_FFV
[I 2025-09-15 17:53:43,382] A new study created in memory with name: gin_FFV
[I 2025-09-15 17:53:49,406] A new study created in memory with name: gcn_FFV
[I 2025-09-15 17:55:43,078] Trial 0 finished with value: 0.4602962155911056 and parameters: {'hidden_channels': 128, 'num_layers': 2, 'dropout': 0.22438759821605908, 'lr': 0.0001060464016937747, 'weight_decay': 0.00012998416679678264}. Best is trial 0 with value: 0.4602962155911056.
[I 2025-09-15 17:56:53,346] Trial 0 finished with value: 0.36008419201455333 and parameters: {'hidden_channels': 256, 'num_layers': 3, 'dropout': 0.23565002610501143, 'lr': 0.00034817592783432304, 'weight_decay': 3.98

Best hyperparameters: {'gcn': {'hidden_channels': 128, 'num_layers': 3, 'dropout': 0.11679280251926744, 'lr': 0.001787949458405508, 'weight_decay': 0.00011714481897539817}, 'gin': {'hidden_channels': 256, 'num_layers': 2, 'dropout': 0.23125514578595177, 'lr': 0.0010990016902223164, 'weight_decay': 4.833474640925903e-06}, 'gat': {'hidden_channels': 128, 'num_layers': 2, 'dropout': 0.12490582349710022, 'lr': 0.002445905988473261, 'weight_decay': 2.783087672650774e-06}}
Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5


[I 2025-09-15 18:52:49,574] A new study created in memory with name: no-name-23ae4ded-c9f9-4944-bf74-10a7ae7c0298
[I 2025-09-15 18:52:52,872] Trial 0 finished with value: 0.828162314869779 and parameters: {'n_estimators': 188, 'max_depth': 5, 'learning_rate': 0.0012460718247151452, 'subsample': 0.9438602797340495, 'colsample_bytree': 0.9147297316207184, 'reg_lambda': 0.5238166426398095, 'reg_alpha': 0.002338930400321933}. Best is trial 0 with value: 0.828162314869779.
[I 2025-09-15 18:52:56,443] Trial 3 finished with value: 0.6390690606730691 and parameters: {'n_estimators': 623, 'max_depth': 3, 'learning_rate': 0.001004190097286555, 'subsample': 0.7586810330615613, 'colsample_bytree': 0.823435942907226, 'reg_lambda': 0.20381488991232566, 'reg_alpha': 0.023802504759916413}. Best is trial 3 with value: 0.6390690606730691.
[I 2025-09-15 18:52:57,508] Trial 1 finished with value: 0.4996326917220698 and parameters: {'n_estimators': 957, 'max_depth': 2, 'learning_rate': 0.001400236764003413

Best params: {'n_estimators': 999, 'max_depth': 3, 'learning_rate': 0.1563090595567522, 'subsample': 0.9870310630559629, 'colsample_bytree': 0.6530784963251083, 'reg_lambda': 0.1266101246684921, 'reg_alpha': 6.942026640730367}
Best RMSE: 0.3640635470247028
  Training GNN for: Tc...


[I 2025-09-15 18:55:12,271] A new study created in memory with name: gcn_Tc
[I 2025-09-15 18:55:12,692] A new study created in memory with name: gin_Tc
[I 2025-09-15 18:55:12,922] A new study created in memory with name: gat_Tc
[I 2025-09-15 18:55:23,443] Trial 0 finished with value: 0.31797125369310375 and parameters: {'hidden_channels': 128, 'num_layers': 2, 'dropout': 0.2275817118108675, 'lr': 0.0002503513133441385, 'weight_decay': 0.00011264530701827172}. Best is trial 0 with value: 0.31797125369310375.
[I 2025-09-15 18:55:25,885] Trial 0 finished with value: 0.27080231994390486 and parameters: {'hidden_channels': 256, 'num_layers': 3, 'dropout': 0.1649028610460527, 'lr': 0.0015251127785694864, 'weight_decay': 3.262707952325129e-05}. Best is trial 0 with value: 0.27080231994390486.
[I 2025-09-15 18:55:26,947] Trial 0 finished with value: 0.27051384568214415 and parameters: {'hidden_channels': 64, 'num_layers': 3, 'dropout': 0.2425933074687296, 'lr': 0.0032687292409152103, 'weight_d

Best hyperparameters: {'gcn': {'hidden_channels': 64, 'num_layers': 2, 'dropout': 0.2179666411041727, 'lr': 0.006099609131146011, 'weight_decay': 7.482005684078252e-05}, 'gin': {'hidden_channels': 128, 'num_layers': 2, 'dropout': 0.20653869116462303, 'lr': 0.0034874994893391246, 'weight_decay': 1.8487886143460044e-05}, 'gat': {'hidden_channels': 64, 'num_layers': 3, 'dropout': 0.27668952053398344, 'lr': 0.0044529550858888696, 'weight_decay': 0.00013055929892724367}}
Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5


[I 2025-09-15 19:01:00,931] A new study created in memory with name: no-name-2cc8b7f3-7ea7-4b08-8578-45684ed92d02
[I 2025-09-15 19:01:04,665] Trial 1 finished with value: 0.7560876247032177 and parameters: {'n_estimators': 252, 'max_depth': 5, 'learning_rate': 0.001481652435007036, 'subsample': 0.6089930358010694, 'colsample_bytree': 0.7692406215831444, 'reg_lambda': 0.008633101804285712, 'reg_alpha': 0.18925941999467807}. Best is trial 1 with value: 0.7560876247032177.
[I 2025-09-15 19:01:05,705] Trial 3 finished with value: 0.43120961455662854 and parameters: {'n_estimators': 693, 'max_depth': 3, 'learning_rate': 0.1945741648506156, 'subsample': 0.8263277055538116, 'colsample_bytree': 0.878820004842094, 'reg_lambda': 1.0247039738844084, 'reg_alpha': 6.050183439143163}. Best is trial 3 with value: 0.43120961455662854.
[I 2025-09-15 19:01:07,195] Trial 5 finished with value: 0.42861930268364573 and parameters: {'n_estimators': 204, 'max_depth': 2, 'learning_rate': 0.03251389149569565, 

Best params: {'n_estimators': 751, 'max_depth': 2, 'learning_rate': 0.007411466019299459, 'subsample': 0.7349146742710068, 'colsample_bytree': 0.8361328540141292, 'reg_lambda': 3.686992169584631, 'reg_alpha': 0.03863238973896931}
Best RMSE: 0.4272074983919564
  Training GNN for: Density...


[I 2025-09-15 19:02:58,859] A new study created in memory with name: gcn_Density
[I 2025-09-15 19:02:59,257] A new study created in memory with name: gin_Density
[I 2025-09-15 19:02:59,567] A new study created in memory with name: gat_Density
[I 2025-09-15 19:03:07,938] Trial 0 finished with value: 0.18099453914910554 and parameters: {'hidden_channels': 256, 'num_layers': 2, 'dropout': 0.14183739794044875, 'lr': 0.0033336679735298954, 'weight_decay': 5.186828789467003e-05}. Best is trial 0 with value: 0.18099453914910554.
[I 2025-09-15 19:03:08,422] Trial 0 finished with value: 0.1737805999815464 and parameters: {'hidden_channels': 64, 'num_layers': 2, 'dropout': 0.22434703978568155, 'lr': 0.007566520271330583, 'weight_decay': 3.148538085644157e-06}. Best is trial 0 with value: 0.1737805999815464.
[I 2025-09-15 19:03:08,942] Trial 0 finished with value: 0.20048832893371582 and parameters: {'hidden_channels': 128, 'num_layers': 2, 'dropout': 0.22995168033918925, 'lr': 0.0002053679728335

Best hyperparameters: {'gcn': {'hidden_channels': 256, 'num_layers': 3, 'dropout': 0.18715668928257673, 'lr': 0.0006410492765130201, 'weight_decay': 1.313776948172849e-05}, 'gin': {'hidden_channels': 64, 'num_layers': 2, 'dropout': 0.22434703978568155, 'lr': 0.007566520271330583, 'weight_decay': 3.148538085644157e-06}, 'gat': {'hidden_channels': 256, 'num_layers': 2, 'dropout': 0.2108909984570863, 'lr': 0.0014395350230181874, 'weight_decay': 0.00040331735791108677}}
Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5


[I 2025-09-15 19:08:06,700] A new study created in memory with name: no-name-b0caf6e4-7bf5-4ffa-b6fe-2a7947e9678f
[I 2025-09-15 19:08:09,472] Trial 0 finished with value: 0.409750337432777 and parameters: {'n_estimators': 302, 'max_depth': 3, 'learning_rate': 0.01734391927458548, 'subsample': 0.9871993112958859, 'colsample_bytree': 0.7130747767384222, 'reg_lambda': 2.1962605763026852, 'reg_alpha': 0.3133662679514646}. Best is trial 0 with value: 0.409750337432777.
[I 2025-09-15 19:08:09,908] Trial 3 finished with value: 0.7687367993275407 and parameters: {'n_estimators': 270, 'max_depth': 4, 'learning_rate': 0.0012264410487518086, 'subsample': 0.8205876667245243, 'colsample_bytree': 0.6773448825163344, 'reg_lambda': 0.0013325762702313954, 'reg_alpha': 0.17056122414360075}. Best is trial 0 with value: 0.409750337432777.
[I 2025-09-15 19:08:12,507] Trial 4 finished with value: 0.4355588258899816 and parameters: {'n_estimators': 218, 'max_depth': 5, 'learning_rate': 0.08449754452427337, '

Best params: {'n_estimators': 642, 'max_depth': 3, 'learning_rate': 0.03873472151533968, 'subsample': 0.6644013132542179, 'colsample_bytree': 0.9604655878794391, 'reg_lambda': 1.3433775074014986, 'reg_alpha': 1.4641714880467074}
Best RMSE: 0.39559336201461714
  Training GNN for: Rg...


[I 2025-09-15 19:10:29,802] A new study created in memory with name: gcn_Rg
[I 2025-09-15 19:10:30,188] A new study created in memory with name: gin_Rg
[I 2025-09-15 19:10:30,547] A new study created in memory with name: gat_Rg
[I 2025-09-15 19:10:40,919] Trial 0 finished with value: 0.9546008169651031 and parameters: {'hidden_channels': 64, 'num_layers': 3, 'dropout': 0.22093209493005694, 'lr': 0.0020148459731505, 'weight_decay': 1.253973371836434e-05}. Best is trial 0 with value: 0.9546008169651031.
[I 2025-09-15 19:10:41,696] Trial 0 finished with value: 0.921439790725708 and parameters: {'hidden_channels': 256, 'num_layers': 3, 'dropout': 0.14223777491431294, 'lr': 0.0003102066496192895, 'weight_decay': 1.8765011054364e-05}. Best is trial 0 with value: 0.921439790725708.
[I 2025-09-15 19:10:42,287] Trial 0 finished with value: 0.9980017900466919 and parameters: {'hidden_channels': 64, 'num_layers': 3, 'dropout': 0.23388653104780324, 'lr': 0.0019430307705072356, 'weight_decay': 0.00

Best hyperparameters: {'gcn': {'hidden_channels': 64, 'num_layers': 2, 'dropout': 0.10517147734610861, 'lr': 0.002256055127273549, 'weight_decay': 3.090979127295896e-06}, 'gin': {'hidden_channels': 256, 'num_layers': 3, 'dropout': 0.14223777491431294, 'lr': 0.0003102066496192895, 'weight_decay': 1.8765011054364e-05}, 'gat': {'hidden_channels': 64, 'num_layers': 2, 'dropout': 0.2135044962664066, 'lr': 0.0015306764028193136, 'weight_decay': 7.306159640674101e-05}}
Fold 1/5
Fold 2/5
Fold 3/5
Fold 4/5
Fold 5/5


[I 2025-09-15 19:15:39,378] A new study created in memory with name: no-name-220780c1-c187-464a-ae79-ce5261bc698c
[I 2025-09-15 19:15:45,952] Trial 3 finished with value: 0.5987858990405059 and parameters: {'n_estimators': 725, 'max_depth': 3, 'learning_rate': 0.02488295525580746, 'subsample': 0.790478539510701, 'colsample_bytree': 0.7544758897025344, 'reg_lambda': 0.004951159464010164, 'reg_alpha': 0.06874464501478043}. Best is trial 3 with value: 0.5987858990405059.
[I 2025-09-15 19:15:46,093] Trial 0 finished with value: 0.659682869548211 and parameters: {'n_estimators': 734, 'max_depth': 3, 'learning_rate': 0.19426337571156943, 'subsample': 0.960391087907287, 'colsample_bytree': 0.7444493216505175, 'reg_lambda': 0.017217947879838436, 'reg_alpha': 0.5055673243294694}. Best is trial 3 with value: 0.5987858990405059.
[I 2025-09-15 19:15:47,484] Trial 2 finished with value: 0.6505160064787427 and parameters: {'n_estimators': 891, 'max_depth': 3, 'learning_rate': 0.15925765032448558, 's

Best params: {'n_estimators': 139, 'max_depth': 2, 'learning_rate': 0.024344643643268066, 'subsample': 0.6609273811229834, 'colsample_bytree': 0.8210674208189493, 'reg_lambda': 0.0012642243710924877, 'reg_alpha': 1.478803356008023}
Best RMSE: 0.5553111195950124
--- Stacked Predictions ready. ---


In [17]:
# Make a submission
final_submission = gnn_predictions_df[sample_df.columns]
final_submission.to_csv('submission.csv', index=False)
print(final_submission.head())

           id          Tg       FFV        Tc   Density         Rg
0  1109053969  106.069267  0.372842  0.198057  1.249553  20.982969
1  1422188626  170.020294  0.376108  0.239984  1.072581  21.188452
2  2032016830  116.134842  0.356908  0.237751  1.085719  20.905579
