## Imports ##

In [6]:
import re
from pathlib import Path
from itertools import product
from topobench.nn.backbones.graph.gat_v4 import GATv4
from topobench.nn.readouts.ftd_readout import FTDReadOut
from topobench.nn.encoders.all_cell_encoder import AllCellFeatureEncoder
from torch_geometric.nn import GAT, GCN, global_mean_pool
import yaml
import hydra
from hydra import compose, initialize
from hydra.utils import instantiate
from topobench.utils.config_resolvers import (
    get_default_transform,
    get_monitor_metric,
    get_monitor_mode,
    infer_in_channels,
    infer_num_cell_dimensions,
    get_flattened_feature_matrix_dim,
    
)
from hydra.core.global_hydra import GlobalHydra  




# Getting Number of runs per experiment #

In [4]:

def count_runs_in_block(lines):
    params = {}
    for line in lines:
        if "=" not in line or line.strip().startswith("python") or "--multirun" in line or line.strip().endswith("&"):
            continue
        key, val = line.split("=", 1)
        key = key.strip()
        val = val.strip().strip("\\")
        
        # Special case: multiple escaped bracketed items (e.g. \[...\],\[...\],...)
        if "\\[" in val and "\\]" in val:
            # Find all escaped bracketed expressions
            matches = re.findall(r'(\[.*?\\\])', val)
            cleaned = [m.replace("\\", "") for m in matches]
            params[key] = cleaned
        elif "," in val:
            params[key] = val.split(",")
        else:
            params[key] = [val]
    # Cartesian product of all parameter options
    total = 1
    for v in params.values():
        total *= len(v)
    return total


def parse_hydra_sh(filepath):
    with open(filepath, "r") as f:
        lines = [line.strip() for line in f.readlines()]
    
    blocks = []
    current = []
    for line in lines:
        if line.startswith("python -m"):
            if current:
                blocks.append(current)
                current = []
        current.append(line)
    if current:
        blocks.append(current)

    results = []
    for i, block in enumerate(blocks):
        run_count = count_runs_in_block(block)
        results.append((i + 1, run_count))
    
    for block_num, count in results:
        print(f"Block {block_num}: {count} runs")
    print(f"Total: {sum(count for _, count in results)} runs")
    return results

In [5]:
results = parse_hydra_sh("/home/lcornelis/code/TopoProteo/topoproteo_experiments.sh") 


Block 1: 11520 runs
Block 2: 11520 runs
Block 3: 11520 runs
Block 4: 11520 runs
Block 5: 11520 runs
Block 6: 11520 runs
Block 7: 720 runs
Block 8: 720 runs
Total: 70560 runs


# Getting Number of Parameters Per Model #

In [87]:
gat_v4_hidden_channels= [[8, 16], [64, 128]]
gat_v4_heads= [[2, 2], [4, 4]]
fc_out_channels_gatv4 = [1,8]

# gat_num_layers= [2, 4]  # only for GAT and GCN
# gat_hidden_channels= [8, 16]
# gat_heads= [2, 4]
# fc_out_channels_gat = [8,16]

gat_num_layers = [2]                     # Fix to 2
gat_hidden_channels = [4, 8]             # Lower than before
gat_heads = [2]                          # Avoid high head count
fc_out_channels_gat = [4]   

# gcn_num_layers= [2, 4] 
# gcn_hidden_channels= [8, 16]
# fc_out_channels_gcn = [8,16]

gcn_num_layers = [2]
gcn_hidden_channels = [4]
fc_out_channels_gcn = [4] 
fc_dims= [128,64,32],[256,128,64]
graph_encoder_dims= 256, 128

readout_graph_encoder_dims = [512,256],[256,128] #[512,256,128]
readout_fc_dims= [128,64,32], [512,512,256,128], [1024, 1024,512,256],
readout_fc_acts = ['relu', 'tanh']


In [70]:
if GlobalHydra().is_initialized():
    GlobalHydra().clear()

initialize(config_path="../TopoProteo/configs", job_name="job")

def load_config_and_model(model_name, hidden_channels, heads, out_channels, fc_dim, graph_encoder_dim):
    cfg = compose(
            config_name="run.yaml",
            overrides=[
                f"model={model_name}",
                f"dataset=graph/FTD",

                # Feature Encoder
                f"model.feature_encoder.out_channels={out_channels}",

                # Backbone
                f"model.backbone.hidden_channels={hidden_channels}",
                f"model.backbone.heads={heads}",

                # Readout
                f"model.readout.fc_dim={fc_dim}",
                f"model.readout.graph_encoder_dim={graph_encoder_dim}",
                # f"model.readout.fc_act={fc_act}",
            ],
            return_hydra_config=True
        )
    model = hydra.utils.instantiate(
                cfg.model,
                evaluator=cfg.evaluator,
                optimizer=cfg.optimizer,
                loss=cfg.loss,
            )
    return model 


The version_base parameter is not specified.
Please specify a compatability version level, or None.
Will assume defaults for version 1.1
  initialize(config_path="../TopoProteo/configs", job_name="job")


## GAT-v4 ##

In [71]:
total_params_list_combined = []

# Iterate over GATv4-specific parameter combinations
for feature_encoder_out_channels in fc_out_channels_gatv4:
    for hidden_channels in gat_v4_hidden_channels:
        for heads in gat_v4_heads:
            for fc_dim in fc_dims:
                for graph_encoder_dim in graph_encoder_dims:
                    try:
                        # Load model with current config
                        model = load_config_and_model(
                            model_name="graph/gatv4",
                            hidden_channels=hidden_channels,
                            heads=heads,
                            out_channels=feature_encoder_out_channels,
                            fc_dim=fc_dim,
                            graph_encoder_dim=graph_encoder_dim,
                        )
                        # Count parameters
                        total_params = sum(p.numel() for p in model.parameters())
                        total_params_list_combined.append(total_params)

                    except Exception as e:
                        print(f"⚠️ Skipped combo due to error: {e}")
                        continue

# Summary
print("Max number of parameters: ", max(total_params_list_combined))
print("Min number of parameters: ", min(total_params_list_combined))
print("Total configurations tested: ", len(total_params_list_combined))

Max number of parameters:  6053558
Min number of parameters:  2876637
Total configurations tested:  32


## GAT ##

In [72]:
def load_config_and_model_gat(model_name, hidden_channels, heads, out_channels, num_layers, fc_dim, graph_encoder_dim):
    cfg = compose(
        config_name="run.yaml",
        overrides=[
            f"model={model_name}",
            f"dataset=graph/FTD",

            # Feature Encoder
            f"model.feature_encoder.out_channels={out_channels}",

            # Backbone (GAT-specific)
            f"model.backbone.hidden_channels={hidden_channels}",
            f"model.backbone.heads={heads}",
            f"model.backbone.num_layers={num_layers}",
            # Readout
            f"model.readout.fc_dim={fc_dim}",
            f"model.readout.graph_encoder_dim={graph_encoder_dim}",
            # f"model.readout.fc_act={fc_act}",
        ],
        return_hydra_config=True
    )
    model = hydra.utils.instantiate(
        cfg.model,
        evaluator=cfg.evaluator,
        optimizer=cfg.optimizer,
        loss=cfg.loss,
    )
    return model

In [73]:
total_params_list_combined = []

# Iterate over GAT parameter combinations
for feature_encoder_out_channels in fc_out_channels_gat:
    for num_layers in gat_num_layers:
        for hidden_channels in gat_hidden_channels:
            for heads in gat_heads:
                for fc_dim in fc_dims:
                    for graph_encoder_dim in graph_encoder_dims:
                        try:
                            model = load_config_and_model_gat(
                                model_name="graph/gat",
                                hidden_channels=hidden_channels,
                                heads=heads,
                                out_channels=feature_encoder_out_channels,
                                num_layers=num_layers,
                                fc_dim=fc_dim,
                                graph_encoder_dim=graph_encoder_dim,
                            )
                            total_params = sum(p.numel() for p in model.parameters())
                            total_params_list_combined.append(total_params)
                        except Exception as e:
                            print(f"⚠️ Skipped combo due to error: {e}")
                            continue

# Summary
print("Max number of parameters:", max(total_params_list_combined))
print("Min number of parameters:", min(total_params_list_combined))
print("Total configurations tested:", len(total_params_list_combined))


Max number of parameters: 7618074
Min number of parameters: 3796986
Total configurations tested: 8


## GCN ##

In [88]:
def load_config_and_model_gcn(model_name, num_layers, hidden_channels, out_channels, fc_dim, graph_encoder_dim):
    cfg = compose(
        config_name="run.yaml",
        overrides=[
            f"model={model_name}",
            f"dataset=graph/FTD",

            # Feature Encoder
            f"model.feature_encoder.out_channels={out_channels}",

            # Backbone (GCN-specific)
            f"model.backbone.num_layers={num_layers}",
            f"model.backbone.hidden_channels={hidden_channels}",

            # Readout
            f"model.readout.fc_dim={fc_dim}",
            f"model.readout.graph_encoder_dim={graph_encoder_dim}",
            # f"model.readout.fc_act={fc_act}",
        ],
        return_hydra_config=True
    )
    model = hydra.utils.instantiate(
        cfg.model,
        evaluator=cfg.evaluator,
        optimizer=cfg.optimizer,
        loss=cfg.loss,
    )
    return model

In [89]:
total_params_list_combined = []

for out_channels in fc_out_channels_gcn:
    for num_layers in gcn_num_layers:
        for hidden_channels in gcn_hidden_channels:
            for fc_dim in fc_dims:
                for graph_encoder_dim in graph_encoder_dims:
                    try:
                        print(hidden_channels)
                        model = load_config_and_model_gcn(
                            model_name="graph/gcn",
                            num_layers=num_layers,
                            hidden_channels=hidden_channels,
                            out_channels=out_channels,
                            fc_dim=fc_dim,
                            graph_encoder_dim=graph_encoder_dim,
                        )
                        total_params = sum(p.numel() for p in model.parameters())
                        total_params_list_combined.append(total_params)
                    except Exception as e:
                        print(f"⚠️ Skipped combo due to error: {e}")
                        continue

# Summary
print("Max number of parameters:", max(total_params_list_combined))
print("Min number of parameters:", min(total_params_list_combined))
print("Total configurations tested:", len(total_params_list_combined))

4
4
4
4
Max number of parameters: 7617858
Min number of parameters: 3796930
Total configurations tested: 4


## MLP ##

In [90]:
def load_readout_model(fc_dim, graph_encoder_dim, fc_act):
    cfg = compose(
        config_name="run.yaml",
        overrides=[
            "model=graph/mlp",
            "dataset=graph/FTD",
            f"model.readout.fc_dim={fc_dim}",
            f"model.readout.graph_encoder_dim={graph_encoder_dim}",
            f"model.readout.fc_act={fc_act}",
        ],
        return_hydra_config=True,
    )
    model = hydra.utils.instantiate(
        cfg.model,
        evaluator=cfg.evaluator,
        optimizer=cfg.optimizer,
        loss=cfg.loss,
    )
    return model

In [91]:
total_params_list_combined = []

# Loop over all combos
for fc_dim in readout_fc_dims:
    for graph_encoder_dim in readout_graph_encoder_dims:
        for fc_act in readout_fc_acts:
            try:
                model = load_readout_model(fc_dim, graph_encoder_dim, fc_act)
                total_params = sum(p.numel() for p in model.parameters())
                total_params_list_combined.append(total_params)
            except Exception as e:
                print(f"⚠️ Skipped combo due to error: {e}")
                continue

# Output results
print("Max number of parameters:", max(total_params_list_combined))
print("Min number of parameters:", min(total_params_list_combined))
print("Total configurations tested:", len(total_params_list_combined))

Max number of parameters: 6022110
Min number of parameters: 1950558
Total configurations tested: 12
