## Imports ##

In [40]:
import re
from pathlib import Path
from itertools import product
from topobench.nn.backbones.graph.gat_v4 import GATv4
from topobench.nn.readouts.ftd_readout import FTDReadOut
from topobench.nn.encoders.all_cell_encoder import AllCellFeatureEncoder
from torch_geometric.nn import GAT, GCN, global_mean_pool
import yaml
from hydra import compose, initialize
from hydra import compose, initialize
from hydra.utils import instantiate


# Getting Number of runs per experiment #

In [42]:

def count_runs_in_block(lines):
    params = {}
    for line in lines:
        if "=" not in line or line.strip().startswith("python") or "--multirun" in line or line.strip().endswith("&"):
            continue
        key, val = line.split("=", 1)
        key = key.strip()
        val = val.strip().strip("\\")
        
        # Special case: multiple escaped bracketed items (e.g. \[...\],\[...\],...)
        if "\\[" in val and "\\]" in val:
            # Find all escaped bracketed expressions
            matches = re.findall(r'(\[.*?\\\])', val)
            cleaned = [m.replace("\\", "") for m in matches]
            params[key] = cleaned
        elif "," in val:
            params[key] = val.split(",")
        else:
            params[key] = [val]
    # Cartesian product of all parameter options
    total = 1
    for v in params.values():
        total *= len(v)
    return total


def parse_hydra_sh(filepath):
    with open(filepath, "r") as f:
        lines = [line.strip() for line in f.readlines()]
    
    blocks = []
    current = []
    for line in lines:
        if line.startswith("python -m"):
            if current:
                blocks.append(current)
                current = []
        current.append(line)
    if current:
        blocks.append(current)

    results = []
    for i, block in enumerate(blocks):
        run_count = count_runs_in_block(block)
        results.append((i + 1, run_count))
    
    for block_num, count in results:
        print(f"Block {block_num}: {count} runs")
    print(f"Total: {sum(count for _, count in results)} runs")
    return results

In [43]:
results = parse_hydra_sh("/home/lcornelis/code/TopoProteo/topoproteo_experiments.sh") 


Block 1: 11520 runs
Block 2: 11520 runs
Block 3: 11520 runs
Block 4: 11520 runs
Block 5: 11520 runs
Block 6: 11520 runs
Block 7: 720 runs
Block 8: 720 runs
Total: 70560 runs


# Getting Number of Parameters Per Model #

In [36]:
gat_v4_hidden_channels= [[8, 16], [64, 128]]
gat_v4_heads= [[2, 2], [4, 4]]
fc_out_channels_gatv4 = [1,8]

gat_num_layers= [2, 4]  # only for GAT and GCN
gat_hidden_channels= [8, 16]
gat_heads= [2, 4]
fc_out_channels_gat = [8,16]

gcn_num_layers= [2, 4] 
gcn_hidden_channels= [8, 16]
fc_out_channels_gcn = [8,16]

readout_graph_encoder_dims = [256,128]
readout_fc_dims= [128,64,32], [256,128,64]
readout_fc_acts = ['relu', 'tanh']

# Dataset Parameters 
FTD_config = "/home/lcornelis/code/TopoProteo/configs/dataset/graph/FTD.yaml"
gatv4config = "/home/lcornelis/code/TopoProteo/configs/model/graph/gatv4.yaml"
gatconfig = "/home/lcornelis/code/TopoProteo/configs/model/graph/gat.yaml"
gcnconfig = "/home/lcornelis/code/TopoProteo/configs/model/graph/gcn.yaml"
mlpconfig = "/home/lcornelis/code/TopoProteo/configs/model/graph/mlp.yaml" #fix this 

with open(FTD_config, "r") as f:
    config = yaml.safe_load(f)
# Navigate to the nested value
num_nodes = config["loader"]["parameters"]["num_nodes"]
task_level = config["parameters"]["task_level"]
out_channels = config["parameters"]["num_classes"]

#shared readout parameters

with open(gatv4config, "r") as f:
    config_gatv4 = yaml.safe_load(f)

readout = config_gatv4["readout"]
readout_which_layer = readout["which_layer"]
fc_dropout = readout["fc_dropout"]
feature_encoder_dim = readout["feature_encoder_dim"]
hidden_dim = readout["hidden_dim"]


In [37]:
def load_config_and_model(model_name, dataset_name, adj_thresh):
    cfg = compose(
        config_name="run.yaml",
        overrides=[
            f"model={model_name}",
            f"dataset={dataset_name}",
            f"dataset.loader.parameters.adj_thresh={adj_thresh}",
        ], 
        return_hydra_config=True
    )
    model = hydra.utils.instantiate(
                cfg.model,
                evaluator=cfg.evaluator,
                optimizer=cfg.optimizer,
                loss=cfg.loss,
            )
    return model 


## GAT-v4 ##

In [None]:
total_params_list_combined = []

for feature_encoder_out_channels in fc_out_channels_gatv4:
    for hidden_channels in gat_v4_hidden_channels:
        for heads in gat_v4_heads:
            for in_channels in fc_out_channels_gatv4:
                for fc_dim in readout_fc_dims:
                    for graph_encoder_dim in readout_graph_encoder_dims:
                        for fc_act in readout_fc_acts:
                            cfg = compose(
                                config_name="run.yaml",
                                overrides=[
                                    "model=graph/gatv4",
                                    "dataset=graph/FTD",
                                    "dataset.loader.parameters.adj_thresh=0.5",
                                ], 
                                return_hydra_config=True
                            )
                            model = hydra.utils.instantiate(
                                    cfg.model,
                                    evaluator=cfg.evaluator,
                                    optimizer=cfg.optimizer,
                                    loss=cfg.loss,
                                )
                            total_params_combined = (
                                sum(p.numel() for p in feature_encoder_model.parameters()) +
                                sum(p.numel() for p in gat_v4_model.parameters()) +
                                sum(p.numel() for p in readout_model.parameters())
                            )
                            total_params_list_combined.append(total_params_combined)

print("Max number of parameters: ", max(total_params_list_combined))
print("Min number of parameters: ", min(total_params_list_combined))

In [33]:
# Get values from config
backbone = config_gatv4["backbone"]
# Extract values backbone
which_layer = backbone["which_layer"]
dropout = backbone["dropout"]
act = backbone["act"]
use_layer_norm = backbone["use_layer_norm"]
weight_initializer = backbone["weight_initializer"]

# Compute fc_input_dim
fc_input_dim = num_nodes * len(which_layer)

total_params_list_combined = []

for feature_encoder_out_channels in fc_out_channels_gatv4:
    feature_encoder_model = AllCellFeatureEncoder(
        in_channels=[1],  # SOS
        out_channels=feature_encoder_out_channels,
    )
    for hidden_channels in gat_v4_hidden_channels:
        for heads in gat_v4_heads:
            for in_channels in fc_out_channels_gatv4:
                gat_v4_model = GATv4(
                    in_channels= in_channels, 
                    hidden_channels=hidden_channels,
                    out_channels=1,
                    heads=heads,
                    dropout=dropout,
                    act=act,
                    which_layer=which_layer,
                    use_layer_norm=use_layer_norm,
                    num_nodes=num_nodes,
                    weight_initializer=weight_initializer,
                )
                
                for fc_dim in readout_fc_dims:
                    for graph_encoder_dim in readout_graph_encoder_dims:
                        for fc_act in readout_fc_acts:
                            readout_model = FTDReadOut(
                                num_nodes=num_nodes,
                                hidden_dim=hidden_dim, #SOSOSOSSSS
                                which_layer=readout_which_layer,  
                                fc_dim=fc_dim,
                                fc_dropout=fc_dropout,
                                fc_act=fc_act,
                                out_channels=out_channels,
                                use_feature_encoder=True,
                                feature_encoder_dim=feature_encoder_dim,
                                graph_encoder_dim=graph_encoder_dim,
                                task_level=task_level,
                            )
                            total_params_combined = (
                                sum(p.numel() for p in feature_encoder_model.parameters()) +
                                sum(p.numel() for p in gat_v4_model.parameters()) +
                                sum(p.numel() for p in readout_model.parameters())
                            )
                            total_params_list_combined.append(total_params_combined)

print("Max number of parameters: ", max(total_params_list_combined))
print("Min number of parameters: ", min(total_params_list_combined))

TypeError: empty(): argument 'size' failed to unpack the object at pos 2 with error "type must be tuple of ints,but got str"

## GAT ##

In [None]:
with open(gatconfig, "r") as f:
    config_gat = yaml.safe_load(f)
backbone = config["backbone"]
dropout = backbone["dropout"]
act = backbone["act"]
num_layers = backbone["num_layers"]
hidden_channels = backbone["hidden_channels"]
heads = backbone["heads"]

total_params_list_combined = []

for feature_encoder_out_channels in fc_out_channels_gat:
    feature_encoder_model = AllCellFeatureEncoder(
        in_channels=[1],  # Adjust if needed
        out_channels=feature_encoder_out_channels,
    )
    for num_layers in gat_num_layers:
        for hidden_channels in gat_hidden_channels:
            for heads in gat_heads:
                gat_model = GAT(
                    in_channels=1,  # Based on encoded output
                    num_layers=num_layers,
                    hidden_channels=hidden_channels,
                    out_channels=1,
                    heads=heads,
                    dropout=dropout,
                    act=act,
                )

                for fc_dim in readout_fc_dims:
                    for graph_encoder_dim in readout_graph_encoder_dims:
                        for fc_act in readout_fc_acts:
                            readout_model = FTDReadOut(
                                num_nodes=num_nodes,
                                hidden_dim=10,  # SOS
                                which_layer=readout_which_layer,
                                fc_dim=fc_dim,
                                fc_dropout=fc_dropout,
                                fc_act=fc_act,
                                out_channels=1,
                                use_feature_encoder=True,
                                feature_encoder_dim=feature_encoder_out_channels,
                                graph_encoder_dim=graph_encoder_dim,
                                task_level=task_level,
                            )

                            total_params_combined = (
                                sum(p.numel() for p in feature_encoder_model.parameters()) +
                                sum(p.numel() for p in gat_model.parameters()) +
                                sum(p.numel() for p in readout_model.parameters())
                            )
                            total_params_list_combined.append(total_params_combined)

print("Max number of parameters: ", max(total_params_list_combined))
print("Min number of parameters: ", min(total_params_list_combined))


## GCN ##

In [None]:
with open(gcnconfig, "r") as f:
    config_gcn = yaml.safe_load(f)
backbone = config["backbone"]
dropout = backbone["dropout"]
act = backbone["act"]
num_layers = backbone["num_layers"]
hidden_channels = backbone["hidden_channels"]

fc_input_dim = (num_nodes * 2) - 1
total_params_list_combined = []
for num_layers in gcn_num_layers:
    for hidden_channels in gcn_hidden_channels:
        gcn_model = GCN(
            in_channels=1,
            num_layers=num_layers,
            hidden_channels=hidden_channels,
            out_channels=1,
            dropout=dropout,
            act=act,
        )
        for fc_dim in fc_dim_choices:
            for fc_dropout in fc_dropout_choices:
                for fc_act in fc_act_choices:
                    readout_model = FTDReadOut(
                        num_nodes=num_nodes,
                        which_layer=readout_which_layer,
                        fc_dim=fc_dim,
                        fc_dropout=fc_dropout,
                        fc_act=fc_act,
                        out_channels=1,
                        fc_input_dim=fc_input_dim,
                        use_feature_encoder=True,
                        feature_encoder_dim=feature_encoder_dim,
                        graph_encoder_dim=graph_encoder_dim,
                        hidden_dim=10, #SOS
                        task_level=task_level, #SOS
                    )
                    total_params_combined = sum(p.numel() for p in gcn_model.parameters()) + sum(p.numel() for p in readout_model.parameters())
                    total_params_list_combined.append(total_params_combined)

print("Max number of parameters: ", max(total_params_list_combined))
print("Min number of parameters: ", min(total_params_list_combined))

## MLP ##

In [None]:
dropout = config.dropout
fc_input_dim = (num_nodes * 2) -1
total_params_list_combined = []
for fc_dim in mlp_channel_lists:
    for fc_dropout in dropout_choices:
        for fc_act in fc_act_choices:
            readout_model = FTDReadOut(
                        num_nodes=num_nodes,
                        which_layer=readout_which_layer,
                        fc_dim=fc_dim,
                        fc_dropout=fc_dropout,
                        fc_act=fc_act,
                        out_channels=1,
                        fc_input_dim=fc_input_dim,
                        use_feature_encoder=True,
                        feature_encoder_dim=feature_encoder_dim,
                        graph_encoder_dim=graph_encoder_dim,
                        hidden_dim=10, #SOS
                        task_level=task_level, #SOS
                    )
            total_params_combined = sum(p.numel() for p in readout_model.parameters())
            total_params_list_combined.append(total_params_combined)
    

print("Max number of parameters: ", max(total_params_list_combined))
print("Min number of parameters: ", min(total_params_list_combined))