## Imports ##

In [2]:
import re
from pathlib import Path
from itertools import product
from topobench.nn.backbones.graph.gat_v4 import GATv4
from topobench.nn.readouts.ftd_readout import FTDReadOut
from torch_geometric.nn import GAT, GCN, global_mean_pool
import yaml


# Getting Number of runs per experiment #

In [3]:

def count_runs_in_block(lines):
    params = {}
    for line in lines:
        if "=" not in line or line.strip().startswith("python") or "--multirun" in line or line.strip().endswith("&"):
            continue
        key, val = line.split("=", 1)
        key = key.strip()
        val = val.strip().strip("\\")
        
        # Special case: multiple escaped bracketed items (e.g. \[...\],\[...\],...)
        if "\\[" in val and "\\]" in val:
            # Find all escaped bracketed expressions
            matches = re.findall(r'(\[.*?\\\])', val)
            cleaned = [m.replace("\\", "") for m in matches]
            params[key] = cleaned
        elif "," in val:
            params[key] = val.split(",")
        else:
            params[key] = [val]
    # Cartesian product of all parameter options
    total = 1
    for v in params.values():
        total *= len(v)
    return total


def parse_hydra_sh(filepath):
    with open(filepath, "r") as f:
        lines = [line.strip() for line in f.readlines()]
    
    blocks = []
    current = []
    for line in lines:
        if line.startswith("python -m"):
            if current:
                blocks.append(current)
                current = []
        current.append(line)
    if current:
        blocks.append(current)

    results = []
    for i, block in enumerate(blocks):
        run_count = count_runs_in_block(block)
        results.append((i + 1, run_count))
    
    for block_num, count in results:
        print(f"Block {block_num}: {count} runs")
    print(f"Total: {sum(count for _, count in results)} runs")
    return results

In [6]:
results = parse_hydra_sh("/home/gbg141/TopoProteo/topoproteo_experiments.sh") 

Block 1: 46080 runs
Total: 46080 runs


# Getting Number of Parameters Per Model #

In [None]:
fc_dim_choices = [[128, 256, 256, 64], [64, 128, 128, 32]]
fc_dropout_choices = [0.1]
fc_act_choices = ['tanh', 'elu']

gat_v4_hidden_channels= [[8, 16], [64, 128]]
gat_v4_heads= [[2, 2], [4, 4]]

gat_num_layers= [2, 4, 6]  # only for GAT and GCN
gat_hidden_channels= [8, 32, 64, 128]
gat_heads= [2, 4]

gcn_num_layers= [2, 4, 6]  # only for GAT and GCN
gcn_hidden_channels= [8, 32, 128]
[512, 128, 64, 32]
mlp_channel_lists= [[128, 256, 256, 64], [64, 128, 128, 32],[416, 256, 128, 32]] #[512, 256, 128, 32]]
dropout_choices= [0.1]

# Dataset Parameters 
FTD_config = "/home/lcornelis/code/TopoProteo/configs/dataset/graph/FTD.yaml"
gatv4config = "/home/lcornelis/code/TopoProteo/configs/model/graph/gatv4.yaml"
gatconfig = "/home/lcornelis/code/TopoProteo/configs/model/graph/gat.yaml"
gcnconfig = "/home/lcornelis/code/TopoProteo/configs/model/graph/gcn.yaml"
mlpconfig = "/home/lcornelis/code/TopoProteo/configs/model/graph/mlp.yaml" #fix this 

with open(FTD_config, "r") as f:
    config = yaml.safe_load(f)
# Navigate to the nested value
num_nodes = config["loader"]["parameters"]["num_nodes"]

#shared readout parameters

with open(gatv4config, "r") as f:
    config_gatv4 = yaml.safe_load(f)

readout = config_gatv4["readout"]
readout_which_layer = readout["which_layer"]
fc_dim_choices = [readout["fc_dim"]]  # wrap in list to iterate if not already
fc_dropout_choices = [readout["fc_dropout"]]
fc_act_choices = [readout["fc_act"]]
fc_input_dim = readout["fc_input_dim"]
feature_encoder_dim = readout["feature_encoder_dim"]
graph_encoder_dim = readout["graph_encoder_dim"]




## GAT-v4 ##

In [12]:
# Get values from config
backbone = config_gatv4["backbone"]
# Extract values backbone
which_layer = backbone["which_layer"]
dropout = backbone["dropout"]
act = backbone["act"]
use_layer_norm = backbone["use_layer_norm"]
weight_initializer = backbone["weight_initializer"]

# Compute fc_input_dim
fc_input_dim = num_nodes * len(which_layer)

total_params_list_combined = []
for hidden_channels in gat_v4_hidden_channels:
    for heads in gat_v4_heads:
        gat_v4_model = GATv4(
            in_channels=1, 
            hidden_channels=hidden_channels,
            out_channels=1,
            heads=heads, 
            dropout=dropout,
            act=act,
            which_layer=which_layer,
            use_layer_norm=use_layer_norm,
            num_nodes=num_nodes,
            weight_initializer=weight_initializer,
        )
        for fc_dim in fc_dim_choices:
            for fc_dropout in fc_dropout_choices:
                for fc_act in fc_act_choices:
                    readout_model = FTDReadOut(
                        num_nodes=num_nodes,
                        which_layer=readout_which_layer,
                        fc_dim=fc_dim,
                        fc_dropout=fc_dropout,
                        fc_act=fc_act,
                        out_channels=1,
                        fc_input_dim=fc_input_dim,
                        use_feature_encoder=True,
                        feature_encoder_dim=feature_encoder_dim,
                        graph_encoder_dim=graph_encoder_dim,
                        hidden_dim=10, #SOS
                        task_level=task_level, #SOS
                    )
                    total_params_combined = sum(p.numel() for p in gat_v4_model.parameters()) + sum(p.numel() for p in readout_model.parameters())
                    total_params_list_combined.append(total_params_combined)

print("Max number of parameters: ", max(total_params_list_combined))
print("Min number of parameters: ", min(total_params_list_combined))

NameError: name 'task_level' is not defined

## GAT ##

In [None]:
with open(gatconfig, "r") as f:
    config_gat = yaml.safe_load(f)
backbone = config["backbone"]
dropout = backbone["dropout"]
act = backbone["act"]
num_layers = backbone["num_layers"]
hidden_channels = backbone["hidden_channels"]
heads = backbone["heads"]

# Assumes num_nodes is defined elsewhere
fc_input_dim = (num_nodes * 2) - 1

fc_input_dim = (num_nodes * 2) - 1
total_params_list_combined = []
for num_layers in gat_num_layers:
    for hidden_channels in gat_hidden_channels:
        for heads in gat_heads:
            gat_model = GAT(
                in_channels=1,
                num_layers=num_layers,
                hidden_channels=hidden_channels,
                out_channels=1,
                heads=heads,
                dropout=dropout,
                act=act,
            )
            for fc_dim in fc_dim_choices:
                for fc_dropout in fc_dropout_choices:
                    for fc_act in fc_act_choices:
                        readout_model = FTDReadOut(
                        num_nodes=num_nodes,
                        which_layer=readout_which_layer,
                        fc_dim=fc_dim,
                        fc_dropout=fc_dropout,
                        fc_act=fc_act,
                        out_channels=1,
                        fc_input_dim=fc_input_dim,
                        use_feature_encoder=True,
                        feature_encoder_dim=feature_encoder_dim,
                        graph_encoder_dim=graph_encoder_dim,
                        hidden_dim=10, #SOS
                        task_level=task_level, #SOS
                    )
                        total_params_combined = sum(p.numel() for p in gat_model.parameters()) + sum(p.numel() for p in readout_model.parameters())
                        total_params_list_combined.append(total_params_combined)

print("Max number of parameters: ", max(total_params_list_combined))
print("Min number of parameters: ", min(total_params_list_combined))

## GCN ##

In [None]:
with open(gcnconfig, "r") as f:
    config_gcn = yaml.safe_load(f)
backbone = config["backbone"]
dropout = backbone["dropout"]
act = backbone["act"]
num_layers = backbone["num_layers"]
hidden_channels = backbone["hidden_channels"]

fc_input_dim = (num_nodes * 2) - 1
total_params_list_combined = []
for num_layers in gcn_num_layers:
    for hidden_channels in gcn_hidden_channels:
        gcn_model = GCN(
            in_channels=1,
            num_layers=num_layers,
            hidden_channels=hidden_channels,
            out_channels=1,
            dropout=dropout,
            act=act,
        )
        for fc_dim in fc_dim_choices:
            for fc_dropout in fc_dropout_choices:
                for fc_act in fc_act_choices:
                    readout_model = FTDReadOut(
                        num_nodes=num_nodes,
                        which_layer=readout_which_layer,
                        fc_dim=fc_dim,
                        fc_dropout=fc_dropout,
                        fc_act=fc_act,
                        out_channels=1,
                        fc_input_dim=fc_input_dim,
                        use_feature_encoder=True,
                        feature_encoder_dim=feature_encoder_dim,
                        graph_encoder_dim=graph_encoder_dim,
                        hidden_dim=10, #SOS
                        task_level=task_level, #SOS
                    )
                    total_params_combined = sum(p.numel() for p in gcn_model.parameters()) + sum(p.numel() for p in readout_model.parameters())
                    total_params_list_combined.append(total_params_combined)

print("Max number of parameters: ", max(total_params_list_combined))
print("Min number of parameters: ", min(total_params_list_combined))

## MLP ##

In [None]:
dropout = config.dropout
fc_input_dim = (num_nodes * 2) -1
total_params_list_combined = []
for fc_dim in mlp_channel_lists:
    for fc_dropout in dropout_choices:
        for fc_act in fc_act_choices:
            readout_model = FTDReadOut(
                        num_nodes=num_nodes,
                        which_layer=readout_which_layer,
                        fc_dim=fc_dim,
                        fc_dropout=fc_dropout,
                        fc_act=fc_act,
                        out_channels=1,
                        fc_input_dim=fc_input_dim,
                        use_feature_encoder=True,
                        feature_encoder_dim=feature_encoder_dim,
                        graph_encoder_dim=graph_encoder_dim,
                        hidden_dim=10, #SOS
                        task_level=task_level, #SOS
                    )
            total_params_combined = sum(p.numel() for p in readout_model.parameters())
            total_params_list_combined.append(total_params_combined)
    

print("Max number of parameters: ", max(total_params_list_combined))
print("Min number of parameters: ", min(total_params_list_combined))