In [1]:
import pandas as pd
import os
from pathlib import Path

In [4]:
NUM_CLASSES = {
    'IMDB-MULTI': 3,
}

In [5]:
template_config = '''
out_dir: {out_dir}
metric_best: auto
metric_agg: argmin
device: 'cuda:0'
wandb:
  use: True
  project: {wandb_proj_name}
  entity: <WANDB_USERNAME>
dataset:
  format: PyG
  name: {ds}
  dir: {ds_download_dir}
  onehot: True
  target_name: None
  task: graph
  task_type: {task_type}
  transductive: False
  node_encoder: True
  node_encoder_name: LinearNode+RWSE
  node_encoder_num_types: 28
  node_encoder_bn: False
  edge_encoder: True
  edge_encoder_name: DummyEdge
  edge_encoder_bn: False
posenc_RWSE:
  enable: True
  kernel:
    times_func: range(1,21)
  model: Linear
  dim_pe: 28
  raw_norm_type: BatchNorm
train:
  mode: custom
  batch_size: {batch_size}
  eval_period: 1
  ckpt_period: 100
model:
  type: GPSModel
  loss_fun: {loss_fn}
  graph_pooling: mean
  edge_decoding: dot
gt:
  layer_type: CustomGatedGCN+Transformer
  layers: {num_layers}
  n_heads: {num_heads}
  dim_hidden: {dim_hidden}  # `gt.dim_hidden` must match `gnn.dim_inner`
  dropout: 0.0
  attn_dropout: {attn_dropout}
  layer_norm: False
  batch_norm: True
gnn:
  head: default
  layers_pre_mp: 0
  layers_post_mp: 3  # Not used when `gnn.head: san_graph`
  dim_inner: {dim_hidden}  # `gt.dim_hidden` must match `gnn.dim_inner`
  batchnorm: True
  act: gelu
  dropout: 0.0
optim:
  clip_grad_norm: True
  optimizer: adamW
  weight_decay: 1e-10
  base_lr: 0.0005
  max_epoch: 500
  scheduler: reduce_on_plateau
  reduce_factor: 0.5
  schedule_patience: 15
  min_lr: 1e-5
  early_stopping_patience: 30
share:
  dim_out: {num_classes}
seed: {seed}'''

In [6]:
for ds in ['IMDB-MULTI']:
    batch_size = 32
    has_edges = True

    loss_fn = 'cross_entropy'
    metric_best = 'mcc'

    task_type = 'classification_multi'

    num_classes = NUM_CLASSES[ds]

    for seed in [0]:
        for dim_hidden in [256]:
            for num_layers in [6]:
                for num_heads in [16]:
                    for attn_dropout in [0.1, 0.5]:
                        CONFIG_PATH = '/home/david/Projects_SEPT2024/source_code_for_repo/graphgps_scripts/training_configs_IMDB_MULTI'
                        SCRIPT_PATH = '/home/david/Projects_SEPT2024/source_code_for_repo/graphgps_scripts/training_scripts_IMDB_MULTI'

                        Path(CONFIG_PATH).mkdir(exist_ok=True, parents=True)
                        Path(SCRIPT_PATH).mkdir(exist_ok=True, parents=True)
                    
                        dl_dir = f'/media/david/Media/PyG_DATASET_DOWNLOAD_DIRECTORY/{ds}'
                        out_dir = f'/media/david/Media/out_graphgps_github_ready/{ds}/{seed}/{dim_hidden}/{num_layers}/{num_heads}/{attn_dropout}'
                        wandb_proj_name = 'GPS+try_gitub_ready'

                        conf = template_config.format(
                            ds=ds, seed=seed, out_dir=out_dir, loss_fn=loss_fn, dim_hidden=dim_hidden, num_layers=num_layers, num_heads=num_heads,
                            metric_best=metric_best, task_type=task_type, ds_download_dir=dl_dir, wandb_proj_name=wandb_proj_name, batch_size=batch_size,
                            has_edges=has_edges, num_classes=num_classes, attn_dropout=attn_dropout
                        )
                        with open(os.path.join(CONFIG_PATH, f'{ds}_{seed}_{dim_hidden}_{num_layers}_{num_heads}_{attn_dropout}.yaml'), 'w') as f:
                            f.write(conf)

                        script = f'python /home/david/Projects_SEPT2024/source_code_for_repo/github/graphgps_graph/main.py --cfg {CONFIG_PATH}/{ds}_{seed}_{dim_hidden}_{num_layers}_{num_heads}_{attn_dropout}.yaml'
                        with open(f'{SCRIPT_PATH}/{ds}_{seed}_{dim_hidden}_{num_layers}_{num_heads}_{attn_dropout}.sh', 'w') as f:
                            f.write(script)