In [2]:
import pandas as pd
import os
from pathlib import Path

In [7]:
template_config = '''
out_dir: {out_dir}
metric_best: auto
metric_agg: argmin
device: 'cuda:0'
wandb:
  use: True
  project: {wandb_proj_name}
  entity: <WANDB_USERNAME>
dataset:
  format: PyG
  name: QM9-TL
  dir: {ds_download_dir}
  onehot: True
  add_3d: False
  target_name: {target}
  hq_or_lq: {hq_or_lq}
  inductive_or_transductive: {ind_or_trans}
  task: graph
  task_type: regression
  transductive: False
  node_encoder: True
  node_encoder_name: LinearNode+RWSE
  node_encoder_num_types: 28
  node_encoder_bn: False
  edge_encoder: True
  edge_encoder_name: LinearEdge
  edge_encoder_bn: False
posenc_RWSE:
  enable: True
  kernel:
    times_func: range(1,21)
  model: Linear
  dim_pe: 28
  raw_norm_type: BatchNorm
train:
  mode: custom
  batch_size: {batch_size}
  eval_period: 1
  ckpt_period: 100
model:
  type: GPSModel
  loss_fun: l1
  graph_pooling: mean
  edge_decoding: dot
gt:
  layer_type: CustomGatedGCN+Transformer
  layers: {num_layers}
  n_heads: {num_heads}
  dim_hidden: {dim_hidden}  # `gt.dim_hidden` must match `gnn.dim_inner`
  dropout: 0.0
  attn_dropout: 0.1
  layer_norm: True
  batch_norm: False
gnn:
  head: default
  layers_pre_mp: 0
  layers_post_mp: 3  # Not used when `gnn.head: san_graph`
  dim_inner: {dim_hidden}  # `gt.dim_hidden` must match `gnn.dim_inner`
  batchnorm: True
  act: gelu
  dropout: 0.0
optim:
  clip_grad_norm: True
  optimizer: adamW
  weight_decay: 1e-10
  base_lr: 0.0005
  max_epoch: 500
  scheduler: reduce_on_plateau
  reduce_factor: 0.5
  schedule_patience: 7
  min_lr: 1e-5
  early_stopping_patience: 15
share:
  dim_in: {dim_hidden}
  dim_out: 1
seed: {seed}'''

In [4]:
for target in ['homo_gw', 'lumo_gw']:
    for dim_hidden in [128, 256, 512]:
        for num_layers in [4, 6, 8, 10]:
            for num_heads in [8, 16]:

                for seed in [0]:
                    CONFIG_PATH = '/home/david/Projects_SEPT2024/source_code_for_repo/graphgps_scripts/training_configs_GW_only'
                    SCRIPT_PATH = '/home/david/Projects_SEPT2024/source_code_for_repo/graphgps_scripts/training_scripts_GW_only'

                    Path(CONFIG_PATH).mkdir(exist_ok=True, parents=True)
                    Path(SCRIPT_PATH).mkdir(exist_ok=True, parents=True)
                
                    dl_dir = f'/media/david/Media/PyG_DATASET_DOWNLOAD_DIRECTORY/data_QM9_TF_3D_PyG/with_edge_index/'
                    out_dir = f'/media/david/Media/out_graphgps_github_ready/transfer_learning/{target}/{seed}/{dim_hidden}/{num_layers}/{num_heads}'
                    wandb_proj_name = 'GPS-TF-try-github-ready'

                    conf = template_config.format(
                        target=target, hq_or_lq="hq", ind_or_trans=None, seed=seed, out_dir=out_dir, dim_hidden=dim_hidden,
                        num_layers=num_layers, num_heads=num_heads, ds_download_dir=dl_dir, wandb_proj_name=wandb_proj_name,
                        batch_size=128,
                    )
                    with open(os.path.join(CONFIG_PATH, f'{target}_{seed}_{dim_hidden}_{num_layers}_{num_heads}.yaml'), 'w') as f:
                        f.write(conf)

                    script = f'python /home/david/Projects_SEPT2024/source_code_for_repo/github/transfer_learning/graphgps_3d/main.py --cfg {CONFIG_PATH}/{target}_{seed}_{dim_hidden}_{num_layers}_{num_heads}.yaml'
                    with open(f'{SCRIPT_PATH}/{target}_{seed}_{dim_hidden}_{num_layers}_{num_heads}.sh', 'w') as f:
                        f.write(script)

In [10]:
template_config = '''
out_dir: {out_dir}
metric_best: auto
metric_agg: argmin
device: 'cuda:0'
wandb:
  use: True
  project: {wandb_proj_name}
  entity: <WANDB_USERNAME>
dataset:
  format: PyG
  name: QM9-TL
  dir: {ds_download_dir}
  onehot: True
  add_3d: False
  target_name: {target}
  hq_or_lq: {hq_or_lq}
  inductive_or_transductive: {ind_or_trans}
  task: graph
  task_type: regression
  transductive: False
  node_encoder: True
  node_encoder_name: LinearNode+RWSE
  node_encoder_num_types: 28
  node_encoder_bn: False
  edge_encoder: True
  edge_encoder_name: LinearEdge
  edge_encoder_bn: False
posenc_RWSE:
  enable: True
  kernel:
    times_func: range(1,21)
  model: Linear
  dim_pe: 28
  raw_norm_type: BatchNorm
train:
  mode: custom
  batch_size: {batch_size}
  eval_period: 1
  ckpt_period: 25
model:
  type: GPSModel
  loss_fun: l1
  graph_pooling: mean
  edge_decoding: dot
gt:
  layer_type: CustomGatedGCN+Transformer
  layers: {num_layers}
  n_heads: {num_heads}
  dim_hidden: {dim_hidden}  # `gt.dim_hidden` must match `gnn.dim_inner`
  dropout: 0.0
  attn_dropout: 0.1
  layer_norm: True
  batch_norm: False
gnn:
  head: default
  layers_pre_mp: 0
  layers_post_mp: 3  # Not used when `gnn.head: san_graph`
  dim_inner: {dim_hidden}  # `gt.dim_hidden` must match `gnn.dim_inner`
  batchnorm: True
  act: gelu
  dropout: 0.0
optim:
  clip_grad_norm: True
  optimizer: adamW
  weight_decay: 1e-10
  base_lr: 0.0005
  max_epoch: 151
share:
  dim_in: {dim_hidden}
  dim_out: 1
seed: {seed}'''

In [11]:
for target in ['homo_dft', 'lumo_dft']:
    for dim_hidden in [256]:
        for num_layers in [8]:
            for num_heads in [16]:
                for ind_or_trans in ['transductive', 'inductive']:
                    for seed in [0]:
                        CONFIG_PATH = '/home/david/Projects_SEPT2024/source_code_for_repo/graphgps_scripts/training_configs_DFT'
                        SCRIPT_PATH = '/home/david/Projects_SEPT2024/source_code_for_repo/graphgps_scripts/training_scripts_DFT'

                        Path(CONFIG_PATH).mkdir(exist_ok=True, parents=True)
                        Path(SCRIPT_PATH).mkdir(exist_ok=True, parents=True)
                    
                        dl_dir = f'/media/david/Media/PyG_DATASET_DOWNLOAD_DIRECTORY/data_QM9_TF_3D_PyG/with_edge_index/'
                        out_dir = f'/media/david/Media/out_graphgps_github_ready/transfer_learning/{target}/{ind_or_trans}/{seed}/{dim_hidden}/{num_layers}/{num_heads}'
                        wandb_proj_name = 'GPS-3D+DFT+SCPUS'

                        conf = template_config.format(
                            target=target, hq_or_lq="lq", seed=seed, out_dir=out_dir, dim_hidden=dim_hidden,
                            num_layers=num_layers, num_heads=num_heads, ds_download_dir=dl_dir, wandb_proj_name=wandb_proj_name,
                            batch_size=128, ind_or_trans=ind_or_trans
                        )
                        with open(os.path.join(CONFIG_PATH, f'{target}_{ind_or_trans}_{seed}_{dim_hidden}_{num_layers}_{num_heads}.yaml'), 'w') as f:
                            f.write(conf)

                        script = f'python /home/david/Projects_SEPT2024/source_code_for_repo/github/transfer_learning/graphgps_3d/main.py --cfg {CONFIG_PATH}/{target}_{ind_or_trans}_{seed}_{dim_hidden}_{num_layers}_{num_heads}.yaml'
                        with open(f'{SCRIPT_PATH}/{target}_{ind_or_trans}_{seed}_{dim_hidden}_{num_layers}_{num_heads}.sh', 'w') as f:
                            f.write(script)

# Retrain DFT to GW

In [14]:
template_config = '''
out_dir: {out_dir}
metric_best: auto
metric_agg: argmin
device: 'cuda:0'
wandb:
  use: True
  project: {wandb_proj_name}
  entity: <WANDB_USERNAME>
dataset:
  format: PyG
  name: QM9-TL
  dir: {ds_download_dir}
  onehot: True
  add_3d: False
  target_name: {target}
  hq_or_lq: {hq_or_lq}
  inductive_or_transductive: {ind_or_trans}
  task: graph
  task_type: regression
  transductive: False
  node_encoder: True
  node_encoder_name: LinearNode+RWSE
  node_encoder_num_types: 28
  node_encoder_bn: False
  edge_encoder: True
  edge_encoder_name: LinearEdge
  edge_encoder_bn: False
posenc_RWSE:
  enable: True
  kernel:
    times_func: range(1,21)
  model: Linear
  dim_pe: 28
  raw_norm_type: BatchNorm
train:
  mode: custom
  batch_size: {batch_size}
  eval_period: 1
  ckpt_period: 1
model:
  type: GPSModel
  loss_fun: l1
  graph_pooling: mean
  edge_decoding: dot
gt:
  layer_type: CustomGatedGCN+Transformer
  layers: {num_layers}
  n_heads: {num_heads}
  dim_hidden: {dim_hidden}  # `gt.dim_hidden` must match `gnn.dim_inner`
  dropout: 0.0
  attn_dropout: 0.1
  layer_norm: True
  batch_norm: False
gnn:
  head: default
  layers_pre_mp: 0
  layers_post_mp: 3  # Not used when `gnn.head: san_graph`
  dim_inner: {dim_hidden}  # `gt.dim_hidden` must match `gnn.dim_inner`
  batchnorm: True
  act: gelu
  dropout: 0.0
optim:
  clip_grad_norm: True
  optimizer: adamW
  weight_decay: 1e-10
  base_lr: 0.0005
  max_epoch: 500
  scheduler: reduce_on_plateau
  reduce_factor: 0.5
  schedule_patience: 7
  min_lr: 1e-5
  early_stopping_patience: 15
share:
  dim_in: {dim_hidden}
  dim_out: 1
pretrained:
  dir: {ckpt_dir}
  reset_prediction_head: False
  freeze_main: False
seed: {seed}'''

In [17]:
for target in ['homo_gw']:
    for dim_hidden in [256]:
        for num_layers in [8]:
            for num_heads in [16]:
                for ind_or_trans in ['transductive', 'inductive']:
                    for seed in [0]:
                        CONFIG_PATH = '/home/david/Projects_SEPT2024/source_code_for_repo/graphgps_scripts/training_configs_DFT_to_GW'
                        SCRIPT_PATH = '/home/david/Projects_SEPT2024/source_code_for_repo/graphgps_scripts/training_scripts_DFT_to_GW'

                        Path(CONFIG_PATH).mkdir(exist_ok=True, parents=True)
                        Path(SCRIPT_PATH).mkdir(exist_ok=True, parents=True)
                    
                        dl_dir = f'/media/david/Media/PyG_DATASET_DOWNLOAD_DIRECTORY/data_QM9_TF_3D_PyG/with_edge_index/'
                        out_dir = f'/media/david/Media/out_graphgps_github_ready/transfer_learning/DFT_to_GW/{target}/{ind_or_trans}/{seed}/{dim_hidden}/{num_layers}/{num_heads}'
                        wandb_proj_name = 'GPS-TF-try-github-ready'

                        ckpt_dir = f'/media/david/Media/out_graphgps_github_ready/transfer_learning/homo_dft/inductive/0/256/8/16/homo_dft_inductive_0_256_8_16/'

                        conf = template_config.format(
                            target=target, hq_or_lq="hq", seed=seed, out_dir=out_dir, dim_hidden=dim_hidden,
                            num_layers=num_layers, num_heads=num_heads, ds_download_dir=dl_dir, wandb_proj_name=wandb_proj_name,
                            batch_size=128, ind_or_trans=ind_or_trans, ckpt_dir=ckpt_dir,
                        )
                        with open(os.path.join(CONFIG_PATH, f'{target}_{ind_or_trans}_{seed}_{dim_hidden}_{num_layers}_{num_heads}.yaml'), 'w') as f:
                            f.write(conf)

                        script = f'python /home/david/Projects_SEPT2024/source_code_for_repo/github/transfer_learning/graphgps_3d/main.py --cfg {CONFIG_PATH}/{target}_{ind_or_trans}_{seed}_{dim_hidden}_{num_layers}_{num_heads}.yaml'
                        with open(f'{SCRIPT_PATH}/{target}_{ind_or_trans}_{seed}_{dim_hidden}_{num_layers}_{num_heads}.sh', 'w') as f:
                            f.write(script)