In [None]:
import yaml

In [None]:
def generate_yaml_config(batch_size=32, num_runs=3, save_dir="./your/path", 
                         pretrain_lr=0.0005, finetune_lr=0.0001, data_dir="./your/data/path", 
                         data_name="###", split_type="random",
                         train_path=None,
                         val_path=None,
                         test_path=None,
                         task_type="regression",
                         loss_func="MAE", # there are more loss functions (MAE/RMSE/MSE)
                         checkpoint="./checkpoint/zinc-gps_best.pt", # download the pre-trained model from the MolMCL github
                         output_path=None):
    config = {
        "batch_size": batch_size,
        "epochs": 100,
        "device": "cuda",
        "seed": 42, 
        "num_run": num_runs,
        "verbose": True,
        "split_seed": 0,
        "save_dir": save_dir,
        "model": {
            "backbone": "gps",
            "num_layer": 5,
            "emb_dim": 300,
            "heads": 6,
            "layernorm": True,
            "dropout_ratio": 0,
            "attn_dropout_ratio": 0.3,
            "temperature": 0.5,
            "use_prompt": True,
            "normalize": False,
            "checkpoint": checkpoint
        },
        "optim": {
            "prompt_lr": 0.0005,
            "pretrain_lr": pretrain_lr,
            "finetune_lr": finetune_lr,
            "decay": 1e-6,
            "gradient_clip": 5,
            "scheduler": "cos_anneal"
        },
        "prompt_optim": {
            "skip_bo": True,
            "inits": [0.0000, 0.0000, 0.0000]
        },
        "dataset": {
            "data_dir": data_dir,
            "data_name": data_name,
            "split_type": split_type,
            "custom_train_path": train_path,
            "custom_val_path": val_path,
            "custom_test_path": test_path,
            "num_workers": 0,
            "feat_type": "super_rich",
            "task": task_type,
            "loss_func": loss_func
        }
    }
    
    with open(output_path, "w") as file:
        yaml.dump(config, file, default_flow_style=False, sort_keys=False)
    
    print(f"YAML configuration file saved to {output_path}")

In [None]:
# Generate the YAML file
# generate_yaml_config(save_dir="path/save/model", # path to save the models
#                      num_runs=5, # numbers of independent models
#                      batch_size=256, 
#                      data_name='data',
#                      split_type='customized', # random/customized/scaffold
#                      train_path='your/train_path', # csv format
#                      val_path='your/val_path', # csv format
#                      test_path='your/test_path', # csv format
#                      output_path='path/to/save/config') # path to save the configuration file