This notebook will create a list of python command line commands so that I can test the runtime of the OpenFold model on the SidechainNet dataset.

In [1]:
# For reference, before I fill it up with arguments
base_cmd = "/scr/openfold/train_openfold.py \
/scr/alphafold_data/pdb_mmcif/pdb_files_for_scnmin/ \
/scr/scn_roda2/ /scr/alphafold_data/pdb_mmcif/mmcif_files_for_scn/ \
/scr/experiments/221212/out1 \
2021-10-10 \
--gpus=1 \
--checkpoint_every_epoch \
--obsolete_pdbs_file_path=/scr/alphafold_data/pdb_mmcif/obsolete.dat \
--config_preset=finetuning_sidechainnet \
--wandb \
--wandb_project=finetune-openfold-01 \
--wandb_entity=jonathanking \
--log_every_n_steps=1 \
--resume_from_ckpt=/scr/openfold/openfold/resources/openfold_params/initial_training.pt \
--resume_model_weights_only=True \
--train_epoch_len=6 \
--max_epochs=1 \
--debug \
--use_openmm=True \
--add_struct_metrics \
--openmm_weight=1e-7 \
--openmm_activation=None \
--use_scn_pdb_names \
--write_pdbs \
--write_pdbs_every_n_steps=3 \
--seed=0 \
--template_release_dates_cache_path=mmcif_cache.json \
--deepspeed_config_path=deepspeed_config.json \
--train_chain_data_cache_path=chain_data_cache.json \
--benchmark \
--num_workers=1 \
--precision=bf16 \
--alignment_index_path=/scr/dbs/super.index \
--wandb_notes=\"debugging CLI\" \
--experiment=debug"

In [2]:
base_cmd = "/scr/openfold/train_openfold.py \
/scr/alphafold_data/pdb_mmcif/pdb_files_for_scnmin/ \
/scr/scn_roda2/ /scr/alphafold_data/pdb_mmcif/mmcif_files_for_scn/ \
/scr/experiments/221212/out1 \
2021-10-10 \
--gpus=1 \
--checkpoint_every_epoch \
--obsolete_pdbs_file_path=/scr/alphafold_data/pdb_mmcif/obsolete.dat \
--config_preset=finetuning_sidechainnet \
--wandb \
--wandb_project=finetune-openfold-01 \
--wandb_entity=jonathanking \
--log_every_n_steps=1 \
--resume_from_ckpt=/scr/openfold/openfold/resources/openfold_params/initial_training.pt \
--resume_model_weights_only=True \
--train_epoch_len=30 \
--max_epochs=1 \
--debug \
--use_openmm=True \
--add_struct_metrics \
--openmm_weight=1e-7 \
--openmm_activation=None \
--use_scn_pdb_names \
--write_pdbs \
--write_pdbs_every_n_steps=3 \
 --train_chain_data_cache_path=chain_data_cache.json"

In [3]:
def get_cmd(
    use_template_release_dates_cache,
    use_deepspeed_config, 
    use_benchmark,
    num_workers,
    precision,
    use_alignment_index,
    wandb_notes,
    experiment):

    cmd = base_cmd
    if use_template_release_dates_cache:
        cmd += " --template_release_dates_cache_path=mmcif_cache.json"
    if use_deepspeed_config:
        cmd += " --deepspeed_config_path=deepspeed_config.json"
    if use_benchmark:
        cmd += " --benchmark"
    if num_workers:
        cmd += f" --num_workers={num_workers}"
    if precision:
        cmd += f" --precision={precision}"
    if use_alignment_index:
        cmd += " --alignment_index_path=/scr/dbs/super.index"
    if wandb_notes:
        cmd += f" --wandb_notes=\"{wandb_notes}\""
    if experiment:
        cmd += f" --experiment={experiment}"
    return cmd
   

# "--template_release_dates_cache_path=mmcif_cache.json \
# --deepspeed_config_path=deepspeed_config.json \
# --train_chain_data_cache_path=chain_data_cache.json \
# --benchmark \
# --num_workers=1 \
# --precision=bf16 \
# --alignment_index_path=/scr/dbs/super.index \
# --wandb_notes=\"debugging CLI\" \
# --experiment=debug"

In [3]:
class CommandGenerator:

    def __init__(self, base_cmd):
        self.base_cmd = base_cmd
        self.cmds = []
        self.cur_experiment = 0
    
    def make_cmd(self,
        use_template_release_dates_cache,
        use_deepspeed_config, 
        use_benchmark,
        num_workers,
        precision,
        use_alignment_index,
        wandb_notes,
        suffix,
        verbose=False,
        seed=3):

        cmd = self.base_cmd
        if use_template_release_dates_cache:
            cmd += " --template_release_dates_cache_path=mmcif_cache.json"
        if use_deepspeed_config:
            cmd += " --deepspeed_config_path=deepspeed_config.json"
        if use_benchmark:
            cmd += " --benchmark"
        if num_workers:
            cmd += f" --num_workers={num_workers}"
        if precision:
            cmd += f" --precision={precision}"
        if use_alignment_index:
            cmd += " --alignment_index_path=/scr/dbs/super.index"
        if wandb_notes:
            cmd += f" --wandb_notes=\"{wandb_notes}\""
        if suffix:
            cmd += f" --experiment=speed-test-{self.cur_experiment:02d}-{suffix}"
        if seed:
            cmd += f" --seed={seed}"
        
        self.cur_experiment += 1
        self.cmds.append(cmd)
        if verbose: print(cmd)
    
    def save_cmds(self, file):
        with open(file, "w") as f:
            f.write("#!/bin/bash \n")
            f.write("\n".join(self.cmds))
    
    def clear(self):
        self.cmds = []
        # self.cur_experiment = 0
    

## Baselines

In [4]:
CG = CommandGenerator(base_cmd)

In [11]:

CG.make_cmd(
        use_template_release_dates_cache=False,
        use_deepspeed_config=True,
        use_benchmark=False,
        num_workers=1,
        precision="bf16",
        use_alignment_index=False,
        wandb_notes="Baseline. No template cache, 1 worker, deepspeed:bf16, no alignment index, no benchmark",
        suffix="baseline",
        seed=4)

CG.make_cmd(
        use_template_release_dates_cache=False,
        use_deepspeed_config=True,
        use_benchmark=False,
        num_workers=12,
        precision="bf16",
        use_alignment_index=False,
        wandb_notes="Baseline + 12 workers. No template cache, 12 workers, deepspeed:bf16, no alignment index, no benchmark",
        suffix="12workers",
        seed=4)

CG.make_cmd(
        use_template_release_dates_cache=False,
        use_deepspeed_config=True,
        use_benchmark=False,
        num_workers=16,
        precision="bf16",
        use_alignment_index=False,
        wandb_notes="Baseline + 16 workers. No template cache, 16 workers, deepspeed:bf16, no alignment index, no benchmark",
        suffix="16workers",
        seed=4)

CG.make_cmd(
        use_template_release_dates_cache=False,
        use_deepspeed_config=True,
        use_benchmark=False,
        num_workers=14,
        precision="bf16",
        use_alignment_index=False,
        wandb_notes="Baseline + 14 workers. No template cache, 14 workers, deepspeed:bf16, no alignment index, no benchmark",
        suffix="8workers",
        seed=4)

## Adding caches

In [12]:
CG.make_cmd(
    use_template_release_dates_cache=True,
    use_deepspeed_config=True,
    use_benchmark=False,
    num_workers=1,
    precision="bf16",
    use_alignment_index=False,
    wandb_notes="Baseline + template cache. No template cache, 1 worker, deepspeed:bf16, no alignment index, no benchmark",
    suffix="tempcache",
    seed=4)

CG.make_cmd(
    use_template_release_dates_cache=False,
    use_deepspeed_config=True,
    use_benchmark=True,
    num_workers=1,
    precision="bf16",
    use_alignment_index=False,
    wandb_notes="Baseline + benchmark. No template cache, 1 worker, deepspeed:bf16, no alignment index, yes benchmark",
    suffix="benchmark",
    seed=4)

CG.make_cmd(
    use_template_release_dates_cache=False,
    use_deepspeed_config=True,
    use_benchmark=False,
    num_workers=1,
    precision="bf16",
    use_alignment_index=True,
    wandb_notes="Baseline + alignment index. No template cache, 1 worker, deepspeed:bf16, yes alignment index, no benchmark",
    suffix="alnindex",
    seed=4)

CG.make_cmd(
    use_template_release_dates_cache=True,
    use_deepspeed_config=True,
    use_benchmark=True,
    num_workers=1,
    precision="bf16",
    use_alignment_index=True,
    wandb_notes="Baseline + template cache + benchmark + alignment index. 1 worker, deepspeed:bf16",
    suffix="tempcache+benchmark+alnindex",
    seed=4)

In [13]:
CG.make_cmd(
    use_template_release_dates_cache=True,
    use_deepspeed_config=True,
    use_benchmark=True,
    num_workers=12,
    precision="bf16",
    use_alignment_index=True,
    wandb_notes="Baseline + template cache + benchmark + alignment index. 12 worker, deepspeed:bf16",
    suffix="12worker+tempcache+benchmark+alnindex")
CG.make_cmd(
    use_template_release_dates_cache=True,
    use_deepspeed_config=True,
    use_benchmark=True,
    num_workers=12,
    precision="16",
    use_alignment_index=True,
    wandb_notes="Baseline + template cache + benchmark + alignment index. 12 worker, deepspeed:16",
    suffix="12worker+tempcache+benchmark+alnindex+16precision")

In [5]:
CG.make_cmd(
    use_template_release_dates_cache=False,
    use_deepspeed_config=True,
    use_benchmark=True,
    num_workers=16,
    precision="bf16",
    use_alignment_index=False,
    wandb_notes="Baseline + benchmark. 16 workers, deepspeed:bf16",
    suffix="baseline+16worker+benchmark")

In [6]:
CG.save_cmds("speed-test-commands-set04.sh")

In [68]:
!chmod +x speed-test-commands-set04.sh

In [62]:
CG.clear()