<a href="https://colab.research.google.com/github/bramyeon/af2-gamma-eval/blob/main/colab/command.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# DSLAB Synthetic Protein Structure Evaluation System v0.3.3: <b>Command Builder</b>

Developed by [Bryan Nathanael Wijaya](mailto:bramyeon@gmail.com) and [Luiz Felipe Vecchietti](lfelipesv@gmail.com)  
Contact us for inquiries or bug reports 🙌

```
python main.py [-h] [--input_dir INPUT_DIR] [--output_dir OUTPUT_DIR] [--token TOKEN] [--contigs CONTIGS] [--ddg_pre]
               [--mpnn] [--msa MSA] [--af] [--ddg_fin] [--ddg_reference DDG_REFERENCE] [--ddg_quiet] [--ddg_filter_pre]
               [--ddg_filter_fin] [--ddg_max_pre DDG_MAX_PRE] [--ddg_max_fin DDG_MAX_FIN] [--mpnn_mode MPNN_MODE]
               [--mpnn_num MPNN_NUM] [--mpnn_temp MPNN_TEMP] [--mpnn_fr] [--msa_quiet] [--af_offline] [--af_init]
               [--af_multimer] [--af_custemp] [--af_recycle AF_RECYCLE] [--af_tolerance AF_TOLERANCE] [--af_seed AF_SEED]
               [--af_quiet] [--af_keep_all] [--af_filter] [--af_model AF_MODEL] [--af_plddt AF_PLDDT] [--af_pae AF_PAE]
               [--af_ipae AF_IPAE] [--af_no_plddt] [--af_no_pae] [--af_no_ipae] [--af_all] [--af_best_avg] [--af_sep_csv]
```

In [None]:
#@title Arguments for I/O directories and structure specifications

#@markdown | Argument                          | Type  | Description                                   | Default Value | Example Value |
#@markdown | :-------------------------------- | :---: | :-------------------------------------------- | :-----------: | :-----------: |
#@markdown | `-h`, `--help`                    |       | show the help message and exit                |               |               |
#@markdown | `--input_dir INPUT_DIR`           | str   | input directory                               | input         | data/foo      |
#@markdown | `--output_dir OUTPUT_DIR`         | str   | output directory                              | output        | data/boo      |
#@markdown | `--token TOKEN`                   | str   | output_dir token in case MSA is pre-searched  | None          | 20240401172937 |
#@markdown | `--contigs CONTIGS`               | str   | specification of the structure                | None          | A1-113:B1-26/6-6/B33-106 |
#@markdown
#@markdown <br>

input_dir = "input" #@param {type:"string"}
output_dir = "output" #@param {type:"string"}
token = "" #@param {type:"string"}
contigs = "" #@param {type:"string"}

args = dict()
args['input_dir'] = input_dir
args['output_dir'] = output_dir
args['token'] = token
args['contigs'] = contigs

In [None]:
#@title Arguments for service selection

#@markdown | Argument                          | Type  | Description                                              |
#@markdown | :-------------------------------- | :---: | :------------------------------------------------------- |
#@markdown | `--ddg_pre`                       | flag  | use PyRosetta to calculate ddG of preliminary structure  |
#@markdown | `--mpnn`                          | flag  | use ProteinMPNN to design sequence                       |
#@markdown | `--msa`                           | flag  | pre-search MSA (before AF2 in offline environment)       |
#@markdown | `--af`                            | flag  | use AF2 to predict structure                             |
#@markdown | `--ddg_fin`                       | flag  | use PyRosetta to calculate ddG of final structure        |
#@markdown
#@markdown <br>

ddg_pre = False #@param {type:"boolean"}
mpnn = False #@param {type:"boolean"}
msa = False #@param {type:"boolean"}
af = False #@param {type:"boolean"}
ddg_fin = False #@param {type:"boolean"}

args['ddg_pre'] = ddg_pre
args['mpnn'] = mpnn
args['msa'] = msa
args['af'] = af
args['ddg_fin'] = ddg_fin

In [None]:
#@title Arguments for ddG calculation with PyRosetta

#@markdown | Argument                          | Type  | Description                                   | Default Value | Example Value |
#@markdown | :-------------------------------- | :---: | :-------------------------------------------- | :-----------: | :-----------: |
#@markdown | `--ddg_reference DDG_REFERENCE`   | str   | reference structure for the designs           | None          | data/ref.pdb  |
#@markdown | `--ddg_quiet`                     | flag  | print only warning and error messages         |               |               |
#@markdown | `--ddg_filter_pre`                | flag  | filter preliminary candidates based on ddG    |               |               |
#@markdown | `--ddg_filter_fin`                | flag  | filter final candidates based on ddG          |               |               |
#@markdown | `--ddg_max_pre DDG_MAX_PRE`       | float | <b>MAX</b>imum ddG value to pass the preliminary filter | -30.0    | 10.5     |
#@markdown | `--ddg_max_fin DDG_MAX_FIN`       | float | <b>MAX</b>imum ddG value to pass the final filter       | 2000.0   | 2023.5   |
#@markdown
#@markdown <br>

if ddg_pre or ddg_fin:
    ddg_reference = "" #@param {type:"string"}
    ddg_quiet = False #@param {type:"boolean"}
    ddg_filter_pre = False #@param {type:"boolean"}
    ddg_filter_fin = False #@param {type:"boolean"}
    ddg_max_pre = -30.0 #@param {type:"number"}
    ddg_max_fin = 2000.0 #@param {type:"number"}

    args['ddg_reference'] = ddg_reference
    args['ddg_quiet'] = ddg_quiet

    if ddg_pre:
        args['ddg_filter_pre'] = ddg_filter_pre
        if ddg_filter_pre:
            args['ddg_max_pre'] = ddg_max_pre

    if ddg_fin:
        args['ddg_filter_fin'] = ddg_filter_fin
        if ddg_filter_fin:
            args['ddg_max_fin'] = ddg_max_fin

In [None]:
#@title Arguments for sequence design with ProteinMPNN (or MPNN-FR)

#@markdown | Argument                          | Type  | Description                                   | Default Value | Example Value |
#@markdown | :-------------------------------- | :---: | :-------------------------------------------- | :-----------: | :-----------: |
#@markdown | `--mpnn_mode MPNN_MODE`           | str   | ProteinMPNN mode: partial/binder/fixbb        | partial       | binder        |
#@markdown | `--mpnn_num MPNN_NUM`             | int   | number of sequence designs per structure      | 8             | 3             |
#@markdown | `--mpnn_temp MPNN_TEMP`           | float | MPNN sampling temperature                     | 0.1           | 0.01          |
#@markdown | `--mpnn_fr`                       | flag  | use Rosetta Fast Relax                        |               |               |
#@markdown
#@markdown <br>

if mpnn:
    mpnn_mode = "partial" #@param {type:"string"}
    mpnn_num = 8 #@param {type:"number"}
    mpnn_temp = 0.1 #@param {type:"number"}
    mpnn_fr = False #@param {type:"boolean"}

    args['mpnn_mode'] = mpnn_mode
    args['mpnn_num'] = mpnn_num
    args['mpnn_temp'] = mpnn_temp
    args['mpnn_fr'] = mpnn_fr

In [None]:
#@title Arguments for MSA pre-search before running AF2 Gamma in an internet-free environment

#@markdown Note that the `--af_custemp` flag here must be set if you want to run AF2 Gamma with custom template later on.

#@markdown | Argument                          | Type  | Description                                   | Default Value | Example Value |
#@markdown | :-------------------------------- | :---: | :-------------------------------------------- | :-----------: | :-----------: |
#@markdown | `--msa_quiet`                     | flag  | print only error messages                     |               |               |
#@markdown | `--af_custemp`                    | flag  | use custom template for AF2 prediction        |               |               |
#@markdown
#@markdown <br>

if msa:
    msa_quiet = False #@param {type:"boolean"}
    af_custemp = False #@param {type:"boolean"}

    args['msa_quiet'] = msa_quiet
    args['af_custemp'] = af_custemp

In [None]:
#@title Arguments for structure prediction with AlphaFold2 Gamma

#@markdown | Argument                          | Type  | Description                                   | Default Value | Example Value |
#@markdown | :-------------------------------- | :---: | :-------------------------------------------- | :-----------: | :-----------: |
#@markdown | `--af_offline`                    | flag  | run AF2 without internet (MSA search skipped) |               |               |
#@markdown | `--af_init`                       | flag  | use initial guess                             |               |               |
#@markdown | `--af_multimer`                   | flag  | use AF2 Multimer                              |               |               |
#@markdown | `--af_custemp`                    | flag  | use custom template for AF2 prediction        |               |               |
#@markdown | `--af_recycle AF_RECYCLE`         | int   | number of AF2 recycles                        | 20            | 3             |
#@markdown | `--af_tolerance AF_TOLERANCE`     | float | recycle early stop tolerance                  | 0.5           | 1.0           |
#@markdown | `--af_seed AF_SEED`               | int   | random model seed                             | 0             | 2023          |
#@markdown | `--af_quiet`                      | flag  | print only error messages                     |               |               |
#@markdown | `--af_keep_all`                   | flag  | keep all the AF2 results and details (not recommended) |
#@markdown | `--af_filter`                     | flag  | filter with one model before using all models |               |               |
#@markdown | `--af_all`                        | flag  | use all 5 AF2 models to validate              |               |               |
#@markdown
#@markdown <br>

if af:
    af_offline = False #@param {type:"boolean"}
    af_init = False #@param {type:"boolean"}
    af_multimer = False #@param {type:"boolean"}
    af_custemp = False #@param {type:"boolean"}
    af_recycle = 20 #@param {type:"number"}
    af_tolerance = 0.5 #@param {type:"number"}
    af_seed = 0 #@param {type:"number"}
    af_quiet = False #@param {type:"boolean"}
    af_keep_all = False #@param {type:"boolean"}
    af_filter = False #@param {type:"boolean"}
    af_all = False #@param {type:"boolean"}

    args['af_offline'] = af_offline
    args['af_init'] = af_init
    args['af_multimer'] = af_multimer
    args['af_custemp'] = af_custemp
    args['af_recycle'] = af_recycle
    args['af_tolerance'] = af_tolerance
    args['af_seed'] = af_seed
    args['af_quiet'] = af_quiet
    args['af_keep_all'] = af_keep_all
    args['af_filter'] = af_filter
    args['af_all'] = af_all

#@markdown <br>
#@markdown
#@markdown | Argument for one-model AF2        | Type  | Description                                   | Default Value | Example Value |
#@markdown | :-------------------------------- | :---: | :-------------------------------------------- | :-----------: | :-----------: |
#@markdown | `--af_model AF_MODEL`             | int   | model number used for filtering/validation    | 1             | 5             |
#@markdown | `--af_plddt AF_PLDDT`             | float | <b>MIN</b>imum pLDDT value to pass the filter | 0.9           | 0.8           |
#@markdown | `--af_pae AF_PAE`                 | float | <b>MAX</b>imum pAE value to pass the filter   | 5.0           | 22.5          |
#@markdown | `--af_ipae AF_IPAE`               | float | <b>MAX</b>imum i-pAE value to pass the filter | 5.0           | 16.7          |
#@markdown | `--af_no_plddt`                   | flag  | do not use pLDDT to filter                    |               |               |
#@markdown | `--af_no_pae`                     | flag  | do not use pAE to filter                      |               |               |
#@markdown | `--af_no_ipae`                    | flag  | do not use i-pAE to filter                    |               |               |
#@markdown
#@markdown <br>

    if af_filter:
        af_model = "1" #@param [1, 2, 3, 4, 5]
        af_plddt = 0.9 #@param {type:"number"}
        af_pae = 5.0 #@param {type:"number"}
        af_ipae = 5.0 #@param {type:"number"}
        af_no_plddt = False #@param {type:"boolean"}
        af_no_pae = False #@param {type:"boolean"}
        af_no_ipae = False #@param {type:"boolean"}

        args['af_model'] = int(af_model)
        if not af_no_plddt:
            args['af_plddt'] = af_plddt
        if not af_no_pae:
            args['af_pae'] = af_pae
        if not af_no_ipae:
            args['af_ipae'] = af_ipae
        args['af_no_plddt'] = af_no_plddt
        args['af_no_pae'] = af_no_pae
        args['af_no_ipae'] = af_no_ipae

#@markdown <br>
#@markdown
#@markdown | Argument for five-model AF2       | Type  | Description                                            |
#@markdown | :-------------------------------- | :---: | :----------------------------------------------------- |
#@markdown | `--af_best_avg`                   | flag  | save CSV files for the best and average results        |
#@markdown | `--af_sep_csv`                    | flag  | save a separate CSV file for each model                |
#@markdown
#@markdown <br>

    if af_all:
        af_best_avg = False #@param {type:"boolean"}
        af_sep_csv = False #@param {type:"boolean"}

        args['af_best_avg'] = af_best_avg
        args['af_sep_csv'] = af_sep_csv

In [None]:
#@title (Optional) Additional settings

#@markdown Do you want to specify the cuda device ID to use?
specify_cuda = False #@param {type:"boolean"}
cuda_id = "0" #@param [0, 1, 2, 3, 4, 5]
cuda_id = int(cuda_id)

#@markdown Do you want to use `nohup` and run in the background?
nohup = False #@param {type:"boolean"}
log_path = '' #@param {type:"string"}

In [None]:
#@title The following is your command! Copy it and run the validation script on the supported environment. Good luck🤞

cmd = ["python main.py"]
for key, value in args.items():
    if type(value) == bool:
        if value:
            cmd.append(key)

    elif type(value) == str:
        if len(value.strip()) > 0:
            cmd.append(f"{key} {value}")

    elif type(value) in [int, float]:
        cmd.append(f"{key} {value}")

    else:
        print(f"WARNING: {key} cannot be {type(value)}. This will be set to its default value intrinsically.")
cmd = ' --'.join(cmd)

if nohup:
    cmd = f"nohup {cmd} > {log_path} &"

if specify_cuda:
    cmd = f"CUDA_VISIBLE_DEVICES={cuda_id} {cmd}"

print(cmd)

python main.py --input_dir input --output_dir output
