# Test GOLD parameter sets using the CSD Python API

One advantage of GOLD is that it is highly configurable and that docking protocols can be optimized for particular systems. However, the sheer number of options means that the optimization process can be difficult. Using the Docking API means that it is easy to test various combinations of options programatically, thus making the optimization task much more feasible.

This Notebook illustrates how to modify various aspects of the GOLD configuration. Please refer to the GOLD and Docking API documentation for details of the various options available.

#### GOLD docs
* [User Guide](https://www.ccdc.cam.ac.uk/support-and-resources/ccdcresources/GOLD_User_Guide.pdf)
* [Conf file](https://www.ccdc.cam.ac.uk/support-and-resources/ccdcresources/GOLD_conf_file_user_guide.pdf)

#### Docking API docs
* [Descriptive](https://downloads.ccdc.cam.ac.uk/documentation/API/descriptive_docs/docking.html)
* [Module API](https://downloads.ccdc.cam.ac.uk/documentation/API/modules/docking_api.html)

In [None]:
import logging
import sys
import os
import shutil
from pathlib import Path
from platform import platform
import time
import subprocess

In [None]:
import pandas as pd

In [None]:
import ccdc
from ccdc.io import MoleculeReader, EntryReader, EntryWriter
from ccdc.docking import Docker

### Config

The directory containing the input files for these dockings; directory must exist...

In [None]:
input_dir = Path(r'input_files').absolute()

Protein target and a native ligand (used to define binding site); files must exist...

In [None]:
target_dir = input_dir / 'target'

protein_file = target_dir / 'protein.mol2'
ligand_file  = target_dir / 'ligand.mol2'

Molecules to dock; file must exist...

In [None]:
input_file = input_dir / 'input.sdf'

The directory where individual docking run directories will be created; it will be created itself if necessary...

In [None]:
runs_dir = Path(r'parameter_tests')

Input options...

In [None]:
# Binding site radius; N.B. the native ligand will be used to define the binding site (see above)

radius = 6.0

In [None]:
# Number of GA runs (https://downloads.ccdc.cam.ac.uk/documentation/API/_modules/ccdc/docking.html#Docker.Settings.add_ligand_file)...

n_ga_runs = 10  # API default is 1, GOLD/Hermes default is 10

In [None]:
# Fitness function (options are 'goldscore', 'chemscore', 'asp' and 'plp', with the default being 'goldscore')...

fitness_function = 'goldscore'

Output options...

In [None]:
# Output dir...

output_directory = 'output'

In [None]:
# Output format...
    
output_format = 'sdf'  # 'mol2'

In [None]:
# Concatenated output file to be used; set to None to write individual solution files (with autogenerated names)...

output_file = f'docked.{output_format}'

In [None]:
# Discard side-chain torsion and lone-pair info...
    
reduce_size_of_output_files = False 

In [None]:
# Keep only the best N poses for a ligand; set to None to use default...

n_solutions = None  # 3

In [None]:
# Save N top-ranked ligands only; set to None to use default...

n_ligands = None

In [None]:
# Save ligands with a minumum fitness only; set to None to use default...

fitness_threshold = None  # 50

### Initialization

In [None]:
logger = logging.getLogger(__name__)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter('[%(asctime)s %(levelname)-7s] %(message)s', datefmt='%y-%m-%d %H:%M:%S'))
logger.addHandler(handler)
logger.setLevel(logging.INFO)

In [None]:
logger.info("""
Platform:                     {platform()}

Python exe:                   {sys.executable}
Python version:               {'.'.join(str(x) for x in sys.version_info[:3])}

CSD version:                  {ccdc.io.csd_version()}
CSD directory:                {ccdc.io.csd_directory()}
API version:                  {ccdc.__version__}

CSDHOME:                      {os.environ.get('CSDHOME', 'Not set')}
CCDC_LICENSING_CONFIGURATION: {os.environ.get('CCDC_LICENSING_CONFIGURATION', 'Not set')}
""")

Check that all required files and directories exist...

In [None]:
for directory in [input_dir, target_dir]: assert directory.exists(), f"Error! Required directory '{directory}' not found."

for file in [protein_file, ligand_file, input_file]: assert file.exists(), f"Error! Required file '{file}' not found."

Create a new runs directory, if necessary...

In [None]:
runs_dir.mkdir(exist_ok=True)

Create a new directory for this run...

In [None]:
run_dirs = sorted(runs_dir.glob('[0-9][0-9][0-9]'))

run_dir = runs_dir / f"{(int(run_dirs[-1].stem) if run_dirs else 0) + 1:03d}"

In [None]:
run_dir.mkdir(exist_ok=False)

In [None]:
logging.info(f"Run dir: {run_dir}")

In [None]:
os.chdir(run_dir)

### Configure docking

We can configure all aspects of a GOLD docking _via_ the [Docker.Setting](https://downloads.ccdc.cam.ac.uk/documentation/API/modules/docking_api.html#ccdc.docking.Docker.Settings) object.

In [None]:
docker = Docker()

settings = docker.settings

Basic setup...

In [None]:
# Protein to dock ligands into...

settings.add_protein_file(str(protein_file))

In [None]:
# Binding site specified from native ligand...

native_ligand = MoleculeReader(str(ligand_file))[0]

settings.binding_site = settings.BindingSiteFromLigand(settings.proteins[0], native_ligand, radius)

In [None]:
# Molecules to dock...

settings.add_ligand_file(str(input_file), n_ga_runs)

In [None]:
# Fitness function...

settings.fitness_function = fitness_function

In [None]:
# Output format...
    
settings.output_format = output_format

# Output dir...

settings.output_directory = output_directory

`SAVE_OPTIONS`



Use concatenated output file or individual files?

Note that we request that individual file output is deleted if concatenated output is selected.

In [None]:
if output_file:
    
    settings.output_file = output_file  # Request concatenated output
    
    settings._settings.set_delete_all_solutions(True)   # Suppress individual output
    
    print(f"Will write only a concatenated output file.")

else:
    
    print(f"Will write individual output files.")

Reduce size of output files...

In [None]:
if reduce_size_of_output_files:
    
    settings._settings.set_save_protein_torsions(False)

    settings._settings.set_save_lone_pairs(False)

Number of solutions per ligand...

_N.B._ From the [gold_conf](https://www.ccdc.cam.ac.uk/support-and-resources/ccdcresources/409d221af167455c8bac9e81dadabc34.pdf) docs:
"In order for `clean_up_option save_top_n_solutions` to take effect the options `clean_up_option delete_empty_directories` and `clean_up_option delete_redundant_log_files` also need to be set."

In [None]:
if n_solutions is not None:
    
    settings._settings.set_save_top_n_solutions(n_solutions)

    settings._settings.set_delete_empty_directories(True)

    settings._settings.set_delete_redundant_log_files(True)

Total number of (top-ranked) ligands to save...

In [None]:
if n_ligands is not None:
    
    settings._settings.set_save_best_n_ligands(n_ligands)

Fitness score threshold...

In [None]:
if fitness_threshold is not None:

    settings._settings.set_fitness_threshold(fitness_threshold)

    settings._settings.set_save_fitness_better_than(True)

`WRITE_OPTIONS`

[Write Options](https://downloads.ccdc.cam.ac.uk/documentation/API/descriptive_docs/docking.html?highlight=dummy#output-file-write-options) control how much output is written.

In [None]:
# settings._settings.set_no_asp_mol2_files(True)

# settings._settings.set_no_bestranking_lst_file(True)

# settings._settings.set_no_fit_pts_files(True)

# settings._settings.set_no_gold_err_file(True)

# settings._settings.set_no_gold_ligand_mol2_file(True)

# settings._settings.set_no_gold_log_file(True)

# settings._settings.set_no_gold_protein_mol2_file(True)

# settings._settings.set_no_gold_soln_ligand_mol2_files(True)

# settings._settings.set_no_lgfname_file(True)

# settings._settings.set_no_link_files(True)

# settings._settings.set_no_log_files(True)

# settings._settings.set_no_pid_file(True)

# settings._settings.set_no_plp_mol2_files(True)

# settings._settings.set_no_rnk_files(True)

# settings._settings.set_no_seed_log_file(True)

### Run GOLD

_N.B._  Writing the conf file (by any method) also causes a cavity-definition file to be written, which is simply a (renamed) copy of the native ligand file.

In [None]:
%%time

results = docker.dock(file_name='gold.conf', mode='foreground')  # See note above about cavity file

assert results.return_code == 0, "Error! GOLD did not run successfully."

logging.info(f"GOLD run completed in dir: {run_dir}.")

Change back to original directory...

In [None]:
os.chdir('../..')