# Docking with Autodock Vina

In this notebook, we use all the 300 ligand and protein pdbqt files to perform docking using vina.

The script:
- produces 9 poses per molecule
- logs the results for each of the 9 poses in the folder ```vina_docking_output``` folder per molecule,
- Provides a summary of docking results with best binding affinity per molecule in vina_summary.csv

In [1]:
#conda install vina
import os
import re
import csv
import subprocess
from typing import List, Optional, Tuple

### First we define a couple of helper functions

In [2]:
def parse_gpf(gpf_file: str):
    """
    Parse AutoDock .gpf file to extract grid center and size (in Å).
    If spacing/npts present, compute size = npts * spacing.
    Args:
        gpf_file: Path to .gpf file.
    Returns:
        center: Dict with 'x', 'y', 'z' keys for box center.
        size: Dict with 'x', 'y', 'z' keys for box size.
    """
    center = {'x': 0.0, 'y': 0.0, 'z': 0.0}
    npts = {'x': None, 'y': None, 'z': None}
    spacing = None

    with open(gpf_file, 'r') as fh:
        for line in fh:
            line = line.strip()
            if not line: 
                continue
            parts = line.split()
            if parts[0].lower() == 'npts' and len(parts) >= 4:
                npts['x'], npts['y'], npts['z'] = map(int, parts[1:4])
            elif parts[0].lower() == 'gridcenter' and len(parts) >= 4:
                center['x'], center['y'], center['z'] = map(float, parts[1:4])
            elif parts[0].lower() == 'spacing' and len(parts) >= 2:
                spacing = float(parts[1])

    if spacing is None:
        spacing = 0.375  # fallback default used by AutoDock4

    # If npts present, compute box size
    if None not in npts.values():
        size = {k: npts[k] * spacing for k in ['x', 'y', 'z']}
    else:
        # fallback box size (small default) if gpf doesn't have npts
        size = {'x': 20.0, 'y': 20.0, 'z': 20.0}

    return center, size

def run_vina_and_capture(vina_exec: str,
                         receptor: str,
                         ligand: str,
                         center: dict,
                         size: dict,
                         out_pdbqt: str,
                         timeout: Optional[int] = None) -> Tuple[bool, str]:
    """
    Run vina and capture stdout/stderr. Return (success_flag, combined_output_text).
    Args:
        vina_exec: Path to vina executable.
        receptor: Path to receptor PDBQT file.
        ligand: Path to ligand PDBQT file.
        center: Dict with 'x', 'y', 'z' keys for box center.
        size: Dict with 'x', 'y', 'z' keys for box size.
        out_pdbqt: Path to output PDBQT file for docked poses.
        timeout: Optional timeout in seconds.
    Returns:
        Tuple of (success_flag, output_text).
    """
    cmd = [
        vina_exec,
        "--receptor", receptor,
        "--ligand", ligand,
        "--center_x", str(center['x']),
        "--center_y", str(center['y']),
        "--center_z", str(center['z']),
        "--size_x", str(size['x']),
        "--size_y", str(size['y']),
        "--size_z", str(size['z']),
        "--out", out_pdbqt,
        "--exhaustiveness", "16",
        "--num_modes", "9"
    ]

    try:
        proc = subprocess.run(cmd,
                              stdout=subprocess.PIPE,
                              stderr=subprocess.STDOUT,
                              text=True,
                              check=True,
                              timeout=timeout)
        output_text = proc.stdout
        return True, output_text
    except subprocess.CalledProcessError as e:
        # command returned non-zero; capture whatever output exists
        out = e.stdout if hasattr(e, "stdout") else (e.output if hasattr(e, "output") else "")
        return False, out + f"\n[Vina exited with return code {e.returncode}]"
    except subprocess.TimeoutExpired as e:
        out = e.stdout if hasattr(e, "stdout") else ""
        return False, out + "\n[Vina timed out]"

def parse_affinities_from_vina_output(text: str) -> List[float]:
    """
    Try a few strategies to extract affinities (kcal/mol) from Vina output text.
    Returns a list of floats (one per reported mode), typically ordered from best to worse.
    Heuristics:
     - Lines containing 'kcal/mol' -> extract number before it
     - Lines that look like: '   1   -7.8   0.000' (mode table)
     - 'affinity = -7.8 (kcal/mol)' variations
    Args:
        text: Vina output text.
    Returns:
        List of extracted affinity floats.
    """
    affinities = []

    if not text:
        return affinities

    # 1) lines with 'kcal' (most robust)
    for line in text.splitlines():
        if 'kcal' in line.lower():
            # find float number before 'kcal' or pattern '(-7.8)'
            m = re.search(r'(-?\d+\.\d+)\s*(?:kcal)', line, flags=re.IGNORECASE)
            if m:
                try:
                    affinities.append(float(m.group(1)))
                except ValueError:
                    pass
            else:
                # sometimes format like: 'Affinity: -7.8 (kcal/mol)'
                m2 = re.search(r'Affinity[:=]?\s*(-?\d+\.\d+)', line, flags=re.IGNORECASE)
                if m2:
                    affinities.append(float(m2.group(1)))

    if affinities:
        return affinities

    # 2) mode table numeric lines: e.g., "   1  -7.8   0.000"
    for line in text.splitlines():
        m = re.match(r'^\s*\d+\s+(-?\d+\.\d+)', line)
        if m:
            try:
                affinities.append(float(m.group(1)))
            except ValueError:
                pass

    if affinities:
        return affinities

    # 3) 'affinity = -7.8' patterns
    for m in re.finditer(r'affinity\s*[=:]?\s*(-?\d+\.\d+)', text, flags=re.IGNORECASE):
        try:
            affinities.append(float(m.group(1)))
        except ValueError:
            pass

    # 4) last-resort: find negative floats (could be noisy)
    if not affinities:
        floats = re.findall(r'(-\d+\.\d+)', text)
        for f in floats:
            try:
                affinities.append(float(f))
            except ValueError:
                pass

    return affinities

In [3]:
### Main function

def main():
    vina_executable = "vina"  # or full path to vina
    receptor = "vina_protein_prep_files/final_1h1q_protein.pdbqt"
    gpf_file = "vina_protein_prep_files/final_1h1q_protein.gpf"
    ligand_folder = "vina_docking_files_for_300ligands"
    out_folder = "vina_docking_output"
    os.makedirs(out_folder, exist_ok=True)

    center, size = parse_gpf(gpf_file)
    print(f"Using grid center: {center}")
    print(f"Using box size (Å): {size}")

    ligand_files = sorted([
        os.path.join(ligand_folder, f)
        for f in os.listdir(ligand_folder)
        if f.endswith(".pdbqt")
    ])

    summary_rows = []
    log_lines = []

    for ligand in ligand_files:
        ligand_basename = os.path.splitext(os.path.basename(ligand))[0]
        out_pdbqt = os.path.join(out_folder, f"{ligand_basename}_out.pdbqt")
        log_path = os.path.join(out_folder, f"{ligand_basename}.log")

        print(f"Docking {ligand_basename} ...", flush=True)
        success, out_text = run_vina_and_capture(vina_executable, receptor, ligand, center, size, out_pdbqt)
        # write captured output to log file
        with open(log_path, "w") as lofh:
            lofh.write(out_text)

        # parse affinities
        affinities = parse_affinities_from_vina_output(out_text)
        best_aff = None
        if affinities:
            # Vina affinities are negative; best = most negative (min)
            try:
                best_aff = min(affinities)
            except Exception:
                best_aff = affinities[0]

        summary_rows.append({
            "ligand": ligand_basename,
            "best_affinity_kcal_per_mol": best_aff if best_aff is not None else "",
            "out_pdbqt": out_pdbqt if success else "",
            "log_path": log_path,
            "success": success
        })
        log_lines.append(f"{ligand_basename} : {'Success' if success else 'Failed'} ; best_aff={best_aff}")

    # write CSV summary
    csv_path = "vina_summary.csv"
    with open(csv_path, "w", newline="") as csvfh:
        fieldnames = ["ligand", "best_affinity_kcal_per_mol", "out_pdbqt", "log_path", "success"]
        writer = csv.DictWriter(csvfh, fieldnames=fieldnames)
        writer.writeheader()
        for r in summary_rows:
            writer.writerow(r)

    # write plain text log
    with open("vina_log.txt", "w") as f:
        f.write("\n".join(log_lines))

    print("Done. Outputs in:", out_folder)
    print("Summary CSV:", csv_path)
    print("Plain summary:", "vina_log.txt")

In [4]:
if __name__ == "__main__":
    main()

Using grid center: {'x': 6.145, 'y': 44.176, 'z': 50.828}
Using box size (Å): {'x': 15.75, 'y': 15.75, 'z': 15.75}
Docking ligand_100 ...
Docking ligand_1019 ...
Docking ligand_1020 ...
Docking ligand_1023 ...
Docking ligand_1027 ...
Docking ligand_1035 ...
Docking ligand_1046 ...
Docking ligand_1049 ...
Docking ligand_1051 ...
Docking ligand_1053 ...
Docking ligand_106 ...
Docking ligand_1069 ...
Docking ligand_108 ...
Docking ligand_1088 ...
Docking ligand_110 ...
Docking ligand_1103 ...
Docking ligand_1104 ...
Docking ligand_1110 ...
Docking ligand_1116 ...
Docking ligand_1119 ...
Docking ligand_1146 ...
Docking ligand_1147 ...
Docking ligand_115 ...
Docking ligand_1150 ...
Docking ligand_116 ...
Docking ligand_1163 ...
Docking ligand_1165 ...
Docking ligand_1169 ...
Docking ligand_1173 ...
Docking ligand_1175 ...
Docking ligand_1177 ...
Docking ligand_1178 ...
Docking ligand_118 ...
Docking ligand_1181 ...
Docking ligand_1182 ...
Docking ligand_1192 ...
Docking ligand_1193 ...
Dock