In [None]:
"""
pdb_to_lightdock.ipynb
-------------------
Runs LightDock for a receptor-ligand pair.

Input PDBs: you pass them on the command line, e.g.
    python pdb_to_lightdock.py examples/ppinsight_data/input_files/2UUY_rec.pdb \
                               examples/ppinsight_data/input_files/2UUY_lig.pdb

Output folders:
    examples/ppinsight_data/output_files/lightdock_runs/<receptor>_vs_<ligand>/
"""

import glob     # to find files using wildcards
import os        # for paths and directories
import subprocess  # to run LightDock command-line tools
import sys       # to read command-line arguments
import shutil    # to copy files


def run_command(cmd, cwd=None):
    """
    Print and execute a shell command.
    - cmd: list of strings, e.g. ["lightdock3.py", "setup.json", "100"]
    - cwd: directory in which to run the command
    """
    print(">>", " ".join(cmd))          # show the command so the user sees what is happening
    subprocess.run(cmd, cwd=cwd, check=True)  # run the command; stop if it fails (check=True)


def make_output_dir(receptor_pdb, ligand_pdb,
                    base_root="examples/ppinsight_data/output_files",
                    method="lightdock_runs"):
    """
    Create an organized output directory structure.

    Final structure:
        examples/ppinsight_data/output_files/
            lightdock_runs/
                <receptor_name>_vs_<ligand_name>/

    - receptor_pdb, ligand_pdb: paths the user passed in
    - base_root: top-level output folder shared by all methods
    - method: subfolder for this tool (LightDock)

    If folders already exist, they are reused (not deleted).
    """

    # Make sure the base root folder exists:
    # examples/ppinsight_data/output_files/
    os.makedirs(base_root, exist_ok=True)

    # Inside that, make sure the method folder exists:
    # e.g. examples/ppinsight_data/output_files/lightdock_runs/
    method_dir = os.path.join(base_root, method)
    os.makedirs(method_dir, exist_ok=True)

    # Get just the filenames without path and .pdb extension
    # e.g. "2UUY_rec.pdb" -> "2UUY_rec"
    rec_name = os.path.splitext(os.path.basename(receptor_pdb))[0]
    lig_name = os.path.splitext(os.path.basename(ligand_pdb))[0]

    # Final run folder name: "<receptor>_vs_<ligand>"
    # e.g. "2UUY_rec_vs_2UUY_lig"
    run_folder_name = f"{rec_name}_vs_{lig_name}"

    # Full path to this specific run
    run_dir = os.path.join(method_dir, run_folder_name)

    # Create the run directory if it does not exist (do not delete if it already exists)
    os.makedirs(run_dir, exist_ok=True)

    # Return the full path so the rest of the script can use it
    return run_dir


def lightdock_pipeline(receptor_pdb, ligand_pdb,
                       working_dir,
                       swarms=None, glowworms=None,
                       steps=100, swarm_list=None, cores=1):
    """
    Runs the full LightDock workflow:

    1) Setup  (lightdock3_setup.py -> creates setup.json and initial files)
    2) Simulation (lightdock3.py)
    3) Model generation (lgd_generate_conformations.py)

    Parameters:
      receptor_pdb, ligand_pdb : input PDB file paths
      working_dir              : where all LightDock files will be written
      swarms, glowworms        : docking parameters (None → LightDock defaults)
      steps                    : number of LightDock steps (tutorial uses 100)
      swarm_list               : which swarms to run (e.g. [0])
      cores                    : number of CPU cores for the simulation
    """

    # Build paths to the copies of the PDB files INSIDE the working directory.
    # Example:
    #   working_dir = examples/ppinsight_data/output_files/lightdock_runs/2UUY_rec_vs_2UUY_lig/
    #   rec = .../2UUY_rec.pdb (inside that directory)
    #   lig = .../2UUY_lig.pdb
    rec = os.path.join(working_dir, os.path.basename(receptor_pdb))
    lig = os.path.join(working_dir, os.path.basename(ligand_pdb))

    # Copy the original PDB files into the working directory
    shutil.copy(receptor_pdb, rec)
    shutil.copy(ligand_pdb, lig)

    # -----------------------------
    # 1) LightDock setup
    # -----------------------------
    # lightdock3_setup.py prepares LightDock input files.
    # Flags:
    #   --noxt : remove terminal OXT atoms
    #   --noh  : remove hydrogens
    #   --now  : remove water molecules
    # These are recommended preprocessing steps.
    cmd = ["lightdock3_setup.py", rec, lig, "--noxt", "--noh", "--now", "-anm"]

    # If user passed a custom number of swarms, add "-s <value>"
    if swarms:
        cmd += ["-s", str(swarms)]

    # If user passed a custom number of glowworms, add "-g <value>"
    if glowworms:
        cmd += ["-g", str(glowworms)]

    # Run the setup command inside the working directory
    run_command(cmd, cwd=working_dir)

    # -----------------------------
    # 2) Run LightDock simulation
    # -----------------------------
    # "setup.json" is created by the previous step in working_dir.
    # Here we tell LightDock how many steps to run.
    cmd = ["lightdock3.py", "setup.json", str(steps), "-c", str(cores)]

    # If a list of swarms is provided, pass it using "-l"
    # e.g., swarm_list = [0] means "only run swarm 0"
    if swarm_list:
        cmd += ["-l"] + list(map(str, swarm_list))

    # Run the simulation
    run_command(cmd, cwd=working_dir)

    # -----------------------------
    # 3) Generate docked models
    # -----------------------------
    # After the simulation, LightDock produces files like:
    #   swarm_0/gso_100.out (if steps=100)
    #
    # Here we choose:
    #   - swarm 0 (simple default)
    #   - last step (gso_<steps>.out)
    swarm = 0
    out_file = os.path.join(working_dir, f"swarm_{swarm}", f"gso_{steps}.out")

    # Number of models to generate:
    # if glowworms is set, use that; otherwise use 200 as a standard default.
    num_models = glowworms if glowworms else 200

    # Command to generate the PDB conformations from the .out file
    cmd = [
        "lgd_generate_conformations.py",
        rec,          # receptor PDB path (inside working_dir)
        lig,          # ligand PDB path (inside working_dir)
        out_file,     # LightDock output with swarm positions
        str(num_models)  # how many models to generate
    ]

    # Run model generation
    run_command(cmd, cwd=working_dir)

    # Print where the final docked models are located
    print("\nDone.")
    print("Docked models saved in:", os.path.join(working_dir, f"swarm_{swarm}"))


if __name__ == "__main__":
    
    items = glob.glob("lightdock*") # find all files/folders starting with "lightdock"
    items += glob.glob("swarm_*") # find all files/folders starting with "swarm_"
    items += ["setup.json", "init"] # add specific files/folders to delete

    for p in items: # iterate over all found items
        if os.path.isdir(p): # if it's a directory, remove it and its contents
            shutil.rmtree(p, ignore_errors=True) # ignore errors if directory doesn't exist
        elif os.path.isfile(p): # if it's a file, remove it
            os.remove(p)
    
    # sys.argv is the list of command-line arguments:
    #   sys.argv[0] is the script name (pdb_to_lightdock.py)
    #   sys.argv[1] is the receptor PDB path
    #   sys.argv[2] is the ligand PDB path
    if len(sys.argv) < 3:
        print("Usage: python pdb_to_lightdock.py receptor.pdb ligand.pdb")
        print("Example:")
        print("  python pdb_to_lightdock.py \\")
        print("      examples/ppinsight_data/input_files/2UUY_rec.pdb \\")
        print("      examples/ppinsight_data/input_files/2UUY_lig.pdb")
        sys.exit(1)

    # Read receptor and ligand PDB paths from the command line
    receptor = sys.argv[1]
    ligand = sys.argv[2]

    # Create a descriptive output directory for this receptor-ligand pair.
    # This will be:
    #   examples/ppinsight_data/output_files/lightdock_runs/<rec_vs_lig>/
    workdir = make_output_dir(receptor, ligand, method="lightdock_runs")

    # Run the LightDock pipeline with mostly default settings
    lightdock_pipeline(
        receptor,
        ligand,
        working_dir=workdir,
        swarms=None,      # None → use LightDock default number of swarms
        glowworms=None,   # None → LightDock default (usually 200)
        steps=100,        # number of steps; 100 is the tutorial default
        swarm_list=[0],   # only run swarm 0
        cores=1           # number of CPU cores; 1 is simplest
    )
