In [None]:
#!/usr/bin/env python3
"""
pdb_to_haddock.py
-----------------
Prepare a HADDOCK3 run directory and a tutorial-style config (.cfg).

This script DOES NOT run HADDOCK.
It only organizes input files and writes a config file the user can edit or run manually.

This mirrors the structure of pdb_to_lightdock.py:
  1) Build an organized run directory
  2) Stage required files (PDBs, restraints)
  3) Write a readable config with sensible defaults
"""

import argparse
import shutil
from pathlib import Path

# Default folder structure matches your other scripts
BASE_ROOT = Path("examples/ppinsight_data/output_files")
METHOD = "haddock_runs"


# ---------------------------------------------------------
# Utility function for consistent command output (lightdock style)
# ---------------------------------------------------------
def info(msg):
    print(f"[INFO] {msg}")


# ---------------------------------------------------------
# Create run directory:
# examples/.../output_files/haddock_runs/<rec_vs_lig>/
#
# Using explicit structure makes multiple runs easy to browse.
# We do NOT delete old folders automatically (safe default).
# ---------------------------------------------------------
def make_run_dir(rec, lig, runname):
    BASE_ROOT.mkdir(parents=True, exist_ok=True)

    method_dir = BASE_ROOT / METHOD
    method_dir.mkdir(exist_ok=True)

    # Folder name describes pair clearly
    run_dir = method_dir / f"{Path(rec).stem}_vs_{Path(lig).stem}"
    run_dir.mkdir(exist_ok=True)

    # HADDOCK examples use a "data" subfolder for all PDB + restraint files
    data_dir = run_dir / "data"
    data_dir.mkdir(exist_ok=True)

    info(f"Created run folder: {run_dir}")
    return run_dir, data_dir


# ---------------------------------------------------------
# Copy input files into run/data/
# This ensures HADDOCK config can reference local paths
# (prevents long absolute paths cluttering files).
# ---------------------------------------------------------
def copy_inputs(data_dir, rec, lig, ambig):
    rec_dst = data_dir / Path(rec).name
    lig_dst = data_dir / Path(lig).name
    shutil.copy(rec, rec_dst)
    shutil.copy(lig, lig_dst)

    ambig_dst = None
    if ambig:
        ambig_dst = data_dir / Path(ambig).name
        shutil.copy(ambig, ambig_dst)

    info("Copied input structures into data/ folder")
    return rec_dst, lig_dst, ambig_dst


# ---------------------------------------------------------
# Write a tutorial-style HADDOCK configuration (.cfg)
# with minimal parameters.
#
# IMPORTANT:
#   The layout, section names, and indentation match
#   the official tutorial examples.
#
#   The defaults here are NOT optimalâ€”
#   they are sensible and readable.
# ---------------------------------------------------------
def write_cfg(cfg_path, runname, mode, ncores, rec_rel, lig_rel, ambig_rel):
    text = f"""# ====================================================================
# Protein-protein docking example (auto-generated)

# directory in which the scoring will be done
run_dir = "{runname}"

# execution mode
mode = "{mode}"
ncores = {ncores}

# molecules to be docked
molecules =  [
    "{rec_rel}",
    "{lig_rel}"
    ]

# ====================================================================
# Parameters for each stage are defined below, prefer full paths
# ====================================================================
[topoaa]
autohis = false
[topoaa.mol1]
nhisd = 0
nhise = 1
hise_1 = 75
[topoaa.mol2]
nhisd = 1
hisd_1 = 76
nhise = 1
hise_1 = 15

[rigidbody]
tolerance = 20
ambig_fname = "{ambig_rel}"
sampling = 20

[caprieval]
reference_fname = ""

[seletop]
select = 5

[flexref]
tolerance = 20
ambig_fname = "{ambig_rel}"

[emref]
tolerance = 20
ambig_fname = "{ambig_rel}"

[clustfcc]
min_population = 1

[seletopclusts]
top_models = 4

# ====================================================================
"""
    cfg_path.write_text(text.strip() + "\n")
    info(f"Wrote config file: {cfg_path}")


# ---------------------------------------------------------
# Pipeline
# ---------------------------------------------------------
def haddock_pipeline(rec, lig, runname, mode, ncores, ambig):
    # 1) Create organized output folder
    run_dir, data_dir = make_run_dir(rec, lig, runname)

    # 2) Copy input files
    rec_dst, lig_dst, ambig_dst = copy_inputs(data_dir, rec, lig, ambig)

    # Use data-relative paths inside .cfg (mirrors tutorials)
    rec_rel = f"data/{rec_dst.name}"
    lig_rel = f"data/{lig_dst.name}"
    ambig_rel = f"data/{ambig_dst.name}" if ambig_dst else ""

    # 3) Write config
    cfg_path = run_dir / f"{runname}.cfg"
    write_cfg(cfg_path, runname, mode, ncores, rec_rel, lig_rel, ambig_rel)

    # Summary
    print("\nDone.")
    print("Files staged in:", data_dir)
    print("Config file:", cfg_path)
    print("You can now run HADDOCK manually using this configuration.\n")


# ---------------------------------------------------------
# CLI
# ---------------------------------------------------------
if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description="Prepare HADDOCK3 config and input files (tutorial style)"
    )

    parser.add_argument("receptor", help="Path to receptor PDB file")
    parser.add_argument("ligand", help="Path to ligand PDB file")
    parser.add_argument("--runname", default="run1", help="Config run_dir name")
    parser.add_argument("--mode", default="local", help="HADDOCK execution mode")
    parser.add_argument("--ncores", type=int, default=4, help="CPU cores")
    parser.add_argument("--ambig", default=None, help="Path to ambiguous restraints (.tbl)")

    args = parser.parse_args()

    haddock_pipeline(
        args.receptor,
        args.ligand,
        args.runname,
        args.mode,
        args.ncores,
        args.ambig,
    )
