In [None]:
# Terminal bootstrap for molecular docking environment
# This script sets up a micromamba environment with all necessary tools
# for protein-ligand docking using AutoDock Vina.


# --- 1. Project Setup ---
echo "Setting up project directory..."
# Create project directory under HOME for persistence
mkdir -p ~/projects/cytochrome-ligand
cd ~/projects/cytochrome-ligand


# --- 2. Define Environment with environment.yml ---
echo "Creating environment.yml..."
# Write a clean environment.yml for Python 3.11 with all dependencies
# Removed 'autodocktools-prepare' as it conflicts with Python 3.11 and
# 'meeko' is the modern Python 3 alternative for PDBQT preparation.
cat > environment.yml <<'YML'
name: docking311
channels:
 - conda-forge
 - bioconda
 - insilichem
dependencies:
 - python=3.11
 - ipykernel
 - numpy
 - pandas
 - scipy
 - biopython
 - rdkit
 - mdanalysis
 - autodock-vina
 - openbabel=3.1.1
 # - autodocktools-prepare   # REMOVED: Conflicts with Python 3.11. Use 'meeko' instead.
 - py3Dmol
 - matplotlib
 - ipywidgets>=8
 - prolif
 - pip
 - pip:
     - meeko               # This is your Python 3 replacement for preparation scripts
     - rcsbsearchapi
     - pdb2pqr
YML


# --- 3. Micromamba Initialization & Environment Creation ---
echo "Initializing micromamba shell hook..."
# Initialize micromamba shell hook (run once per new terminal session or setup)
# Ensure micromamba is installed in $HOME/micromamba before running this.
eval "$($HOME/micromamba/bin/micromamba shell hook -s bash)"


echo "Creating and activating 'docking311' conda environment from environment.yml..."
# Create the environment directly from the YAML file.
# This allows micromamba to resolve all dependencies and channels at once.
micromamba env create -y -f environment.yml


# Activate the newly created environment
micromamba activate docking311


# --- 4. Install Jupyter Kernel ---
echo "Installing Jupyter kernel..."
# Install ipykernel so this environment appears in Jupyter notebooks
python -m ipykernel install --user --name docking311 --display-name "Python (docking311)"


# --- 5. Verification Checks ---
echo "Running verification checks..."
# Verify Python installation and version
echo "Python path and version:"
which python; python -V


# NOTE: prepare_receptor4.py and prepare_ligand4.py are part of autodocktools-prepare,
# which we removed. You will use the 'meeko' library in Python code instead.
echo "Verification for PDBQT preparation tools:"
echo "You will use 'meeko' functions directly in Python for preparing files."


# Verify AutoDock Vina installation and version
echo "AutoDock Vina path and version:"
which vina && vina --version || echo "Vina not found or not executable."


echo "Setup complete. You can now activate the environment using 'micromamba activate docking311'."
echo "Remember to use 'meeko' for preparing your receptor and ligand PDBQT files."

In [None]:
# whenever to start new session, do this in terminal
cd ~/projects/cytochrome-ligand-docking
eval "$($HOME/micromamba/bin/micromamba shell hook -s bash)"
micromamba activate docking311

In [None]:
# 1) Imports + tool discovery (keeps PATH clean and predictable)
# Cell 1 — Imports & PATH
from pathlib import Path
import os, sys, shutil, subprocess, re

# Ensure we use the current kernel's bin first
ENV_BIN = Path(sys.executable).parent
os.environ["PATH"] = str(ENV_BIN) + os.pathsep + os.environ.get("PATH", "")

# CLI discovery
VINA = shutil.which("vina") or str(ENV_BIN / "vina")
OBABEL = shutil.which("obabel") or str(ENV_BIN / "obabel")
PREP_REC4 = shutil.which("prepare_receptor4.py")  # MGLTools
PREP_REC  = shutil.which("prepare_receptor")      # ADFR
PREP_LIG4 = shutil.which("prepare_ligand4.py")    # MGLTools

print("ENV_BIN          :", ENV_BIN)
print("vina             :", VINA)
print("obabel           :", OBABEL)
print("prepare_receptor4:", PREP_REC4)
print("prepare_receptor :", PREP_REC)
print("prepare_ligand4  :", PREP_LIG4)

# Hard error if Vina missing (can't dock)
assert Path(VINA).exists(), "AutoDock Vina not found in this kernel/env."

# Python libs
import MDAnalysis as mda
from rdkit import Chem
from rdkit.Chem import AllChem
import py3Dmol

In [None]:
# STOP HERE... the .sdf file is wrong, there is no oxygen there, there should be oxygen. Get the SMILES for azoxystrobin.
# Rebuild azoxystrobin from a TRUSTED SMILES, then make PDBQT
from rdkit import Chem
from rdkit.Chem import AllChem
from pathlib import Path
import subprocess, shutil, sys

# <<< PASTE a correct azoxystrobin SMILES here >>>
smiles = r"CO/C=C(\C1=CC=CC=C1OC2=NC=NC(=C2)OC3=CC=CC=C3C#N)/C(=O)OC"
m = Chem.MolFromSmiles(smiles); assert m, "Bad SMILES"
m = Chem.AddHs(m)
AllChem.EmbedMolecule(m, AllChem.ETKDGv3())
AllChem.UFFOptimizeMolecule(m, maxIters=500)

# write a clean SDF (keeps your original filename untouched)
Chem.SDWriter("azoxystrobin_fixed.sdf").write(m)

# Make PDBQT with Meeko CLI (uses AD4 types & Gasteiger charges)
mk = shutil.which("mk_prepare_ligand.py")
cmd = [mk, "-i", "azoxystrobin_fixed.sdf", "-o", "azoxystrobin.pdbqt", "-a",
       "--charge_model", "gasteiger"] if mk else \
      [sys.executable, "-m", "meeko.mcli.prepare_ligand",
       "-i", "azoxystrobin_fixed.sdf", "-o", "azoxystrobin.pdbqt", "-a",
       "--charge_model", "gasteiger"]
print("$", " ".join(cmd))
subprocess.run(cmd, check=True)

# QC – AD4 types and torsions must look right (expect OA present)
txt = Path("azoxystrobin.pdbqt").read_text().splitlines()
types = sorted({ln.split()[-1] for ln in txt if ln.startswith(("ATOM","HETATM"))})
tors  = next((ln for ln in txt if ln.startswith("TORSDOF")), "TORSDOF not found")
print("Ligand AD4 types:", types)
print(tors)

In [None]:
# QC the SDF: elements, 3D, functional groups
from rdkit import Chem
from rdkit.Chem import Descriptors, rdMolDescriptors
from collections import Counter

sdf_path = "azoxystrobin_fixed.sdf"

sup = Chem.SDMolSupplier(sdf_path, removeHs=False, sanitize=False)
assert sup and sup[0], "Cannot read azoxystrobin_fixed.sdf"
m_raw = sup[0]

# Try to sanitize (catch and continue so we can still inspect)
try:
    m = Chem.Mol(m_raw)
    Chem.SanitizeMol(m)
    sanit_ok = True
except Exception as e:
    m = Chem.Mol(m_raw)  # minimal copy for inspection
    sanit_ok = False
    print("⚠️ RDKit sanitize failed:", e)

# Element counts
syms = [a.GetSymbol() for a in m.GetAtoms()]
print("Element counts:", Counter(syms))
print("Oxygen count  :", syms.count("O"))

# 3D check
has3d = (m.GetNumConformers() > 0 and m.GetConformer().Is3D())
print("Has 3D coords :", has3d, "| nConfs:", m.GetNumConformers())

# Simple ester / carbonyl motifs
pat_ester   = Chem.MolFromSmarts("[CX3](=O)[OX2H0,O-]")
pat_carbonyl= Chem.MolFromSmarts("[CX3]=O")
print("Ester present :", bool(m.HasSubstructMatch(pat_ester)))
print("C=O present   :", bool(m.HasSubstructMatch(pat_carbonyl)))

# Formula & MW (rough sanity)
try:
    print("Formula       :", rdMolDescriptors.CalcMolFormula(m))
    print("MolWt         :", round(Descriptors.MolWt(m), 2))
except Exception as e:
    print("Formula/MW skipped:", e)

print("Sanitize OK   :", sanit_ok)

In [None]:
# Ligand QC (quick)
from pathlib import Path, PurePath
txt = Path("azoxystrobin.pdbqt").read_text().splitlines()
types = sorted({ln.split()[-1] for ln in txt if ln.startswith(("ATOM","HETATM"))})
tors  = next((ln for ln in txt if ln.startswith("TORSDOF")), "TORSDOF not found")
print("Ligand AD4 types:", types)
print(tors)
assert "OA" in types and "NA" in types, "Expected acceptor types (OA/NA) missing"

In [None]:
# QC-LIG (final): file health + key lines
from pathlib import Path
p = Path("azoxystrobin.pdbqt"); assert p.exists(), "azoxystrobin.pdbqt missing"
lines = p.read_text().splitlines()

# basic structure
assert any(l.startswith("ROOT") for l in lines), "No ROOT block"
assert any(l.startswith("TORSDOF") for l in lines), "TORSDOF line missing"

# counts
n_atoms = sum(l.startswith(("ATOM","HETATM")) for l in lines)
n_hd    = sum(l.endswith(" HD") for l in lines if l.startswith(("ATOM","HETATM")))
print(f"PDBQT atoms: {n_atoms} | H-atoms(HD): {n_hd}")

# types sanity
types = sorted({l.split()[-1] for l in lines if l.startswith(("ATOM","HETATM"))})
for t in ("NA","OA"): 
    assert t in types, f"Expected {t} not found"
print("Types OK:", types)

In [None]:
# CHECKPOINT
# QC-1: basic file integrity and receptor contents
from pathlib import Path
import MDAnalysis as mda
from rdkit import Chem # Moved here as it's used in the cell

# --- 1. Load Protein File ---
# Check for 1sqb.pdb or 1sqb.cif and load it using MDAnalysis
inp = Path("1sqb.pdb") if Path("1sqb.pdb").exists() else Path("1sqb.cif")
assert inp.exists(), "Need 1sqb.pdb or 1sqb.cif for receptor QC"
u_all = mda.Universe(str(inp))

# Print basic structural information from the protein universe
print("Chains:", {seg.segid or 'UNK' for seg in u_all.segments})
print("Residues:", u_all.atoms.n_residues, "| Atoms:", u_all.atoms.n_atoms)

# --- 2. Cofactor Check ---
# Ensure the cofactor(s) we need exist in the raw structure
# Select all atoms in protein or specified cofactor residues/elements
# NOTE: The original code had undefined variables 'sel_heme' and 'sel_fe'.
# This correction properly defines atom selections for these groups.
sel_cofactors = u_all.select_atoms("resname HEM HEME FE FES SF4")
sel_heme_group = u_all.select_atoms("resname HEM HEME") # Specifically for HEM/HEME residues
sel_fe_atoms = u_all.select_atoms("name FE and resname HEM HEME FES SF4") # Specifically for Fe atoms within cofactors

print(f"Total cofactor atoms (HEM/HEME/Fe/FES/SF4): {sel_cofactors.n_atoms}")
print(f"HEM/HEME residues found: {len(sel_heme_group.residues)} | Fe atoms in cofactors: {sel_fe_atoms.n_atoms}")

# --- 3. Native Inhibitor Check (Optional) ---
# Confirm native inhibitor present (useful for RMSD later if doing re-docking)
num_azo_residues = len(u_all.select_atoms("resname AZO").residues)
print(f"AZO (Azoxystrobin) residues found in protein: {num_azo_residues}")

# --- 4. Ligand SDF Readability Check ---
# Check if the ligand SDF file is readable by RDKit
mol_supplier = Chem.SDMolSupplier("azoxystrobin_fixed.sdf", removeHs=False) # Changed to _fixed.sdf as per previous steps
assert mol_supplier and mol_supplier[0], "Cannot read azoxystrobin_fixed.sdf"
ligand_mol = mol_supplier[0]
print(f"Ligand from SDF: atoms {ligand_mol.GetNumAtoms()} | formal charge {Chem.GetFormalCharge(ligand_mol)}")

In [None]:
# check working directory
import os
print(os.getcwd())

In [None]:
# check file list
import os
for f in os.listdir('.'):
    print(f)

In [None]:
# 3) Clean receptor (protein + heme; AZO removed). FES optional.
# Cell 3 — Clean receptor for Qo-site docking
from pathlib import Path
import MDAnalysis as mda

# Assuming 'inp' (from the QC-1 checkpoint) holds the path to your raw protein file.
# If 'inp' is not defined in this scope, you should define it here, e.g.:
# inp = Path("1sqb.pdb") # or Path("1sqb.cif") if that's what you're using

# Define the path for the cleaned receptor PDB file
RECEPTOR_CLEAN_PDB = Path("receptor_clean.pdb")

# Load the full raw protein universe from the input file
# IMPORTANT: Changed 'RECEPTOR_PDB' to 'inp' to fix NameError
u = mda.Universe(str(inp))

# Keep protein & heme/iron-sulfur clusters.
# This selection implicitly removes water, ions, and any other molecules not specified,
# including the 'AZO' (Azoxystrobin) if it was present as a native ligand.
sel = u.select_atoms("protein or resname HEM HEME FES SF4")

# Write the selected atoms (cleaned protein + cofactors) to a new PDB file
sel.write(str(RECEPTOR_CLEAN_PDB))
print(f"Wrote cleaned receptor to: {RECEPTOR_CLEAN_PDB}")

# --- Sanity Check on the Cleaned Receptor ---
print("\n--- Sanity Check on Cleaned Receptor ---")
# Load the newly written cleaned receptor PDB file into a new universe
uc = mda.Universe(str(RECEPTOR_CLEAN_PDB))

# Print counts of essential components to confirm cleaning was successful
print("Chains in cleaned receptor:", {seg.segid or 'UNK' for seg in uc.segments})
print("Residues in cleaned receptor:", uc.atoms.n_residues, "| Atoms:", uc.atoms.n_atoms)

print("HEM/HEME residues in cleaned file:", len(uc.select_atoms("resname HEM HEME").residues))
print("FES residues in cleaned file     :", len(uc.select_atoms("resname FES").residues))
print("SF4 residues in cleaned file     :", len(uc.select_atoms("resname SF4").residues))

print("Heme Fe atoms in cleaned file    :", len(uc.select_atoms("resname HEM HEME and name FE")))
print("Rieske Fe atoms in cleaned file  :", len(uc.select_atoms("resname FES and name FE*")))
print("Rieske S atoms in cleaned file   :", len(uc.select_atoms("resname FES and name S*")))

# Optional: Verify AZO is removed (should be 0)
num_azo_residues_clean = len(uc.select_atoms("resname AZO").residues)
print(f"AZO (Azoxystrobin) residues in cleaned file: {num_azo_residues_clean} (Expected 0)")
assert num_azo_residues_clean == 0, "AZO was not removed from the cleaned receptor!"

In [None]:
# 3) Clean receptor (protein + heme; AZO removed). FES optional.
# Cell 3 — Clean receptor for Qo-site docking
from pathlib import Path
import MDAnalysis as mda

# Assuming 'inp' (from the QC-1 checkpoint) holds the path to your raw protein file.
# If 'inp' is not defined in this scope, you should define it here, e.g.:
# inp = Path("1sqb.pdb") # or Path("1sqb.cif") if that's what you're using

# Define the path for the cleaned receptor PDB file
RECEPTOR_CLEAN_PDB = Path("receptor_clean.pdb")

# Load the full raw protein universe from the input file
u = mda.Universe(str(inp))

# Keep protein & heme/iron-sulfur clusters FOR A SPECIFIC BIOLOGICAL UNIT (Chain C - Cytochrome b).
# This selection implicitly removes water, ions, other chains, and any other molecules not specified,
# including the 'AZO' (Azoxystrobin) if it was present as a native ligand.
# --- MODIFICATION STARTS HERE ---
# Select protein of chain C (Cytochrome b) and any HEM/HEME/FES/SF4 also belonging to chain C.
# Changed 'chain C' to 'segid C' for MDAnalysis compatibility.
sel = u.select_atoms("(protein and segid C) or (resname HEM HEME FES SF4 and segid C)")
# --- MODIFICATION ENDS HERE ---

# Write the selected atoms (cleaned protein + cofactors) to a new PDB file
sel.write(str(RECEPTOR_CLEAN_PDB))
print(f"Wrote cleaned receptor to: {RECEPTOR_CLEAN_PDB}")

# --- Sanity Check on the Cleaned Receptor ---
print("\n--- Sanity Check on Cleaned Receptor ---")
# Load the newly written cleaned receptor PDB file into a new universe
uc = mda.Universe(str(RECEPTOR_CLEAN_PDB))

# Print counts of essential components to confirm cleaning was successful
print("Chains in cleaned receptor:", {seg.segid or 'UNK' for seg in uc.segments})
print("Residues in cleaned receptor:", uc.atoms.n_residues, "| Atoms:", uc.atoms.n_atoms)

print("HEM/HEME residues in cleaned file:", len(uc.select_atoms("resname HEM HEME").residues))
print("FES residues in cleaned file     :", len(uc.select_atoms("resname FES").residues))
print("SF4 residues in cleaned file     :", len(uc.select_atoms("resname SF4").residues))

print("Heme Fe atoms in cleaned file    :", len(uc.select_atoms("resname HEM HEME and name FE")))
print("Rieske Fe atoms in cleaned file  :", len(uc.select_atoms("resname FES and name FE*")))
print("Rieske S atoms in cleaned file   :", len(uc.select_atoms("resname FES and name S*")))

# Optional: Verify AZO is removed (should be 0)
num_azo_residues_clean = len(uc.select_atoms("resname AZO").residues)
print(f"AZO (Azoxystrobin) residues in cleaned file: {num_azo_residues_clean} (Expected 0)")
assert num_azo_residues_clean == 0, "AZO was not removed from the cleaned receptor!"

In [None]:
# in terminal:
micromamba install -y -n docking311 -c conda-forge prody

In [None]:
# 4) Prepare Receptor to PDBQT
# Cell 4 — Convert cleaned receptor to PDBQT
from pathlib import Path
import subprocess, shutil, sys

# Define input and output file paths
RECEPTOR_CLEAN_PDB = Path("receptor_clean.pdb")
RECEPTOR_PDBQT = Path("receptor.pdbqt") # This is still the desired final name for our script to use

# Check if the cleaned receptor PDB exists
assert RECEPTOR_CLEAN_PDB.exists(), f"Cleaned receptor PDB not found: {RECEPTOR_CLEAN_PDB}"

# Discover the Meeko receptor preparation script
mk_rec = shutil.which("mk_prepare_receptor.py")

# Construct the command to run Meeko's receptor preparation CLI
# This will add non-polar hydrogens, assign Gasteiger charges, and assign AD4 atom types.
# Changed '-o str(RECEPTOR_PDBQT)' to '-o "receptor"'
# This tells Meeko to use "receptor" as the base name, and '--write_pdbqt' will add the .pdbqt
# The RECEPTOR_PDBQT variable in the Python script remains 'receptor.pdbqt' for consistency
# with what we expect to find after Meeko runs.
cmd = [mk_rec, "-i", str(RECEPTOR_CLEAN_PDB), "-o", "receptor", "--write_pdbqt"] if mk_rec else \
      [sys.executable, "-m", "meeko.mcli.prepare_receptor",
       "-i", str(RECEPTOR_CLEAN_PDB), "-o", "receptor", "--write_pdbqt"]

print(f"$ {' '.join(cmd)}") # Print the command being executed
print(f"Current working directory: {Path.cwd()}") # Added for debugging

# Execute the command
try:
    # Capture stdout/stderr even on success to inspect
    result = subprocess.run(cmd, check=True, capture_output=True, text=True)
    print(f"Successfully converted {RECEPTOR_CLEAN_PDB} to {RECEPTOR_PDBQT}")

    # Print full stdout and stderr for inspection
    print("\n--- mk_prepare_receptor.py STDOUT ---")
    print(result.stdout)
    print("\n--- mk_prepare_receptor.py STDERR ---")
    print(result.stderr)

except subprocess.CalledProcessError as e:
    print(f"Error preparing receptor: {e}")
    print(f"STDOUT: {e.stdout}")
    print(f"STDERR: {e.stderr}")
    raise # Re-raise the exception after printing details

# --- Sanity Check: Verify the generated PDBQT file ---
print("\n--- Sanity Check on Receptor PDBQT ---")

# Added more robust checks for file existence and size
if not RECEPTOR_PDBQT.exists():
    print(f"ERROR: Receptor PDBQT file '{RECEPTOR_PDBQT}' was NOT created.")
    raise AssertionError(f"Receptor PDBQT file not found: {RECEPTOR_PDBQT}")
elif RECEPTOR_PDBQT.stat().st_size == 0:
    print(f"WARNING: Receptor PDBQT file '{RECEPTOR_PDBQT}' was created but is EMPTY.")
    raise AssertionError(f"Receptor PDBQT file is empty: {RECEPTOR_PDBQT}")
else:
    print(f"Receptor PDBQT file '{RECEPTOR_PDBQT}' exists and is not empty.")


pdbqt_lines = RECEPTOR_PDBQT.read_text().splitlines()

# Check for presence of essential PDBQT elements
assert any("ATOM" in l or "HETATM" in l for l in pdbqt_lines), "No ATOM/HETATM records found in PDBQT"
# assert any("REMARK" in l for l in pdbqt_lines), "No REMARK lines found in PDBQT" # REMOVED THIS ASSERTION

# Count atoms and check for AD4 types
n_atoms_pdbqt = sum(l.startswith(("ATOM", "HETATM")) for l in pdbqt_lines)
types_pdbqt = sorted(list({l.split()[-1] for l in pdbqt_lines if l.startswith(("ATOM", "HETATM"))}))

print(f"Atoms in receptor PDBQT: {n_atoms_pdbqt}")
print(f"Unique AD4 types in receptor PDBQT: {types_pdbqt}")

# Expected AD4 types for protein + heme, including C, N, O, S, FE, and various H types (HD, H)
# Removed 'S' and 'O' from expected_protein_types as 'SA' and 'OA' are the appropriate types.
expected_protein_types = {'C', 'N', 'A', 'NA', 'OA', 'SA'}
# Corrected 'FE' to 'Fe' for heme iron type as seen in Meeko's output.
expected_heme_types = {'Fe'}

for t in expected_protein_types:
    assert t in types_pdbqt, f"Expected protein type '{t}' missing from receptor PDBQT"
for t in expected_heme_types:
    assert t in types_pdbqt, f"Expected heme type '{t}' missing from receptor PDBQT"

print("Receptor PDBQT is ready for docking!")

In [None]:
# 5) Define Docking Grid Box
# Cell 5 — Specify the search space for Vina

# IMPORTANT: These values are examples.
# You MUST replace them with coordinates specific to the Qo-site
# of your Cytochrome b (Chain C) in 1SQB.
# Obtain these by inspecting the binding site in a molecular viewer,
# or by calculating the center of relevant residues/cofactors.

# Example: Using MDAnalysis to find center of HEME in Chain C
# (You can run this in a separate cell to get suggestions for center_x/y/z)
# import MDAnalysis as mda
# uc_receptor = mda.Universe("receptor_clean.pdb") # Load your cleaned receptor
# # For 1SQB Chain C, there are two HEM groups. You might want to center between them or on a specific one.
# # Let's aim for a central point near both.
# # Find the average center of both HEME groups (C:381 and C:382 from previous output)
# # Check your PDBQT file or the original PDB to verify residue numbers if unsure.
# heme_381 = uc_receptor.select_atoms("resname HEM and resid 381 and segid C")
# heme_382 = uc_receptor.select_atoms("resname HEM and resid 382 and segid C")
#
# if heme_381 and heme_382:
#     # Calculate the center between the two heme groups
#     center_coords = (heme_381.center_of_geometry() + heme_382.center_of_geometry()) / 2
# elif heme_381:
#     center_coords = heme_381.center_of_geometry()
# elif heme_382:
#     center_coords = heme_382.center_of_geometry()
# else:
#     # Fallback or error if no heme found (should not happen after previous steps)
#     print("Warning: No HEME residues found in receptor_clean.pdb for centering.")
#     # You might manually set a center or use a broader search.
#     center_coords = [0.0, 0.0, 0.0] # Placeholder, replace with actual
#
# print(f"Suggested Docking Box Center (from HEME): X={center_coords[0]:.3f} Y={center_coords[1]:.3f} Z={center_coords[2]:.3f}")


# Replaced with your chosen values:
BOX_CENTER_X = 70.03
BOX_CENTER_Y = 55.83
BOX_CENTER_Z = 166.59

# Dimensions of the docking box (Å)
# A typical size for a binding pocket is 20-30 Å in each dimension.
BOX_SIZE_X = 26.0
BOX_SIZE_Y = 26.0
BOX_SIZE_Z = 26.0

print(f"Docking Box Center: ({BOX_CENTER_X:.3f}, {BOX_CENTER_Y:.3f}, {BOX_CENTER_Z:.3f})")
print(f"Docking Box Size: ({BOX_SIZE_X:.1f}, {BOX_SIZE_Y:.1f}, {BOX_SIZE_Z:.1f})")

# You can also use Py3Dmol to visualize the box and receptor (advanced, requires more code)
# import py3Dmol
# view = py3Dmol.view(width=800, height=600)
# view.addModel(open("receptor_clean.pdb").read(), "pdb")
# view.setStyle({'cartoon': {'color': 'spectrum'}})
# view.addSphere({'center': {'x': BOX_CENTER_X, 'y': BOX_CENTER_Y, 'z': BOX_CENTER_Z}, 'radius': 1.0, 'color': 'red'})
# # You might need to add a box visualization function
# view.zoomTo()
# view.show()

In [None]:
# 6) Run AutoDock Vina
# Cell 6 — Execute the docking simulation
from pathlib import Path
import subprocess, sys

# Ensure Vina executable is found (from your initial setup cell)
# Assuming VINA is globally defined from your first cell's tool discovery
assert VINA and Path(VINA).exists(), "AutoDock Vina executable (VINA) not found!"

# Input files (already prepared in previous steps)
RECEPTOR_PDBQT = Path("receptor.pdbqt")
LIGAND_PDBQT = Path("azoxystrobin.pdbqt")

assert RECEPTOR_PDBQT.exists(), f"Receptor PDBQT not found: {RECEPTOR_PDBQT}"
assert LIGAND_PDBQT.exists(), f"Ligand PDBQT not found: {LIGAND_PDBQT}"

# Output files
DOCKED_OUTPUT_PDBQT = Path("docked_azoxystrobin_cytochrome_b.pdbqt") # Specific name for this run
VINA_LOG_FILE = Path("vina_log_cytochrome_b.txt") # Specific name for this run

# Docking parameters (using the values defined in the previous cell 'define-docking-box')
# These variables should already be in your notebook's scope from the previous cell execution.
# If you run this cell independently without running the 'define-docking-box' cell first,
# you would need to define them here (e.g., BOX_CENTER_X = 70.03, etc.)
# BOX_CENTER_X = 70.03
# BOX_CENTER_Y = 55.83
# BOX_CENTER_Z = 166.59
# BOX_SIZE_X = 26.0
# BOX_SIZE_Y = 26.0
# BOX_SIZE_Z = 26.0

# Construct the Vina command
vina_cmd = [
    str(VINA), # Path to the vina executable
    "--receptor", str(RECEPTOR_PDBQT), # Input receptor PDBQT file
    "--ligand", str(LIGAND_PDBQT),     # Input ligand PDBQT file
    "--center_x", str(BOX_CENTER_X),   # X coordinate of the docking box center
    "--center_y", str(BOX_CENTER_Y),   # Y coordinate of the docking box center
    "--center_z", str(BOX_CENTER_Z),   # Z coordinate of the docking box center
    "--size_x", str(BOX_SIZE_X),       # X dimension of the docking box
    "--size_y", str(BOX_SIZE_Y),       # Y dimension of the docking box
    "--size_z", str(BOX_SIZE_Z),       # Z dimension of the docking box
    "--out", str(DOCKED_OUTPUT_PDBQT), # Output PDBQT file containing docked poses
    "--log", str(VINA_LOG_FILE),       # Log file for Vina's output and scores
    "--cpu", "4",  # Number of CPU cores to utilize (adjust based on your system)
    "--exhaustiveness", "8", # Thoroughness of the search (higher = more thorough, slower)
    "--num_modes", "9" # Maximum number of binding modes (poses) to generate
]

print(f"$ {' '.join(vina_cmd)}") # Print the command for verification

# Run Vina using subprocess
try:
    result = subprocess.run(vina_cmd, check=True, capture_output=True, text=True)
    print("AutoDock Vina finished successfully for Cytochrome b!")
    print("\n--- Vina Output Log (first 20 lines from stdout) ---")
    print("\n".join(result.stdout.splitlines()[:20])) # Print first 20 lines of Vina's standard output
    print(f"\nDocking results written to: {DOCKED_OUTPUT_PDBQT}")
    print(f"Vina log written to: {VINA_LOG_FILE}")
except subprocess.CalledProcessError as e:
    print(f"Error running AutoDock Vina: {e}")
    print(f"STDOUT: {e.stdout}") # Print Vina's standard output if an error occurs
    print(f"STDERR: {e.stderr}") # Print Vina's standard error if an error occurs
    raise # Re-raise the exception to indicate failure
except FileNotFoundError:
    print("Error: Vina executable not found. Make sure it's in your PATH or the correct location.")
    raise

In [None]:
# 7) Inspect Vina Log File
# Cell 7 — Display the content of the Vina log file

from pathlib import Path

VINA_LOG_FILE = Path("vina_log_cytochrome_b.txt")

assert VINA_LOG_FILE.exists(), f"Vina log file not found: {VINA_LOG_FILE}"

print(f"--- Contents of {VINA_LOG_FILE} ---")
log_content = VINA_LOG_FILE.read_text()
print(log_content)
print(f"--- End of {VINA_LOG_FILE} ---")

In [None]:
# BREAK, download files
import shutil
import os
from pathlib import Path

def create_zip_archive(output_filename="all_files_archive", source_dir=".", format="zip"):
    """
    Creates a compressed archive of the specified source directory.

    Args:
        output_filename (str): The desired name for the output archive file (without extension).
                               Defaults to "all_files_archive".
        source_dir (str): The path to the directory to archive. Defaults to the current directory ".".
        format (str): The archive format ('zip', 'tar', 'gztar', 'bztar', 'xztar'). Defaults to 'zip'.

    Returns:
        str: The full path to the created archive file, or None if an error occurs.
    """
    try:
        # Get the absolute path of the source directory
        source_path = Path(source_dir).resolve()
        # Define the base name for the archive (e.g., 'all_files_archive')
        # shutil.make_archive will add the '.zip' extension
        base_name = output_filename
        archive_path = shutil.make_archive(base_name, format, root_dir=source_path)
        print(f"Successfully created archive: {archive_path}")
        print(f"You can now download '{Path(archive_path).name}' from your file browser.")
        return archive_path
    except Exception as e:
        print(f"Error creating archive: {e}")
        return None

# --- Usage ---
# Call the function to create a zip archive of the current working directory
current_directory_name = Path(os.getcwd()).name
archive_name = f"{current_directory_name}_files" # Example: cytochrome-ligand_files.zip
created_archive = create_zip_archive(output_filename=archive_name, source_dir=os.getcwd())

# If you prefer a simpler, fixed name:
# created_archive = create_zip_archive(output_filename="my_project_archive", source_dir=os.getcwd())

In [None]:
# 8) Visualize Best Docked Pose
# Cell 8 — Display the docked pose interactively in the notebook

import py3Dmol
from pathlib import Path

# Define paths to your receptor and docked ligand files
RECEPTOR_PDBQT = Path("receptor.pdbqt")
DOCKED_OUTPUT_PDBQT = Path("docked_azoxystrobin_cytochrome_b.pdbqt")

# Assert that the required files exist
assert RECEPTOR_PDBQT.exists(), f"Receptor PDBQT not found: {RECEPTOR_PDBQT}"
assert DOCKED_OUTPUT_PDBQT.exists(), f"Docked ligand PDBQT not found: {DOCKED_OUTPUT_PDBQT}"

print(f"Visualizing receptor: {RECEPTOR_PDBQT}")
print(f"Visualizing best docked ligand pose from: {DOCKED_OUTPUT_PDBQT}")

# Create a new py3Dmol view
view = py3Dmol.view(width=800, height=600) # Adjust dimensions as needed

# Add the receptor (protein + heme)
with open(RECEPTOR_PDBQT, 'r') as f:
    receptor_data = f.read()
view.addModel(receptor_data, 'pdbqt') # Specify format as 'pdbqt'
view.setStyle({'cartoon': {'color': 'spectrum'}}) # Style protein as a colorful cartoon

# Add the best docked ligand pose (which is the first model)
# We only want the first model from the PDBQT file, which corresponds to the best affinity.
# Split the file content by 'MODEL' to get individual poses and take the first one.
with open(DOCKED_OUTPUT_PDBQT, 'r') as f:
    docked_data_full = f.read()

# Extract the first model (best pose)
# PDBQT files with multiple models are delimited by 'MODEL' and 'ENDMDL'
# The first block after the initial header is usually the first mode.
# A simpler way is often to just load the whole file and py3Dmol will default to first model
# or allow iterating through models if needed. For best pose, loading the whole is fine.

view.addModel(docked_data_full, 'pdbqt') # Add all models, py3Dmol will show the first by default
view.setStyle({ 'model': 1 }, {'stick': {'colorscheme': 'carbon', 'radius': 0.2}}) # Style ligand as stick

# Optionally, zoom to the entire loaded structure
view.zoomTo()

# Enable mouse controls for rotation, pan, zoom
view.enableContextMenu(True)
view.setBackgroundColor('0xeeeeee') # Light grey background

# Render the view
view.show()

print("\nInteractive visualization loaded. You can rotate, pan, and zoom the molecule.")
print("The protein is shown as a cartoon, and the best-scoring azoxystrobin pose as sticks.")