# MD Simulation - Mahkota Dewa Study

**Complex:** Luteolin + PDE5A

**Platform:** Kaggle (GPU)

**Duration:** 50 ns with checkpoint every 10 ns

---

In [None]:
CONFIG = {
    "complex_name": "Luteolin_PDE5A",
    "compound_name": "Luteolin",
    "target_name": "PDE5A",
    "pdb_id": "1TBF",
    "total_time_ns": 50,
    "checkpoint_interval_ns": 10,
    "temperature_k": 310,
    "timestep_fs": 2,
    "nvt_time_ps": 100,
    "npt_time_ps": 100,
    "forcefield": "amber99sb-ildn",
    "water_model": "tip3p",
    "resume": False,
    "resume_from_ns": 0,
}

NSTEPS_TOTAL = int(CONFIG['total_time_ns'] * 1e6 / CONFIG['timestep_fs'])
NSTEPS_PER_SEGMENT = int(CONFIG['checkpoint_interval_ns'] * 1e6 / CONFIG['timestep_fs'])
NUM_SEGMENTS = CONFIG['total_time_ns'] // CONFIG['checkpoint_interval_ns']
dt = CONFIG['timestep_fs'] / 1000

print(f"Complex: {CONFIG['complex_name']}")
print(f"Total time: {CONFIG['total_time_ns']} ns")
print(f"Checkpoints: every {CONFIG['checkpoint_interval_ns']} ns ({NUM_SEGMENTS} segments)")

In [None]:
%%bash
apt-get update -qq
apt-get install -qq gromacs
gmx --version | head -3

In [None]:
!pip install -q acpype MDAnalysis matplotlib numpy pandas
print("Dependencies installed!")

In [None]:
import os
import shutil
from pathlib import Path

WORK_DIR = Path(f"/kaggle/working/{CONFIG['complex_name']}")
OUTPUT_DIR = Path("/kaggle/working/output")

for d in ["input", "topol", "em", "nvt", "npt", "md", "analysis", "checkpoints"]:
    (WORK_DIR / d).mkdir(parents=True, exist_ok=True)

OUTPUT_DIR.mkdir(exist_ok=True)
os.chdir(WORK_DIR)
print(f"Working directory: {WORK_DIR}")

In [None]:
# Copy files from dataset - UPDATE DATASET NAME!
DATASET_DIR = Path("/kaggle/input/luteolin-pde5a-input")

if DATASET_DIR.exists():
    for f in DATASET_DIR.glob("*.pdb"):
        shutil.copy(f, WORK_DIR / "input")
        print(f"Copied: {f.name}")
else:
    print(f"Dataset not found: {DATASET_DIR}")
    print("Please add the dataset to this notebook")

In [None]:
%%bash
cd topol
echo "1" | gmx pdb2gmx -f ../input/PDE5A_1TBF.pdb -o protein.gro -p topol.top -i posre.itp -ff amber99sb-ildn -water tip3p -ignh
echo "Protein topology generated!"

In [None]:
os.chdir(WORK_DIR / "topol")
!acpype -i ../input/Luteolin_docked.pdb -b LIG -c bcc -a gaff2
!mv LIG.acpype/LIG_GMX.gro ligand.gro
!mv LIG.acpype/LIG_GMX.itp ligand.itp
os.chdir(WORK_DIR)
print("Ligand topology generated!")

In [None]:
os.chdir(WORK_DIR / "topol")

with open("protein.gro", "r") as f:
    protein_lines = f.readlines()
with open("ligand.gro", "r") as f:
    ligand_lines = f.readlines()

protein_atoms = protein_lines[2:-1]
ligand_atoms = ligand_lines[2:-1]
box = protein_lines[-1]
total_atoms = len(protein_atoms) + len(ligand_atoms)

with open("complex.gro", "w") as f:
    f.write(f"{CONFIG['complex_name']} complex\n")
    f.write(f" {total_atoms}\n")
    f.writelines(protein_atoms)
    f.writelines(ligand_atoms)
    f.write(box)

print(f"Complex created: {total_atoms} atoms")

with open("topol.top", "r") as f:
    topol = f.read()
insert_pos = topol.find("[ system ]")
if insert_pos > 0:
    topol = topol[:insert_pos] + '#include "ligand.itp"\n\n' + topol[insert_pos:]
topol += "\nLIG     1\n"
with open("topol.top", "w") as f:
    f.write(topol)

print("Topology updated!")
os.chdir(WORK_DIR)

In [None]:
%%bash
cd topol
gmx editconf -f complex.gro -o box.gro -c -d 1.2 -bt dodecahedron
gmx solvate -cp box.gro -cs spc216.gro -o solvated.gro -p topol.top
echo "System solvated!"

In [None]:
ions_mdp = """integrator=steep
emtol=1000.0
emstep=0.01
nsteps=50000
nstlist=1
cutoff-scheme=Verlet
ns_type=grid
coulombtype=cutoff
rcoulomb=1.0
rvdw=1.0
pbc=xyz
"""
with open(WORK_DIR / "topol" / "ions.mdp", "w") as f:
    f.write(ions_mdp)

In [None]:
%%bash
cd topol
gmx grompp -f ions.mdp -c solvated.gro -p topol.top -o ions.tpr -maxwarn 5
echo "SOL" | gmx genion -s ions.tpr -o system.gro -p topol.top -pname NA -nname CL -neutral -conc 0.15
echo "System neutralized!"

In [None]:
em_mdp = """integrator=steep
emtol=1000.0
emstep=0.01
nsteps=50000
nstlist=1
cutoff-scheme=Verlet
ns_type=grid
coulombtype=PME
rcoulomb=1.0
rvdw=1.0
pbc=xyz
"""
with open(WORK_DIR / "em" / "em.mdp", "w") as f:
    f.write(em_mdp)

In [None]:
%%bash
gmx grompp -f em/em.mdp -c topol/system.gro -p topol/topol.top -o em/em.tpr -maxwarn 5
gmx mdrun -v -deffnm em/em
echo "Energy minimization complete!"

In [None]:
nvt_steps = int(CONFIG['nvt_time_ps'] * 1000 / CONFIG['timestep_fs'])

nvt_mdp = f"""define=-DPOSRES
integrator=md
nsteps={nvt_steps}
dt={dt}
nstxout=5000
nstvout=5000
nstenergy=5000
nstlog=5000
continuation=no
constraint_algorithm=lincs
constraints=h-bonds
lincs_iter=1
lincs_order=4
cutoff-scheme=Verlet
ns_type=grid
nstlist=10
rcoulomb=1.0
rvdw=1.0
coulombtype=PME
pme_order=4
fourierspacing=0.16
tcoupl=V-rescale
tc-grps=Protein Non-Protein
tau_t=0.1 0.1
ref_t={CONFIG['temperature_k']} {CONFIG['temperature_k']}
pcoupl=no
pbc=xyz
DispCorr=EnerPres
gen_vel=yes
gen_temp={CONFIG['temperature_k']}
gen_seed=-1
"""
with open(WORK_DIR / "nvt" / "nvt.mdp", "w") as f:
    f.write(nvt_mdp)

In [None]:
%%bash
gmx grompp -f nvt/nvt.mdp -c em/em.gro -r em/em.gro -p topol/topol.top -o nvt/nvt.tpr -maxwarn 5
gmx mdrun -v -deffnm nvt/nvt
echo "NVT equilibration complete!"

In [None]:
npt_steps = int(CONFIG['npt_time_ps'] * 1000 / CONFIG['timestep_fs'])

npt_mdp = f"""define=-DPOSRES
integrator=md
nsteps={npt_steps}
dt={dt}
nstxout=5000
nstvout=5000
nstenergy=5000
nstlog=5000
continuation=yes
constraint_algorithm=lincs
constraints=h-bonds
lincs_iter=1
lincs_order=4
cutoff-scheme=Verlet
ns_type=grid
nstlist=10
rcoulomb=1.0
rvdw=1.0
coulombtype=PME
pme_order=4
fourierspacing=0.16
tcoupl=V-rescale
tc-grps=Protein Non-Protein
tau_t=0.1 0.1
ref_t={CONFIG['temperature_k']} {CONFIG['temperature_k']}
pcoupl=Parrinello-Rahman
pcoupltype=isotropic
tau_p=2.0
ref_p=1.0
compressibility=4.5e-5
refcoord_scaling=com
pbc=xyz
DispCorr=EnerPres
gen_vel=no
"""
with open(WORK_DIR / "npt" / "npt.mdp", "w") as f:
    f.write(npt_mdp)

In [None]:
%%bash
gmx grompp -f npt/npt.mdp -c nvt/nvt.gro -r nvt/nvt.gro -t nvt/nvt.cpt -p topol/topol.top -o npt/npt.tpr -maxwarn 5
gmx mdrun -v -deffnm npt/npt
echo "NPT equilibration complete!"

In [None]:
md_mdp = f"""integrator=md
nsteps={NSTEPS_PER_SEGMENT}
dt={dt}
nstxout=0
nstvout=0
nstxout-compressed=5000
nstenergy=5000
nstlog=5000
continuation=yes
constraint_algorithm=lincs
constraints=h-bonds
lincs_iter=1
lincs_order=4
cutoff-scheme=Verlet
ns_type=grid
nstlist=10
rcoulomb=1.0
rvdw=1.0
coulombtype=PME
pme_order=4
fourierspacing=0.16
tcoupl=V-rescale
tc-grps=Protein Non-Protein
tau_t=0.1 0.1
ref_t={CONFIG['temperature_k']} {CONFIG['temperature_k']}
pcoupl=Parrinello-Rahman
pcoupltype=isotropic
tau_p=2.0
ref_p=1.0
compressibility=4.5e-5
pbc=xyz
DispCorr=EnerPres
gen_vel=no
"""
with open(WORK_DIR / "md" / "md.mdp", "w") as f:
    f.write(md_mdp)
print(f"Production MDP created")

In [None]:
import subprocess
import time
from datetime import datetime

def run_md_segment(segment_num, resume=False):
    start_ns = segment_num * CONFIG['checkpoint_interval_ns']
    end_ns = (segment_num + 1) * CONFIG['checkpoint_interval_ns']
    
    print(f"\n{'='*60}")
    print(f"Segment {segment_num + 1}/{NUM_SEGMENTS}: {start_ns}-{end_ns} ns")
    print(f"Started: {datetime.now().strftime('%H:%M:%S')}")
    print(f"{'='*60}")
    
    os.chdir(WORK_DIR)
    
    if segment_num == 0 and not resume:
        result = subprocess.run(["gmx", "grompp", "-f", "md/md.mdp", "-c", "npt/npt.gro",
            "-t", "npt/npt.cpt", "-p", "topol/topol.top", "-o", "md/md.tpr", "-maxwarn", "5"],
            capture_output=True, text=True)
        if result.returncode != 0:
            print(f"grompp failed: {result.stderr}")
            return False
        subprocess.run(["gmx", "mdrun", "-deffnm", "md/md", "-v"])
    else:
        subprocess.run(["gmx", "mdrun", "-deffnm", "md/md", "-cpi", "md/md.cpt", "-append", "-v"])
    
    checkpoint_name = f"checkpoint_{end_ns}ns"
    checkpoint_dir = WORK_DIR / "checkpoints" / checkpoint_name
    checkpoint_dir.mkdir(exist_ok=True)
    
    for ext in [".cpt", ".gro", ".edr", ".log", ".xtc"]:
        src = WORK_DIR / "md" / f"md{ext}"
        if src.exists():
            shutil.copy(src, checkpoint_dir / f"md{ext}")
    
    output_checkpoint = OUTPUT_DIR / f"{CONFIG['complex_name']}_{checkpoint_name}"
    shutil.copytree(checkpoint_dir, output_checkpoint, dirs_exist_ok=True)
    
    print(f"\nCheckpoint saved: {checkpoint_name}")
    return True

def save_final_results():
    final_dir = OUTPUT_DIR / f"{CONFIG['complex_name']}_final"
    final_dir.mkdir(exist_ok=True)
    for src_dir in ["md", "analysis", "topol"]:
        src = WORK_DIR / src_dir
        if src.exists():
            shutil.copytree(src, final_dir / src_dir, dirs_exist_ok=True)
    print(f"\nFinal results saved to: {final_dir}")

In [None]:
# RUN PRODUCTION MD
start_segment = 0
if CONFIG['resume']:
    start_segment = CONFIG['resume_from_ns'] // CONFIG['checkpoint_interval_ns']
    print(f"Resuming from segment {start_segment + 1}")

total_start = time.time()

for segment in range(start_segment, NUM_SEGMENTS):
    segment_start = time.time()
    success = run_md_segment(segment, resume=(segment > start_segment or CONFIG['resume']))
    segment_time = time.time() - segment_start
    print(f"Segment time: {segment_time/60:.1f} minutes")
    if not success:
        print(f"Segment {segment + 1} failed!")
        break

total_time = time.time() - total_start
print(f"\n{'='*60}")
print(f"PRODUCTION MD COMPLETE!")
print(f"Total time: {total_time/3600:.2f} hours")
print(f"{'='*60}")

save_final_results()

In [None]:
%%bash
cd md
echo "4 4" | gmx rms -s md.tpr -f md.xtc -o ../analysis/rmsd_backbone.xvg -tu ns
echo "4" | gmx rmsf -s md.tpr -f md.xtc -o ../analysis/rmsf.xvg -res
echo "1" | gmx gyrate -s md.tpr -f md.xtc -o ../analysis/gyrate.xvg
echo "Analysis complete!"

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def parse_xvg(filename):
    data = []
    with open(filename, 'r') as f:
        for line in f:
            if not line.startswith(('#', '@')):
                values = [float(x) for x in line.split()]
                if values:
                    data.append(values)
    return np.array(data)

fig, axes = plt.subplots(1, 3, figsize=(15, 4))
fig.suptitle(f"{CONFIG['complex_name']} - MD Analysis (50 ns)", fontsize=14)

rmsd = parse_xvg('analysis/rmsd_backbone.xvg')
axes[0].plot(rmsd[:, 0], rmsd[:, 1], color='#2E86AB')
axes[0].set_xlabel('Time (ns)')
axes[0].set_ylabel('RMSD (nm)')
axes[0].set_title('Backbone RMSD')
axes[0].grid(True, alpha=0.3)

rmsf = parse_xvg('analysis/rmsf.xvg')
axes[1].plot(rmsf[:, 0], rmsf[:, 1], color='#2E86AB')
axes[1].set_xlabel('Residue')
axes[1].set_ylabel('RMSF (nm)')
axes[1].set_title('RMSF per Residue')
axes[1].grid(True, alpha=0.3)

gyrate = parse_xvg('analysis/gyrate.xvg')
axes[2].plot(gyrate[:, 0]/1000, gyrate[:, 1], color='#F18F01')
axes[2].set_xlabel('Time (ns)')
axes[2].set_ylabel('Rg (nm)')
axes[2].set_title('Radius of Gyration')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(f'analysis/{CONFIG["complex_name"]}_analysis.png', dpi=300)
plt.show()

print(f"\nAnalysis Summary:")
print(f"  Avg Backbone RMSD: {rmsd[-5000:, 1].mean():.3f} +/- {rmsd[-5000:, 1].std():.3f} nm")
print(f"  Avg Rg: {gyrate[-5000:, 1].mean():.3f} nm")

In [None]:
shutil.copytree(WORK_DIR / "analysis", OUTPUT_DIR / f"{CONFIG['complex_name']}_analysis", dirs_exist_ok=True)
print(f"Analysis saved to {OUTPUT_DIR}")