# MD Simulation Test - CPU Only (Steps 1-8)

Test instalasi dan setup topology menggunakan **ACPYPE** untuk proper GROMACS ligand topology.

**Jalankan dengan Accelerator: None**

---

In [None]:
# Config
COMPLEX_NAME = '264THM_PPARG'
WORK_DIR = f'/kaggle/working/{COMPLEX_NAME}'
TOPOL_DIR = f'{WORK_DIR}/topol'
print(f'Testing: {COMPLEX_NAME}')

## Step 1: Install GROMACS

In [None]:
%%bash
set -e
apt-get update -qq
apt-get install -qq gromacs
gmx --version | head -3
echo 'GROMACS: OK'

## Step 2: Install Micromamba + AmberTools + ACPYPE

ACPYPE generates proper GROMACS topology files directly.

In [None]:
%%bash
set -e
cd /kaggle/working
curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
./bin/micromamba --version
echo 'Micromamba: OK'

In [None]:
%%bash
set -e
cd /kaggle/working
export MAMBA_ROOT_PREFIX=/kaggle/working/mamba
# Install AmberTools, OpenBabel, and ACPYPE
./bin/micromamba create -n amber -c conda-forge ambertools=23 openbabel acpype python=3.10 -y
echo 'AmberTools + ACPYPE: OK'

In [None]:
%%bash
set -e
cd /kaggle/working
export MAMBA_ROOT_PREFIX=/kaggle/working/mamba
eval \"$(./bin/micromamba shell hook -s bash)\"
micromamba activate amber
echo \"antechamber: $(which antechamber)\"
echo \"acpype: $(which acpype)\"
echo 'All tools verified!'

## Step 3: Setup Working Directory

In [None]:
import os
import shutil
from pathlib import Path

work_dir = Path(WORK_DIR)
topol_dir = Path(TOPOL_DIR)
for d in ['input', 'topol', 'em']:
    (work_dir / d).mkdir(parents=True, exist_ok=True)
print(f'Working: {work_dir}')

In [None]:
# Copy input files
DATASET_DIR = Path('/kaggle/input/md-simulation-264thm-pparg-input')
if DATASET_DIR.exists():
    for f in DATASET_DIR.glob('*.pdb'):
        shutil.copy(f, work_dir / 'input')
        print(f'Copied: {f.name}')
else:
    raise FileNotFoundError(f'Dataset not found: {DATASET_DIR}')

## Step 4: Protein Topology (pdb2gmx)

In [None]:
%%bash
set -e
cd /kaggle/working/264THM_PPARG/topol
printf '1\\n' | gmx pdb2gmx -f ../input/PPARG_6MS7.pdb -o protein.gro -p topol.top -i posre.itp -ff amber99sb-ildn -water tip3p -ignh
echo 'Protein topology: OK'

## Step 5: Ligand Topology (ACPYPE with GAFF2)

ACPYPE generates proper GROMACS `.itp` files with correct atomtypes and moleculetype sections.

In [None]:
%%bash
set -e
cd /kaggle/working
export MAMBA_ROOT_PREFIX=/kaggle/working/mamba
eval \"$(./bin/micromamba shell hook -s bash)\"
micromamba activate amber

cd /kaggle/working/264THM_PPARG/topol

# Run ACPYPE with GAFF2 force field
acpype -i ../input/264-trihydroxy-4-methoxybenzophenone_docked.pdb -n 0 -a gaff2 -o gmx

echo '\\n=== ACPYPE OUTPUT FILES ==='
ls -la *.acpype/
echo 'ACPYPE: OK'

In [None]:
%%bash
set -e
cd /kaggle/working/264THM_PPARG/topol

# Find ACPYPE output directory
ACPYPE_DIR=$(ls -d *.acpype 2>/dev/null | head -1)
if [ -z \"$ACPYPE_DIR\" ]; then
    echo 'ERROR: ACPYPE output directory not found!'
    exit 1
fi

echo \"ACPYPE dir: $ACPYPE_DIR\"

# Copy GROMACS files from ACPYPE output
cp $ACPYPE_DIR/*_GMX.gro ligand.gro
cp $ACPYPE_DIR/*_GMX.itp ligand.itp

echo '\\n=== ligand.itp first 30 lines ==='
head -30 ligand.itp

echo '\\nLigand files copied: OK'

## Step 6: Combine Protein + Ligand

In [None]:
# Combine GRO files
os.chdir(topol_dir)

with open('protein.gro', 'r') as f:
    protein_lines = f.readlines()
with open('ligand.gro', 'r') as f:
    ligand_lines = f.readlines()

protein_atoms = protein_lines[2:-1]
ligand_atoms = ligand_lines[2:-1]
box = protein_lines[-1]
total_atoms = len(protein_atoms) + len(ligand_atoms)

with open('complex.gro', 'w') as f:
    f.write(f'{COMPLEX_NAME} complex\\n')
    f.write(f' {total_atoms}\\n')
    f.writelines(protein_atoms)
    f.writelines(ligand_atoms)
    f.write(box)

print(f'Complex: {total_atoms} atoms')

In [None]:
# Update topology file
with open('topol.top', 'r') as f:
    topol = f.read()

ff_pattern = '#include \"amber99sb-ildn.ff/forcefield.itp\"'
ff_pos = topol.find(ff_pattern)
if ff_pos == -1:
    raise ValueError('Cannot find forcefield include!')

ff_end = topol.find('\\n', ff_pos) + 1

ligand_include = '\\n; Include ligand topology (ACPYPE/GAFF2)\\n#include \"ligand.itp\"\\n'
new_topol = topol[:ff_end] + ligand_include + topol[ff_end:]

# Get ligand moleculetype name from ligand.itp
with open('ligand.itp', 'r') as f:
    itp_content = f.read()

import re
moltype_match = re.search(r'\\[ moleculetype \\]\\s*\\n;.*\\n\\s*(\\S+)', itp_content)
if moltype_match:
    lig_name = moltype_match.group(1)
else:
    lig_name = 'LIG'
print(f'Ligand moleculetype name: {lig_name}')

if lig_name not in new_topol.split('[ molecules ]')[-1]:
    new_topol += f'\\n{lig_name}     1\\n'

with open('topol.top', 'w') as f:
    f.write(new_topol)

print('Topology updated!')

## Step 7: Solvate + Ions

In [None]:
%%bash
set -e
cd /kaggle/working/264THM_PPARG/topol
gmx editconf -f complex.gro -o box.gro -c -d 1.2 -bt dodecahedron
gmx solvate -cp box.gro -cs spc216.gro -o solvated.gro -p topol.top
echo 'Solvated: OK'

In [None]:
# Create ions.mdp
ions_mdp = '''integrator = steep
emtol = 1000.0
emstep = 0.01
nsteps = 50000
nstlist = 10
cutoff-scheme = Verlet
coulombtype = cutoff
rcoulomb = 1.0
rvdw = 1.0
pbc = xyz
'''
with open('ions.mdp', 'w') as f:
    f.write(ions_mdp)
print('ions.mdp created')

In [None]:
%%bash
set -e
cd /kaggle/working/264THM_PPARG/topol
gmx grompp -f ions.mdp -c solvated.gro -p topol.top -o ions.tpr -maxwarn 2
printf 'SOL\\n' | gmx genion -s ions.tpr -o system.gro -p topol.top -pname NA -nname CL -neutral -conc 0.15
echo 'Ions: OK'

## Step 8: Energy Minimization (CPU)

In [None]:
import os
from pathlib import Path
work_dir = Path(WORK_DIR)
os.chdir(work_dir)
(work_dir / 'em').mkdir(exist_ok=True)

em_mdp = '''integrator = steep
emtol = 1000.0
emstep = 0.01
nsteps = 50000
nstlist = 10
cutoff-scheme = Verlet
coulombtype = PME
rcoulomb = 1.0
rvdw = 1.0
pbc = xyz
'''
with open('em/em.mdp', 'w') as f:
    f.write(em_mdp)
print('em.mdp created')

In [None]:
%%bash
set -e
cd /kaggle/working/264THM_PPARG
gmx grompp -f em/em.mdp -c topol/system.gro -p topol/topol.top -o em/em.tpr -maxwarn 2
gmx mdrun -v -deffnm em/em
echo '\\n=== ENERGY MINIMIZATION COMPLETE ===' 
echo 'All CPU steps passed! Ready for GPU production MD.'

---

## SUCCESS!

Jika sampai sini tidak ada error, berarti setup sudah benar.

**Next:** Run notebook utama dengan GPU untuk production MD.