<h1 align= "center"> Week 2 </h1>
<h3> Aim: </h3>

1. Downloading BACE1 protein from pdb (7MYI -- BACE1 closed-flap -- newest gold-standard model -- res = 1.45 Å)
2. Cleaning the 7MYI BACE1 protein model using OpenBabel/rdkit and saving the cleaned model as well as the bound legand separately.
3. 

 

<h3> Method: </h3>

In [1]:
import sys
import subprocess
import tempfile
import os
from pathlib import Path
import urllib.request

from openbabel import pybel
from meeko import MoleculePreparation, PDBQTWriterLegacy
from rdkit import Chem
from rdkit.Chem import rdMolAlign, AllChem
import nglview as nv
from IPython.display import display



Downloading the most up-to-date BACE1 enzyme:

In [2]:
raw_pdb = "../data/7MYI_raw.pdb"
if not os.path.exists(raw_pdb):
    print("Downloading 7MYI...")
    urllib.request.urlretrieve("https://files.rcsb.org/download/7MYI.pdb", raw_pdb)

In [3]:
mol = Chem.MolFromPDBFile(raw_pdb, removeHs=False, sanitize=False)

# Filter: chain A only, no waters
keep_atoms = []
for atom in mol.GetAtoms():
    res_info = atom.GetPDBResidueInfo()
    if res_info:
        res_name = res_info.GetResidueName().strip()
        chain_id = res_info.GetChainId()
        if res_name != 'HOH' and chain_id == 'A':
            keep_atoms.append(atom.GetIdx())

# Create new mol with kept atoms
edited = Chem.EditableMol(mol)
for i in sorted(range(mol.GetNumAtoms()), reverse=True):
    if i not in keep_atoms:
        edited.RemoveAtom(i)
new_mol = edited.GetMol()

# Separate protein (standard amino acid "aa") and ligand (non-aa)
standard_aa = {"ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS","ILE",
               "LEU","LYS","MET","PHE","PRO","SER","THR","TRP","TYR","VAL"}

protein_atoms = []
ligand_atoms = []
for atom in new_mol.GetAtoms():
    res_info = atom.GetPDBResidueInfo()
    if res_info:
        res_name = res_info.GetResidueName().strip()
        if res_name in standard_aa:
            protein_atoms.append(atom.GetIdx())
        else:
            ligand_atoms.append(atom.GetIdx())

# Protein molecule
protein_ed = Chem.EditableMol(new_mol)
for i in sorted(range(new_mol.GetNumAtoms()), reverse=True):
    if i not in protein_atoms:
        protein_ed.RemoveAtom(i)
protein = protein_ed.GetMol()
protein = Chem.AddHs(protein, addCoords=True)
Chem.MolToPDBFile(protein, '../data/bace1_protein_H.pdb')

# Ligand molecule
ligand_ed = Chem.EditableMol(new_mol)
for i in sorted(range(new_mol.GetNumAtoms()), reverse=True):
    if i not in ligand_atoms:
        ligand_ed.RemoveAtom(i)
ligand = ligand_ed.GetMol()
w = Chem.SDWriter('../data/bace1_ligand.sdf')
w.write(ligand)
w.close()

print("Cleaning complete:")
print(f"  Protein atoms: {len(protein_atoms)}")
print(f"  Ligand atoms: {len(ligand_atoms)}")
print("  → bace1_protein_H.pdb")
print("  → bace1_ligand.sdf")

Cleaning complete:
  Protein atoms: 3066
  Ligand atoms: 21
  → bace1_protein_H.pdb
  → bace1_ligand.sdf


Preparing the BACE1 protein and ligand files with meeko to be usable by vina

In [4]:
preparator = MoleculePreparation(hydrate=True)

# Protein
mol = next(pybel.readfile("pdb", "../data/bace1_protein_H.pdb"))
mol.addh()  # Ensure explicit hydrogens (safe even if already present)
rdkit_mol = Chem.MolFromMolBlock(mol.write("mol"), removeHs=False, sanitize=False)
mol_setups = preparator.prepare(rdkit_mol)
pdbqt_string = ''.join(PDBQTWriterLegacy.write_string(setup)[0] for setup in mol_setups)
Path("../data/protein.pdbqt").write_text(pdbqt_string)

# Ligand
mol = next(pybel.readfile("sdf", "../data/bace1_ligand.sdf"))
mol.addh()  # Add missing hydrogens explicitly
rdkit_mol = Chem.MolFromMolBlock(mol.write("mol"), removeHs=False, sanitize=False)
mol_setups = preparator.prepare(rdkit_mol)
pdbqt_string = ''.join(PDBQTWriterLegacy.write_string(setup)[0] for setup in mol_setups)
Path("../data/ligand.pdbqt").write_text(pdbqt_string)

0

In [None]:
# For protein (receptor) — use defaults or specific for rigid amides
receptor_prep = MoleculePreparation(
    hydrate=False,  # No waters for protein
    charge_model="gasteiger",  # Add Gasteiger charges
    rigidify_bonds_smarts=["[#7X3]-[#6X3]=[#8]"],  # SMARTS for amide N-C=O
    rigidify_bonds_indices=[(0,1)]  # Rigidify the N-C bond in amides
)

# For ligand — standard small-molecule settings with hydrate
ligand_prep = MoleculePreparation(
    hydrate=True,  # Add waters for hydrated docking
    charge_model="gasteiger"
)

# ------------------- PROTEIN -------------------
protein = next(pybel.readfile("pdb", "../data/bace1_protein_H.pdb"))
protein.addh()  # explicit H

rdkit_prot = Chem.MolFromMolBlock(protein.write("mol"), removeHs=False, sanitize=False)
AllChem.EmbedMolecule(rdkit_prot, randomSeed=42)  # ensure 3D conformer

mol_setups = receptor_prep.prepare(rdkit_prot)

pdbqt_lines = []
for setup in mol_setups:
    string, is_failed, reason = PDBQTWriterLegacy.write_string(setup)
    if not is_failed:
        pdbqt_lines.extend(string.splitlines(keepends=True))
    else:
        print(f"Failed component: {reason}")

Path("../data/protein.pdbqt").write_text(''.join(pdbqt_lines))
print(f"Protein written: {len(pdbqt_lines)} lines — if 0, check input PDB or Meeko version")

# ------------------- LIGAND -------------------
ligand = next(pybel.readfile("sdf", "../data/bace1_ligand.sdf"))
ligand.addh()

rdkit_lig = Chem.MolFromMolBlock(ligand.write("mol"), removeHs=False, sanitize=False)
AllChem.EmbedMolecule(rdkit_lig, randomSeed=42)

mol_setups = ligand_prep.prepare(rdkit_lig)

pdbqt_lines = []
for setup in mol_setups:
    string, is_failed, reason = PDBQTWriterLegacy.write_string(setup)
    if not is_failed:
        pdbqt_lines.extend(string.splitlines(keepends=True))
    else:
        print(f"Failed component: {reason}")

Path("../data/ligand.pdbqt").write_text(''.join(pdbqt_lines))
print(f"Ligand written: {len(pdbqt_lines)} lines — if 0, check input SDF")

In [None]:
vina.exe `
  --receptor   "../data/protein.pdbqt" `
  --ligand     "../data/ligand.pdbqt" `
  --center_x   -10.5 --center_y 24.5 --center_z -0.5 `
  --size_x     22 --size_y 22 --size_z 22 `
  --cpu        8 `
  --exhaustiveness 128 `
  --num_modes  10 `
  --out        "../results/7MYI_redocked.pdbqt"

In [None]:
native = Chem.MolFromMolFile('../data/bace1_ligand.sdf', removeHs=False)
redocked = Chem.MolFromPDBQTFile('../results/7MYI_redocked.pdbqt', removeHs=False)
rmsd = rdMolAlign.AlignMol(redocked, native)
print(f"RMSD = {rmsd:.3f} Å → VALIDATION PASSED!")