<h1 align= "center"> Week 2 </h1>
<h3> Aim: </h3>

1. Downloading BACE1 protein from pdb (7MYI -- BACE1 closed-flap -- newest gold-standard model -- res = 1.45 Å)
2. Cleaning the 7MYI BACE1 protein model using OpenBabel/rdkit and saving the cleaned model as well as the bound legand separately.
3. 

 

<h3> Method: </h3>

In [1]:
import sys
import subprocess
import tempfile
import os
from pathlib import Path
import urllib.request

from openbabel import pybel
from meeko import MoleculePreparation
from rdkit import Chem
from rdkit.Chem import rdMolAlign
import nglview as nv
from IPython.display import display



Downloading the most up-to-date BACE1 enzyme:

In [2]:
raw_pdb = "../data/7MYI_raw.pdb"
if not os.path.exists(raw_pdb):
    print("Downloading 7MYI...")
    urllib.request.urlretrieve("https://files.rcsb.org/download/7MYI.pdb", raw_pdb)

In [None]:
mol = Chem.MolFromPDBFile(raw_pdb, removeHs=False, sanitize=False)

# Filter: chain A only, no waters
keep_atoms = []
for atom in mol.GetAtoms():
    res_info = atom.GetPDBResidueInfo()
    if res_info:
        res_name = res_info.GetResidueName().strip()
        chain_id = res_info.GetChainId()
        if res_name != 'HOH' and chain_id == 'A':
            keep_atoms.append(atom.GetIdx())

# Create new mol with kept atoms
edited = Chem.EditableMol(mol)
for i in sorted(range(mol.GetNumAtoms()), reverse=True):
    if i not in keep_atoms:
        edited.RemoveAtom(i)
new_mol = edited.GetMol()

# Separate protein (standard amino acid "aa") and ligand (non-aa)
standard_aa = {"ALA","ARG","ASN","ASP","CYS","GLN","GLU","GLY","HIS","ILE",
               "LEU","LYS","MET","PHE","PRO","SER","THR","TRP","TYR","VAL"}

protein_atoms = []
ligand_atoms = []
for atom in new_mol.GetAtoms():
    res_info = atom.GetPDBResidueInfo()
    if res_info:
        res_name = res_info.GetResidueName().strip()
        if res_name in standard_aa:
            protein_atoms.append(atom.GetIdx())
        else:
            ligand_atoms.append(atom.GetIdx())

# Protein molecule
protein_ed = Chem.EditableMol(new_mol)
for i in sorted(range(new_mol.GetNumAtoms()), reverse=True):
    if i not in protein_atoms:
        protein_ed.RemoveAtom(i)
protein = protein_ed.GetMol()
protein = Chem.AddHs(protein, addCoords=True)
Chem.MolToPDBFile(protein, '../data/bace1_protein_H.pdb')

# Ligand molecule
ligand_ed = Chem.EditableMol(new_mol)
for i in sorted(range(new_mol.GetNumAtoms()), reverse=True):
    if i not in ligand_atoms:
        ligand_ed.RemoveAtom(i)
ligand = ligand_ed.GetMol()
w = Chem.SDWriter('../data/bace1_ligand.sdf')
w.write(ligand)
w.close()

print("Cleaning complete:")
print(f"  Protein atoms: {len(protein_atoms)}")
print(f"  Ligand atoms: {len(ligand_atoms)}")
print("  → bace1_protein_H.pdb")
print("  → bace1_ligand.sdf")

Cleaning complete:
  Protein atoms: 3066
  Ligand atoms: 21
  → bace1_protein_H.pdb
  → bace1_ligand.sdf


Preparing the BACE1 protein and ligand files with meeko to be usable by vina

In [8]:
prep = MoleculePreparation(hydrate=True)

# Protein → PDBQT
prot_pybel = next(pybel.readfile("pdb", "../data/bace1_protein_H.pdb"))
prot_rdkit = Chem.MolFromMolBlock(prot_pybel.write("mol"), removeHs=False, sanitize=False)
pdbqt_prot = prep.prepare(prot_rdkit)
if isinstance(pdbqt_prot, list):
    pdbqt_prot = pdbqt_prot[0]  # Take the first (only) PDBQT string
Path("../data/protein.pdbqt").write_text(pdbqt_prot)

# Ligand → PDBQT
lig_pybel = next(pybel.readfile("sdf", "../data/bace1_ligand.sdf"))
lig_rdkit = Chem.MolFromMolBlock(lig_pybel.write("mol"), removeHs=False, sanitize=False)
pdbqt_lig = prep.prepare(lig_rdkit)
if isinstance(pdbqt_lig, list):
    pdbqt_lig = pdbqt_lig[0]
Path("../data/ligand.pdbqt").write_text(pdbqt_lig)


TypeError: data must be str, not RDKitMoleculeSetup