# PDB ligands - align
* download PDB structures with crystalized ligands (already downloaded: Zenodo -> materials -> ligands_pdb and proteins_pdb)
* align each to a ROCK1 strucutre with PDB-ID 2ETR

### Aligning

In [None]:
# import libraries
import biotite.structure as struc
import biotite.structure.io.pdb as pdb
from biotite.structure import superimpose_homologs
import numpy as np
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.MolStandardize import rdMolStandardize
import tempfile

In [None]:
# define complex-ligand dictionary, containing pairs of ROCK1-s from PDB and their crystallized ligand
complex_ligand = {
'1S1C':'GNP',
'2ESM': 'M77',
'2ETK': 'HFS',
'2ETR': 'Y27',
'2V55': 'ANP',
'3D9V': 'H52',
'3NCZ': '3NC',
'3TV7': '07Q',
'3TWJ': '07R',
'3V8S': '0HD',
'4W7P': '3J7',
'4YVC': '4KH',
'4YVE': '4KK',
'5BML': '4TW',
'5HVU': '65R',
'5KKS': '6U1',
'5KKT': '6U2',
'5UZJ': '8UV',
'5WNE': 'B4J',
'5WNF': 'B4V',
'5WNG': 'B4Y',
'5WNH': 'B5G',
'6E9W': 'J0P',
'7JOU': 'VFS',
'7S25': '86G',
'7S26': '86K',
}

complex_ligand = dict(sorted(complex_ligand.items(), key=lambda item: item[1]))
print(complex_ligand)

In [None]:
# define function: align PDB structure to refernce + return aligned ligand
def align_molecules(complex, ligand):
    print(f"complex: {complex}, ligand: {ligand}")
    proteins_path = "../materials/proteins_pdb/"
    ligands_path = "../materials/ligands_pdb/"

    ref_pdb = pdb.PDBFile.read(proteins_path + "2etr.pdb") # reference
    ref_atoms = ref_pdb.get_structure()[0]
    complex_pdb = pdb.PDBFile.read(f"{proteins_path}{complex}.pdb") # to align
    complex_atoms = complex_pdb.get_structure()[0]
    amino_acid_residues = [
        "ALA", "ARG", "ASN", "ASP", "CYS", "GLU", "GLN", "GLY", "HIS", "ILE",
        "LEU", "LYS", "MET", "PHE", "PRO", "SER", "THR", "TRP", "TYR", "VAL"
    ]

    # Identify ligand atoms (non-standard residues)
    is_ligand_complex = complex_atoms.chain_id == "HET A"
    is_ligand_ref = ref_atoms.chain_id == "HET A"

    # Identify atoms belonging to Chain A
    is_chain_a_complex = complex_atoms.chain_id == "A"
    is_chain_a_ref = ref_atoms.chain_id == "A"
            
    # Keep Chain A **OR** ligands
    complex_atoms = complex_atoms[is_chain_a_complex | is_ligand_complex]
    ref_atoms = ref_atoms[is_chain_a_ref | is_ligand_ref]
            
    fitted,_,_,_ = superimpose_homologs(ref_atoms, complex_atoms)
    is_ligand = ~np.isin(fitted.res_name, amino_acid_residues+['HOH'])
    ligand_atoms = fitted[is_ligand]

    # the below roundabout procedure is needed in order to not have problems with bond orders etc
    # Create a temporary PDB file
    with tempfile.NamedTemporaryFile(suffix=".pdb", delete=False) as temp_pdb:
        out_pdb = pdb.PDBFile()
        out_pdb.set_structure(ligand_atoms)
        out_pdb.write(temp_pdb.name)
        temp_pdb_path = temp_pdb.name  # Store filename

    # Convert PDB to RDKit Mol object
    tmp = Chem.MolFromPDBFile(temp_pdb_path)
    largest_fragment = rdMolStandardize.LargestFragmentChooser()
    tmp = largest_fragment.choose(tmp)
    #this is the SDF provided by PDB for 6e9w
    template = Chem.MolFromMolFile(f"{ligands_path}{ligand}_ideal.sdf")

    #this is the aligned molecule you can use for calculating RMSDs
    mol = AllChem.AssignBondOrdersFromTemplate(template, tmp)

    return mol

In [None]:
# iterate over the dictionary and save aligned molecules
molecules = []
molecules_order = []
for complex, ligand in complex_ligand.items():
    mol = align_molecules(complex, ligand)
    molecules.append(mol)
    molecules_order.append(ligand)


In [None]:
with Chem.SDWriter("../materials/aligned_molecules.sdf") as wri:
  for mol in molecules:
    wri.write(mol)
    
wri.close()