In [225]:
import os
import multiprocessing
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import rdFMCS

In [216]:
PATH_TO_BASE_FOLDER = '/home/fol007/PhD_Project/Template_Based_Docking_Project_GitRepo/filter_out_failed_tbd'
PATH_TO_FILTERED = f'{PATH_TO_BASE_FOLDER}/filtered'
PATH_TO_REFERENCE = f'{PATH_TO_BASE_FOLDER}/reference_ligands'
PATH_TO_ALIGNED = f'{PATH_TO_BASE_FOLDER}/aligned'

In [None]:
def create_initial_reference_dictionary(path=PATH_TO_FILTERED):
    '''organize dictionary based on the reference ligands'''
    reference_dictionary = {}
    uniprot_ids = os.listdir(path)
    for uniprot_id in uniprot_ids:
        reference_dictionary[uniprot_id] = []
        for references in os.listdir(path + '/' + uniprot_id):
            reference_dictionary[uniprot_id] += [references]
    return reference_dictionary

create_initial_reference_dictionary()

In [None]:
def get_core_constraint(ref_mol, mcs_mol):

    ref_match = ref_mol.GetSubstructMatch(mcs_mol)
    #print(ref_match)
    rwmol = Chem.RWMol(mcs_mol)
    rwconf = Chem.Conformer(rwmol.GetNumAtoms())

    matches = ref_mol.GetSubstructMatch(rwmol)
    #print(matches)
    ref_conf = ref_mol.GetConformer()
    for i, match in enumerate(matches):
        #print(ref_conf.GetAtomPosition(ref_match[i]))
        rwconf.SetAtomPosition(i, ref_conf.GetAtomPosition(match))

    rwmol.AddConformer(rwconf)

    return rwmol

In [224]:
dictionary = create_initial_reference_dictionary()

good_pairs = []

for uniprot_id in dictionary:
    print(uniprot_id)
    for filtered in dictionary[uniprot_id]:

        template = Chem.SDMolSupplier(PATH_TO_REFERENCE+'/'+uniprot_id+'/'+filtered[-12:])[0]

        docked = Chem.SDMolSupplier(PATH_TO_FILTERED+'/'+uniprot_id+'/'+filtered)

        for pose in docked:
            try:
                mcs_strict = rdFMCS.FindMCS([Chem.MolFromSmiles(Chem.MolToSmiles(template)),
                                             Chem.MolFromSmiles(Chem.MolToSmiles(pose))], matchValences=True,
                                                      ringMatchesRingOnly=True, completeRingsOnly=True,
                                                      bondCompare=Chem.rdFMCS.BondCompare.CompareOrderExact)

                # Get rdkit mol from the smarts strings corresponding to the strict mcs
                mcs_strict_mol = Chem.MolFromSmarts(mcs_strict.smartsString)
                # Calculate properties associated with the Smarts molecule
                mcs_strict_mol.UpdatePropertyCache(strict=False)
                Chem.GetSymmSSSR(mcs_strict_mol)

                #mc1 = get_core_constraint(template, mcs_strict_mol)
                mc2 = get_core_constraint(pose, mcs_strict_mol)

                #print(mcs_strict.numAtoms)
                rmsd = AllChem.CalcRMS(mc2, template)

                #w=Chem.SDWriter(pose.GetProp('_Name')+'.sdf')
                #w.write(mcs_strict_mol)
                #w.close()
                good_pairs += [[template.GetProp('_Name'), pose.GetProp('_Name'), rmsd, '']]
                #print(template.GetProp('_Name'), pose.GetProp('_Name'))
                #print(rmsd)
            except Exception as e:
                good_pairs += [[template.GetProp('_Name'), pose.GetProp('_Name'), -1, str(e)]]

pd.DataFrame(data=good_pairs, columns=['template', 'docked', 'rmsd', 'exception']).to_csv('good_pairs.csv', index=False)


P49356
P00520
