# XCOS for merge scoring

The method follows the following steps:

1. Loop through compounds
        1.1 Break designed compound into bits at rotable bonds

2. Loop through the bits
        2.1 If the number of atoms in the bit excluding wildcard atoms > 1 continue. Else break & assign score 0

3. Loop through all the frags and compare with bit: 
        3.1 Check if there is a MCS between the bit and frag and continue. Else break & assign score 0
        3.2 Get shape overlay score of bit to frag - if there is > 50% volume overlay continue. Else break & assign score 0 
        3.3 Get the feature overlay score - if there is > 50% feature overlay continue. Else break and assign score 0

4. Score = 0.5 * (feat score * no non-wildcard bit atoms) + 0.5 * (shape score * total bit atoms)

5. Assign best matching frag with highest score to bit

5. All unique fragment matches, compound mol captured in sdf file

In [7]:
from rdkit.Chem.Lipinski import RotatableBondSmarts
from rdkit.Chem import BRICS
from rdkit.Chem import Draw
from rdkit import Chem
from rdkit.Chem.FeatMaps import FeatMaps
from rdkit.Chem import AllChem, rdShapeHelpers
from rdkit import RDConfig
from rdkit.Chem import rdFMCS

import os
import numpy as np
import pandas as pd
from datetime import datetime

date = datetime.today().strftime('%Y-%m-%d')

def getBits(mol):
    '''

    Parameters
    ----------
    mol : rdkit mol object to be broken up into fragments by breaking 
    rotable bonds

    Returns
    -------
    mols : A list of rdkit mol objects

    '''
    # Try break everything up - aromatics and aliphatics into single bits!
    smart_breaks = Chem.MolFromSmarts('[!R][R]')
    
    # find the rotatable bonds
    bonds = mol.GetSubstructMatches(smart_breaks)
    
    bonds = [((x,y),(0,0)) for x,y in bonds]
    p = BRICS.BreakBRICSBonds(mol,bonds=bonds)
 
    mols = [mol for mol in Chem.GetMolFrags(p,asMols=True)]
    
    return mols

# Function to build feature maps and score two mol objects
fdef = AllChem.BuildFeatureFactory(os.path.join(RDConfig.RDDataDir, 'BaseFeatures.fdef'))

fmParams = {}
for k in fdef.GetFeatureFamilies():
    fparams = FeatMaps.FeatMapParams()
    fmParams[k] = fparams

keep = ('Donor', 'Acceptor', 'NegIonizable', 'PosIonizable', 'ZnBinder',
        'Aromatic', 'Hydrophobe', 'LumpedHydrophobe')

def getFeatureMapScore(small_m, large_m, score_mode=FeatMaps.FeatMapScoreMode.All):
    try: 
        featLists = []
        for m in [small_m, large_m]:
            rawFeats = fdef.GetFeaturesForMol(m)
            # filter that list down to only include the ones we're interested in
            featLists.append([f for f in rawFeats if f.GetFamily() in keep])
        fms = [FeatMaps.FeatMap(feats=x, weights=[1] * len(x), params=fmParams) for x in featLists]
        fms[0].scoreMode = score_mode
        fm_score = fms[0].ScoreFeats(featLists[1]) / min(fms[0].GetNumFeatures(), len(featLists[1]))
        return fm_score
    except ZeroDivisionError:
        return 0

def getSDFprops(compound_mol):
    # Need to change for diff sdf files!!!
    
    # Make smiles = original SMILES 
    compound_mol.SetProp('original SMILES', compound_mol.GetProp('smiles'))
    
    # Assign ref pdb
    compound_mol.SetProp('ref_pdb', 'Fragmenstein.pdb')
    
    # Get all the sdf properties
    all_properties = list(compound_mol.GetPropsAsDict().keys())

    # Properties to keep 
    keep_properties = ['ref_pdb', 'ref_mols', 'original SMILES'] 

    # Properties to delete
    del_properties = [prop for prop in all_properties if prop not in keep_properties]

    for prop in del_properties:
            compound_mol.ClearProp(prop)

    return compound_mol


def getBlankMol(blank_mol):
    
    # Add compulsory props
    blank_mol.SetProp('_Name', 'ver_1.2')
    blank_mol.SetProp('ref_mols', 'Fragments that bits overlap with')
    blank_mol.SetProp('ref_url', 'https://github.com/Waztom/xchem-xCOS')
    blank_mol.SetProp('submitter_name', 'WT')
    blank_mol.SetProp('submitter_email', 'warren.thompson@diamond.ac.uk')
    blank_mol.SetProp('submitter_institution', 'Diamond Light Source')
    blank_mol.SetProp('generation_date', date)
    blank_mol.SetProp('method', 'xCOS')

    # Add scoring descriptors
    blank_mol.SetProp('N_hits', 'The number of fragments that bits overlap with')
    blank_mol.SetProp('Score_1', 'The score is scaled by the number of heavy bit atoms')    
    
    return blank_mol


In [8]:
# Read in fragment mols
frag_mol_folder = '/home/warren/XChem_projects/xchem-XCOS/in_data/fragment_mols'
path  = frag_mol_folder + '/'
frag_mol_list = [Chem.MolFromMolFile((path + mol_file), sanitize=True) for mol_file in os.listdir(frag_mol_folder)]

In [9]:
# Use if we want to load docking sdf file
compound_mols = Chem.SDMolSupplier('/home/warren/XChem_projects/xchem-XCOS/in_data/Fragmenstein_permissive_rescored_20200609.sdf')

In [12]:
def getReverseScores(compound_mols, filename):
    
    # Get writer set up for writing final mols to file
    w = Chem.SDWriter(filename)
    
    # Sort out blank mole
    blank_mol = Chem.MolFromSmiles('C')
    
    # Assign required props for ver 1.2 spec
    blank_mol = getBlankMol(blank_mol)

    # Write to file
    w.write(blank_mol)
    
    # Score eveything besides for first mol
    for i in range(len(compound_mols)):
        
        index = i 
        
        if index < len(compound_mols):
            
            # Get compound mol
            compound_mol = compound_mols[index]
            
            # Get no compound mol atoms for scaling score
            no_compound_atoms = compound_mol.GetNumAtoms()
            
            # Get the bits
            compound_bits = getBits(compound_mol)
            
            all_scores = []

            for bit in compound_bits:
                
                # Let's remove wildcard atoms
                # Removing wildcard atoms does not impact feat score but does lower shape overlay
                # For scoring should multiply feat score by number of non-wilcard atoms and use
                # all atoms including wildcard for shape overlay
                bit_without_wildcard_atoms = Chem.DeleteSubstructs(bit, Chem.MolFromSmarts('[#0]'))
                
                # Let's only score bits that have more than one atom (do not count wildcard atoms)           
                # Get number of bit atoms without wildcard atoms
                no_bit_atoms_without_wild_card = bit_without_wildcard_atoms.GetNumAtoms()
                
                # Get number of bit atoms with wildcard
                no_bit_atoms = bit.GetNumAtoms()
                
                # Only score if enough info in bit to describe a vector - this will bias against 
                # cases where frag has long aliphatic chain
                if no_bit_atoms_without_wild_card > 1:
                
                    scores = []

                    for frag_mol in frag_mol_list:

                            # Get some info and append to list
                            frag_name = frag_mol.GetProp('_Name').strip('Mpro-')

                            # Score only if some common structure shared between bit and fragment.
                            # Check if MCS yield > 0 atoms
                            mcs_match = rdFMCS.FindMCS([bit,frag_mol],ringMatchesRingOnly=True,
                                                       matchValences=True)
                            
                            # Get mcs_mol from mcs_match
                            mcs_mol = Chem.MolFromSmarts(mcs_match.smartsString)
                            
                            # check if frag has MCS mol
                            mcs_test = frag_mol.HasSubstructMatch(mcs_mol)

                            # Get number of atoms in MCS match found
                            # no_mcs_atoms = Chem.MolFromSmarts(mcs_match.smartsString).GetNumAtoms()

                            if mcs_test:

                                # Change van der Waals radius scale for stricter overlay
                                protrude_dist = rdShapeHelpers.ShapeProtrudeDist(bit, frag_mol,
                                                                                 allowReordering=False,
                                                                                 vdwScale=0.2)
                                protrude_dist = np.clip(protrude_dist, 0, 1)
                                
                                protrude_score = 1 - protrude_dist
                                
                                # We are comparing small bits relative to large frags
                                # If overlay poor then assign score of 0
                                # NB reverse SuCOS scoring. Feat map is also comp
                                # more expensive
                                
                                if protrude_score > 0.50:
                                    fm_score = getFeatureMapScore(bit, frag_mol)
                                    fm_score = np.clip(fm_score, 0, 1)
                                    
                                    # What about good shape overlay but poor feat match?
                                    # Let's add a cutoff here to prevent good overlays with
                                    # poor feat match - eg. 3 mem ring 2 x C atoms overlay well
                                    # with 2 x aromatic ring Cs
                                    
                                    if fm_score > 0.50:
                                        # Use modified SuCOS score where feat_score scaled by number of bit atoms 
                                        # without wildcard atoms and the shape overlay score by the number of bit atoms
                                        # including wildcard atoms

                                        scores.append((frag_name, protrude_score,no_bit_atoms,fm_score,no_bit_atoms_without_wild_card ))
                                    
                                    else:
                                        scores.append((frag_name,0,no_bit_atoms,0,no_bit_atoms_without_wild_card ))
                                else:
                                    scores.append((frag_name,0,no_bit_atoms,0,no_bit_atoms_without_wild_card ))
                            else:
                                scores.append((frag_name,0,no_bit_atoms,0,no_bit_atoms_without_wild_card ))
                                    
                    all_scores.append(scores)

                    list_dfs = [] 

                    for score in all_scores:

                        df = pd.DataFrame(data=score, columns = ['Fragment','Shape_score','no_bit_atoms','Feat_score','no_bit_atoms_without_wild_card'])

                        # Get maximum scoring fragment for bit match
                        df['Modified_SuCOS_score'] = 0.5 * (df.Feat_score * df.no_bit_atoms_without_wild_card) + 0.5 * (df.Shape_score * df.no_bit_atoms)
                        df = df[df['Modified_SuCOS_score'] == df['Modified_SuCOS_score'].max()]
                        list_dfs.append(df)

                    final_df = pd.concat(list_dfs)

            # Score 1: the score is scaled by the number of bit atoms
            score_1 = final_df.Modified_SuCOS_score.sum()

            # Let's only get frags with a score > 0 
            final_df = final_df[final_df.Modified_SuCOS_score > 0]

            # Get the unique fragments above threshold
            all_frags = pd.unique(final_df.Fragment)

            # Set sdf props - see function for props to keep and drop            
            compound_mol = getSDFprops(compound_mol)

            # Add props we want                                   
            compound_mol.SetProp('ref_mols',','.join(all_frags))
            compound_mol.SetProp('N_hits', str(len(all_frags)))
            compound_mol.SetProp('Score_1', "{:.4f}".format(score_1))

            # Write to file
            w.write(compound_mol)

In [13]:
# Let's do all of the compounds
getReverseScores(compound_mols=compound_mols,
                 filename='/home/warren/XChem_projects/xchem-XCOS/out_data/xCOS3_Teo_{}.sdf'.format(date))