In [1]:
import os
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Lipinski
import json

In [2]:
# Define constants:
PATH_TO_BASE_FOLDER = '/home/fol007/PycharmProjects/ChEMBL_plus_BindingMOAD/BindingMOAD_AstexDiverseSet_Simplified' #'/BindingMOAD_AstexDiverseSet_Simplified'
PATH_TO_PDB_FOLDER = f'{PATH_TO_BASE_FOLDER}/pdb_files'
PATH_TO_REFERENCE_LIGANDS_FOLDER = f'{PATH_TO_BASE_FOLDER}/reference_ligands'
PATH_TO_ATOM_PROPERTIES = '/home/fol007/PycharmProjects/SASA_calculation/data/sasa_per_atoms'

In [3]:
# Helper function:

def create_reference_dictionary(path_to_reference_ligands_folder=PATH_TO_REFERENCE_LIGANDS_FOLDER):
    '''organize dictionary based on the reference ligands'''
    reference_dictionary = {}
    uniprot_ids = os.listdir(path_to_reference_ligands_folder)
    for uniprot_id in uniprot_ids:
        reference_dictionary[uniprot_id] = []
        for references in os.listdir(path_to_reference_ligands_folder + '/' + uniprot_id):
            reference_dictionary[uniprot_id] += [references]
    return reference_dictionary

In [4]:
reference_dictionary = create_reference_dictionary()

dictionary_df = {'uniprot_id':[], 'reference':[], 'mol_object':[], 'atom_properties': []}

for uniprot_id in reference_dictionary:
    for ref in reference_dictionary[uniprot_id]:
        if os.path.isfile(PATH_TO_ATOM_PROPERTIES + '/' + ref.split('.')[0] + '.json'):
            dictionary_df['uniprot_id'] += [uniprot_id]
            dictionary_df['reference'] += [ref.split('.')[0]]
            dictionary_df['mol_object'] += [Chem.SDMolSupplier(PATH_TO_REFERENCE_LIGANDS_FOLDER + '/' + uniprot_id + '/' + ref)[0]]
            dictionary_df['atom_properties'] += [json.load(open(PATH_TO_ATOM_PROPERTIES + '/' + ref.split('.')[0] + '.json','r'))]

dictionary_df = pd.DataFrame(dictionary_df)

dictionary_df.head(3)

Unnamed: 0,uniprot_id,reference,mol_object,atom_properties
0,P16083,3OVM_MZC,<rdkit.Chem.rdchem.Mol object at 0x7fcabed85300>,"[{'SASAClassName': 'Apolar', 'SASA': '7.815197..."
1,P16083,3G5M_XM5,<rdkit.Chem.rdchem.Mol object at 0x7fcabed88170>,"[{'SASAClassName': 'Polar', 'SASA': '43.576379..."
2,P16083,3OWH_52X,<rdkit.Chem.rdchem.Mol object at 0x7fcabed51490>,"[{'SASAClassName': 'Apolar', 'SASA': '6.997487..."


In [5]:
# table with rmsd values, smiles and mcs smarts
#rmsd_df = pd.read_csv('../data/rmsd_values_base.csv')
rmsd_df = pd.read_csv('../../../Analysis_of_Docking/data/rmsd_values_featurized_without_bad_pairs.csv')

rmsd_df['mcs_bonded_polar_sasa'] = None
rmsd_df['mcs_bonded_apolar_sasa'] = None
rmsd_df['mcs_unbonded_polar_sasa'] = None
rmsd_df['mcs_unbonded_apolar_sasa'] = None

In [6]:
for index, template, mcs_smarts in \
        rmsd_df[['template', 'mcs_smartsString']].itertuples():

    if os.path.isfile(PATH_TO_ATOM_PROPERTIES + '/' + template + '.json'):
        try:
            template_mol = dictionary_df.loc[dictionary_df['reference']==template, 'mol_object'].values[0]
            mcs_mol = Chem.MolFromSmarts(mcs_smarts)
            mcs_mol.UpdatePropertyCache(strict=False)
            Chem.GetSymmSSSR(mcs_mol)

            mcs_indices=template_mol.GetSubstructMatch(mcs_mol)

            atom_properties = dictionary_df.loc[dictionary_df['reference']==template, 'atom_properties'].values[0]

            mcs_bonded_polar_sasa = 0
            mcs_bonded_apolar_sasa = 0
            mcs_unbonded_polar_sasa = 0
            mcs_unbonded_apolar_sasa = 0

            for atom_index in mcs_indices:
                if atom_properties[atom_index]['SASAClassName'] == 'Polar':
                    mcs_bonded_polar_sasa += float(atom_properties[atom_index]['SASA_bonded'])
                    mcs_unbonded_polar_sasa += float(atom_properties[atom_index]['SASA'])
                else:
                    mcs_bonded_apolar_sasa += float(atom_properties[atom_index]['SASA_bonded'])
                    mcs_unbonded_apolar_sasa += float(atom_properties[atom_index]['SASA'])

            rmsd_df.at[index, 'mcs_bonded_polar_sasa'] = mcs_bonded_polar_sasa
            rmsd_df.at[index, 'mcs_bonded_apolar_sasa'] = mcs_bonded_apolar_sasa
            rmsd_df.at[index, 'mcs_unbonded_polar_sasa'] = mcs_unbonded_polar_sasa
            rmsd_df.at[index, 'mcs_unbonded_apolar_sasa'] = mcs_unbonded_apolar_sasa
        except Exception as e:
            print(e)
            print(PATH_TO_ATOM_PROPERTIES + '/' + ref.split('.')[0] + '.json')
            print(index, template)
    else:
        rmsd_df.at[index, 'mcs_bonded_polar_sasa'] = -1
        rmsd_df.at[index, 'mcs_bonded_apolar_sasa'] = -1
        rmsd_df.at[index, 'mcs_unbonded_polar_sasa'] = -1
        rmsd_df.at[index, 'mcs_unbonded_apolar_sasa'] = -1

print('Done!')

Done!


In [7]:
# add SASA values as features
sasa_df = pd.read_csv('/home/fol007/PycharmProjects/SASA_calculation/data/total_sasa.csv')

rmsd_df['sasa_bonded_polar'] , rmsd_df['sasa_bonded_apolar'],\
rmsd_df['sasa_unbonded_polar'], rmsd_df['sasa_unbonded_apolar'] = None, None, None, None

In [8]:
for reference, sasa_bond_polar, sasa_bond_apolar, sasa_unbond_polar, sasa_unbond_apolar, error \
        in sasa_df[['reference', 'sasa_bond_polar', 'sasa_bond_apolar', 'sasa_unbond_polar', 'sasa_unbond_apolar', 'error']].values:
    try:
        error = int(error)
    except:
        error = 0
    if error == -1:
        if len(rmsd_df.loc[rmsd_df['template']==reference]) > 0:

            rmsd_df.loc[rmsd_df['template']==reference, 'sasa_bonded_polar'] = sasa_bond_polar
            rmsd_df.loc[rmsd_df['template']==reference, 'sasa_bonded_apolar'] = sasa_bond_apolar
            rmsd_df.loc[rmsd_df['template']==reference, 'sasa_unbonded_polar'] = sasa_unbond_polar
            rmsd_df.loc[rmsd_df['template']==reference, 'sasa_unbonded_apolar'] = sasa_unbond_apolar
    else:
        rmsd_df = rmsd_df.loc[rmsd_df['template']!=reference]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


In [9]:
rmsd_df.to_csv('../../../Analysis_of_Docking/data/rmsd_values_featurized_w_sasa_without_bad_pairs.csv' ,index=False)