In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import math
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, Lipinski, Crippen, Descriptors, PandasTools as pdt


file = 'm:/480/480 - all library docking results/PPI ~ 3600/all_ppi_dock_results.sdf' # fragment library file
f = pdt.LoadSDF(file) # loads all fragments into dataframe
df = pd.DataFrame(f)

logP = df['logP'] = df['ROMol'].map(Descriptors.MolLogP)   # these descriptors map the molecule and calculate molecular properties
hba = df['HBA'] = df['ROMol'].map(Descriptors.NumHAcceptors)
hbd = df['HBD'] = df['ROMol'].map(Descriptors.NumHDonors)
numheavyatom = df['NumHeavyAtoms'] = df.apply(lambda x: x['ROMol'].GetNumHeavyAtoms(), axis=1)   # for viewing properties of fragments in a particular library
mwt = df['MWT'] = df['ROMol'].map(Descriptors.MolWt)

plt.figure(figsize=(24,10))
plt.subplot2grid(shape=(2,6), loc=(0,0), colspan=2)
plt.hist(hbd, bins=10, color='blue', edgecolor='white')
plt.xticks(np.arange(0,12,2))
plt.xlim(0,10)
plt.title('H-Bond Donors')

plt.subplot2grid((2,6), (0,2), colspan=2)
plt.hist(hba, bins=10, color='blue', edgecolor='white')
plt.xticks(np.arange(0,12,2))
plt.xlim(0,10)
plt.title('H-Bond Acceptors')

plt.subplot2grid((2,6), (0,4), colspan=2)
plt.hist(logP, bins=10, color='blue', edgecolor='white')
plt.xticks(np.arange(-5,6,2.5))
plt.xlim(-5,5)
plt.title('LogP')

plt.subplot2grid((2,6), (1,1), colspan=2)
plt.hist(numheavyatom, bins=10, color='blue', edgecolor='white')
plt.xticks(np.arange(0,35,5))
plt.xlim(0,30)
plt.title('Number of Heavy Atoms')

plt.subplot2grid((2,6), (1,3), colspan=2)
plt.hist(mwt, bins=10, color='blue', edgecolor='white')
plt.xticks(np.arange(0,600,100))
plt.xlim(0,500)
plt.title('Molecular Weight')

plt.suptitle('Properties of the PPI Fragment Library')

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import math
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, Lipinski, Crippen, Descriptors, PandasTools as pdt

file = 'm:/480/480 - all library docking results/MCE ~ 22000/all_docking_results.sdf'
f = pdt.LoadSDF(file)
df = pd.DataFrame(f)

df[['Gold.PLP.Fitness']] = df[['Gold.PLP.Fitness']].astype(float) # docking results contain fitness scores and that data is placed into a dataframe here
fitness = df[['Gold.PLP.Fitness']]

plt.figure(figsize=(8,8))
plt.hist(fitness, bins=10, color='red', edgecolor='white')
plt.xticks(np.arange(0,120,20))
plt.xlim(0,100)
plt.title('PLP Fitness | MedChemExpress Fragments docked in CypD')

df.describe()

In [None]:
### for top 5% fitness ###
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import rdkit
from rdkit.Chem import PandasTools as pdt

file = 'm:/480/480 - all library docking results/PPI ~ 3600/all_ppi_dock_results.sdf'
f = pdt.LoadSDF(file)
df = pd.DataFrame(f)

df1 = df[['Gold.PLP.Fitness','Gold.PLP.Chemscore.Hbond','ROMol']]
sort_data = df1.sort_values(by=['Gold.PLP.Fitness'], ascending=False)

top5_data = np.array(sort_data['Gold.PLP.Fitness'], dtype=float).tolist()
b = np.percentile(top5_data, 95) # this finds the top 5% molecules based on fitness score
top5_list = []

for i in top5_data:
    if i > b:
        top5_list.append(i)

h = pd.DataFrame(top5_list)
#plt.figure(figsize=(8,8))
#plt.hist(h, bins=10, color='blue', edgecolor='white')
#plt.xticks(np.arange(0,120,20))
#plt.xlim(0,100)
#plt.title('Top 5% Fitness Scores | Natural Product-like Library')

In [None]:
### for top5% properties ###
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import math
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, Lipinski, Crippen, Descriptors, PandasTools as pdt

file = 'm:/480/480 - all library docking results/NPL ~ 4160/all_npl_docking_results.sdf'
f = pdt.LoadSDF(file)
df = pd.DataFrame(f)

logP = df['logP'] = df['ROMol'].map(Descriptors.MolLogP)  # these descriptors map the molecule and calculate molecular properties
hba = df['HB-Acc'] = df['ROMol'].map(Descriptors.NumHAcceptors)
hbd = df['HB-Don'] = df['ROMol'].map(Descriptors.NumHDonors)
numheavyatom = df['NumHeavyAtoms'] = df.apply(lambda x: x['ROMol'].GetNumHeavyAtoms(), axis=1)
mwt = df['MWT'] = df['ROMol'].map(Descriptors.MolWt)

df1 = df.sort_values(by=['Gold.PLP.Fitness'], ascending=False)
a = df1[['Gold.PLP.Fitness', 'Gold.PLP.Chemscore.Hbond', 'HB-Don','HB-Acc', 'logP','MWT','NumHeavyAtoms','Lig Eff']]

### for the 95th percentile ###     # properties of docked fragments including fitness, ligand efficiency and logP

top5 = math.ceil(len(a)*0.05)
b = a.head(top5)

plt.figure(figsize=(24,10))
plt.subplot2grid(shape=(2,6), loc=(0,0), colspan=2)
plt.hist(b['HB-Don'], bins=10, color='purple', edgecolor='white')
plt.xticks(np.arange(0,12,2))
plt.xlim(0,10)
plt.title('H-Bond Donors')

plt.subplot2grid((2,6), (0,2), colspan=2)
plt.hist(b['HB-Acc'], bins=10, color='purple', edgecolor='white')
plt.xticks(np.arange(0,12,2))
plt.xlim(0,10)
plt.title('H-Bond Acceptors')

plt.subplot2grid((2,6), (0,4), colspan=2)
plt.hist(b['logP'], bins=10, color='purple', edgecolor='white')
plt.xticks(np.arange(-5,6,2.5))
plt.xlim(-5,5)
plt.title('LogP')

plt.subplot2grid((2,6), (1,1), colspan=2)
plt.hist(b['NumHeavyAtoms'], bins=10, color='purple', edgecolor='white')
plt.xticks(np.arange(0,35,5))
plt.xlim(0,30)
plt.title('Number of Heavy Atoms')

plt.subplot2grid((2,6), (1,3), colspan=2)
plt.hist(b['MWT'], bins=10, color='purple', edgecolor='white')
plt.xticks(np.arange(0,600,100))
plt.xlim(0,500)
plt.title('Molecular Weight')

plt.suptitle('Properties of the Top 5% Fragments in the High Fidelity Library')


### for the 5th percentile ie remaining fragments ###    # graphs for the 5th and 95th percentile fragments

d = len(a)-(top5)
c = a.tail(d)

plt.figure(figsize=(24,10))
plt.subplot2grid(shape=(2,6), loc=(0,0), colspan=2)
plt.hist(b['HB-Don'], bins=10, color='purple', edgecolor='white')
plt.xticks(np.arange(0,12,2))
plt.xlim(0,10)
plt.title('H-Bond Donors')

plt.subplot2grid((2,6), (0,2), colspan=2)
plt.hist(b['HB-Acc'], bins=10, color='purple', edgecolor='white')
plt.xticks(np.arange(0,12,2))
plt.xlim(0,10)
plt.title('H-Bond Acceptors')

plt.subplot2grid((2,6), (0,4), colspan=2)
plt.hist(b['logP'], bins=10, color='purple', edgecolor='white')
plt.xticks(np.arange(-5,6,2.5))
plt.xlim(-5,5)
plt.title('LogP')

plt.subplot2grid((2,6), (1,1), colspan=2)
plt.hist(b['NumHeavyAtoms'], bins=10, color='purple', edgecolor='white')
plt.xticks(np.arange(0,35,5))
plt.xlim(0,30)
plt.title('Number of Heavy Atoms')

plt.subplot2grid((2,6), (1,3), colspan=2)
plt.hist(b['MWT'], bins=10, color='purple', edgecolor='white')
plt.xticks(np.arange(0,600,100))
plt.xlim(0,500)
plt.title('Molecular Weight')

plt.suptitle('Properties of the Top 5% Fragments in the Natural-Product-like Library')

In [None]:
import matplotlib.pyplot as plt
from matplotlib.pyplot import hist
from matplotlib.patches import Rectangle
import math
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, Lipinski, Crippen, Descriptors, PandasTools as pdt
from operator import truediv

file = 'm:/480/480 - all library docking results/MCE ~ 22000/all_docking_results.sdf'
f = pdt.LoadSDF(file)
df = pd.DataFrame(f)

logP = df['logP'] = df['ROMol'].map(Descriptors.MolLogP) # these descriptors map the molecule and calculate molecular properties
hba = df['HB-Acc'] = df['ROMol'].map(Descriptors.NumHAcceptors)
hbd = df['HB-Don'] = df['ROMol'].map(Descriptors.NumHDonors)
numheavyatom = df['NumHeavyAtoms'] = df.apply(lambda x: x['ROMol'].GetNumHeavyAtoms(), axis=1)
mwt = df['MWT'] = df['ROMol'].map(Descriptors.MolWt)

df['Gold.PLP.Fitness'] = pd.to_numeric(df['Gold.PLP.Fitness'])
df['NumHeavyAtoms'] = pd.to_numeric(df['NumHeavyAtoms'])
df['Lig Eff'] = df['Gold.PLP.Fitness'] / df['NumHeavyAtoms']

df1 = df[~(df['NumHeavyAtoms'] <= 7)]

df2 = df1.sort_values(by=['Lig Eff'], ascending=False)

full_df = df2[['Lig Eff','Gold.PLP.Fitness','Gold.PLP.Chemscore.Hbond','HB-Don','HB-Acc','logP','MWT','NumHeavyAtoms']]

plt.figure(figsize=(8,8))

plt.hist(full_df['Lig Eff'], bins=10, color='red', edgecolor='white')   # all this code here sorts the fragments based on ligand efficiency
plt.xlabel('Ligand Efficiency')
plt.title('Ligand Efficiency from docked MedChemExpress Fragments')
plt.show()

a="""
#plt.subplot(132)
plt.hist(full_df['Lig Eff'], 
         bins=10, 
         color='blue',
         edgecolor='white')
plt.xlabel('Ligand Efficiency')
plt.ylabel('Frequency')

#plt.subplot(133)
plt.bar(full_df['NumHeavyAtoms'],
        full_df['Lig Eff'],
        color='blue')
"""
#full_df.describe()

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
import math
import numpy as np
import pandas as pd
from rdkit import Chem
from rdkit.Chem import AllChem, Lipinski, Descriptors, PandasTools as pdt


file = 'm:/480/480 - all library docking results/CA ~ 4000/all_ca_dock_results.sdf'
f = pdt.LoadSDF(file)
df = pd.DataFrame(f)


logP = df['logP'] = df['ROMol'].map(Descriptors.MolLogP)
hba = df['HB-Acc'] = df['ROMol'].map(Descriptors.NumHAcceptors)
hbd = df['HB-Don'] = df['ROMol'].map(Descriptors.NumHDonors)
numheavyatom = df['NumHeavyAtoms'] = df.apply(lambda x: x['ROMol'].GetNumHeavyAtoms(), axis=1)
mwt = df['MWT'] = df['ROMol'].map(Descriptors.MolWt)

df['Gold.PLP.Fitness'] = pd.to_numeric(df['Gold.PLP.Fitness'])
df['NumHeavyAtoms'] = pd.to_numeric(df['NumHeavyAtoms'])
df['Lig Eff'] = df['Gold.PLP.Fitness'] / df['NumHeavyAtoms']

df1 = df[~(df['NumHeavyAtoms'] <= 7)] # this sorts by ligand efficiency where the fragment contains less than or equal to 7 heavy atoms (non-H)

df2 = df1.sort_values(by=['Lig Eff'], ascending=False)

full_df = df2[['Lig Eff',
               'Gold.PLP.Fitness',
               'Gold.PLP.Chemscore.Hbond',
               'HB-Don','HB-Acc','logP','MWT',
               'NumHeavyAtoms']]

stats = full_df.describe()
stats[['Lig Eff']]
