In [None]:
from rdkit import DataStructs
from rdkit.Chem import MolFromSmiles,SDMolSupplier,MACCSkeys,AllChem
#以下两行语句是为了使分子显示为矢量图，更加清晰
from rdkit.Chem.Draw import IPythonConsole
IPythonConsole.ipython_useSVG = True
import numpy as np
import pandas as pd

In [None]:
sdf_path = r"Q:\ALK\ChEMBL_TargetID_CHEMBL4247_1810.sdf"
ms = SDMolSupplier(sdf_path)
# fps = [MACCSkeys.GenMACCSKeys(x) for x in ms]
fps =[AllChem.GetMorganFingerprintAsBitVect(x,2,nBits=1024,useChirality=True) for x in ms]

In [None]:
hot_data = np.eye(len(fps))
for i in range(len(fps)):
    for j in range(i+1,len(fps)):
        hot_data[i,j] = DataStructs.FingerprintSimilarity(fps[i],fps[j])
        hot_data[j,i] = hot_data[i,j]

In [None]:
hot_data.mean()

In [None]:
from matplotlib import rcParams
rcParams['font.family'] = 'serif'
rcParams['font.serif'] = ['Times New Roman']

In [None]:
import matplotlib.pyplot as plt
plt.clf()
plt.figure(facecolor="w")
ax = plt.gca()
ax.xaxis.set_ticks_position("top") 
plt.imshow(hot_data,cmap='rainbow')  #dir(plt.cm)查看颜色库
plt.colorbar()
plt.savefig(r"Q:\ALK\ChEMBL_TargetID_CHEMBL4247_1810_ECFP4_Tanimoto_hotmap.tif", dpi=600, bbox_inches="tight")
plt.show()

In [None]:
hist = np.triu(hot_data,1).flatten()
hist = hist[hist>0]

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
plt.rcParams['font.family'] = ['Times New Roman']# 设置字体
plt.clf()
plt.figure(facecolor="w")
plt.hist(hist,bins=50,range=(0,1),weights= [1./ len(hist)]*len(hist),facecolor="w",edgecolor="k",alpha=1)
# 把y轴转化为百分比。
def to_percent(y, position):
    y = round(y,3)
    return str(100 * y) + '%'
formatter = FuncFormatter(to_percent)
plt.gca().yaxis.set_major_formatter(formatter)
plt.ylabel("Frequncy",fontsize=12)
plt.xlabel("Tanimoto Similarity",fontsize=12)
#plt.grid(True)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlim(-0.05,1.05)
plt.savefig(r"Q:\ALK\ChEMBL_TargetID_CHEMBL4247_1810_ECFP4_Tanimoto_Frequncy.png", dpi=600, bbox_inches="tight")
plt.show()

In [None]:
hist.mean()

In [None]:
len(hist)

In [None]:
sum(hist<0.7)/len(hist)

In [None]:
sum(hist<0.5)/len(hist)

In [None]:
import numpy as np
import pandas as pd
value = pd.read_csv(r"Q:\ALK\ChEMBL_TargetID_CHEMBL4247_1810_activity.csv")
x = value.IC50

In [None]:
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from math import log2,ceil,floor,log10
plt.rcParams['font.family'] = ['Times New Roman']# 设置字体
plt.rcParams['font.size'] = 20# 设置字体
plt.clf()
plt.figure(facecolor="w",figsize=(15,4))
l = [2**(i/4) for i in range(floor(log2(x.min())*4),round(log2(x.max())*4)+1)]
plt.hist(x,bins=l,weights= [1./ len(x)]*len(x),facecolor="w",edgecolor="k",alpha=1)
# 把y轴转化为百分比。
def to_percent(y, position):
    return str(100 * y)[:4] + '%'
formatter = FuncFormatter(to_percent)
plt.gca().yaxis.set_major_formatter(formatter)
plt.ylabel("Frequncy")
plt.xlabel("Activity Value (IC$_{50}$/nM)")
#plt.grid(True)
#plt.xticks(np.linspace(x.min(), x.max(),21),fontsize=12)
#plt.yticks(fontsize=12)
plt.xscale("log",basex=10)
plt.ylim(0,0.05)
# plt.plot([10,10],[0,0.06],"r")
# plt.plot([50,50],[0,0.06],"g")
# plt.text(0.1, 0.05, '1139 high activity inhibitors',fontsize=20,color="k")
# plt.text(200, 0.05, '973 high activity inhibitors',fontsize=20,color="k")
plt.fill_between
plt.savefig(r"Q:\ALK\ChEMBL_TargetID_CHEMBL4247_1810_activity_Frequncy.png", dpi=300, bbox_inches="tight")
plt.show()