In [1]:
import pandas as pd

df = pd.read_csv('admet_ai_output.csv')
df.index = df.index+1

In [2]:
def trapezoid_score(x, min_val, low_opt, high_opt, max_val):
    if x <= min_val or x >= max_val:
        return 0.0
    if low_opt <= x <= high_opt:
        return 1.0
    if x < low_opt:
        return (x - min_val) / (low_opt - min_val) if (low_opt - min_val) != 0 else 0.0
    return (max_val - x) / (max_val - high_opt) if (max_val - high_opt) != 0 else 0.0

def linear_score(x, min_val, max_val):
    if x <= min_val: return 0.0
    if x >= max_val: return 1.0
    m = 1/(max_val-min_val)
    return m*(x-min_val)

def inverse_prob(x):
    return 1.0 - x

Permiability, Bioavailability and Solubility

In [3]:
df["score_logS"] = df["Solubility_AqSolDB"].apply(lambda x: linear_score(x, -8, -2))
df["score_tpsa"] = df["tpsa"].apply(lambda x: trapezoid_score(x, 20, 60, 120, 160))
df["Caco2_norm"] = df["Caco2_Wang"].apply(lambda x: linear_score(x, -6.0, -4.7))

Metabolism with CYP

In [4]:
cyp_inhib_cols = ["CYP1A2_Veith", "CYP2C19_Veith", "CYP2C9_Veith", "CYP2D6_Veith", "CYP3A4_Veith"]

df["score_CYP_inhibition_mean"] = df[cyp_inhib_cols].map(inverse_prob).mean(axis=1)

Toxicity

In [5]:
tox_cols = ["hERG", "AMES", "DILI", "ClinTox", "Carcinogens_Lagunin", "Skin_Reaction"]
df["score_toxicity_mean"] = df[tox_cols].map(inverse_prob).mean(axis=1)

In [9]:
df["lead_score"] = (
      df["score_tpsa"]
    + df["score_logS"]
    + df["QED"]
    +  df["HIA_Hou"]
    +  df["Bioavailability_Ma"]
    + df["Caco2_norm"]
    + df["score_CYP_inhibition_mean"]
    + df["score_toxicity_mean"]
)/8

df_sorted = df.sort_values("lead_score", ascending=False)
df_sorted

Unnamed: 0,smiles,molecular_weight,logP,hydrogen_bond_acceptors,hydrogen_bond_donors,Lipinski,QED,stereo_centers,tpsa,AMES,...,Lipophilicity_AstraZeneca_drugbank_approved_percentile,PPBR_AZ_drugbank_approved_percentile,Solubility_AqSolDB_drugbank_approved_percentile,VDss_Lombardo_drugbank_approved_percentile,score_logS,score_tpsa,Caco2_norm,score_CYP_inhibition_mean,score_toxicity_mean,lead_score
1,C[C@@H]1Oc2ccccc2O[C@H]1C(=O)N1CCC[C@@H](N2CCN...,345.399,1.2311,4,1,4.0,0.8752,3,71.11,0.316042,...,33.889104,37.068631,69.67817,5.428461,1.0,1.0,1.0,0.949872,0.700393,0.918034
5,O=C(Cc1ccco1)N1CC[C@@]2(C[C@H](Nc3ncccn3)CCO2)C1,342.399,1.8743,6,1,4.0,0.913625,2,80.49,0.292584,...,42.535867,32.37689,63.745638,33.811555,0.945458,1.0,0.751743,0.940602,0.752811,0.897855
2,Cc1nc([C@@H]2CCCN(C(=O)CCc3cccnc3)C2)cc(=O)[nH]1,326.4,1.81212,4,1,4.0,0.929211,1,78.95,0.076281,...,30.787127,15.548662,72.857697,62.233424,1.0,1.0,0.693112,0.877453,0.754217,0.896795
4,Cc1ccc(CNC(=O)N[C@@H]2CCCN(c3ncccn3)C2)cn1,326.404,1.64822,5,2,4.0,0.892468,1,83.04,0.315247,...,48.623497,32.027918,65.451725,42.923614,0.959341,1.0,0.673163,0.877873,0.663227,0.877063
8,Cc1nc(C)n([C@H]2CCCN(C(=O)c3cccc4c[nH]nc34)C2)n1,324.388,2.24854,5,1,4.0,0.783864,1,79.7,0.338645,...,30.244281,24.660721,63.784413,36.835983,0.946313,1.0,0.506201,0.977716,0.592057,0.845674
3,C[C@@]1(C(=O)N2CCC(c3nc4cc(F)ccc4[nH]3)CC2)CCCCO1,345.418,3.3672,3,1,4.0,0.907803,1,58.22,0.594802,...,76.347421,55.835595,45.948042,39.162466,0.794506,0.9555,1.0,0.680689,0.535026,0.844407
7,CO[C@@H](CNc1ncnc(N[C@@H]2CCC[NH2+]C2)n1)c1ccc...,343.455,1.11732,6,3,4.0,0.698083,2,88.57,0.198098,...,48.352074,35.711516,61.962001,90.887941,0.927111,1.0,0.390319,0.819154,0.73817,0.790751
9,Cc1cccc(Nc2nc(N)nc(C[N@@H+]3C[C@@H]4CC(=O)N[C@...,340.411,-0.59098,6,4,4.0,0.592119,3,110.26,0.171611,...,40.093059,31.136099,66.614967,35.672741,0.972529,1.0,0.045716,0.989973,0.785906,0.698908
10,CN(C)c1n[nH]c(-c2cccc(C(=O)NCC[NH+]3CCCCC3)c2)n1,343.455,0.3363,4,3,4.0,0.70662,0,78.35,0.146892,...,20.589376,20.434277,71.539356,91.663435,1.0,1.0,0.519719,0.959886,0.715487,0.695348
6,Cc1cccc(Nc2nc(C[N@@H+]3C[C@@H]4CC(=O)N[C@@H]4C...,340.411,-0.76061,5,5,4.0,0.503681,3,110.99,0.144053,...,29.817759,28.150446,63.43544,76.037224,0.943619,1.0,0.0,0.990641,0.799106,0.596646


In [10]:
df_sorted[['smiles']

Unnamed: 0,smiles,lead_score
1,C[C@@H]1Oc2ccccc2O[C@H]1C(=O)N1CCC[C@@H](N2CCN...,0.918034
5,O=C(Cc1ccco1)N1CC[C@@]2(C[C@H](Nc3ncccn3)CCO2)C1,0.897855
2,Cc1nc([C@@H]2CCCN(C(=O)CCc3cccnc3)C2)cc(=O)[nH]1,0.896795
4,Cc1ccc(CNC(=O)N[C@@H]2CCCN(c3ncccn3)C2)cn1,0.877063
8,Cc1nc(C)n([C@H]2CCCN(C(=O)c3cccc4c[nH]nc34)C2)n1,0.845674
3,C[C@@]1(C(=O)N2CCC(c3nc4cc(F)ccc4[nH]3)CC2)CCCCO1,0.844407
7,CO[C@@H](CNc1ncnc(N[C@@H]2CCC[NH2+]C2)n1)c1ccc...,0.790751
9,Cc1cccc(Nc2nc(N)nc(C[N@@H+]3C[C@@H]4CC(=O)N[C@...,0.698908
10,CN(C)c1n[nH]c(-c2cccc(C(=O)NCC[NH+]3CCCCC3)c2)n1,0.695348
6,Cc1cccc(Nc2nc(C[N@@H+]3C[C@@H]4CC(=O)N[C@@H]4C...,0.596646
