In [1]:
import polaris as po
import numpy as np

# Load the competition from the Hub
competition = po.load_competition("asap-discovery/antiviral-admet-2025")

# Get the train and test data-loaders
train, test = competition.get_train_test_split()

In [2]:
smis = [t[0] for t in train]
y1 = [t[1]['HLM'] for t in train]
y2 = [t[1]['MLM'] for t in train]
y3 = [t[1]['LogD'] for t in train]
y4 = [t[1]['MDR1-MDCKII'] for t in train]
y5 = [t[1]['KSOL'] for t in train]

In [3]:
test_smis = [t for t in test]

In [4]:
y1 = [y for y in y1 if not np.isnan(y)]
y2 = [y for y in y2 if not np.isnan(y)]
y3 = [y for y in y3 if not np.isnan(y)]
y4 = [y for y in y4 if not np.isnan(y)]
y5 = [y for y in y5 if not np.isnan(y)]
smis1 = [smis[i] for i,y in enumerate(y1) if not np.isnan(y)]
smis2 = [smis[i] for i,y in enumerate(y2) if not np.isnan(y)]
smis3 = [smis[i] for i,y in enumerate(y3) if not np.isnan(y)]
smis4 = [smis[i] for i,y in enumerate(y4) if not np.isnan(y)]
smis5 = [smis[i] for i,y in enumerate(y5) if not np.isnan(y)]


In [5]:
#tNN thresholded nearest neighbor
from rdkit import Chem
from rdkit.Chem import rdFingerprintGenerator

MFPGEN = rdFingerprintGenerator.GetMorganGenerator(3,fpSize=4096)

def get_rep(smi):
    m = Chem.MolFromSmiles(smi)
    fp = MFPGEN.GetCountFingerprint(m)
    return fp

X1 = [get_rep(smi) for smi in smis1]
X2 = [get_rep(smi) for smi in smis2]
X3 = [get_rep(smi) for smi in smis3]
X4 = [get_rep(smi) for smi in smis4]
X5 = [get_rep(smi) for smi in smis5]

In [8]:
from rdkit import DataStructs
from sklearn.metrics import mean_absolute_error


t = 0.64 #0.64
probe_fps = [get_rep(smi) for smi in test_smis]
ref_fps1 = X1
ref_fps2 = X2
ref_fps3 = X3
ref_fps4 = X4
ref_fps5 = X5
ref_y1 = y1
ref_y2 = y2
ref_y3 = y3
ref_y4 = y4
ref_y5 = y5

preds1 = []
preds2 = []
preds3 = []
preds4 = []
preds5 = []
y_pred = {}
for j,fp in enumerate(probe_fps):
    for ref_fps,preds,ref_y in [(X1,preds1,ref_y1),(X2,preds2,ref_y2),(X3,preds3,ref_y3),(X4,preds4,ref_y4),(X5,preds5,ref_y5)]:
        TSs = DataStructs.BulkTanimotoSimilarity(fp,ref_fps)
        idx = [i for i,TS in enumerate(TSs) if TS>t]
        if len(idx) == 0:
            idx = [np.argsort(TSs)[-1]]
        pred = np.average([ref_y[v] for v in idx])
        preds.append(pred)


y_pred['HLM'] = preds1
y_pred['MLM'] = preds2
y_pred['LogD'] = preds3
y_pred['MDR1-MDCKII'] = preds4
y_pred['KSOL'] = preds5


In [24]:
competition.submit_predictions(
    predictions=y_pred,
    prediction_name="ADMET-thresholded-NN",
    prediction_owner="wim0",
    report_url="https://github.com/dehaenw", 
    # The below metadata is optional, but recommended.
    description="Source code will be made public once challenge is finished",
    user_attributes={"Framework": "RDKit", "Method": "thresholded nearest neighbor"}
)

Output()