In [1]:
import polaris as po
import numpy as np

# Load the competition from the Hub
competition = po.load_competition("asap-discovery/antiviral-admet-2025")

# Get the train and test data-loaders
train, test = competition.get_train_test_split()

In [25]:
smis = [t[0] for t in train]
y1 = [t[1]['HLM'] for t in train]
y2 = [t[1]['MLM'] for t in train]
y3 = [t[1]['LogD'] for t in train]
y4 = [t[1]['MDR1-MDCKII'] for t in train]
y5 = [t[1]['KSOL'] for t in train]

In [27]:
test_smis = [t for t in test]

In [28]:
#NO logtransforn mdone because the metrics were worse

y1 = [y for y in y1 if not np.isnan(y)]
y2 = [y for y in y2 if not np.isnan(y)]
y3 = [y for y in y3 if not np.isnan(y)]
y4 = [y for y in y4 if not np.isnan(y)]
y5 = [y for y in y5 if not np.isnan(y)]
smis1 = [smis[i] for i,y in enumerate(y1) if not np.isnan(y)]
smis2 = [smis[i] for i,y in enumerate(y2) if not np.isnan(y)]
smis3 = [smis[i] for i,y in enumerate(y3) if not np.isnan(y)]
smis4 = [smis[i] for i,y in enumerate(y4) if not np.isnan(y)]
smis5 = [smis[i] for i,y in enumerate(y5) if not np.isnan(y)]


In [29]:

from rdkit import Chem
from rdkit.Chem import Descriptors,rdFingerprintGenerator
MFPGEN = rdFingerprintGenerator.GetMorganGenerator(3,fpSize=2048)
def calculateDescriptors(mol: Chem.Mol, missingVal: float | None = 0.0) -> dict:
    """Calculate the full list of descriptors for a molecule.
    adapted from
    https://github.com/jonswain/tabpfn-tdc/blob/main/submission.py#L12
    """
    
    res = []
    for nm, fn in Descriptors._descList:
        try:
            val = fn(mol)
        except:
            val = missingVal
        res.append(val)
    return res + list(MFPGEN.GetFingerprint(mol))

X1 = [calculateDescriptors(Chem.MolFromSmiles(smi)) for smi in smis1]
X2 = [calculateDescriptors(Chem.MolFromSmiles(smi)) for smi in smis2]
X3 = [calculateDescriptors(Chem.MolFromSmiles(smi)) for smi in smis3]
X4 = [calculateDescriptors(Chem.MolFromSmiles(smi)) for smi in smis4]
X5 = [calculateDescriptors(Chem.MolFromSmiles(smi)) for smi in smis5]


In [None]:
from tabpfn import TabPFNRegressor

#train on entire set
X_test = [calculateDescriptors(Chem.MolFromSmiles(smi)) for smi in test_smis]
y_pred = {}

regressor = TabPFNRegressor(ignore_pretraining_limits=True)  
regressor.fit(X1, y1)
preds1 = regressor.predict(X_test)
regressor = TabPFNRegressor(ignore_pretraining_limits=True)  
regressor.fit(X2, y2)
preds2 = regressor.predict(X_test)
regressor = TabPFNRegressor(ignore_pretraining_limits=True)  
regressor.fit(X3, y3)
preds3 = regressor.predict(X_test)
regressor = TabPFNRegressor(ignore_pretraining_limits=True)  
regressor.fit(X4, y4)
preds4 = regressor.predict(X_test)
regressor = TabPFNRegressor(ignore_pretraining_limits=True)  
regressor.fit(X5, y5)
preds5 = regressor.predict(X_test)

y_pred['HLM'] = preds1
y_pred['MLM'] = preds2
y_pred['LogD'] = preds3
y_pred['MDR1-MDCKII'] = preds4
y_pred['KSOL'] = preds5

In [33]:

competition.submit_predictions(
    predictions=y_pred,
    prediction_name="admet-tabPFN",
    prediction_owner="wim0",
    report_url="https://molecular.beauty/blog/2025/03/14/polaris.html", 
    # The below metadata is optional, but recommended.
    github_url="https://github.com/dehaenw/polaris-baseline",
    description="Source code and report will be made public once challenge is finished",
    user_attributes={"Framework": "RDKit + TabPFN", "Method": "TabPFN on logtransformed data with ECFP and rdkit descriptors"}
)


Output()