In [1]:
import polaris as po
import pickle
import pandas as pd
import numpy as np
from sklearn.base import clone

  from .autonotebook import tqdm as notebook_tqdm


# ADMET

In [2]:
# load and clone best models as determined by ASTRA
with open("results/HLM/HLM_pharm2d_cats_standard/final_model.pkl", "rb") as f:
    HLM = pickle.load(f)
HLM_clean = clone(HLM)
with open("results/KSOL/KSOL_desc2D_minmax/final_model.pkl", "rb") as f:
    KSOL = pickle.load(f)
KSOL_clean = clone(KSOL)
with open("results/LogD/LogD_pmapper2d_standard/final_model.pkl", "rb") as f:
    LogD = pickle.load(f)
LogD_clean = clone(LogD)
with open("results/MDR1MDCKII/MDR1MDCKII_electroshape_standard/final_model.pkl", "rb") as f:
    MDR1MDCKII = pickle.load(f)
MDR1MDCKII_clean = clone(MDR1MDCKII)
with open("results/MLM/MLM_desc2D_standard/final_model.pkl", "rb") as f:
    MLM = pickle.load(f)
MLM_clean = clone(MLM)

In [3]:
# load log10(x+1)-transformed training data
with open("features/antiviral-admet-2025/HLM_pharm2d_cats_final.pkl", "rb") as f:
    HLM_features = pd.read_pickle(f)
X_HLM = np.vstack(HLM_features["Features"].to_numpy())
y_HLM = np.vstack(HLM_features["Target"].to_numpy()).ravel()
with open("features/antiviral-admet-2025/KSOL_desc2D_final.pkl", "rb") as f:
    KSOL_features = pd.read_pickle(f)
X_KSOL = np.vstack(KSOL_features["Features"].to_numpy())
y_KSOL = np.vstack(KSOL_features["Target"].to_numpy()).ravel()
with open("features/antiviral-admet-2025/LogD_pmapper2d_final.pkl", "rb") as f:
    LogD_features = pd.read_pickle(f)
X_LogD = np.vstack(LogD_features["Features"].to_numpy())
y_LogD = np.vstack(LogD_features["Target"].to_numpy()).ravel()
with open("features/antiviral-admet-2025/MDR1MDCKII_electroshape_final.pkl", "rb") as f:
    MDR1MDCKII_features = pd.read_pickle(f)
X_MDR1MDCKII = np.vstack(MDR1MDCKII_features["Features"].to_numpy())
y_MDR1MDCKII = np.vstack(MDR1MDCKII_features["Target"].to_numpy()).ravel()
with open("features/antiviral-admet-2025/MLM_desc2D_final.pkl", "rb") as f:
    MLM_features = pd.read_pickle(f)
X_MLM = np.vstack(MLM_features["Features"].to_numpy())
y_MLM = np.vstack(MLM_features["Target"].to_numpy()).ravel()

In [4]:
# fit models to training data
HLM_clean.fit(X_HLM, y_HLM)
KSOL_clean.fit(X_KSOL, y_KSOL)
LogD_clean.fit(X_LogD, y_LogD)
MDR1MDCKII_clean.fit(X_MDR1MDCKII, y_MDR1MDCKII)
MLM_clean.fit(X_MLM, y_MLM)

In [5]:
# load test features
pharm2d_cats_test = np.load("features/antiviral-admet-2025/pharm2d_cats_test.npy")
desc2D_test = np.load("features/antiviral-admet-2025/desc2D_test.npy")
pmapper2d_test = np.load("features/antiviral-admet-2025/pmapper2d_test.npy")
electroshape_test = np.load("features/antiviral-admet-2025/electroshape_test.npy")

In [6]:
# conformer generation failed for one molecule in the test set (using "ETDG", "ETKDG", "ETKDGv2", and "ETKDGv3")
# so we'll simply predict the average of the predictions for the other test molecules
with open(f"features/antiviral-admet-2025/test_conformers.pkl", "rb") as f:
    test_conformers = pickle.load(f)
valid_test_conformers = [conf is not None for conf in test_conformers]
conf_gen_failure = valid_test_conformers.index(False)

In [7]:
HLM_preds = HLM_clean.predict(pharm2d_cats_test)
KSOL_preds = KSOL_clean.predict(desc2D_test)
LogD_preds = LogD_clean.predict(pmapper2d_test)
MDR1MDCKII_preds = MDR1MDCKII_clean.predict(electroshape_test)
MLM_preds = MLM_clean.predict(desc2D_test)

# for MDR1MDCKII_preds, we need to average the predictions for the other test molecules,
# and insert the average at the index of the failed molecule
MDR1MDCKII_preds = np.insert(MDR1MDCKII_preds, conf_gen_failure, np.mean(MDR1MDCKII_preds))

# the output is log10(x + 1) transformed, so we need to reverse that, except for LogD_preds
HLM_preds = 10**HLM_preds - 1
KSOL_preds = 10**KSOL_preds - 1
MDR1MDCKII_preds = 10**MDR1MDCKII_preds - 1
MLM_preds = 10**MLM_preds - 1

In [8]:
y_pred = {}
y_pred["MDR1-MDCKII"] = MDR1MDCKII_preds
y_pred["HLM"] = HLM_preds
y_pred["KSOL"] = KSOL_preds
y_pred["MLM"] = MLM_preds
y_pred["LogD"] = LogD_preds

In [9]:
competition = po.load_competition("asap-discovery/antiviral-admet-2025")

In [10]:
competition.submit_predictions(
    predictions=y_pred,
    prediction_name="final_predictions",
    prediction_owner="wtrd",
    report_url="https://docs.google.com/document/d/1464wiMCWdncfWoMIpL4WHI31qOECqeBq0CEhwiAAfW4/edit?usp=sharing",
    github_url="https://github.com/duartegroup/asap-polaris-challenge",
)

# Potency

In [11]:
# load and clone best models as determined by ASTRA
with open("results/pIC50_MERS_CoV_Mpro/pIC50_MERS_CoV_Mpro_usr_standard/final_model.pkl", "rb") as f:
    MERS = pickle.load(f)
MERS_clean = clone(MERS)
with open("results/pIC50_SARS_CoV_2_Mpro/pIC50_SARS_CoV_2_Mpro_desc2D_standard/final_model.pkl", "rb") as f:
    SARS = pickle.load(f)
SARS_clean = clone(SARS)

In [12]:
# load training data
with open("features/antiviral-potency-2025/pIC50_MERS_CoV_Mpro_usr_final.pkl", "rb") as f:
    MERS_features = pd.read_pickle(f)
X_MERS = np.vstack(MERS_features["Features"].to_numpy())
y_MERS = np.vstack(MERS_features["Target"].to_numpy()).ravel()
with open("features/antiviral-potency-2025/pIC50_SARS_CoV_2_Mpro_desc2D_final.pkl", "rb") as f:
    SARS_features = pd.read_pickle(f)
X_SARS = np.vstack(SARS_features["Features"].to_numpy())
y_SARS = np.vstack(SARS_features["Target"].to_numpy()).ravel()

In [13]:
# fit models to training data
MERS_clean.fit(X_MERS, y_MERS)
SARS_clean.fit(X_SARS, y_SARS)

In [14]:
# load test features
desc2D_test = np.load("features/antiviral-potency-2025/desc2D_test.npy")
usr_test = np.load("features/antiviral-potency-2025/usr_test.npy")

In [15]:
# conformer generation failed for one molecule in the test set (using "ETDG", "ETKDG", "ETKDGv2", and "ETKDGv3")
# so we'll simply predict the average of the predictions for the other test molecules
with open(f"features/antiviral-potency-2025/test_conformers.pkl", "rb") as f:
    test_conformers = pickle.load(f)
valid_test_conformers = [conf is not None for conf in test_conformers]
conf_gen_failure = valid_test_conformers.index(False)

In [16]:
MERS_preds = MERS_clean.predict(usr_test)
SARS_preds = SARS_clean.predict(desc2D_test)

# for MERS_preds, we need to average the predictions for the other test molecules,
# and insert the average at the index of the failed molecule
MERS_preds = np.insert(MERS_preds, conf_gen_failure, np.mean(MERS_preds))

In [17]:
y_pred = {}
y_pred["pIC50 (SARS-CoV-2 Mpro)"] = SARS_preds
y_pred["pIC50 (MERS-CoV Mpro)"] = MERS_preds

In [18]:
competition = po.load_competition("asap-discovery/antiviral-potency-2025")

In [19]:
competition.submit_predictions(
    predictions=y_pred,
    prediction_name="final_predictions",
    prediction_owner="wtrd",
    report_url="https://docs.google.com/document/d/1464wiMCWdncfWoMIpL4WHI31qOECqeBq0CEhwiAAfW4/edit?usp=sharing", 
    github_url="https://github.com/duartegroup/asap-polaris-challenge",
)