In [1]:
import polaris as po
import base64
from rdkit import Chem
import datamol as dm

competition = po.load_competition("asap-discovery/antiviral-ligand-poses-2025")

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
y_pred_mers = []
y_pred_sars = []

input_sdf_mers = 'MERS/full_run-MERS/cs_optimised_molecules.sdf'
input_sdf_mers_R2 = 'MERS/complex-79/cs_optimised_molecules.sdf'
input_sdf_mers_docking = 'MERS/docking/merged-MERS.sdf'

input_sdf_sars = 'SARS/full_run-SARS/cs_optimised_molecules.sdf'
input_sdf_sars_R2 = 'SARS/complex-48/cs_optimised_molecules.sdf'
input_sdf_sars_docking = 'SARS/docking/merged-SARS.sdf'


## Collect MERS data, add later runs at top of file:

with Chem.SDMolSupplier(input_sdf_mers_docking) as SDF:
    # for each mol
    for mol in SDF:
        if mol is None:
            continue
        y_pred_mers.append(mol)

with Chem.SDMolSupplier(input_sdf_mers_R2) as SDF:
    # for each mol
    for mol in SDF:
        if mol is None:
            continue
        if mol.GetPropsAsDict()['Success'] == 'True':
            y_pred_mers.append(mol)

with Chem.SDMolSupplier(input_sdf_mers) as SDF:
    # for each mol
    for mol in SDF:
        if mol is None:
            continue
        if mol.GetPropsAsDict()['Success'] == 'True':
            y_pred_mers.append(mol)



## Collect SARS data:

with Chem.SDMolSupplier(input_sdf_sars_docking) as SDF:
    # for each mol
    for mol in SDF:
        if mol is None:
            continue
        y_pred_sars.append(mol)


with Chem.SDMolSupplier(input_sdf_sars_R2) as SDF:
    # for each mol
    for mol in SDF:
        if mol is None:
            continue
        if mol.GetPropsAsDict()['Success'] == 'True':
            y_pred_sars.append(mol)


with Chem.SDMolSupplier(input_sdf_sars) as SDF:
    # for each mol
    for mol in SDF:
        if mol is None:
            continue
        if mol.GetPropsAsDict()['Success'] == 'True':
            y_pred_sars.append(mol)
            

# may be some duplicate structures, the first occurance will be used
print(len(y_pred_mers), len(y_pred_sars))

105 106




In [3]:
train, test = competition.get_train_test_split()

In [4]:
y_pred = []

for i in range(len(test)):
    done = 0
    smiles = test[i]["CXSMILES"]
    label = test[i]['Protein Label']

    if label == "MERS-CoV Mpro":
        for j in range(len(y_pred_mers)):
            # search for matching smiles in collected mers data
            smiles_b = Chem.MolToSmiles(y_pred_mers[j])
            b = Chem.CanonSmiles(smiles_b)
            if smiles==b:
                y_pred.append(y_pred_mers[j])
                done = 1
                break
    
    if label == "SARS-CoV-2 Mpro":
        for j in range(len(y_pred_sars)):
            # search for matching smiles in collected sars data
            smiles_b = Chem.MolToSmiles(y_pred_sars[j])
            b = Chem.CanonSmiles(smiles_b)
            if smiles==b:
                y_pred.append(y_pred_sars[j])
                done = 1
                break

    # check for missing mlecules
    if done == 0:
        print(i, label, smiles)

In [5]:
# should be equal
print(len(test), len(y_pred))

195 195


In [6]:
def serialize_rdkit_mol(mol: Chem.Mol): 
    props = Chem.PropertyPickleOptions.AllProps
    mol_bytes = mol.ToBinary(props)
    return base64.b64encode(mol_bytes).decode('ascii')  


y_pred_serialized = [serialize_rdkit_mol(mol) for mol in y_pred]

In [7]:
# check serialisation:

index=0

deserialized = Chem.Mol(base64.b64decode(y_pred_serialized[index].encode("ascii")))
dm.same_mol(y_pred[index], deserialized)

True

### Submission

In [8]:
competition.submit_predictions(
    predictions=y_pred_serialized,
    # edit for alternative submission:
    prediction_name="newcastle-edinburgh-fegrow",
    prediction_owner="djc56",
    report_url="https://github.com/cole-group/FEgrow", 
    # The below metadata is optional, but recommended.
    github_url="https://github.com/cole-group/FEgrow",
    description="FEgrow submission by Finlay Clark, Asma Feriel Khoualdi, Josh Horton, Julien Michel and Daniel Cole (v2)",
    tags=["FEgrow", "ANI", "OpenMM", "RDKit"],
    user_attributes={"Framework": "FEgrow", "Method": "Constrained geometry optimisation with ML/MM"}
)

✅ SUCCESS: [1mYour competition predictions have been successfully uploaded to the Hub for evaluation.[0m
 


  self._color = self._set_color(value) if value else value


In [9]:
writer = Chem.SDWriter('submitted.sdf')
for cid in range(len(y_pred)):
    writer.write(y_pred[cid])