# Import

In [1]:
import numpy as np
from pathlib import Path
import pandas as pd
from tqdm import tqdm
from asapdiscovery.docking.analysis import calculate_rmsd_openeye
from asapdiscovery.data.schema.ligand import Ligand
from asapdiscovery.data.readers.molfile import MolFileFactory

# Load Data

In [2]:
original_ligand_path = Path("/data/chodera/asap-datasets/mpro_fragalysis-04-01-24_curated_cache/combined_3d.sdf")

In [4]:
mff = MolFileFactory(filename=original_ligand_path)
ligs = mff.load()

In [None]:
results_dfs = list(Path("/lila/data/chodera/asap-datasets/retro_docking/sars_fragalysis_retrospective/20240424_multi_pose_docking_cross_docking").glob("*.csv")]

# Calculate n-to-n MCS

In [8]:
def get_n_to_n_mcs(mols: list[Ligand]):
    from asapdiscovery.data.backend.openeye import oechem
   # these are the defaaults for atom and bond expressions but just to be explicit I'm putting them here
    atomexpr = (
        oechem.OEExprOpts_Aromaticity
        | oechem.OEExprOpts_AtomicNumber
        | oechem.OEExprOpts_FormalCharge
    )
    bondexpr = (
        oechem.OEExprOpts_Aromaticity
        | oechem.OEExprOpts_BondOrder
    )
    
    # want to make these separately so that they don't get silently edited
    refmols = [mol.to_oemol() for mol in mols]
    querymols = [mol.to_oemol() for mol in mols]
    
    # Set up the search pattern and MCS objects
    mcs_num_atoms = np.zeros((len(refmols), len(querymols)), dtype=int)
    for i, refmol in tqdm(enumerate(refmols), total=len(refmols)):
        pattern_query = oechem.OEQMol(refmol)
        pattern_query.BuildExpressions(atomexpr, bondexpr)
        mcss = oechem.OEMCSSearch(pattern_query)
        mcss.SetMCSFunc(oechem.OEMCSMaxAtomsCompleteCycles())

        for j, querymol in enumerate(querymols):
            # MCS search
            try:
                mcs = next(iter(mcss.Match(querymol, True)))
                mcs_num_atoms[i, j] = mcs.NumAtoms()
            except StopIteration:  # no match found
                mcs_num_atoms[i, j] = 0
    return mcs_num_atoms

In [11]:
mcs_num_atoms = get_n_to_n_mcs(ligs)

  1%|          | 2/205 [50:39<85:42:28, 1519.94s/it]


KeyboardInterrupt: 