In [3]:
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors, Lipinski, rdMolDescriptors, QED, FilterCatalog
from rdkit.Chem.FilterCatalog import FilterCatalogParams
from mordred import Calculator, descriptors

In [4]:


def fetch_admet(input_csv, output_csv, smiles_col="smiles", name_col="drug_name"):
    df = pd.read_csv(input_csv)
    
    params = FilterCatalogParams()
    params.AddCatalog(FilterCatalogParams.FilterCatalogs.PAINS)
    pains_catalog = FilterCatalog.FilterCatalog(params)
    
    calc = Calculator([descriptors.SLogP, descriptors.AcidBase, 
                       descriptors.BertzCT, descriptors.AtomCount,
                       descriptors.BondCount, descriptors.RotatableBond], ignore_3D=True)
    
    results = []
    for _, row in df.iterrows():
        smi = row[smiles_col]
        mol = Chem.MolFromSmiles(smi) if pd.notna(smi) else None
        
        if mol is None:
            results.append({smiles_col: smi, "error": "Invalid SMILES"})
            continue
        
        # Mordred descriptors
        m_res = calc(mol).asdict()
        logp_mordred = m_res.get('SLogP', 0)
        bases = m_res.get('nBase', 0)
        complexity = m_res.get('BertzCT', 0)
        halogens = m_res.get('nX', 0)
        
        # RDKit descriptors
        mw = Descriptors.MolWt(mol)
        logp = Descriptors.MolLogP(mol)
        tpsa = Descriptors.TPSA(mol)
        hbd = Lipinski.NumHDonors(mol)
        hba = Lipinski.NumHAcceptors(mol)
        rotb = Lipinski.NumRotatableBonds(mol)
        
        # PAINS
        pains_match = pains_catalog.HasMatch(mol)
        
        # Toxicity (Mordred-based)
        herg_risk = "High" if (logp_mordred > 4.5 and bases > 0) else "Low"
        hepato_risk = "High" if (complexity > 1000 or halogens > 3) else "Low"
        
        results.append({
            smiles_col: smi,
            "MW": round(mw, 2),
            "LogP": round(logp, 2),
            "HBD": hbd,
            "HBA": hba,
            "TPSA": round(tpsa, 2),
            "RotatableBonds": rotb,
            "QED": round(QED.qed(mol), 3),
            "Lipinski_Violations": sum([mw > 500, logp > 5, hbd > 5, hba > 10]),
            "PAINS_Alert": int(pains_match),
            "hERG_Risk": herg_risk,
            "Hepato_Risk": hepato_risk,
            "SLogP": round(logp_mordred, 2) if not isinstance(logp_mordred, str) else 0,
            "Bases": bases if not isinstance(bases, str) else 0,
            "Complexity": round(complexity, 2) if not isinstance(complexity, str) else 0,
            "Halogens": halogens if not isinstance(halogens, str) else 0,
        })
    
    results_df = pd.DataFrame(results)
    if name_col in df.columns:
        results_df.insert(0, name_col, df[name_col].values[:len(results_df)])
    
    results_df.to_csv(output_csv, index=False)
    print(f"Saved {len(results_df)} results to {output_csv}")
    return results_df

if __name__ == "__main__":
    fetch_admet("ddms1.csv", "admet_results.csv")

Saved 149 results to admet_results.csv
