In [6]:
from rdkit.Chem.rdMolDescriptors import CalcMolFormula
from rdkit.Chem.Descriptors import ExactMolWt, MolWt
from rdkit import RDLogger, Chem
import re

In [7]:
# RDLogger.DisableLog('rdApp.*') # Disable rdkit log (warning) messages

inchikey_pattern = re.compile(r"([A-Z]{14}-[A-Z]{10}-[NO])|([A-Z]{14})", flags=re.IGNORECASE) # Match inchikey or short inchikey
indigo_smiles_correction_pattern = re.compile(r"\|[\s\S]*")

In [8]:
def apply_transformations(inchi_smiles):
    """
    Apply transformations to a given InChI or SMILES string.

    :param inchi_smiles: The InChI or SMILES string.
    :return: A dictionary containing the transformed values of the input string.
    """
    transforms = {}

    if 'InChI=' not in inchi_smiles:
        inchi_smiles = re.sub(indigo_smiles_correction_pattern, "", inchi_smiles)
    print(inchi_smiles)
    if isinstance(inchi_smiles, str):
        mol = Chem.MolFromInchi(inchi_smiles) if 'InChI=' in inchi_smiles else Chem.MolFromSmiles(inchi_smiles)
        print(mol)
        # Mol harmonization
        if mol is not None:
            transforms = {
                'INCHI': Chem.MolToInchi(mol),
                'INCHIKEY': Chem.MolToInchiKey(mol),
                'SMILES': Chem.MolToSmiles(mol),
                'FORMULA': CalcMolFormula(mol),
            }
        # Mass calculation
        if transforms:
            mol = Chem.MolFromInchi(transforms['INCHI']) if 'InChI=' in inchi_smiles else Chem.MolFromSmiles(transforms['SMILES'])
            if mol is not None:
                try:
                    transforms['EXACTMASS'] = ExactMolWt(mol)
                    transforms['AVERAGEMASS'] = MolWt(mol)
                except:
                    return transforms

    return transforms

In [9]:
inchi_smiles = "O=C1N=C(O)C(=CN1C2OC(CO)C(OP(=O)(O)O)C2)C"

In [10]:
print(apply_transformations(inchi_smiles))

O=C1N=C(O)C(=CN1C2OC(CO)C(OP(=O)(O)O)C2)C
<rdkit.Chem.rdchem.Mol object at 0x000001B8DDB62030>
{'INCHI': 'InChI=1S/C10H15N2O8P/c1-5-3-12(10(15)11-9(5)14)8-2-6(7(4-13)19-8)20-21(16,17)18/h3,6-8,13H,2,4H2,1H3,(H,11,14,15)(H2,16,17,18)', 'INCHIKEY': 'XXYIANZGUOSQHY-UHFFFAOYSA-N', 'SMILES': 'Cc1cn(C2CC(OP(=O)(O)O)C(CO)O2)c(=O)nc1O', 'FORMULA': 'C10H15N2O8P', 'EXACTMASS': 322.05660207000005, 'AVERAGEMASS': 322.21000000000004}



