In [36]:
from rdkit.Chem.rdMolDescriptors import CalcMolFormula
from rdkit.Chem.Descriptors import ExactMolWt, MolWt
from rdkit import RDLogger, Chem
import re

In [37]:
# RDLogger.DisableLog('rdApp.*') # Disable rdkit log (warning) messages

inchikey_pattern = re.compile(r"([A-Z]{14}-[A-Z]{10}-[NO])|([A-Z]{14})", flags=re.IGNORECASE) # Match inchikey or short inchikey
indigo_smiles_correction_pattern = re.compile(r"\|[\s\S]*")

In [38]:
def apply_transformations(inchi_smiles):
    """
    Apply transformations to a given InChI or SMILES string.

    :param inchi_smiles: The InChI or SMILES string.
    :return: A dictionary containing the transformed values of the input string.
    """
    transforms = {}

    if 'InChI=' not in inchi_smiles:
        inchi_smiles = re.sub(indigo_smiles_correction_pattern, "", inchi_smiles)
    print(inchi_smiles)
    if isinstance(inchi_smiles, str):
        mol = Chem.MolFromInchi(inchi_smiles) if 'InChI=' in inchi_smiles else Chem.MolFromSmiles(inchi_smiles, sanitize=True)
        print(mol)
        # Mol harmonization
        if mol is not None:
            transforms = {
                'INCHI': Chem.MolToInchi(mol),
                'INCHIKEY': Chem.MolToInchiKey(mol),
                'SMILES': Chem.MolToSmiles(mol, kekuleSmiles=True),
                'FORMULA': CalcMolFormula(mol),
            }
        # Mass calculation
        if transforms:
            mol = Chem.MolFromInchi(transforms['INCHI']) if 'InChI=' in inchi_smiles else Chem.MolFromSmiles(transforms['SMILES'])
            if mol is not None:
                try:
                    transforms['EXACTMASS'] = ExactMolWt(mol)
                    transforms['AVERAGEMASS'] = MolWt(mol)
                except:
                    return transforms

    return transforms

In [39]:
inchi_smiles = "CCC=CCC=CCC=CCC=CCC=CCCCCCCCC(=O)OC(COC(=O)CCCCCCCC)COP(=O)([O-])OCC[N+](C)(C)C"

In [40]:
print(apply_transformations(inchi_smiles))

CCC=CCC=CCC=CCC=CCC=CCCCCCCCC(=O)OC(COC(=O)CCCCCCCC)COP(=O)([O-])OCC[N+](C)(C)C
<rdkit.Chem.rdchem.Mol object at 0x00000260F768AFF0>
{'INCHI': 'InChI=1S/C41H72NO8P/c1-6-8-10-12-14-15-16-17-18-19-20-21-22-23-24-25-26-27-28-30-32-34-41(44)50-39(37-47-40(43)33-31-29-13-11-9-7-2)38-49-51(45,46)48-36-35-42(3,4)5/h8,10,14-15,17-18,20-21,23-24,39H,6-7,9,11-13,16,19,22,25-38H2,1-5H3', 'INCHIKEY': 'AALQMHQUDHIRSD-UHFFFAOYSA-N', 'SMILES': 'CCC=CCC=CCC=CCC=CCC=CCCCCCCCC(=O)OC(COC(=O)CCCCCCCC)COP(=O)([O-])OCC[N+](C)(C)C', 'FORMULA': 'C41H72NO8P', 'EXACTMASS': 737.4995548939999, 'AVERAGEMASS': 738.0000000000002}



