<a href="https://colab.research.google.com/github/glevans/PDB_Notebooks/blob/main/RDKIT_smiles_to_inchi_inchikeys.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install rdkit

Collecting rdkit
  Downloading rdkit-2025.9.3-cp312-cp312-manylinux_2_28_x86_64.whl.metadata (4.2 kB)
Downloading rdkit-2025.9.3-cp312-cp312-manylinux_2_28_x86_64.whl (36.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m36.4/36.4 MB[0m [31m17.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rdkit
Successfully installed rdkit-2025.9.3


In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.EnumerateStereoisomers import EnumerateStereoisomers, StereoEnumerationOptions

In [None]:
# Get InChI software version
inchi_version = Chem.GetInchiVersion()

print(f"InChI Version: {inchi_version}")

InChI Version: 1.07.3


In [None]:
# Create a molecule from when is an ISOMERIC SMILES string
mol = Chem.MolFromSmiles('CC(=O)O')

# Generate InChI
inchi = Chem.MolToInchi(mol)
print(f"InChI: {inchi}")

# Generate InChIKey
inchikey = Chem.MolToInchiKey(mol)
print(f"InChIKey: {inchikey}")

InChI: InChI=1S/C2H4O2/c1-2(3)4/h1H3,(H,3,4)
InChIKey: QTBSBXVTEAMEQO-UHFFFAOYSA-N


In [None]:
# Create a molecule from SMILES when NOT an ISOMERIC SMILES string
mol = Chem.MolFromSmiles('FC(F)(F)c1ccnc(n1)C2CCCN2')

In [None]:
mol = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol)
conf = mol.GetConformer()

# Stereoisomers
opts = StereoEnumerationOptions(tryEmbedding=True)
isomers = tuple(EnumerateStereoisomers(mol, options=opts))
print(isomers)
print("Number of isomers calculated:", len(isomers))
print()
for smi in sorted(Chem.MolToSmiles(x, isomericSmiles=True) for x in isomers):
    # Report ISOMERIC SMILES
    print(f"SMILES: {smi}")
    # Generate InChI
    inchi = Chem.MolToInchi(Chem.MolFromSmiles(smi))
    print(f"InChI: {inchi}")
    # Generate InChIKey
    inchikey = Chem.MolToInchiKey(Chem.MolFromSmiles(smi))
    print(f"InChIKey: {inchikey}")
    print()

(<rdkit.Chem.rdchem.Mol object at 0x7cd89a7ae340>, <rdkit.Chem.rdchem.Mol object at 0x7cd89a7ae8e0>)
Number of isomers calculated: 2

SMILES: [H]c1nc([C@@]2([H])N([H])C([H])([H])C([H])([H])C2([H])[H])nc(C(F)(F)F)c1[H]
InChI: InChI=1S/C9H10F3N3/c10-9(11,12)7-3-5-14-8(15-7)6-2-1-4-13-6/h3,5-6,13H,1-2,4H2/t6-/m0/s1
InChIKey: UYXOJZURHVPGSE-LURJTMIESA-N

SMILES: [H]c1nc([C@]2([H])N([H])C([H])([H])C([H])([H])C2([H])[H])nc(C(F)(F)F)c1[H]
InChI: InChI=1S/C9H10F3N3/c10-9(11,12)7-3-5-14-8(15-7)6-2-1-4-13-6/h3,5-6,13H,1-2,4H2/t6-/m1/s1
InChIKey: UYXOJZURHVPGSE-ZCFIWIBFSA-N



In [None]:
# Create a molecule from SMILES when NOT an ISOMERIC SMILES string
mol = Chem.MolFromSmiles('NC(=N)c1ccc2ccccc2c1')

In [None]:
mol = Chem.AddHs(mol)
AllChem.EmbedMolecule(mol)
conf = mol.GetConformer()

# Stereoisomers
opts = StereoEnumerationOptions(tryEmbedding=True)
isomers = tuple(EnumerateStereoisomers(mol, options=opts))
print(isomers)
print("Number of isomers calculated:", len(isomers))
print()
for smi in sorted(Chem.MolToSmiles(x, isomericSmiles=True) for x in isomers):
    # Report ISOMERIC SMILES
    print(f"SMILES: {smi}")
    # Generate InChI
    inchi = Chem.MolToInchi(Chem.MolFromSmiles(smi))
    print(f"InChI: {inchi}")
    # Generate InChIKey
    inchikey = Chem.MolToInchiKey(Chem.MolFromSmiles(smi))
    print(f"InChIKey: {inchikey}")
    print()

# Note:
# This is an example where the InChI descriptors are unique per conformers.
# However, the InChIKey is the same for the conformers.

(<rdkit.Chem.rdchem.Mol object at 0x7cd89a7ad800>, <rdkit.Chem.rdchem.Mol object at 0x7cd89a766cf0>)
Number of isomers calculated: 2

SMILES: [H]/N=C(/c1c([H])c([H])c2c([H])c([H])c([H])c([H])c2c1[H])N([H])[H]
InChI: InChI=1S/C11H10N2/c12-11(13)10-6-5-8-3-1-2-4-9(8)7-10/h1-7H,(H3,12,13)
InChIKey: URXJHZXEUUFNKM-UHFFFAOYSA-N

SMILES: [H]/N=C(\c1c([H])c([H])c2c([H])c([H])c([H])c([H])c2c1[H])N([H])[H]
InChI: InChI=1S/C11H10N2/c12-11(13)10-6-5-8-3-1-2-4-9(8)7-10/h1-7H,(H3,12,13)
InChIKey: URXJHZXEUUFNKM-UHFFFAOYSA-N

