In [1]:
import gemmi
from rdkit import Chem
from rdkit.Chem import AllChem

In [2]:
def cif_to_smiles_with_coordinates(cif_file):
    # Read CIF file
    structure = gemmi.read_structure(cif_file)
    
    # Convert to RDKit mol object
    mol = Chem.MolFromPDBBlock(structure.make_minimal_pdb())
    
    # Generate SMILES
    smiles = Chem.MolToSmiles(mol)
    
    # Get 3D coordinates from the original structure
    coords = {}
    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    coords[atom.name] = atom.pos.tolist()
    
    # Create a new mol from SMILES to get atom order
    mol_from_smiles = Chem.MolFromSmiles(smiles)
    
    # Add hydrogens to match the original molecule
    mol_from_smiles = Chem.AddHs(mol_from_smiles)
    
    # Get atom mapping between SMILES and original mol
    atom_mapping = mol.GetSubstructMatch(mol_from_smiles)
    
    # Collect coordinates in SMILES atom order
    ordered_coords = []
    for idx in atom_mapping:
        atom = mol.GetAtomWithIdx(idx)
        atom_name = atom.GetPDBResidueInfo().GetName().strip()
        ordered_coords.append(coords[atom_name])
    
    return smiles, ordered_coords

In [3]:
cif_to_smiles_with_coordinates('1100140.cif')

ArgumentError: Python argument types in
    rdkit.Chem.rdmolfiles.MolToSmiles(NoneType)
did not match C++ signature:
    MolToSmiles(RDKit::ROMol mol, bool isomericSmiles=True, bool kekuleSmiles=False, int rootedAtAtom=-1, bool canonical=True, bool allBondsExplicit=False, bool allHsExplicit=False, bool doRandom=False, bool ignoreAtomMapNumbers=False)
    MolToSmiles(RDKit::ROMol mol, RDKit::SmilesWriteParams params)

In [5]:
structure = gemmi.read_structure('1100140.cif')