In [1]:
import stk
import stko
from rdkit.Chem import AllChem as rdkit
import numpy as np

# Get a structure.
Download CIF of cage of interest: NAVCIA (2116647)

From this paper: https://pubs.acs.org/doi/10.1021/acs.inorgchem.1c02803

Using software of choice, write a .mol file of a single molecule from the CIF.

## Issues:

* How to extract the single molecule from a crystal structure?
* How to get the structure into a mol file, with bonds percieved?
* stk handling of mol files with dative bonds: (atom order in bond matters, metal goes second)

In [2]:
cage = stk.BuildingBlock.init_from_file('2116647_cleaned_2.mol')

In [3]:
cage

BuildingBlock('[CH3]->[Pt]12(<-[CH3])(<-[CH3])<-[N]3[CH]C=C(C=C3)C3=C[CH][N](->[Pt]4(<-[CH3])(<-[CH3])(<-[CH3])<-[N]5[CH]C=C(C=C5)C5=C[CH][N](->[Pt]6(<-[CH3])(<-[CH3])(<-[CH3])<-[N]7[CH]C=C(C=C7)C7=C[CH][N](->[Pt]8(<-[CH3])(<-[CH3])(<-[CH3])<-[N]9[CH]C=C(C=C9)C9=C[CH][N](->[Pt](<-[CH3])(<-[CH3])(<-[CH3])(<-[N]%10[CH]C=C(C=C%10)C%10=C[CH][N](->[Pt](<-[CH3])(<-[CH3])(<-[CH3])(<-[N]%11[CH]C=C(C=C%11)C%11=C[CH][N]->1C=C%11)<-[N]1[CH]C=C(C=C1)c1ccn->6cc1)C=C%10)<-[N]1[CH]C=C(C=C1)C1=C[CH][N](->[Pt](<-[CH3])(<-[CH3])(<-[CH3])(<-[N]6[CH]C=C(C=C6)C6=C[CH][N]->2C=C6)<-n2ccc(cc2)-c2ccn(->[Pt](<-[CH3])(<-[CH3])(<-[CH3])(<-[N]6[CH]C=C(C=C6)C6=C[CH][N]->4C=C6)<-[N]4[CH]C=C(C=C4)c4ccn->8cc4)cc2)C=C1)C=C9)C=C7)C=C5)C=C3')

## Topology extraction

In [4]:
struct = cage.with_centroid([0, 0, 0])

# Define bonds to break.
smarts_to_search_for = '[#7]~[Pt]'

broken_bonds_by_id = []
disconnectors = []
rdkit_mol = struct.to_rdkit_mol()
rdkit.SanitizeMol(rdkit_mol)

for atom_ids in rdkit_mol.GetSubstructMatches(
    query=rdkit.MolFromSmarts(smarts_to_search_for),
):
    bond_l = atom_ids[0]
    bond_pt = atom_ids[1]
    broken_bonds_by_id.append(sorted((bond_l, bond_pt)))
    disconnectors.extend((bond_l, bond_pt))

In [5]:
print(f'{len(broken_bonds_by_id)} bonds to break')

24 bonds to break


In [6]:
new_topology_graph = stko.TopologyExtractor()
tg_info = new_topology_graph.extract_topology(
    molecule=struct,
    broken_bonds_by_id=broken_bonds_by_id,
    disconnectors=set(disconnectors),
)
struct.write('tg_cage.mol')
tg_info.write('tg_info.pdb')

## Write new topology graph

In [7]:
v_pos = tg_info.get_vertex_positions()
v_con = tg_info.get_connectivities()

# Need this for later.
metal_bb_ids = []
bridging_bb_ids = []

for i in v_pos:
    pos = v_pos[i]
    if v_con[i] == 1:
        print(
            'stk.cage.NonLinearVertex(\n'
            f'    id={i},\n'
            f'    position=[{round(pos[0], 2)}, {round(pos[1], 2)}, {round(pos[2], 2)}],\n'
            '),\n'
        )
        metal_bb_ids.append(i)
    elif v_con[i] == 2:
        print(
            'stk.cage.LinearVertex(\n'
            f'    id={i},\n'
            f'    position=[{round(pos[0], 2)}, {round(pos[1], 2)}, {round(pos[2], 2)}],\n'
            '),\n'
        )
        bridging_bb_ids.append(i)

stk.cage.NonLinearVertex(
    id=0,
    position=[3.45, -5.5, -9.41],
),

stk.cage.NonLinearVertex(
    id=1,
    position=[-3.45, 5.5, 9.41],
),

stk.cage.NonLinearVertex(
    id=2,
    position=[-2.95, 10.47, -2.61],
),

stk.cage.NonLinearVertex(
    id=3,
    position=[2.95, -10.47, 2.61],
),

stk.cage.NonLinearVertex(
    id=4,
    position=[8.76, 5.42, -4.77],
),

stk.cage.NonLinearVertex(
    id=5,
    position=[-8.76, -5.42, 4.77],
),

stk.cage.NonLinearVertex(
    id=6,
    position=[-8.18, -0.44, -7.11],
),

stk.cage.NonLinearVertex(
    id=7,
    position=[8.18, 0.44, 7.11],
),

stk.cage.LinearVertex(
    id=8,
    position=[-2.86, 7.17, 3.07],
),

stk.cage.LinearVertex(
    id=9,
    position=[2.87, -7.17, -3.07],
),

stk.cage.LinearVertex(
    id=10,
    position=[5.45, -0.04, -6.31],
),

stk.cage.LinearVertex(
    id=11,
    position=[-5.45, 0.04, 6.31],
),

stk.cage.LinearVertex(
    id=12,
    position=[2.61, 7.21, -3.28],
),

stk.cage.LinearVertex(
    id=13,
    positi

In [9]:
edges = tg_info.get_edge_pairs()
for i, pair in enumerate(edges):
    print(
        'stk.Edge(\n'
        f'    id={i},\n'
        f'    vertex1=_vertex_prototypes[{pair[0]}],\n'
        f'    vertex2=_vertex_prototypes[{pair[1]}],\n'
        '),\n'
    )

stk.Edge(
    id=0,
    vertex1=_vertex_prototypes[1],
    vertex2=_vertex_prototypes[8],
),

stk.Edge(
    id=1,
    vertex1=_vertex_prototypes[0],
    vertex2=_vertex_prototypes[9],
),

stk.Edge(
    id=2,
    vertex1=_vertex_prototypes[0],
    vertex2=_vertex_prototypes[10],
),

stk.Edge(
    id=3,
    vertex1=_vertex_prototypes[1],
    vertex2=_vertex_prototypes[11],
),

stk.Edge(
    id=4,
    vertex1=_vertex_prototypes[2],
    vertex2=_vertex_prototypes[12],
),

stk.Edge(
    id=5,
    vertex1=_vertex_prototypes[3],
    vertex2=_vertex_prototypes[13],
),

stk.Edge(
    id=6,
    vertex1=_vertex_prototypes[0],
    vertex2=_vertex_prototypes[14],
),

stk.Edge(
    id=7,
    vertex1=_vertex_prototypes[1],
    vertex2=_vertex_prototypes[15],
),

stk.Edge(
    id=8,
    vertex1=_vertex_prototypes[4],
    vertex2=_vertex_prototypes[10],
),

stk.Edge(
    id=9,
    vertex1=_vertex_prototypes[5],
    vertex2=_vertex_prototypes[11],
),

stk.Edge(
    id=10,
    vertex1=_vertex_prototypes[

In [10]:
class NewTg(stk.cage.Cage):
    _vertex_prototypes = (
        stk.cage.NonLinearVertex(
            id=0,
            position=[3.45, -5.5, -9.41],
        ),

        stk.cage.NonLinearVertex(
            id=1,
            position=[-3.45, 5.5, 9.41],
        ),

        stk.cage.NonLinearVertex(
            id=2,
            position=[-2.95, 10.47, -2.61],
        ),

        stk.cage.NonLinearVertex(
            id=3,
            position=[2.95, -10.47, 2.61],
        ),

        stk.cage.NonLinearVertex(
            id=4,
            position=[8.76, 5.42, -4.77],
        ),

        stk.cage.NonLinearVertex(
            id=5,
            position=[-8.76, -5.42, 4.77],
        ),

        stk.cage.NonLinearVertex(
            id=6,
            position=[-8.18, -0.44, -7.11],
        ),

        stk.cage.NonLinearVertex(
            id=7,
            position=[8.18, 0.44, 7.11],
        ),

        stk.cage.LinearVertex(
            id=8,
            position=[-2.86, 7.17, 3.07],
        ),

        stk.cage.LinearVertex(
            id=9,
            position=[2.87, -7.17, -3.07],
        ),

        stk.cage.LinearVertex(
            id=10,
            position=[5.45, -0.04, -6.31],
        ),

        stk.cage.LinearVertex(
            id=11,
            position=[-5.45, 0.04, 6.31],
        ),

        stk.cage.LinearVertex(
            id=12,
            position=[2.61, 7.21, -3.28],
        ),

        stk.cage.LinearVertex(
            id=13,
            position=[-2.61, -7.21, 3.28],
        ),

        stk.cage.LinearVertex(
            id=14,
            position=[-2.0, -2.45, -7.38],
        ),

        stk.cage.LinearVertex(
            id=15,
            position=[2.0, 2.45, 7.38],
        ),

        stk.cage.LinearVertex(
            id=16,
            position=[-4.95, 4.49, -4.23],
        ),

        stk.cage.LinearVertex(
            id=17,
            position=[4.95, -4.49, 4.23],
        ),

        stk.cage.LinearVertex(
            id=18,
            position=[7.36, 2.62, 0.98],
        ),

        stk.cage.LinearVertex(
            id=19,
            position=[-7.36, -2.62, -0.98],
        ),
    )
    
    _edge_prototypes = (
        stk.Edge(
            id=0,
            vertex1=_vertex_prototypes[1],
            vertex2=_vertex_prototypes[8],
        ),

        stk.Edge(
            id=1,
            vertex1=_vertex_prototypes[0],
            vertex2=_vertex_prototypes[9],
        ),

        stk.Edge(
            id=2,
            vertex1=_vertex_prototypes[0],
            vertex2=_vertex_prototypes[10],
        ),

        stk.Edge(
            id=3,
            vertex1=_vertex_prototypes[1],
            vertex2=_vertex_prototypes[11],
        ),

        stk.Edge(
            id=4,
            vertex1=_vertex_prototypes[2],
            vertex2=_vertex_prototypes[12],
        ),

        stk.Edge(
            id=5,
            vertex1=_vertex_prototypes[3],
            vertex2=_vertex_prototypes[13],
        ),

        stk.Edge(
            id=6,
            vertex1=_vertex_prototypes[0],
            vertex2=_vertex_prototypes[14],
        ),

        stk.Edge(
            id=7,
            vertex1=_vertex_prototypes[1],
            vertex2=_vertex_prototypes[15],
        ),

        stk.Edge(
            id=8,
            vertex1=_vertex_prototypes[4],
            vertex2=_vertex_prototypes[10],
        ),

        stk.Edge(
            id=9,
            vertex1=_vertex_prototypes[5],
            vertex2=_vertex_prototypes[11],
        ),

        stk.Edge(
            id=10,
            vertex1=_vertex_prototypes[2],
            vertex2=_vertex_prototypes[16],
        ),

        stk.Edge(
            id=11,
            vertex1=_vertex_prototypes[3],
            vertex2=_vertex_prototypes[17],
        ),

        stk.Edge(
            id=12,
            vertex1=_vertex_prototypes[4],
            vertex2=_vertex_prototypes[12],
        ),

        stk.Edge(
            id=13,
            vertex1=_vertex_prototypes[5],
            vertex2=_vertex_prototypes[13],
        ),

        stk.Edge(
            id=14,
            vertex1=_vertex_prototypes[6],
            vertex2=_vertex_prototypes[14],
        ),

        stk.Edge(
            id=15,
            vertex1=_vertex_prototypes[7],
            vertex2=_vertex_prototypes[15],
        ),

        stk.Edge(
            id=16,
            vertex1=_vertex_prototypes[4],
            vertex2=_vertex_prototypes[18],
        ),

        stk.Edge(
            id=17,
            vertex1=_vertex_prototypes[5],
            vertex2=_vertex_prototypes[19],
        ),

        stk.Edge(
            id=18,
            vertex1=_vertex_prototypes[6],
            vertex2=_vertex_prototypes[16],
        ),

        stk.Edge(
            id=19,
            vertex1=_vertex_prototypes[7],
            vertex2=_vertex_prototypes[17],
        ),

        stk.Edge(
            id=20,
            vertex1=_vertex_prototypes[7],
            vertex2=_vertex_prototypes[18],
        ),

        stk.Edge(
            id=21,
            vertex1=_vertex_prototypes[6],
            vertex2=_vertex_prototypes[19],
        ),

        stk.Edge(
            id=22,
            vertex1=_vertex_prototypes[3],
            vertex2=_vertex_prototypes[9],
        ),

        stk.Edge(
            id=23,
            vertex1=_vertex_prototypes[2],
            vertex2=_vertex_prototypes[8],
        ),
    )

## Get building blocks.

In [11]:
disconnected_graphs = new_topology_graph.get_connected_graphs(
    molecule=struct,
    atom_ids_to_disconnect=broken_bonds_by_id,
)
potential_bbs = {}
for i, cg in enumerate(disconnected_graphs):
    atom_ids = [i.get_id() for i in cg]
    _atomid_map = {j: i for i, j in enumerate(atom_ids)}
    atoms = tuple([
        stk.Atom(
            id=_atomid_map[a.get_id()],
            atomic_number=a.get_atomic_number(),
            charge=a.get_charge(),
        )
        for a in struct.get_atoms(atom_ids=atom_ids)
    ])
    bonds = tuple([
        stk.Bond(
            atom1=atoms[_atomid_map[i.get_atom1().get_id()]],
            atom2=atoms[_atomid_map[i.get_atom2().get_id()]],
            order=i.get_order(),
        )
        for i in struct.get_bonds()
        if i.get_atom1().get_id() in atom_ids
        if i.get_atom2().get_id() in atom_ids
    ])
    position_matrix = tuple(struct.get_atomic_positions(atom_ids))
    cg_bb = stk.BuildingBlock.init(
        atoms=tuple(atoms),
        bonds=tuple(bonds),
        position_matrix=np.array(position_matrix),
    )
    cg_bb.write(f'cg_bb_{i}.mol')
    potential_bbs[stk.Smiles().get_key(cg_bb)] = cg_bb

### See the issue with the perception here!

In [12]:
potential_bbs

{'[CH3]->[Pt](<-[CH3])<-[CH3]': BuildingBlock('[CH3]->[Pt](<-[CH3])<-[CH3]'),
 'c1cc(-c2ccncc2)ccn1': BuildingBlock('c1cc(-c2ccncc2)ccn1'),
 '[CH]1C=C(c2ccncc2)C=C[N]1': BuildingBlock('[CH]1C=C(c2ccncc2)C=C[N]1'),
 '[CH]1C=C(C2=C[CH][N]C=C2)C=C[N]1': BuildingBlock('[CH]1C=C(C2=C[CH][N]C=C2)C=C[N]1')}

In [13]:
metal_bb = stk.BuildingBlock.init_from_molecule(
    molecule=potential_bbs['[CH3]->[Pt](<-[CH3])<-[CH3]'],
    functional_groups=[
        stk.SmartsFunctionalGroupFactory(
            smarts='[#6]~[Pt]',
            bonders=(1, ),
            deleters=(),
        ),
    ]
)
print(metal_bb)

BuildingBlock('[CH3]->[Pt](<-[CH3])<-[CH3]', (GenericFunctionalGroup(atoms=(C(0), Pt(3)), bonders=(Pt(3),), deleters=()), GenericFunctionalGroup(atoms=(C(5), Pt(3)), bonders=(Pt(3),), deleters=()), GenericFunctionalGroup(atoms=(C(7), Pt(3)), bonders=(Pt(3),), deleters=())))


In [14]:
original_bridging_bb = stk.BuildingBlock.init_from_molecule(
    molecule=potential_bbs['c1cc(-c2ccncc2)ccn1'],
    functional_groups=[
        stk.SmartsFunctionalGroupFactory(
            smarts='[#6]~[#7X2]~[#6]',
            bonders=(1, ),
            deleters=(),
        ),
    ]
)
print(original_bridging_bb)

BuildingBlock('c1cc(-c2ccncc2)ccn1', (GenericFunctionalGroup(atoms=(C(0), N(10), C(12)), bonders=(N(10),), deleters=()), GenericFunctionalGroup(atoms=(C(6), N(4), C(16)), bonders=(N(4),), deleters=())))


## Build the original cage again.

In [15]:
cage1 = stk.ConstructedMolecule(
    NewTg(
        building_blocks={
            metal_bb: metal_bb_ids,
            original_bridging_bb: bridging_bb_ids,
        },
        optimizer=stk.Collapser(),
        # Ensure that bonds between the GenericFunctionalGroups
        # of the ligand and the SingleAtom functional groups
        # of the metal are dative.
        reaction_factory=stk.DativeReactionFactory(
            stk.GenericReactionFactory(
                bond_orders={
                    frozenset({
                        stk.GenericFunctionalGroup,
                        stk.GenericFunctionalGroup,
                    }): 9,
                },
            ),
        ),
    ),
)
cage1.write('original_cage.mol')

<ConstructedMolecule at 139934424825232>

## Use the splitter and transformer to make a new building block.

In [16]:
old_bb = stk.BuildingBlock.init_from_molecule(original_bridging_bb)

In [20]:
# Split between benzene rings.
splitter = stko.MoleculeSplitter(
    breaker_smarts='[#6X3!H1]~[#6X3!H1]',
    bond_deleter_ids=(0, 1),
)

split_mols = splitter.split(old_bb)
for i, mol in enumerate(split_mols):
    print(mol)
    mol.write(f'splits_{i}.xyz')

[BuildingBlock('*[C]1[CH][CH][N][CH][CH]1'), BuildingBlock('*[C]1[CH][CH][N][CH][CH]1')]
BuildingBlock('*[C]1[CH][CH][N][CH][CH]1')
BuildingBlock('*[C]1[CH][CH][N][CH][CH]1')


In [21]:
# Transform molecule to add reactive groups.
transformer = stko.MoleculeTransformer(
    replacer_smarts='[Br]',
    functional_groups=(stk.BromoFactory(), ),
)
transformed_mols = tuple(
    transformer.transform(i) for i in split_mols
)

count = 0
for i, mol in enumerate(transformed_mols):
    print(mol)
    mol.write(f'transform_{i}.mol')

BuildingBlock('Br[C]1[CH][CH][N][CH][CH]1', (Bromo(Br(10), C(6), bonders=(C(6),), deleters=(Br(10),)),))
BuildingBlock('Br[C]1[CH][CH][N][CH][CH]1', (Bromo(Br(10), C(6), bonders=(C(6),), deleters=(Br(10),)),))


In [22]:
addition_bb = stk.BuildingBlock(
    smiles='IC#CC#CI',
    functional_groups=[stk.IodoFactory()]
)
print(addition_bb)

BuildingBlock('IC#CC#CI', (Iodo(I(0), C(1), bonders=(C(1),), deleters=(I(0),)), Iodo(I(5), C(4), bonders=(C(4),), deleters=(I(5),))))


In [23]:
# Reconstruct.
polymer = stk.ConstructedMolecule(
    topology_graph=stk.polymer.Linear(
        building_blocks=(transformed_mols[0], addition_bb, transformed_mols[1]),
        repeating_unit='ABC',
        num_repeating_units=1,
    ),
)
polymer = stko.UFF().optimize(polymer)
polymer.write('reconstructed.mol')

<ConstructedMolecule at 139934274457664>

## Build a new cage!

In [25]:
new_bridging_bb = stk.BuildingBlock.init_from_molecule(
    molecule=polymer,
    functional_groups=[
        stk.SmartsFunctionalGroupFactory(
            smarts='[#6]~[#7X2]~[#6]',
            bonders=(1, ),
            deleters=(),
        ),
    ]
)
print(new_bridging_bb)

BuildingBlock('C(C#C[C]1[CH][CH][N][CH][CH]1)#C[C]1[CH][CH][N][CH][CH]1', (GenericFunctionalGroup(atoms=(C(0), N(4), C(5)), bonders=(N(4),), deleters=()), GenericFunctionalGroup(atoms=(C(17), N(16), C(22)), bonders=(N(16),), deleters=())))


In [26]:
cage2 = stk.ConstructedMolecule(
    NewTg(
        building_blocks={
            metal_bb: metal_bb_ids,
            new_bridging_bb: bridging_bb_ids,
        },
        optimizer=stk.Collapser(),
        # Ensure that bonds between the GenericFunctionalGroups
        # of the ligand and the SingleAtom functional groups
        # of the metal are dative.
        reaction_factory=stk.DativeReactionFactory(
            stk.GenericReactionFactory(
                bond_orders={
                    frozenset({
                        stk.GenericFunctionalGroup,
                        stk.GenericFunctionalGroup,
                    }): 9,
                },
            ),
        ),
    ),
)
cage2.write('new_cage.mol')

<ConstructedMolecule at 139934273830432>