The building blocks (nodes and edges) used in  <a href="https://github.com/tobacco-mofs/tobacco_3.0"> Topologically Based Crystal Constructor (ToBaCCo)</a> must follow the specific format outlined in the manual. This notebook can be used to individually specify the FILE that requires atom-bond data or the method outlined here can be moved to another script to streamline the process if mutliple building blocks require this undertaking. 

In [None]:
FILE = "EXAMPLE_EDGE.cif" #speficy the file for which bond infomartion must be collected

In [None]:
from pymatgen.io.cif import CifWriter
from pymatgen.io.lammps.data import CombinedData
from pymatgen.io import xyz
from pymatgen.io.cif import CifParser
from pymatgen.io.cif import CifWriter
import numpy as np

In [None]:
#EXAMPLE BOND DATA SECTION

#The atom bonds in the CIF file must be specified in the following format


example_format= """
loop_
_geom_bond_atom_site_label_1
_geom_bond_atom_site_label_2
_geom_bond_distance
_geom_bond_site_symmetry_2
_ccdc_geom_bond_type
X1 C2 1.540 . A
X1 C6 1.540 . A
C2 X3 1.540 . A
C2 H7 1.140 . S
X3 C4 1.540 . A
C4 X5 1.540 . A
C4 H8 1.140 . S
X5 C6 1.540 . A
C6 H9 1.140 . S"""
print(example_format)

In [None]:
parser = CifParser(FILE) 
structure = parser.get_structures()[0]

In [None]:
#When loaded directly through pymatgen, the labels have the same number, 1, regardless of 
#how many times the element has already occured in the structure.

#This block corrects the issue stated above
linker =structure.as_dict()
s = linker['sites']
atom_counter = {}
for i in s:
    element = i['species'][0]['element']
    if element not in atom_counter:
        atom_counter[element] = 0
    atom_counter[element] +=1
    i['label'] = f"{element}{atom_counter[element]}"

In [None]:
#Certain atoms in the original cif file might require removal.
#Those can be specified in separate lines below.
removal_block = """
O1
O2
O3
O4
H7
H8
"""


In [None]:
#Here the atom positions are compared with each other's 
#if closer than the specified maximum below and neither atom has been marked for removal,
#the atoms are documented as having a bond. 
#The "_geom_bond_site_symmetry_2" is recorded as "." and 
# the "_ccdc_geom_bond_type" is recorded as a single "S" bond. 

MAX_DIST = 1.9

atoms_to_remove = [i for i in removal_block.split('\n') if i.strip() != ""]
for i in range(len(s)):
    for j in range(i+1,len(s)):
        a = np.array(s[i]['xyz'])
        b = np.array(s[j]['xyz'])
        dist = np.linalg.norm(a-b)
        if dist < MAX_DIST and s[i]['label'] not in atoms_to_remove and s[j]['label'] not in atoms_to_remove:
            print(f"{s[i]['label']:<4} {s[j]['label']:<4} {dist:<6.5f} . S")
    