# Checks geometry for validity

Many of these molecules are prone to ring-opening and closing during geometry optimization. The goal is to compare the number or rings of the initial geometry to the number of rings of the final geometry.

In [1]:
import pybel
import openbabel
import glob

  from tqdm.autonotebook import tqdm


In [2]:
def num_rings(SMILES):
    '''
    Counts the number of rings in SMILES

    Parameters
    ----------
    SMILES: str
        SMILES string of the molecule
    Returns
    --------
    ring_count: int
        number of rings in the molecule
    '''
    ring_symbol_count = 0
    frags = list(SMILES)

    for x in frags:
        if RepresentsInt(x) == True:
            ring_symbol_count += 1 # adds 1 if it sees a number in the SMILES, representing a part of the ring break
        elif x == '%':
            ring_symbol_count -= 1 # the number of the ring closure is double digits, so we don't want to add that ring twice

    ring_count = ring_symbol_count / 2 # needs 2 numbers for every 1 ring break

    return ring_count

def check_mol_breaks(SMILES):
    '''
    Checks to see if the molecule was broken into fragments during geometry optimization

    Parameters
    ----------
    SMILES:str
        SMILES string of the molecule

    '''
    frags = list(SMILES)
    if '.' in frags:
        return True

def check_geom_opt(NFA_str, file_name):
    '''
    Checks to see if something weird and incorrect happened during geometry optimization
    Primary example is new rings forming or fragments breaking

    Parameters
    ----------
    NFA_str: string
        SMILES string of the molecule

    file_name: string
        path to xyz file
    '''

    unopt_num_rings = num_rings(NFA_str)

    opt_smi = xyz_to_smiles(file_name)
    opt_num_rings = num_rings(opt_smi)

    if unopt_num_rings != opt_num_rings:
        print(file_name)
        print('The numbers of rings does match before and after geometry optimization')
        return False

    if check_mol_breaks(opt_smi) == True:
        print(file_name)
        print('The molecule broke into fragments')
        return False

In [3]:
for file in glob.iglob('Calculations/acceptors/input/2_benchmarking/*.sdf'):
    filename = file.split("/")[-1].split(".")[0]
    mol = next(pybel.readfile('sdf', file))
    initial_smi = mol.write(format = 'smi')
    print(initial_smi)
    
    

AttributeError: module 'pybel' has no attribute 'readfile'