### Installing the required Modules


In [1]:
# Importing the require modules
import numpy as np

import os
import pickle 
import blosc

import re
from pprint import pprint

from openbabel import pybel
from openbabel import openbabel as ob
from xtb.interface import Calculator, Param
from xtb.libxtb import VERBOSITY_MUTED

error_files = []

## Helper function filter to extract all the bond lengths, angles and dihedrals from a molecule

In [2]:
def filterAtom(atom: str) -> str:
    '''Removes any number from the string e.g. C3 becomes C'''
    pattern_order = r'[0-9]'
    return re.sub(pattern_order, '', atom)

def atomType(mol, atomIdx) -> str:
    '''get the atomic type given an atom index'''
    return mol.OBMol.GetAtom(atomIdx).GetType()

In [3]:
def getBondOrders_Charges(mol):
    
    coords = []
    for atom in mol.atoms:
        coords.append([atom.coords[0], atom.coords[1], atom.coords[2]])
    
    # xtb-python expects them in Bohr
    atom_coords = np.array(coords) / 0.52917721092
    atomic_nums = np.array([a.atomicnum for a in mol.atoms])
    
    # Creating the calculator
    calc = Calculator(Param.GFN2xTB, atomic_nums, atom_coords)
    calc.set_verbosity(VERBOSITY_MUTED)
    res = calc.singlepoint()
    
    # Get the partial charges
    charges = res.get_charges()
    
    # Get the bond order matrix
    bond_orders = res.get_bond_orders()
    
    '''
    print("Charges:")
    for i, charge in enumerate(charges):
        print("%d\t%f" % (i+1, charge))
        
    print("Bond orders (matrix):")
    for i, row in enumerate(bond_orders):
        for j, bond_order in enumerate(row):
            if j > i and bond_order > 0.1:
                print("%d \t %d \t %f" % (i+1, j+1, bond_order))
    '''
    
    return (charges, bond_orders)

In [4]:
def getBonds(mol, charge, bond_order) -> dict:
    '''Iterate through all the bonds in a molecule'''
    
    bonds = {}
    
    for bond in ob.OBMolBondIter(mol.OBMol):
        index1 = bond.GetBeginAtomIdx()
        index2 = bond.GetEndAtomIdx()
        begin = filterAtom(atomType(mol, index1))
        end = filterAtom(atomType(mol, index2))
        bond_length = round(bond.GetLength(),4)
        bondOrder = bond_order[index1-1][index2-1]
        
        # Swap them for lexographic order
        if (end < begin):
            begin, end = end, begin
        
        #Appending to the dictionary of list
        if (f"{begin} - {end} , {bond_order}" in bonds.keys()):
            bonds[f"{begin} - {end} , {round(bondOrder,1)}"].append((bond_length, charge[index1-1], charge[index2-1], 
                                                             round(bondOrder,2)))
        else:
            bonds[f"{begin} - {end} , {round(bondOrder,1)}"]=[(bond_length, charge[index1-1], charge[index2-1], 
                                                             round(bondOrder,2))]
        
    return bonds

In [22]:
def getAngles(mol, bondOrder, charge) -> dict:
    '''Iterate through all the bond angles in a molecule'''
    
    angles = {}
    
    for angle in ob.OBMolAngleIter(mol.OBMol):
        bond_order1 = bondOrder[angle[0], angle[1]]
        bond_order2 = bondOrder[angle[1], angle[2]]
        #print(bond_order1, bond_order2)
        #print(angle[0], angle[1], angle[2])
        
        a = angle[0] + 1
        b = mol.OBMol.GetAtom(angle[1] + 1)
        c = angle[2] + 1
        bond_angle = round(b.GetAngle(a, c), 3)

        aType = filterAtom(atomType(mol, a))
        bType = filterAtom(b.GetType())
        cType = filterAtom(atomType(mol, c))
        
        # Swap them for lexographic order
        if (cType < aType):
            aType, cType = cType, aType
            bond_order1, bond_order2 = bond_order2, bond_order1
            
        #Appedning to the dictonary of lists if the angle b/t the elements already existse
        if(f"{aType} - {bType} - {cType}" in angles.keys()):
            angles[f"{aType} - {bType} - {cType}"].append((bond_angle, charge[angle[0]], charge[angle[1]], charge[angle[2]], bond_order1, bond_order2))
        else:
            angles[f"{aType} - {bType} - {cType}"] = [(bond_angle, charge[angle[0]], charge[angle[1]], charge[angle[2]], bond_order1, bond_order2)]                                                                                      
                                                                                                     
    return angles

In [23]:
def getTorsions(mol, bondOrder, charge) -> dict:
    '''Iterate through all the torsions in a molecule'''
    
    torsions = {}
    
    for torsion in ob.OBMolTorsionIter(mol.OBMol):
        a = torsion[0] + 1
        b = torsion[1] + 1
        c = torsion[2] + 1
        d = torsion[3] + 1
        
        bondOrder1 = bondOrder[torsion[0], torsion[1]]
        bondOrder2 = bondOrder[torsion[1], torsion[2]]
        bondOrder3 = bondOrder[torsion[2], torsion[3]]
        
        torsion_angle = round(mol.OBMol.GetTorsion(a, b, c, d), 3)

        aType = filterAtom(atomType(mol, a))
        bType = filterAtom(atomType(mol, b))
        cType = filterAtom(atomType(mol, c))
        dType = filterAtom(atomType(mol, d))

        # Switch if not in lexographic order
        if(dType < aType):
            aType, dType = dType, aType
            bondOrder1, bondOrder3 = bondOrder3, bondOrder1
            
        #Appedning to the dictonary of lists if the torsion angle b/t the elements already existse
        if(f"{aType} - {bType} - {cType} -{dType}" in torsions.keys()):
            torsions[f"{aType} - {bType} - {cType} -{dType}"].append((torsion_angle, charge[torsion[0]], charge[torsion[1]],
                                                charge[torsion[2]], charge[torsion[3]], bondOrder1, bondOrder2, bondOrder3))
        
        # Checking for palindromic sequence
        elif(f"{dType} - {cType} - {bType} -{aType}" in torsions.keys()):
            torsions[f"{dType} - {cType} - {bType} -{aType}"].append((torsion_angle, charge[torsion[3]], charge[torsion[2]],
                                                charge[torsion[1]], charge[torsion[0]], bondOrder3, bondOrder2, bondOrder1))
            
        else:
            torsions[f"{aType} - {bType} - {cType} -{dType}"] = [(torsion_angle, charge[torsion[0]], charge[torsion[1]],
                                            charge[torsion[2]], charge[torsion[3]], bondOrder1, bondOrder2, bondOrder3)]
    
    return torsions

### Wrapping all the helper functions into a main function

In [28]:
def analyze_molecular_data(file: str) -> (dict, dict, dict):
    '''Fetches the bond lengths, angles and dihedrals from a given file'''
    
    # Load the File
    extension=file.split('.')[-1]
    try:
        mol = next(pybel.readfile(extension, file))
    except:
        error_files.append(file)
        return ({},{},{})
    
    # Get Bond Order and partial charges on atoms
    charge, bond_order = getBondOrders_Charges(mol)
    
    # Get bond lengths
    #bond_lengths = getBonds(mol, charge, bond_order)
    
    # Get bond angles
    bond_angles = getAngles(mol, bond_order, charge)
    
    # Get torsions
    dihedrals = getTorsions(mol, bond_order, charge)

    return (bond_lengths, bond_angles, dihedrals)


def mergeDictionaries(dict1 : dict, dict2: dict):
    '''Merges two Python dictionary by combining elements for a common key
    The function merges dict2 into dict 1'''
    for key, value in dict2.items():
        if key in dict1:
            dict1[key].extend(value)
        else:
            dict1[key] = value

### Testing the above functions on a sample molecule

In [24]:
# Example usage
xyz_file = '21395061/cod-crest/W/WKGCLMYDEAXJHN-UHFFFAOYSA-N.xyz'
mol = next(pybel.readfile('xyz', xyz_file))
charge, bond_order = getBondOrders_Charges(mol)
#pprint(getBonds(mol))
#getAngles(mol, bond_order, charge)
getTorsions(mol, bond_order, charge)


#molecular_data = analyze_molecular_data(xyz_file)
#molecular_data

{'H - C - C -H': [(-61.147,
   0.03503899968442077,
   -0.10378630636174588,
   -0.053875656833530024,
   0.03290577940117682,
   0.987853806853633,
   1.0192875179900571,
   0.9784312829508657),
  (-178.881,
   0.03503899968442077,
   -0.10378630636174588,
   -0.053875656833530024,
   0.032890953517818074,
   0.987853806853633,
   1.0192875179900571,
   0.9784238569929431),
  (58.925,
   0.036001701940108675,
   -0.10378630636174588,
   -0.053875656833530024,
   0.03290577940117682,
   0.9872833720193072,
   1.0192875179900571,
   0.9784312829508657),
  (-58.81,
   0.036001701940108675,
   -0.10378630636174588,
   -0.053875656833530024,
   0.032890953517818074,
   0.9872833720193072,
   1.0192875179900571,
   0.9784238569929431),
  (178.989,
   0.03503443734946105,
   -0.10378630636174588,
   -0.053875656833530024,
   0.03290577940117682,
   0.9878446566377561,
   1.0192875179900571,
   0.9784312829508657),
  (61.255,
   0.03503443734946105,
   -0.10378630636174588,
   -0.053875656833

## Extracting Molecular Data from all the files

In [29]:
#Collecting data from the Molecular Dataset

bond_lengths = {}
bond_angles = {}
dihedrals = {}

ext = ('.xyz',)
count = 0

for root, dirs, files in os.walk('21395061/cod-crest/'):
    for file in files:
        extension=os.path.splitext(file)[1]
        count +=1
        if(extension not in ext): continue
            
        filename = os.path.join(root,file)
        length, angle, torsion = analyze_molecular_data(filename)
        
        #mergeDictionaries(bond_lengths, length)
        mergeDictionaries(bond_angles, angle)
        mergeDictionaries(dihedrals, torsion)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -59.433677868596          0.000316298731)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -79.377053843214          0.000924825445)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -86.034286802623          0.000854184241)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -58.758444780557          0.000328546459)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -111.085364902439          0.000603406375)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -103.528980684869          0.000489942664)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -91.618324994391          0.000451592379)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -94.340217728248          0.000410361877)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -65.485149618314          0.000041819052)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -84.893951507837          0.000432179138)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -43.338442248229          0.000158257975)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -161.206963397959          0.000759112675)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -29.282164042213          0.000830019973)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -61.188387478083          0.000949836438)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -122.706261229453          0.000619626115)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -45.800808954851          0.000127003290)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -116.316247123820          0.000341302592)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -72.235098405002          0.000574729245)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -33.667170665269          0.000397739222)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -86.420450608817          0.000324359427)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -74.997605752354          0.000845558168)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -82.258093546117          0.000946077429)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -130.114727446468          0.000453489848)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -126.237724477994          0.000918757918)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -51.742702166545          0.000363859104)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -31.789058681207          0.000159162384)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -88.249652676622          0.000256448438)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -63.829852186305          0.000049640629)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -48.498179052821          0.000647271907)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -86.850574119305          0.000660827113)

  Problems reading an XYZ file: Cannot read the first line.
  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -98.944952818855          0.000303391322)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -72.047500907270          0.000736772944)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -62.761696921750          0.000756825227)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -165.556345248705          0.000248494081)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -111.079852658070          0.000589841174)

  

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -141.967847636520          0.000326744121)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -44.918399970281          0.000945551600)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -50.664486326927          0.000049948737)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -65.520853644467          0.000093295728)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -163.235270788404          0.000633343457)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -80.675825563333          0.000925246097)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -75.461749110041          0.000332863329)

  Problems reading an XYZ file: Cannot read the first line.
  

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -63.379352585571          0.000892770596)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -174.180721787059          0.000784679305)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -64.180521779920          0.000017166759)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -134.703356424741          0.000514417618)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -108.734237634884          0.000824981146)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -56.645793893085          0.000670378300)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -52.659979260843          0.000931483634)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -76.685705644160          0.000031150799)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -71.926598996862          0.000406042696)

  Problems reading an XYZ file: Cannot read the first line.
  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -44.844936759500          0.000847292793)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -83.430195634118          0.000603342987)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -324.729398855712          0.000311187033)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -61.613276361018          0.000024981601)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -71.737666861832          0.000560047682)

  

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -125.301024549696          0.000608143805)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -285.671916646656          0.000393823990)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -114.531793869351          0.000958085593)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -192.818163521931          0.000035764495)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -64.298101284615          0.000817527658)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -88.073241934083          0.000696198957)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -88.162864587943          0.000926817331)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -136.262138034361          0.000565789523)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -114.971871516087          0.000431502308)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -56.090009187722          0.000037758719)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -38.677906079360          0.000023380596)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -93.325409364459          0.000729625859)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -85.477607409830          0.000301737547)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -133.076938456476          0.000555283815)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -86.544539075439          0.000386150362)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -74.120289563800          0.000827633009)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -45.262567951549          0.000395992566)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -33.235469603667          0.000979393411)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -123.123311415875          0.000417959066)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -74.052622736072          0.000028340255)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -74.352128178918          0.000362054372)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -150.369574181538          0.000946777294)

  Problems reading an XYZ file: Cannot read the first line.
  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -30.522680641388          0.000064383370)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -106.008361146187          0.000775931700)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -82.329684561325          0.000488981392)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -103.456529940956          0.000841322613)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -43.218564560686          0.000018756334)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -86.310419256330          0.000761555145)

  

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -40.087248919923          0.000086703476)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -63.505731869232          0.000465486890)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -43.308300509987          0.000043593418)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -57.323105442045          0.000935267118)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -73.898408844142          0.000388760194)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -88.192798206386          0.000053608513)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -59.926370913718          0.000453697539)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Problems reading an XYZ file: Cannot read the first line.
  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -63.23610429)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -80.58049670)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -75.380456787486          0.000030414807)

  Problems reading an XYZ file: Cannot read the first line.
  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -26.284260472110          0.000151028491)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -75.528763923359          0.000417230421)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -62.957896317740          0.000724688576)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -81.145336377141          0.000094515928)

  Failed to kekulize aromatic 

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -77.243553225791          0.000845288416)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -76.22393448)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -63.886283368873          0.000257979872)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -61.654082248813          0.000159520486)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -70.324967305825          0.000741356940)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -58.038434352562          0.000020833497)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -66.30941650)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -65.264050672291          0.000037976958)

  Failed to keku

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -43.172722753240          0.000054351980)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -73.597109182502          0.000279245772)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -88.481932859954          0.000564316594)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -130.35462286)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -54.231333902355          0.000079652238)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -66.90147375)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -57.71351983)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -70.192300335746          0.000964946768)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondO

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -78.60250284)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -61.992416778239          0.000086252789)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -64.869002779839          0.000408311976)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -49.763801715690          0.000047276449)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -171.44817894)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -44.273807928439          0.000049516957)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -47.27554982)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -41.429923806781          0.000035687391)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondO

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -60.87218238)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -88.28245908)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -86.528641045168          0.000083648807)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -59.24706445)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -103.78765017)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -48.076300303059          0.000062308504)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -37.978471377579          0.000063497389)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -79.451782355972          0.000104504894)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -42.15486365

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -50.59547819)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -67.77125777)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -62.67624352)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -86.81418585)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -41.807641300458          0.000847227041)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -61.32450305)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -66.439230928625          0.000208749737)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -41.240717068572          0.000060070008)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -54.615515413047          0.000023512362)

  Failed to ke

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -70.235775941656          0.000411272431)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -88.630943993588          0.000924982647)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -92.481726571145          0.000306223875)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -48.078466618051          0.000124147889)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -62.680983882859          0.000856830704)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -70.397322488508          0.000605637885)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -127.743958950115          0.000170334279)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -44.272219044854          0.000077702345)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -65.249732421265          0.000068265291)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -98.121672679763          0.000673871863)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -104.704437189879          0.000720570716)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -45.417886862608          0.000021113307)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -139.775696713189          0.000496000264)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -70.889743595719          0.000944000070)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -93.863166136146          0.000992266039)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -169.699368103374          0.000932292400)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -185.379285527472          0.000310559017)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -90.738243442405          0.000412124472)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -85.118237275940          0.000706149530)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -52.713134961687          0.000637581444)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -67.508844229477          0.000491081373)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -39.527044428699          0.000786062438)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -55.880091425537          0.000948279969)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -59.236553521668          0.000559552534)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -66.895292235669          0.000450645636)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -57.608344616629          0.000917647866)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -37.342421111082          0.000069066237)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -134.618550642308          0.000551817472)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -51.450018368391          0.000529418341)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -77.403338918656          0.000540026501)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -101.418661805598          0.000480275112)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -68.658138504857          0.000426744140)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -193.665424216576          0.000990544312)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -98.609989409797          0.000748534259)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -74.105308332945          0.000565883933)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -64.196656255119          0.000110454848)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -72.691295316101          0.000489844169)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -60.690278521851          0.000730722468)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -73.240145815683          0.000810502388)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -55.894331984043          0.000415855342)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -37.842953955303          0.000726321557)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -78.566751160400          0.000247775560)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrde

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -59.804318824484          0.000110920937)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -60.376897728145          0.000300592719)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -53.051967056058          0.000319347612)

  Problems reading an XYZ file: Cannot read the first line.
  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -98.442687653706          0.000729921947)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -55.024029917737          0.000846533144)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -64.086696101881          0.000954172080)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -81.113278282225          0.000751913996)

  

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -62.640663187558          0.000075419240)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -51.73371118)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -126.81155588)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -28.756519489538          0.000058683699)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -68.467199307241          0.000035712067)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -157.33012532)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -86.676277874346          0.000026896537)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -46.49312991)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -56.6538851

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -150.19221434)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -92.87677807)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done       -171.343066796213          0.000018589280)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -76.526232883783          0.000039041893)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -41.752312203978          0.000065903802)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -58.29767816)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -73.517191974972          0.000061287851)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -121.46879451)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -63.0008565

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -57.71487871)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -133.48170259)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -138.56009644)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -46.564518915949          0.000123821791)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -107.43700611)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -36.961859227693          0.000024709691)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is SCF done        -42.324254341747          0.000091293011)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -59.48379656)

  Failed to kekulize aromatic bonds in OBMol::PerceiveBondOrders (title is -196.19093660)

  Failed to kekulize aromatic bonds in OBMol::Perceive

XTBException: Single point calculation failed:
-2- xtb_calculator_singlepoint: Electronic structure method terminated
-1- scf: Self consistent charge iterator did not converge

### Running the corrupt xyz files with corresponding sdf files and writing the bond length, angle, torsion data as pickle object

In [30]:
#Trying with sfd files for the corrupted xyz files

for file in error_files:
    filename = os.path.splitext(file)[0] + '.sdf'
    length, angle, torsion = analyze_molecular_data(filename)
    #mergeDictionaries(bond_lengths, length)
    mergeDictionaries(bond_angles, angle)
    mergeDictionaries(dihedrals, torsion)

In [None]:
def MolecularDataWriter_compress():
    
    pickled_bond_lengths = pickle.dump(bond_lengths)
    compressed_bond_lengths = blosc.compress(pickled_bond_lengths)
    
    with open('cod-crest_bond_lengths.dat', 'wb') as f:
        f.write(compressed_bond_lengths)
        
    pickled_bond_angles = pickle.dump(bond_angles)
    compressed_bond_angles = blosc.compress(pickled_bond_angles)
        
    with open('cod-crest_bond_angles.dat', 'wb') as f:
        f.write(compressed_bond_angles)
        
    pickled_torsions = pickle.dump(torsion)
    compressed_dihedrals = blosc.compress(pickled_torsions)
        
    with open('cod-crest_torsions.dat', 'wb') as f:
        f.write(compressed_dihedrals)

In [31]:
def MolecularDataWriter_pickle():
    
    for key in bond_lengths.keys():
        bond_lengths[key] = np.array(bond_lengths[key])
        
    for key in bond_angles.keys():
        bond_angles[key] = np.array(bond_angles[key])
        
    for key in dihedrals.keys():
        dihedrals[key] = np.array(dihedrals[key])
    
#     with open('cod-crest_bond_lengths_.pkl', 'wb') as f:
#         pickle.dump(bond_lengths, f)
        
    with open('cod-crest_bond_angles.pkl', 'wb') as f:
        pickle.dump(bond_angles, f)
        
    with open('cod-crest_torsions.pkl', 'wb') as f:
        pickle.dump(dihedrals, f)

In [34]:
for key in bond_angles.keys():
    print(key)

Car - O - Nar
Car - Nar - Car
Car - O - Car
Nar - Car - Nar
Car - Car - Nar
Car - Car - Car
Car - H - Car
Car - Car - H
C - Nar - Nar
C - Car - Nar
Car - Nar - Nar
C - Nar - Car
C - Car - Car
Car - Car - Npl
H - Car - Npl
H - H - Npl
C - Car - H
C - H - H
C - Nar - H
C - O - C
C - C - C
C - C - H
C - O - O
C - C - O
C - O - H
Car - F - Car
C - Car - Nam
C - O - Nam
C - Car - O
Car - H - Nam
H - C - Nam
Car - C - Nam
Car - Nam - Car
C - F - F
C - Car - F
Car - O - Ntr
Ntr - O - O
Car - Ntr - Car
Car - C - Car
C - F - Ntr
C - Ntr - C
C - F - C
Ntr - C - O
C - C - Car
Car - Car - O
C - O - Ntr
C - Ntr - H
C - C - N
C - N - H
N - C - O
HO - N - O
C - O - Car
Car - Car - S
Car - S - Nar
Car - S - Car
B - C - N
C - N - C
B - N - Car
B - C - Car
B - N - C
C - B - N
B - C - C
C - B - H
Car - B - Car
C - Car - C
Car - Cl - Car
Car - Car - Cl
C - Car - N
C - N - O
C - O - N
C - N - Car
C - Npl - N
C - N - Npl
C - Npl - Car
C - Npl - C
N - C - Npl
C - Nam - C
C - C - Nam
C - Nam - H
C - Nam - O
C

In [32]:
MolecularDataWriter_pickle()