# Parser for Orca Dihedrals 

In [2]:
import cclib
import pybel
import glob
import os
import pandas as pd

In [2]:
elems = {1:'H', 6:'C', 7:'N', 8:'O', 9:'F'}

energies = []
for file in sorted(glob.iglob('dihedral/*/out/*.out.bz2')):
    molecule = file.split('.')[0].split('out/')[1].split('_')
    name = molecule[0]
    if name == 'sucrose':
        phi = 'nan'
        psi = 'nan'
        theta = molecule[1]

        data = cclib.io.ccread(file)
        # get energy 
        energy = data.scfenergies[0]

        # store in dict to append to list
        d = {}
        d.update({'name': name})
        d.update({'phi': phi})
        d.update({'psi': psi})
        d.update({'theta': theta})
        d.update({'energy': energy})
        energies.append(d)
    else:
        phi = molecule[1]
        psi = molecule[2]
        theta = 'nan'

        data = cclib.io.ccread(file)
        # get energy 
        energy = data.scfenergies[0]

        # store in dict to append to list
        d = {}
        d.update({'name': name})
        d.update({'phi': phi})
        d.update({'psi': psi})
        d.update({'theta': theta})
        d.update({'energy': energy})
        energies.append(d)


In [3]:
df = pd.DataFrame(energies)
df

Unnamed: 0,name,phi,psi,theta,energy
0,ala-ala,-100,-100,,-15534.900658
1,ala-ala,-100,-120,,-15534.739239
2,ala-ala,-100,-140,,-15531.340037
3,ala-ala,-100,-160,,-15535.108399
4,ala-ala,-100,-180,,-15534.326386
...,...,...,...,...,...
1097,sucrose,,,180,-35230.313002
1098,sucrose,,,20,-35308.728028
1099,sucrose,,,40,-35303.921090
1100,sucrose,,,60,-35302.316484


In [4]:
df.to_csv('../data/dft-data-dihedral.csv', index=False)

#### Biphenyl twist was organized a bit differently requiring a change in parsing
Biphenyl 0-360deg w/ 15deg increments

In [10]:
angles = [0, 15, 30, 45, 60, 75, 90, 105, 120, 135, 150, 165, 180,
          -165, -150, -135, -120, -105, -90, -75, -60, -45, -30, -15, -180]
print(len(angles))

25


In [15]:
elems = {1:'H', 6:'C', 7:'N', 8:'O', 9:'F'}

energies = []
for file in sorted(glob.iglob('dihedral/biphenyl-twist.out')):
    molecule = 'biphenyl-twist'
    data = cclib.io.ccread(file)
#     print(len(data.atomcoords))
    for i in range(len(data.atomcoords)):
#         if i == 0:
#             pass
#         else:
        
        # get geometery and energy 
        geom = data.atomcoords[i]
        energy = data.scfenergies[i]
#         print(energy)
        natom = data.natom
        # write xyz file
#         print('dihedral/{}/xyz/{}_{}.xyz'.format(molecule, molecule, angles[i]))
        with open('dihedral/{}/xyz/{}_{}.xyz'.format(molecule, molecule, angles[i]), 'w') as fxyz:
            print(natom, file=fxyz)
            print('Energy: \t{}'.format(data.scfenergies[i]), file=fxyz)
            for j in range(natom):
                sym = elems[data.atomnos[j]]
                x = geom[j][0]
                y = geom[j][1]
                z = geom[j][2]
                print('{} \t\t{} \t\t{} \t\t{}'.format(sym, x, y, z), file=fxyz)

        # open with pybel in order to make sdf
        mol = next(pybel.readfile('xyz', 'dihedral/{}/xyz/{}_{}.xyz'.format(molecule, molecule, angles[i])))
        # rewrite xyz to make cleaner
        output = pybel.Outputfile('xyz', 'dihedral/{}/xyz/{}_{}.xyz'.format(molecule, molecule, angles[i]), overwrite=True)
        output.write(mol)
        output.close()
        # write sdf file
        output = pybel.Outputfile('sdf', 'dihedral/{}/sdf/{}_{}.sdf'.format(molecule, molecule, angles[i]), overwrite=True)
        output.write(mol)
        output.close()
        
        energy = data.scfenergies[i]

        # store in dict to append to list
        d = {}
        d.update({'name': molecule})
        d.update({'phi': 'nan'})
        d.update({'psi': 'nan'})
        d.update({'theta': angles[i]})
        d.update({'energy': energy})
        energies.append(d)

In [16]:
df = pd.DataFrame(energies)
df

Unnamed: 0,name,phi,psi,theta,energy
0,biphenyl-twist,,,0,-12603.271086
1,biphenyl-twist,,,15,-12603.314826
2,biphenyl-twist,,,30,-12603.393736
3,biphenyl-twist,,,45,-12603.417057
4,biphenyl-twist,,,60,-12603.385198
5,biphenyl-twist,,,75,-12603.342248
6,biphenyl-twist,,,90,-12603.323815
7,biphenyl-twist,,,105,-12603.343731
8,biphenyl-twist,,,120,-12603.389098
9,biphenyl-twist,,,135,-12603.424953


In [17]:
df.to_csv('../data/biphenyl-data-dihedral.csv', index=False)