In [1]:
import json
from itertools import groupby
import numpy as np
import sys
import re
sys.path.append('..')
import geomlib
from pathlib import Path
from tabulate import tabulate

bohr = 0.52917721092

In [2]:
with open('res/suppl-info.json') as f:
    pages = json.load(f)

In [3]:
def get_lines(pages):
    for page in pages:
        for top, tokens in groupby(page['text'], lambda tok: tok['top']):
            tokens = list(tokens)
            if len(tokens) == 1 and tokens[0]['left'] > 430 and tokens[0]['top'] > 1120:
                continue
            token_data = [tok['data'] for tok in tokens]
            yield token_data

In [4]:
lines = get_lines(pages)
while next(lines) != ['âˆ†Ga']:
    pass
energies = []
for _ in range(8):
    next(lines)
while next(lines)[-1] != 'theory.':
    pass
geoms = {}
while True:
    try:
        name = next(lines)[0].strip()
    except StopIteration:
        break
    natoms = int(next(lines)[0])
    next(lines)
    geom = []
    i = 0
    while i < natoms:
        line = next(lines)
        if len(line) == 1:
            continue
        i += 1
        geom.append((line[0], tuple(float(x) for x in line[1:4])))
    geoms[name] = geom

In [5]:
print(tabulate(
        (key, len(geom))
        for key, geom in sorted(geoms.items())
    ))

---------  ---
1           84
10CPP      100
11CPP      110
2           84
3          128
4          192
5          156
6          159
7          170
8          180
ADMPA       64
AdCH2NH3+   33
AdNH3+      30
C70         70
CB7        126
DB24c8      64
FDNB        16
TCNB        16
tweezer     68
---------  ---


In [6]:
for name, geom in sorted(geoms.items()):
    try:
        idx = int(name)
        system = 'complex'
        subidx = ''
    except ValueError:
        idx = ''
        system = 'monomer'
        subidx = name.lower()
    filename = '{}-{}-{}.xyz'.format(idx, system, subidx)
    with (Path('geoms')/filename).open('w') as f:
        f.write('{}\n'.format(len(geom)))
        f.write('\n')
        for specie, coord in geom:
            f.write('{:>2} {:.6} {:.6} {:.6}\n'.format(specie, *coord))