Skip to content

Commit

Permalink
optimization to unpack (#191)
Browse files Browse the repository at this point in the history
* SPEEDUP
  • Loading branch information
stsouko committed Jul 27, 2021
1 parent e4953b7 commit d24b7f7
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 44 deletions.
93 changes: 70 additions & 23 deletions CGRtools/containers/_unpack.pyx
Original file line number Diff line number Diff line change
@@ -1,22 +1,27 @@
from CGRtools.containers.bonds import Bond


def unpack(bytes data):
cdef short isotope_shift
cdef unsigned char a, b, c, d
cdef unsigned short na, nct, i, n, shift = 3
cdef unsigned long bb, nb = 0
cdef unsigned short na, nct, i, j, n, m, bo, shift = 3, order_shift = 0
cdef unsigned long nb = 0

cdef unsigned short[4095] atom, neighbors, hydrogens, orders, mapping, isotopes, cis_trans_1, cis_trans_2
cdef unsigned short[4095] mapping, atom, isotopes, hydrogens, neighbors, orders, cis_trans_1, cis_trans_2
cdef unsigned short[8190] connections
cdef unsigned short[4096] hybridization
cdef short[4095] charges
cdef bint[4095] radicals, is_tet, is_all, tet_sign, all_sign, ct_sign
cdef float[4095] x, y
cdef bint[4096] seen

cdef dict py_charges, py_radicals, py_hydrogens, py_plane
cdef object bond
cdef dict py_charges, py_radicals, py_hydrogens, py_plane, py_hybridization, py_bonds, tmp
cdef dict py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo
cdef tuple py_xy
cdef list py_mapping, py_atoms, py_isotopes, py_neighbors, py_connections, py_orders
cdef list py_mapping, py_atoms, py_isotopes

# lets extract data
a, b, c = data[:3]
na = a << 4| (b & 0xf0) >> 4
nct = (b & 0x0f) << 8 | c
Expand Down Expand Up @@ -65,12 +70,7 @@ def unpack(bytes data):
connections[i * 2 + 1] = (b & 0x0f) << 8 | c
shift += 3

if nb % 5:
bb = nb // 5 + 1
else:
bb = nb // 5

for i in range(bb):
for i in range((nb // 5 + 1) if nb % 5 else (nb // 5)):
a, b = data[shift: shift + 2]
orders[i * 5] = (a >> 4) + 1
orders[i * 5 + 1] = ((a >> 1) & 0x07) + 1
Expand All @@ -86,26 +86,34 @@ def unpack(bytes data):
ct_sign[i] = d
shift += 4

# prepare working structures
for i in range(na):
n = mapping[i]
seen[n] = False
hybridization[n] = 1

# define returned data
py_mapping = []
py_atoms = []
py_isotopes = []
py_neighbors = []
py_connections = []
py_orders = []

py_bonds = {}
py_charges = {}
py_radicals = {}
py_hydrogens = {}
py_plane = {}
py_atoms_stereo = {}
py_allenes_stereo = {}
py_cis_trans_stereo = {}
py_hybridization = {}

shift = 0
for i in range(na):
n = mapping[i]

# fill intermediate data
py_mapping.append(n)
py_atoms.append(atom[i])
py_isotopes.append(isotopes[i])
py_neighbors.append(neighbors[i])
py_isotopes.append(isotopes[i] or None)

py_charges[n] = charges[i]
py_radicals[n] = radicals[i]
Expand All @@ -119,17 +127,56 @@ def unpack(bytes data):
if is_all[i]:
py_allenes_stereo[n] = all_sign[i]

for i in range(nb):
py_orders.append(orders[i])
for i in range(nb * 2):
py_connections.append(connections[i])
tmp = {}
py_bonds[n] = tmp
seen[n] = True
for j in range(shift, shift + neighbors[i]):
m = connections[j]
if seen[m]: # bond partially exists. need back-connection.
tmp[m] = py_bonds[m][n]
else:
bo = orders[order_shift]
bond = object.__new__(Bond)
bond._Bond__order = bo
tmp[m] = bond
order_shift += 1

# calc hyb for n atom
if hybridization[n] != 4:
if bo == 4:
hybridization[n] = 4
elif bo == 2:
if hybridization[n] == 1:
hybridization[n] = 2
else:
hybridization[n] = 3
elif bo == 3:
hybridization[n] = 3

# calc hyb for m atom
if hybridization[m] != 4:
if bo == 4:
hybridization[m] = 4
elif bo == 2:
if hybridization[m] == 1:
hybridization[m] = 2
else:
hybridization[m] = 3
elif bo == 3:
hybridization[m] = 3

shift += neighbors[i]

for i in range(na):
n = mapping[i]
py_hybridization[n] = hybridization[n]

for i in range(nct):
py_xy = (cis_trans_1[i], cis_trans_2[i])
py_cis_trans_stereo[py_xy] = ct_sign[i]

return (py_mapping, py_atoms, py_isotopes, py_neighbors, py_connections, py_orders,
py_charges, py_radicals, py_hydrogens, py_plane,
return (py_mapping, py_atoms, py_isotopes,
py_charges, py_radicals, py_hydrogens, py_plane, py_hybridization, py_bonds,
py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo)


Expand Down
32 changes: 12 additions & 20 deletions CGRtools/containers/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from math import ceil
from struct import pack_into, unpack_from
from typing import List, Union, Tuple, Optional, Dict, FrozenSet
from weakref import ref
from zlib import compress, decompress
from . import cgr, query # cyclic imports resolve
from .bonds import Bond, DynamicBond, QueryBond
Expand Down Expand Up @@ -735,12 +736,15 @@ def unpack(cls, data: bytes) -> 'MoleculeContainer':
"""
Unpack from compressed bytes.
"""
from ._unpack import unpack

(mapping, atom_numbers, isotopes, neighbors, connections, orders, charges, radicals, hydrogens, plane,
try: # windows? ;)
from ._unpack import unpack
except ImportError:
return cls.pure_unpack(data)
(mapping, atom_numbers, isotopes, charges, radicals, hydrogens, plane, hybridization, bonds,
atoms_stereo, allenes_stereo, cis_trans_stereo) = unpack(decompress(data))

mol = object.__new__(cls)
mol._bonds = bonds
mol._plane = plane
mol._charges = charges
mol._radicals = radicals
Expand All @@ -750,26 +754,14 @@ def unpack(cls, data: bytes) -> 'MoleculeContainer':
mol._cis_trans_stereo = cis_trans_stereo

mol._conformers = []
mol._hybridizations = {}
mol._hybridizations = hybridization
atoms = mol._atoms = {}
bonds = mol._bonds = {}

for n, a, i in zip(mapping, atom_numbers, isotopes):
a = Element.from_atomic_number(a)
atoms[n] = a = a(i or None)
a._attach_to_graph(mol, n)

con = iter(connections)
ords = iter(orders)
for n, ms in zip(mapping, neighbors):
bonds[n] = cbn = {}
for _ in range(ms):
m = next(con)
if m in bonds: # bond partially exists. need back-connection.
cbn[m] = bonds[m][n]
else:
cbn[m] = Bond(next(ords))
mol._calc_hybridization(n)
atoms[n] = a = object.__new__(Element.from_atomic_number(a))
a._Core__isotope = i
a._graph = ref(mol)
a._map = n
return mol

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def finalize_options(self):

setup(
name='CGRtools',
version='4.2.19',
version='4.2.20',
packages=['CGRtools', 'CGRtools.algorithms', 'CGRtools.algorithms.calculate2d', 'CGRtools.algorithms.components',
'CGRtools.algorithms.standardize', 'CGRtools.algorithms.tautomers', 'CGRtools.containers',
'CGRtools.files', 'CGRtools.files._mdl', 'CGRtools.periodictable', 'CGRtools.periodictable.element',
Expand Down

0 comments on commit d24b7f7

Please sign in to comment.