Skip to content

Commit

Permalink
Cython unpack (#190)
Browse files Browse the repository at this point in the history
implemented pickle level speed unpack.
optimizations.
  • Loading branch information
stsouko committed Jul 26, 2021
1 parent a6a4a54 commit e4953b7
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 8 deletions.
146 changes: 146 additions & 0 deletions CGRtools/containers/_unpack.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@


def unpack(bytes data):
cdef short isotope_shift
cdef unsigned char a, b, c, d
cdef unsigned short na, nct, i, n, shift = 3
cdef unsigned long bb, nb = 0

cdef unsigned short[4095] atom, neighbors, hydrogens, orders, mapping, isotopes, cis_trans_1, cis_trans_2
cdef unsigned short[8190] connections
cdef short[4095] charges
cdef bint[4095] radicals, is_tet, is_all, tet_sign, all_sign, ct_sign
cdef float[4095] x, y

cdef dict py_charges, py_radicals, py_hydrogens, py_plane
cdef dict py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo
cdef tuple py_xy
cdef list py_mapping, py_atoms, py_isotopes, py_neighbors, py_connections, py_orders

a, b, c = data[:3]
na = a << 4| (b & 0xf0) >> 4
nct = (b & 0x0f) << 8 | c

for i in range(na):
a, b = data[shift: shift + 2]
mapping[i] = a << 4 | (b & 0xf0) >> 4
neighbors[i] = b & 0x0f
nb += b & 0x0f

a, b = data[shift + 2: shift + 4]
if a & 0x80:
is_tet[i] = True
tet_sign[i] = a & 0x40
else:
is_tet[i] = False
if a & 0x20:
is_all[i] = True
all_sign[i] = a & 0x10
else:
is_all[i] = False

atom[i] = b & 0x7f
isotope_shift = (a & 0x0f) << 1 | b >> 7
if isotope_shift:
isotopes[i] = common_isotopes[b & 0x7f] + isotope_shift
else:
isotopes[i] = 0

a, b = data[shift + 4: shift + 6]
x[i] = a << 8 | b
a, b = data[shift + 6: shift + 8]
y[i] = a << 8 | b

a = data[shift + 8]
hydrogens[i] = a >> 5
charges[i] = ((a >> 1) & 0x0f) - 4
radicals[i] = a & 0x01

shift += 9

nb //= 2
for i in range(nb):
a, b, c = data[shift: shift + 3]
connections[i * 2] = a << 4| (b & 0xf0) >> 4
connections[i * 2 + 1] = (b & 0x0f) << 8 | c
shift += 3

if nb % 5:
bb = nb // 5 + 1
else:
bb = nb // 5

for i in range(bb):
a, b = data[shift: shift + 2]
orders[i * 5] = (a >> 4) + 1
orders[i * 5 + 1] = ((a >> 1) & 0x07) + 1
orders[i * 5 + 2] = ((a & 0x01) << 2 | b >> 6) + 1
orders[i * 5 + 3] = ((b >> 3) & 0x07) + 1
orders[i * 5 + 4] = (b & 0x07) + 1
shift += 2

for i in range(nct):
a, b, c, d = data[shift: shift + 4]
cis_trans_1[i] = a << 4 | (b & 0xf0) >> 4
cis_trans_2[i] = (b & 0x0f) << 8 | c
ct_sign[i] = d
shift += 4

py_mapping = []
py_atoms = []
py_isotopes = []
py_neighbors = []
py_connections = []
py_orders = []

py_charges = {}
py_radicals = {}
py_hydrogens = {}
py_plane = {}
py_atoms_stereo = {}
py_allenes_stereo = {}
py_cis_trans_stereo = {}
for i in range(na):
n = mapping[i]
py_mapping.append(n)
py_atoms.append(atom[i])
py_isotopes.append(isotopes[i])
py_neighbors.append(neighbors[i])

py_charges[n] = charges[i]
py_radicals[n] = radicals[i]
py_hydrogens[n] = hydrogens[i]

py_xy = (x[i], y[i])
py_plane[n] = py_xy

if is_tet[i]:
py_atoms_stereo[n] = tet_sign[i]
if is_all[i]:
py_allenes_stereo[n] = all_sign[i]

for i in range(nb):
py_orders.append(orders[i])
for i in range(nb * 2):
py_connections.append(connections[i])

for i in range(nct):
py_xy = (cis_trans_1[i], cis_trans_2[i])
py_cis_trans_stereo[py_xy] = ct_sign[i]

return (py_mapping, py_atoms, py_isotopes, py_neighbors, py_connections, py_orders,
py_charges, py_radicals, py_hydrogens, py_plane,
py_atoms_stereo, py_allenes_stereo, py_cis_trans_stereo)


cdef short[119] common_isotopes
common_isotopes[:] = [0, -15, -12, -9, -7, -5, -4, -2, 0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 24, 23, 24, 29,
32, 35, 36, 39, 40, 43, 43, 48, 49, 54, 57, 59, 63, 64, 68, 69, 72, 73, 75, 77,
80, 82, 85, 87, 90, 92, 96, 99, 103, 106, 112, 111, 115, 117, 121, 123, 124, 125,
128, 129, 134, 136, 141, 143, 147, 149, 151, 153, 157, 159, 162, 165, 168, 170,
174, 176, 179, 181, 185, 188, 191, 193, 193, 194, 206, 207, 210, 211, 216, 215,
222, 221, 228, 227, 231, 231, 235, 236, 241, 242, 243, 244, 245, 254, 253, 254,
254, 262, 265, 265, 269, 262, 273, 273, 277, 281, 278]


from timeit import timeit
46 changes: 44 additions & 2 deletions CGRtools/containers/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -728,16 +728,58 @@ def pack(self) -> bytes:
for o, ((n, m), s) in enumerate(cis_trans_stereo.items()):
pack_into('>I', data, shift + 4 * o, (n << 20) | (m << 8) | s)

# 16 bit - neighbor | 3 bit bond type
return compress(bytes(data), 9)

@classmethod
def unpack(cls, data: bytes) -> 'MoleculeContainer':
"""
Unpack from compressed bytes.
"""
data = memoryview(decompress(data))
from ._unpack import unpack

(mapping, atom_numbers, isotopes, neighbors, connections, orders, charges, radicals, hydrogens, plane,
atoms_stereo, allenes_stereo, cis_trans_stereo) = unpack(decompress(data))

mol = object.__new__(cls)
mol._plane = plane
mol._charges = charges
mol._radicals = radicals
mol._hydrogens = hydrogens
mol._atoms_stereo = atoms_stereo
mol._allenes_stereo = allenes_stereo
mol._cis_trans_stereo = cis_trans_stereo

mol._conformers = []
mol._hybridizations = {}
atoms = mol._atoms = {}
bonds = mol._bonds = {}

for n, a, i in zip(mapping, atom_numbers, isotopes):
a = Element.from_atomic_number(a)
atoms[n] = a = a(i or None)
a._attach_to_graph(mol, n)

con = iter(connections)
ords = iter(orders)
for n, ms in zip(mapping, neighbors):
bonds[n] = cbn = {}
for _ in range(ms):
m = next(con)
if m in bonds: # bond partially exists. need back-connection.
cbn[m] = bonds[m][n]
else:
cbn[m] = Bond(next(ords))
mol._calc_hybridization(n)
return mol

@classmethod
def pure_unpack(cls, data: bytes) -> 'MoleculeContainer':
"""
Unpack from compressed bytes. Python implementation.
"""
from ..files._mdl.mol import common_isotopes

data = memoryview(decompress(data))
mol = cls()
atoms = mol._atoms
bonds = mol._bonds
Expand Down
10 changes: 7 additions & 3 deletions CGRtools/periodictable/element/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,14 @@ def from_atomic_number(cls, number: int) -> Type['Element']:
get Element class by its number
"""
try:
element = next(x for x in Element.__subclasses__() if x.atomic_number.fget(None) == number)
except StopIteration:
elements = cls.__class_cache__['elements']
except KeyError:
elements = {x.atomic_number.fget(None): x for x in Element.__subclasses__()}
cls.__class_cache__['elements'] = elements
try:
return elements[number]
except KeyError:
raise ValueError(f'Element with number "{number}" not found')
return element

@classmethod
def from_atom(cls, atom: 'Element') -> 'Element':
Expand Down
20 changes: 17 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@
# along with this program; if not, see <https://www.gnu.org/licenses/>.
#
from distutils.command.sdist import sdist
from distutils.command.build import build
from distutils.util import get_platform
from importlib.util import find_spec
from pathlib import Path
from setuptools import setup
from setuptools import setup, Extension


class _sdist(sdist):
Expand Down Expand Up @@ -49,9 +50,19 @@ def finalize_options(self):
cmd_class['bdist_wheel'] = _bdist_wheel


if find_spec('cython'):
class _build(build):
def finalize_options(self):
super().finalize_options()
from Cython.Build import cythonize
self.distribution.ext_modules = cythonize(self.distribution.ext_modules, language_level=3)

cmd_class['build'] = _build


setup(
name='CGRtools',
version='4.2.18',
version='4.2.19',
packages=['CGRtools', 'CGRtools.algorithms', 'CGRtools.algorithms.calculate2d', 'CGRtools.algorithms.components',
'CGRtools.algorithms.standardize', 'CGRtools.algorithms.tautomers', 'CGRtools.containers',
'CGRtools.files', 'CGRtools.files._mdl', 'CGRtools.periodictable', 'CGRtools.periodictable.element',
Expand All @@ -62,10 +73,13 @@ def finalize_options(self):
author_email='nougmanoff@protonmail.com',
python_requires='>=3.6.1',
cmdclass=cmd_class,
ext_modules=[Extension('CGRtools.containers._unpack', ['CGRtools/containers/_unpack.pyx'],
extra_compile_args=['-O3'])],
setup_requires=['wheel', 'cython'],
install_requires=['CachedMethods>=0.1.4,<0.2', 'lazy_object_proxy>=1.6'],
extras_require={'mrv': ['lxml>=4.1'], 'clean2d': ['py-mini-racer>=0.4.0'], 'jit': ['numpy>=1.18', 'numba>=0.50'],
'pytest': ['pytest'], 'screening': ['StructureFingerprint>=2.1']},
package_data={'CGRtools.algorithms.calculate2d': ['clean2d.js']},
package_data={'CGRtools.algorithms.calculate2d': ['clean2d.js'], 'CGRtools.containers': ['_unpack.pyx']},
data_files=[],
zip_safe=False,
long_description=(Path(__file__).parent / 'README.rst').read_text(),
Expand Down

0 comments on commit e4953b7

Please sign in to comment.