Skip to content

Commit

Permalink
Elements isomorphism update!
Browse files Browse the repository at this point in the history
* elements now compare hydrogens count.
* elements hashes now include hydrogens count.
* tautomers tests fixed.
* substructure method now support marks skipping.
  • Loading branch information
stsouko committed Jul 25, 2021
1 parent e907f17 commit a6a4a54
Show file tree
Hide file tree
Showing 6 changed files with 69 additions and 36 deletions.
13 changes: 8 additions & 5 deletions CGRtools/algorithms/tautomers/test/test_tautomers.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@ def test_keto_enol_2h_pyrrole():
"""
2H‐pyrrole. [N:1]=1[C:2][C:3]=,:[C:4][C:5]=1
"""
for t, v in zip(['C1C=CC=N1', 'C1N=CC2=C1C=CC=C2'], ['c1cc[nH]c1', 'c12c(cccc1)c[nH]c2']):
for t, v in zip(['C1C=CC=N1', 'C1N=CC2=C1C=CC=C2'], ['C=1C=CNC=1', 'N1C=C2C=CC=CC2=C1']):
s = smiles(t)
t = list(s.enumerate_tautomers())
t = set(s.enumerate_tautomers())
v = smiles(v)
s.thiele()
v.thiele()
assert len(t) == 2, ' '.join(str(x) for x in t)
assert t == {s, smiles(v)}
assert t == {s, v}, f'{", ".join(str(x) for x in t)} != {s}, {v}'


def test_acid_protonated_nitrogen():
Expand Down Expand Up @@ -60,11 +63,11 @@ def test_base_nitrogen():
('CN(C)C(=NN)N(C)C.Cl', 'CN(C)C(N(C)C)=[NH+]N.[Cl-]'),
('Cl.NC(=N)OC', '[NH2+]=C(N)OC.[Cl-]'), ('Cl.NC(=N)SC', '[NH2+]=C(N)SC.[Cl-]'),
('COC(OC)=N.Cl', 'COC(OC)=[NH2+].[Cl-]'),
('COC(C)=N.Cl', 'COC(C)=[NH2+].[Cl-]'),
('COC(C)=N.Cl', 'COC(C)=[NH2+].[Cl-]', 'C(N)(OC)=C.Cl'),
('CNN.Cl', 'CN[NH3+].[Cl-]', 'C[NH2+]N.[Cl-]'),
('CN.Cl', 'C[NH3+].[Cl-]')]):
s = smiles(t)
t = set(s.enumerate_tautomers())
t = set(s.enumerate_tautomers(full=True))
if v:
assert len(t) == len(v), ' '.join(str(x) for x in t)
vs = set()
Expand Down
41 changes: 25 additions & 16 deletions CGRtools/containers/cgr.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright 2017-2020 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2017-2021 Ramil Nugmanov <nougmanoff@protonmail.com>
# This file is part of CGRtools.
#
# CGRtools is free software; you can redistribute it and/or modify
Expand Down Expand Up @@ -164,14 +164,16 @@ def copy(self, **kwargs) -> 'CGRContainer':
copy._p_charges = self._p_charges.copy()
return copy

def substructure(self, atoms, *, as_query: bool = False, **kwargs) -> Union['CGRContainer',
'query.QueryCGRContainer']:
def substructure(self, atoms, *, as_query: bool = False, skip_neighbors_marks=False,
skip_hybridizations_marks=False, **kwargs) -> Union['CGRContainer', 'query.QueryCGRContainer']:
"""
create substructure containing atoms from atoms list
:param atoms: list of atoms numbers of substructure
:param meta: if True metadata will be copied to substructure
:param as_query: return Query object based on graph substructure
:param skip_neighbors_marks: Don't set neighbors count marks on substructured queries
:param skip_hybridizations_marks: Don't set hybridizations marks on substructured queries
"""
sub, atoms = super().substructure(atoms, graph_type=query.QueryCGRContainer if as_query else self.__class__,
atom_type=DynamicQueryElement if as_query else DynamicElement,
Expand All @@ -182,19 +184,26 @@ def substructure(self, atoms, *, as_query: bool = False, **kwargs) -> Union['CGR
sub._p_radicals = {n: spr[n] for n in atoms}

if as_query:
sh = self._hybridizations
sph = self._p_hybridizations
ngb = self.neighbors

sub._hybridizations = {n: (sh[n],) for n in atoms}
sub._p_hybridizations = {n: (sph[n],) for n in atoms}

sub._neighbors = cn = {}
sub._p_neighbors = cpn = {}
for n in atoms:
sn, pn = ngb(n)
cn[n] = (sn,)
cpn[n] = (pn,)
if skip_hybridizations_marks:
sub._hybridizations = {n: () for n in atoms}
sub._p_hybridizations = {n: () for n in atoms}
else:
sh = self._hybridizations
sph = self._p_hybridizations
sub._hybridizations = {n: (sh[n],) for n in atoms}
sub._p_hybridizations = {n: (sph[n],) for n in atoms}

if skip_neighbors_marks:
sub._neighbors = {n: () for n in atoms}
sub._p_neighbors = {n: () for n in atoms}
else:
ngb = self.neighbors
sub._neighbors = cn = {}
sub._p_neighbors = cpn = {}
for n in atoms:
sn, pn = ngb(n)
cn[n] = (sn,)
cpn[n] = (pn,)
else:
sub._conformers = [{n: c[n] for n in atoms} for c in self._conformers]
# recalculate query marks
Expand Down
38 changes: 28 additions & 10 deletions CGRtools/containers/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,9 @@ def copy(self, **kwargs) -> 'MoleculeContainer':
copy._cis_trans_stereo = self._cis_trans_stereo.copy()
return copy

def substructure(self, atoms, *, as_query: bool = False, **kwargs) -> Union['MoleculeContainer',
'query.QueryContainer']:
def substructure(self, atoms, *, as_query: bool = False, skip_neighbors_marks=False,
skip_hybridizations_marks=False, skip_hydrogens_marks=False, skip_rings_sizes_marks=False,
**kwargs) -> Union['MoleculeContainer', 'query.QueryContainer']:
"""
Create substructure containing atoms from atoms list.
Expand All @@ -236,17 +237,17 @@ def substructure(self, atoms, *, as_query: bool = False, **kwargs) -> Union['Mol
:param atoms: list of atoms numbers of substructure
:param meta: if True metadata will be copied to substructure
:param as_query: return Query object based on graph substructure
:param skip_neighbors_marks: Don't set neighbors count marks on substructured queries
:param skip_hybridizations_marks: Don't set hybridizations marks on substructured queries
:param skip_hydrogens_marks: Don't set hydrogens count marks on substructured queries
:param skip_rings_sizes_marks: Don't set rings_sizes marks on substructured queries
"""
sub, atoms = super().substructure(atoms, graph_type=query.QueryContainer if as_query else self.__class__,
atom_type=QueryElement if as_query else Element,
bond_type=QueryBond if as_query else Bond, **kwargs)
if as_query:
sa = self._atoms
sb = self._bonds
sh = self._hybridizations
shg = self._hydrogens
sn = self.neighbors
rs = self.atoms_rings_sizes.copy()

lost = {n for n, a in sa.items() if a.atomic_number != 1} - set(atoms) # atoms not in substructure
not_skin = {n for n in atoms if lost.isdisjoint(sb[n])}
Expand All @@ -258,11 +259,28 @@ def substructure(self, atoms, *, as_query: bool = False, **kwargs) -> Union['Mol
if not_skin.issuperset(self._stereo_cis_trans_paths[nm]) and
not_skin.issuperset(x for x in self._stereo_cis_trans[nm] if x)}

sub._neighbors = {n: (sn(n),) for n in atoms}
sub._hybridizations = {n: (sh[n],) for n in atoms}
sub._hydrogens = {n: () if shg[n] is None else (shg[n],) for n in atoms}
sub._rings_sizes = {n: rs.get(n, ()) for n in atoms}
sub._heteroatoms = {n: () for n in atoms}

if skip_hybridizations_marks:
sub._hybridizations = {n: () for n in atoms}
else:
sh = self._hybridizations
sub._hybridizations = {n: (sh[n],) for n in atoms}
if skip_neighbors_marks:
sub._neighbors = {n: () for n in atoms}
else:
sn = self.neighbors
sub._neighbors = {n: (sn(n),) for n in atoms}
if skip_hydrogens_marks:
sub._hydrogens = {n: () for n in atoms}
else:
shg = self._hydrogens
sub._hydrogens = {n: () if shg[n] is None else (shg[n],) for n in atoms}
if skip_rings_sizes_marks:
sub._rings_sizes = {n: () for n in atoms}
else:
rs = self.atoms_rings_sizes
sub._rings_sizes = {n: rs.get(n, ()) for n in atoms}
else:
sub._conformers = [{n: c[n] for n in atoms} for c in self._conformers]

Expand Down
3 changes: 2 additions & 1 deletion CGRtools/containers/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,8 @@ def fingerprints(self) -> Tuple[Dict[int, FrozenSet[int]], ...]:
if not chains:
continue

atoms = {idx: int(atom) for idx, atom in mol.atoms()}
atoms = {idx: tuple_hash((atom.isotope or 0, atom.atomic_number, atom.charge, atom.is_radical))
for idx, atom in mol.atoms()}
bonds = mol._bonds
out = defaultdict(list)

Expand Down
6 changes: 4 additions & 2 deletions CGRtools/periodictable/element/element.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,12 @@ def __eq__(self, other):
compare attached to molecules elements
"""
return isinstance(other, Element) and self.atomic_number == other.atomic_number and \
self.isotope == other.isotope and self.charge == other.charge and self.is_radical == other.is_radical
self.isotope == other.isotope and self.charge == other.charge and self.is_radical == other.is_radical and \
self.implicit_hydrogens == other.implicit_hydrogens

def __hash__(self):
return tuple_hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical))
return tuple_hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical,
self.implicit_hydrogens or 0))

def __setstate__(self, state):
if 'charge' in state: # 3.1
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def finalize_options(self):

setup(
name='CGRtools',
version='4.2.17',
version='4.2.18',
packages=['CGRtools', 'CGRtools.algorithms', 'CGRtools.algorithms.calculate2d', 'CGRtools.algorithms.components',
'CGRtools.algorithms.standardize', 'CGRtools.algorithms.tautomers', 'CGRtools.containers',
'CGRtools.files', 'CGRtools.files._mdl', 'CGRtools.periodictable', 'CGRtools.periodictable.element',
Expand All @@ -64,7 +64,7 @@ def finalize_options(self):
cmdclass=cmd_class,
install_requires=['CachedMethods>=0.1.4,<0.2', 'lazy_object_proxy>=1.6'],
extras_require={'mrv': ['lxml>=4.1'], 'clean2d': ['py-mini-racer>=0.4.0'], 'jit': ['numpy>=1.18', 'numba>=0.50'],
'pytest': ['pytest'], 'screening': ['StructureFingerprint>=2.0']},
'pytest': ['pytest'], 'screening': ['StructureFingerprint>=2.1']},
package_data={'CGRtools.algorithms.calculate2d': ['clean2d.js']},
data_files=[],
zip_safe=False,
Expand Down

0 comments on commit a6a4a54

Please sign in to comment.