Elements isomorphism update!

* elements now compare hydrogens count. * elements hashes now include hydrogens count. * tautomers tests fixed. * substructure method now support marks skipping.
cimm-kzn · Jul 25, 2021 · a6a4a54 · a6a4a54
1 parent e907f17
commit a6a4a54
Show file tree

Hide file tree

Showing 6 changed files with 69 additions and 36 deletions.
diff --git a/CGRtools/algorithms/tautomers/test/test_tautomers.py b/CGRtools/algorithms/tautomers/test/test_tautomers.py
@@ -24,11 +24,14 @@ def test_keto_enol_2h_pyrrole():
     """
     2H‐pyrrole. [N:1]=1[C:2][C:3]=,:[C:4][C:5]=1
     """
-    for t, v in zip(['C1C=CC=N1', 'C1N=CC2=C1C=CC=C2'], ['c1cc[nH]c1', 'c12c(cccc1)c[nH]c2']):
+    for t, v in zip(['C1C=CC=N1', 'C1N=CC2=C1C=CC=C2'], ['C=1C=CNC=1', 'N1C=C2C=CC=CC2=C1']):
         s = smiles(t)
-        t = list(s.enumerate_tautomers())
+        t = set(s.enumerate_tautomers())
+        v = smiles(v)
+        s.thiele()
+        v.thiele()
         assert len(t) == 2, ' '.join(str(x) for x in t)
-        assert t == {s, smiles(v)}
+        assert t == {s, v}, f'{", ".join(str(x) for x in t)} != {s}, {v}'
 
 
 def test_acid_protonated_nitrogen():
@@ -60,11 +63,11 @@ def test_base_nitrogen():
                              ('CN(C)C(=NN)N(C)C.Cl', 'CN(C)C(N(C)C)=[NH+]N.[Cl-]'),
                              ('Cl.NC(=N)OC', '[NH2+]=C(N)OC.[Cl-]'), ('Cl.NC(=N)SC', '[NH2+]=C(N)SC.[Cl-]'),
                              ('COC(OC)=N.Cl', 'COC(OC)=[NH2+].[Cl-]'),
-                             ('COC(C)=N.Cl', 'COC(C)=[NH2+].[Cl-]'),
+                             ('COC(C)=N.Cl', 'COC(C)=[NH2+].[Cl-]', 'C(N)(OC)=C.Cl'),
                              ('CNN.Cl', 'CN[NH3+].[Cl-]', 'C[NH2+]N.[Cl-]'),
                              ('CN.Cl', 'C[NH3+].[Cl-]')]):
         s = smiles(t)
-        t = set(s.enumerate_tautomers())
+        t = set(s.enumerate_tautomers(full=True))
         if v:
             assert len(t) == len(v), ' '.join(str(x) for x in t)
             vs = set()

diff --git a/CGRtools/containers/cgr.py b/CGRtools/containers/cgr.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-#  Copyright 2017-2020 Ramil Nugmanov <nougmanoff@protonmail.com>
+#  Copyright 2017-2021 Ramil Nugmanov <nougmanoff@protonmail.com>
 #  This file is part of CGRtools.
 #
 #  CGRtools is free software; you can redistribute it and/or modify
@@ -164,14 +164,16 @@ def copy(self, **kwargs) -> 'CGRContainer':
         copy._p_charges = self._p_charges.copy()
         return copy
 
-    def substructure(self, atoms, *, as_query: bool = False, **kwargs) -> Union['CGRContainer',
-                                                                                'query.QueryCGRContainer']:
+    def substructure(self, atoms, *, as_query: bool = False, skip_neighbors_marks=False,
+                     skip_hybridizations_marks=False, **kwargs) -> Union['CGRContainer', 'query.QueryCGRContainer']:
         """
         create substructure containing atoms from atoms list
 
         :param atoms: list of atoms numbers of substructure
         :param meta: if True metadata will be copied to substructure
         :param as_query: return Query object based on graph substructure
+        :param skip_neighbors_marks: Don't set neighbors count marks on substructured queries
+        :param skip_hybridizations_marks: Don't set hybridizations marks on substructured queries
         """
         sub, atoms = super().substructure(atoms, graph_type=query.QueryCGRContainer if as_query else self.__class__,
                                           atom_type=DynamicQueryElement if as_query else DynamicElement,
@@ -182,19 +184,26 @@ def substructure(self, atoms, *, as_query: bool = False, **kwargs) -> Union['CGR
         sub._p_radicals = {n: spr[n] for n in atoms}
 
         if as_query:
-            sh = self._hybridizations
-            sph = self._p_hybridizations
-            ngb = self.neighbors
-
-            sub._hybridizations = {n: (sh[n],) for n in atoms}
-            sub._p_hybridizations = {n: (sph[n],) for n in atoms}
-
-            sub._neighbors = cn = {}
-            sub._p_neighbors = cpn = {}
-            for n in atoms:
-                sn, pn = ngb(n)
-                cn[n] = (sn,)
-                cpn[n] = (pn,)
+            if skip_hybridizations_marks:
+                sub._hybridizations = {n: () for n in atoms}
+                sub._p_hybridizations = {n: () for n in atoms}
+            else:
+                sh = self._hybridizations
+                sph = self._p_hybridizations
+                sub._hybridizations = {n: (sh[n],) for n in atoms}
+                sub._p_hybridizations = {n: (sph[n],) for n in atoms}
+
+            if skip_neighbors_marks:
+                sub._neighbors = {n: () for n in atoms}
+                sub._p_neighbors = {n: () for n in atoms}
+            else:
+                ngb = self.neighbors
+                sub._neighbors = cn = {}
+                sub._p_neighbors = cpn = {}
+                for n in atoms:
+                    sn, pn = ngb(n)
+                    cn[n] = (sn,)
+                    cpn[n] = (pn,)
         else:
             sub._conformers = [{n: c[n] for n in atoms} for c in self._conformers]
             # recalculate query marks

diff --git a/CGRtools/containers/molecule.py b/CGRtools/containers/molecule.py
@@ -224,8 +224,9 @@ def copy(self, **kwargs) -> 'MoleculeContainer':
         copy._cis_trans_stereo = self._cis_trans_stereo.copy()
         return copy
 
-    def substructure(self, atoms, *, as_query: bool = False, **kwargs) -> Union['MoleculeContainer',
-                                                                                'query.QueryContainer']:
+    def substructure(self, atoms, *, as_query: bool = False, skip_neighbors_marks=False,
+                     skip_hybridizations_marks=False, skip_hydrogens_marks=False, skip_rings_sizes_marks=False,
+                     **kwargs) -> Union['MoleculeContainer', 'query.QueryContainer']:
         """
         Create substructure containing atoms from atoms list.
 
@@ -236,17 +237,17 @@ def substructure(self, atoms, *, as_query: bool = False, **kwargs) -> Union['Mol
         :param atoms: list of atoms numbers of substructure
         :param meta: if True metadata will be copied to substructure
         :param as_query: return Query object based on graph substructure
+        :param skip_neighbors_marks: Don't set neighbors count marks on substructured queries
+        :param skip_hybridizations_marks: Don't set hybridizations marks on substructured queries
+        :param skip_hydrogens_marks: Don't set hydrogens count marks on substructured queries
+        :param skip_rings_sizes_marks: Don't set rings_sizes marks on substructured queries
         """
         sub, atoms = super().substructure(atoms, graph_type=query.QueryContainer if as_query else self.__class__,
                                           atom_type=QueryElement if as_query else Element,
                                           bond_type=QueryBond if as_query else Bond, **kwargs)
         if as_query:
             sa = self._atoms
             sb = self._bonds
-            sh = self._hybridizations
-            shg = self._hydrogens
-            sn = self.neighbors
-            rs = self.atoms_rings_sizes.copy()
 
             lost = {n for n, a in sa.items() if a.atomic_number != 1} - set(atoms)  # atoms not in substructure
             not_skin = {n for n in atoms if lost.isdisjoint(sb[n])}
@@ -258,11 +259,28 @@ def substructure(self, atoms, *, as_query: bool = False, **kwargs) -> Union['Mol
                                      if not_skin.issuperset(self._stereo_cis_trans_paths[nm]) and
                                         not_skin.issuperset(x for x in self._stereo_cis_trans[nm] if x)}
 
-            sub._neighbors = {n: (sn(n),) for n in atoms}
-            sub._hybridizations = {n: (sh[n],) for n in atoms}
-            sub._hydrogens = {n: () if shg[n] is None else (shg[n],) for n in atoms}
-            sub._rings_sizes = {n: rs.get(n, ()) for n in atoms}
             sub._heteroatoms = {n: () for n in atoms}
+
+            if skip_hybridizations_marks:
+                sub._hybridizations = {n: () for n in atoms}
+            else:
+                sh = self._hybridizations
+                sub._hybridizations = {n: (sh[n],) for n in atoms}
+            if skip_neighbors_marks:
+                sub._neighbors = {n: () for n in atoms}
+            else:
+                sn = self.neighbors
+                sub._neighbors = {n: (sn(n),) for n in atoms}
+            if skip_hydrogens_marks:
+                sub._hydrogens = {n: () for n in atoms}
+            else:
+                shg = self._hydrogens
+                sub._hydrogens = {n: () if shg[n] is None else (shg[n],) for n in atoms}
+            if skip_rings_sizes_marks:
+                sub._rings_sizes = {n: () for n in atoms}
+            else:
+                rs = self.atoms_rings_sizes
+                sub._rings_sizes = {n: rs.get(n, ()) for n in atoms}
         else:
             sub._conformers = [{n: c[n] for n in atoms} for c in self._conformers]
 

diff --git a/CGRtools/containers/query.py b/CGRtools/containers/query.py
@@ -343,7 +343,8 @@ def fingerprints(self) -> Tuple[Dict[int, FrozenSet[int]], ...]:
             if not chains:
                 continue
 
-            atoms = {idx: int(atom) for idx, atom in mol.atoms()}
+            atoms = {idx: tuple_hash((atom.isotope or 0, atom.atomic_number, atom.charge, atom.is_radical))
+                     for idx, atom in mol.atoms()}
             bonds = mol._bonds
             out = defaultdict(list)
 

diff --git a/CGRtools/periodictable/element/element.py b/CGRtools/periodictable/element/element.py
@@ -126,10 +126,12 @@ def __eq__(self, other):
         compare attached to molecules elements
         """
         return isinstance(other, Element) and self.atomic_number == other.atomic_number and \
-            self.isotope == other.isotope and self.charge == other.charge and self.is_radical == other.is_radical
+            self.isotope == other.isotope and self.charge == other.charge and self.is_radical == other.is_radical and \
+            self.implicit_hydrogens == other.implicit_hydrogens
 
     def __hash__(self):
-        return tuple_hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical))
+        return tuple_hash((self.isotope or 0, self.atomic_number, self.charge, self.is_radical,
+                           self.implicit_hydrogens or 0))
 
     def __setstate__(self, state):
         if 'charge' in state:  # 3.1

diff --git a/setup.py b/setup.py
@@ -51,7 +51,7 @@ def finalize_options(self):
 
 setup(
     name='CGRtools',
-    version='4.2.17',
+    version='4.2.18',
     packages=['CGRtools', 'CGRtools.algorithms', 'CGRtools.algorithms.calculate2d', 'CGRtools.algorithms.components',
               'CGRtools.algorithms.standardize', 'CGRtools.algorithms.tautomers', 'CGRtools.containers',
               'CGRtools.files', 'CGRtools.files._mdl', 'CGRtools.periodictable', 'CGRtools.periodictable.element',
@@ -64,7 +64,7 @@ def finalize_options(self):
     cmdclass=cmd_class,
     install_requires=['CachedMethods>=0.1.4,<0.2', 'lazy_object_proxy>=1.6'],
     extras_require={'mrv': ['lxml>=4.1'], 'clean2d': ['py-mini-racer>=0.4.0'], 'jit': ['numpy>=1.18', 'numba>=0.50'],
-                    'pytest': ['pytest'], 'screening': ['StructureFingerprint>=2.0']},
+                    'pytest': ['pytest'], 'screening': ['StructureFingerprint>=2.1']},
     package_data={'CGRtools.algorithms.calculate2d': ['clean2d.js']},
     data_files=[],
     zip_safe=False,