Skip to content

Commit

Permalink
Tautomers update (#178)
Browse files Browse the repository at this point in the history
release candidate

* acid-base tautomerism rules updated.
* unit tests for rules added.
* removed ring-chain. no way
* std rules fixed.
* keto-enol rules removed.
* cache fixed.
* reimplemented keto-enol algorithm.
* neutralize method renamed to __fix_resonance
* module refactored.
* rules separated.
* salts neutralize method implemented.
* tautomerize api changed.
  • Loading branch information
stsouko committed Jul 25, 2021
1 parent 0f379ec commit e907f17
Show file tree
Hide file tree
Showing 12 changed files with 1,013 additions and 925 deletions.
4 changes: 2 additions & 2 deletions CGRtools/algorithms/components/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ def not_special_bonds(self) -> Dict[int, Dict[int, Union[Bond, QueryBond, Dynami
if b != 8:
ngb[m] = b
bonds[n] = FrozenDict(ngb)
return FrozenDict(bonds)
return bonds

@cached_property
def not_special_connectivity(self) -> Dict[int, FrozenSet[int]]:
Expand All @@ -210,7 +210,7 @@ def not_special_connectivity(self) -> Dict[int, FrozenSet[int]]:
if b != 8:
ngb.add(m)
bonds[n] = frozenset(ngb)
return FrozenDict(bonds)
return bonds

@cached_property
def atoms_rings(self) -> Dict[int, Tuple[Tuple[int, ...]]]:
Expand Down
4 changes: 2 additions & 2 deletions CGRtools/algorithms/huckel.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class Huckel:
__slots__ = ()

@cached_property
def huckel_pi_electrons_energy(self) -> float:
def huckel_energy(self) -> float:
"""
Huckel method based Pi electrons energy calculator.
Parametrized for B C N O S.
Expand All @@ -70,7 +70,7 @@ def huckel_pi_electrons_energy(self) -> float:
if ah or ac or ar: # unsaturated carbon
adj[n] = {}
alpha[n], beta[n], electrons[n] = basis[(6, ac, ar, ah)]
elif an in (5, 7, 8, 15, 16, 33, 34):
elif an in (5, 7, 8, 16):
try:
alpha[n], beta[n], electrons[n] = basis[(an, ac, ar, ah)]
except KeyError: # not parametrized or don't have Pi orbitals
Expand Down
151 changes: 81 additions & 70 deletions CGRtools/algorithms/standardize/molecule.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def standardize(self: Union['MoleculeContainer', 'Standardize'], *, fix_stereo=T
:param logging: return list of fixed atoms with matched rules.
"""
neutralized = self.neutralize(fix_stereo=False, logging=logging)
neutralized = self.__fix_resonance(logging=logging)
hs, log = self.__standardize()
if hs:
if not neutralized:
Expand All @@ -81,80 +81,20 @@ def standardize(self: Union['MoleculeContainer', 'Standardize'], *, fix_stereo=T
self._fix_stereo()
if logging:
if neutralized:
log.append((tuple(neutralized), -1, 'neutralized'))
log.append((tuple(neutralized), -1, 'resonance'))
return log
return True
if neutralized:
if fix_stereo:
self._fix_stereo()
if logging:
log.append((tuple(neutralized), -1, 'neutralized'))
log.append((tuple(neutralized), -1, 'resonance'))
return log
return True
if logging:
return log
return False

def neutralize(self: Union['MoleculeContainer', 'Standardize'], *, fix_stereo=True, logging=False) -> \
Union[bool, List[int]]:
"""
Transform biradical or dipole resonance structures into neutral form. Return True if structure form changed.
:param logging: return list of changed atoms.
"""
atoms = self._atoms
charges = self._charges
radicals = self._radicals
bonds = self._bonds
entries, exits, rads, constrains = self.__entries()
hs = set()
while len(rads) > 1:
n = rads.pop()
for path in self.__find_delocalize_path(n, rads, constrains):
l, m, b = path[-1]
if b == 1: # required pi-bond
continue
try:
atoms[m].valence_rules(charges[m], False, sum(int(y) for x, y in bonds[m].items() if x != l) + b)
except ValenceError:
continue
self.__patch_path(path)
radicals[n] = radicals[m] = False
rads.discard(m)
hs.add(n)
hs.update(x for _, x, _ in path)
break # path found
# path not found. atom n keep as is
while entries and exits:
n = entries.pop()
for path in self.__find_delocalize_path(n, exits, constrains):
l, m, b = path[-1]
try:
atoms[m].valence_rules(charges[m] - 1, radicals[m],
sum(int(y) for x, y in bonds[m].items() if x != l) + b)
except ValenceError:
continue
self.__patch_path(path)
charges[n] = charges[m] = 0
exits.discard(m)
hs.add(n)
hs.update(x for _, x, _ in path)
break # path from negative atom to positive atom found.
# path not found. keep negative atom n as is
if hs:
self.flush_cache()
for n in hs:
self._calc_implicit(n)
self._calc_hybridization(n)
if fix_stereo:
self._fix_stereo()
if logging:
return list(hs)
return True
if logging:
return []
return False

def standardize_charges(self: Union['MoleculeContainer', 'Standardize'], *, fix_stereo=True,
logging=False, prepare_molecule=True) -> \
Union[bool, List[int]]:
Expand Down Expand Up @@ -473,10 +413,64 @@ def __standardize(self: Union['MoleculeContainer', 'Standardize']):
hs.update(seen)
return hs, log

def __patch_path(self: 'MoleculeContainer', path):
def __fix_resonance(self: Union['MoleculeContainer', 'Standardize'], *, logging=False) -> Union[bool, List[int]]:
"""
Transform biradical or dipole resonance structures into neutral form. Return True if structure form changed.
:param logging: return list of changed atoms.
"""
atoms = self._atoms
charges = self._charges
radicals = self._radicals
bonds = self._bonds
for n, m, b in path:
bonds[n][m]._Bond__order = b
entries, exits, rads, constrains = self.__entries()
hs = set()
while len(rads) > 1:
n = rads.pop()
for path in self.__find_delocalize_path(n, rads, constrains):
l, m, b = path[-1]
if b == 1: # required pi-bond
continue
try:
atoms[m].valence_rules(charges[m], False, sum(int(y) for x, y in bonds[m].items() if x != l) + b)
except ValenceError:
continue
radicals[n] = radicals[m] = False
rads.discard(m)
hs.add(n)
hs.update(x for _, x, _ in path)
for n, m, b in path:
bonds[n][m]._Bond__order = b
break # path found
# path not found. atom n keep as is
while entries and exits:
n = entries.pop()
for path in self.__find_delocalize_path(n, exits, constrains):
l, m, b = path[-1]
try:
atoms[m].valence_rules(charges[m] - 1, radicals[m],
sum(int(y) for x, y in bonds[m].items() if x != l) + b)
except ValenceError:
continue
charges[n] = charges[m] = 0
exits.discard(m)
hs.add(n)
hs.update(x for _, x, _ in path)
for n, m, b in path:
bonds[n][m]._Bond__order = b
break # path from negative atom to positive atom found.
# path not found. keep negative atom n as is
if hs:
self.flush_cache()
for n in hs:
self._calc_implicit(n)
self._calc_hybridization(n)
if logging:
return list(hs)
return True
if logging:
return []
return False

def __find_delocalize_path(self: 'MoleculeContainer', start, finish, constrains):
bonds = self._bonds
Expand Down Expand Up @@ -507,15 +501,14 @@ def __entries(self: 'MoleculeContainer'):
charges = self._charges
radicals = self._radicals
atoms = self._atoms
hybs = self._hybridizations
bonds = self._bonds

transfer = set()
entries = set()
exits = set()
rads = set()
for n, a in atoms.items():
if a.atomic_number not in {5, 6, 7, 8, 14, 15, 16, 33, 34, 52} or hybs[n] == 4:
if a.atomic_number not in {5, 6, 7, 8, 14, 15, 16, 33, 34, 52}:
# filter non-organic set, halogens and aromatics
continue
if charges[n] == -1:
Expand Down Expand Up @@ -1236,11 +1229,11 @@ def __standardize_rules():

#
# | |
# - N - ? >> - [N+] - ?
# - N - >> - [N+] -
# \\ |
# [O,N] [O,N-]
#
atoms = ({'atom': 'N', 'neighbors': (3, 4), 'hybridization': 2},
atoms = ({'atom': 'N', 'neighbors': 4, 'hybridization': 2},
{'atom': ListElement(['O', 'N']), 'hybridization': 2})
bonds = ((1, 2, 2),)
atom_fix = {1: {'charge': 1, 'hybridization': 1}, 2: {'charge': -1, 'hybridization': 1}}
Expand Down Expand Up @@ -1364,6 +1357,10 @@ def __standardize_rules():
bonds_fix = ((1, 2, 1), (1, 3, 2))
rules.append((atoms, bonds, atom_fix, bonds_fix))

# todo:
# [C;a:10][N;H:2][N:3]=[C:4]1[C:5]=,:[C:6][C:7](=[O:1])[C:8]=,:[C:9]1
# [C;a:10][N;H:2][N:3]=[C:4]1[C:5](=[O:1])[C:6]=,:[C:7]-,:[C:8]=,:[C:9]1

#
# A A
# | |
Expand Down Expand Up @@ -1584,6 +1581,20 @@ def __standardize_rules():
bonds_fix = ((1, 2, 1),)
rules.append((atoms, bonds, atom_fix, bonds_fix))

#
# OH O
# / //
# C = C >> C - C
# \ \
# [O,N] [O,N]
#
atoms = ({'atom': 'O', 'neighbors': 1}, {'atom': ListElement(['O', 'N'])},
{'atom': 'C'}, {'atom': 'C', 'hybridization': 2})
bonds = ((1, 3, 1), (2, 3, 1), (3, 4, 2))
atom_fix = {1: {'hybridization': 2}, 4: {'hybridization': 1}}
bonds_fix = ((1, 3, 2), (3, 4, 1))
rules.append((atoms, bonds, atom_fix, bonds_fix))

#
# R - N - C R - N - C
# / || / ||
Expand Down
24 changes: 0 additions & 24 deletions CGRtools/algorithms/standardize/reaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,30 +98,6 @@ def standardize(self: 'ReactionContainer', fix_mapping: bool = True, *, logging=
self.flush_cache()
return total

def neutralize(self: 'ReactionContainer', *, logging=False) -> Union[bool, Tuple[int, Tuple[int, ...]]]:
"""
Transform biradical or dipole resonance structures into neutral form. Works only for Molecules.
:param logging: return log from molecules with index of molecule at first position.
Otherwise return True if these groups found in any molecule.
"""
if logging:
total = []
else:
total = False
for n, m in enumerate(self.molecules()):
if not isinstance(m, molecule.MoleculeContainer):
raise TypeError('Only Molecules supported')
out = m.neutralize(logging=logging)
if out:
if logging:
total.extend((n, tuple(x)) for x in out)
else:
total = True
if total:
self.flush_cache()
return total

def fix_mapping(self: Union['ReactionContainer', 'StandardizeReaction'], *, logging: bool = False) -> bool:
"""
Fix atom-to-atom mapping of some functional groups. Return True if found AAM errors.
Expand Down

0 comments on commit e907f17

Please sign in to comment.