Skip to content

Commit

Permalink
implicit H write to mrv added.
Browse files Browse the repository at this point in the history
cxsmiles radicals now supported.
  • Loading branch information
stsouko committed Jul 4, 2021
1 parent d4fc2c6 commit 03106d7
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 15 deletions.
6 changes: 6 additions & 0 deletions CGRtools/files/MRVrw.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@


parse_error = namedtuple('MRVParseError', ('number', 'json', 'log', 'meta'))
organic_set = {'B', 'C', 'N', 'O', 'P', 'S', 'Se', 'F', 'Cl', 'Br', 'I'}


def xml_dict(parent_element, stop_list=None):
Expand Down Expand Up @@ -434,10 +435,13 @@ def __convert_structure(self, g):
gc = g._charges
gr = g._radicals
bg = g._bonds
hg = g._hydrogens
hb = g._hybridizations

out = ['<atomArray>']
for n, atom in g._atoms.items():
x, y = gp[n]
ih = hg[n]
out.append(f'<atom id="a{n}" elementType="{atom.atomic_symbol}" '
f'x2="{x * 2:.4f}" y2="{y * 2:.4f}" mrvMap="{n}"')
if gc[n]:
Expand All @@ -446,6 +450,8 @@ def __convert_structure(self, g):
out.append(' radical="monovalent"')
if atom.isotope:
out.append(f' isotope="{atom.isotope}"')
if ih and (atom.atomic_symbol not in organic_set or hb[n] == 4 and atom.atomic_number in (5, 7, 15)):
out.append(f' hydrogenCount="{ih}"')
out.append('/>')
out.append('</atomArray>')

Expand Down
63 changes: 49 additions & 14 deletions CGRtools/files/SMILESrw.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
from collections import defaultdict
from fileinput import FileInput
from functools import reduce
from itertools import permutations
from itertools import permutations, chain
from io import StringIO, TextIOWrapper
from logging import warning
from operator import or_
from pathlib import Path
from re import split, compile, fullmatch
from re import split, compile, fullmatch, findall, search
from traceback import format_exc
from typing import Union, List, Dict
from warnings import warn
Expand Down Expand Up @@ -70,6 +70,8 @@
dyn_atom_re = compile(r'([1-9][0-9]{0,2})?([A-IK-PR-Zacnopsb][a-ik-pr-vy]?)([+-0][1-4+-]?(>[+-0][1-4+-]?)?)?'
r'([*^](>[*^])?)?')
delimiter = compile(r'[=:]')
cx_fragments = compile(r'f:(?:[0-9]+(?:\.[0-9]+)+)(?:,(?:[0-9]+(?:\.[0-9]+)+))*')
cx_radicals = compile(r'\^[1-7]:[0-9]+(?:,[0-9]+)*')


class SMILESRead(CGRRead):
Expand Down Expand Up @@ -190,22 +192,38 @@ def parse(self, smiles: str) -> Union[MoleculeContainer, CGRContainer, ReactionC
if not smi:
self._info('empty smiles')
return {}
elif data and data[0].startswith('|f:'):
try:
contract = [sorted(int(x) for x in x.split('.')) for x in data[0][3:-1].split(',')]
except ValueError:
self._info(f'invalid cxsmiles fragments description: {data[0]}')
contract = None
else:
elif data and data[0].startswith('|') and data[0].endswith('|'):
fr = search(cx_fragments, data[0])
if fr is not None:
contract = [sorted(int(x) for x in x.split('.')) for x in fr.group()[2:].split(',')]
if len({x for x in contract for x in x}) < len([x for x in contract for x in x]):
self._info(f'collisions in cxsmiles fragments description: {data[0]}')
contract = None
elif any(x[0] < 0 for x in contract):
self._info(f'invalid cxsmiles fragments description: {data[0]}')
contract = None
else:

radicals = [int(x) for x in findall(cx_radicals, data[0]) for x in x[3:].split(',')]
if any(x < 0 for x in radicals):
self._info(f'invalid cxsmiles radicals description: {data[0]}')
radicals = []
if len(set(radicals)) != len(radicals):
self._info(f'collisions in cxsmiles radicals description: {data[0]}')
radicals = []
data = data[1:]
else:
radicals = [int(x) for x in findall(cx_radicals, data[0]) for x in x[3:].split(',')]
if radicals:
if any(x < 0 for x in radicals):
self._info(f'invalid cxsmiles radicals description: {data[0]}')
radicals = []
if len(set(radicals)) != len(radicals):
self._info(f'collisions in cxsmiles radicals description: {data[0]}')
radicals = []
data = data[1:]
contract = None
else:
radicals = []
contract = None

if self.__header is None:
Expand Down Expand Up @@ -262,6 +280,12 @@ def parse(self, smiles: str) -> Union[MoleculeContainer, CGRContainer, ReactionC
self._info(f'record consist errors:\n{format_exc()}')
return meta

if radicals:
atom_map = dict(enumerate(a for m in chain(record['reactants'], record['reagents'], record['products'])
for a in m['atoms']))
for x in radicals:
atom_map[x]['is_radical'] = True

try:
container = self._convert_reaction(record)
except ValueError:
Expand Down Expand Up @@ -313,6 +337,8 @@ def parse(self, smiles: str) -> Union[MoleculeContainer, CGRContainer, ReactionC
return meta

record['meta'].update(meta)
for x in radicals:
record['atoms'][x]['is_radical'] = True
try:
container = self._convert_structure(record)
except ValueError:
Expand All @@ -336,17 +362,26 @@ def _convert_molecule(self, molecule, mapping):
if hc is None: # aromatic rings or valence errors. just store given H count.
hydrogens[n] = h
elif hc != h: # H count mismatch. try radical state of atom.
radicals[n] = True
calc_implicit(n)
if hydrogens[n] != h: # radical state also has errors.
if radicals[n]:
if self._ignore:
radicals[n] = False # reset radical state
hydrogens[n] = h # set parsed hydrogens count
self._info(f'implicit hydrogen count ({h}) mismatch with '
f'calculated ({hc}) on atom {n}. calculated count replaced.')
else:
raise ValueError(f'implicit hydrogen count ({h}) mismatch with '
f'calculated ({hc}) on atom {n}.')
else:
radicals[n] = True
calc_implicit(n)
if hydrogens[n] != h: # radical state also has errors.
if self._ignore:
radicals[n] = False # reset radical state
hydrogens[n] = h # set parsed hydrogens count
self._info(f'implicit hydrogen count ({h}) mismatch with '
f'calculated ({hc}) on atom {n}. calculated count replaced.')
else:
raise ValueError(f'implicit hydrogen count ({h}) mismatch with '
f'calculated ({hc}) on atom {n}.')

if self.__ignore_stereo or not molecule['stereo_atoms'] and not molecule['stereo_bonds']:
return mol
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def finalize_options(self):

setup(
name='CGRtools',
version='4.2.10',
version='4.2.11',
packages=['CGRtools', 'CGRtools.algorithms', 'CGRtools.algorithms.calculate2d', 'CGRtools.algorithms.components',
'CGRtools.algorithms.standardize', 'CGRtools.containers', 'CGRtools.files', 'CGRtools.files._mdl',
'CGRtools.periodictable', 'CGRtools.periodictable.element', 'CGRtools.reactor', 'CGRtools.utils',
Expand Down

0 comments on commit 03106d7

Please sign in to comment.