Skip to content

Commit

Permalink
M V30 COUNTS kw metadata parser added.
Browse files Browse the repository at this point in the history
  • Loading branch information
stsouko committed Jan 19, 2021
1 parent 23d720a commit 78ce928
Show file tree
Hide file tree
Showing 9 changed files with 31 additions and 23 deletions.
6 changes: 3 additions & 3 deletions CGRtools/files/MRVrw.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright 2017-2020 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2017-2021 Ramil Nugmanov <nougmanoff@protonmail.com>
# This file is part of CGRtools.
#
# CGRtools is free software; you can redistribute it and/or modify
Expand Down Expand Up @@ -146,7 +146,7 @@ def __reader(self):
self._info(f'record consist errors:\n{format_exc()}')
yield parse_error(n, parsed, self._format_log(), meta)
else:
record['meta'] = meta
record['meta'].update(meta)
try:
container = self._convert_structure(record)
except ValueError:
Expand Down Expand Up @@ -302,7 +302,7 @@ def __parse_molecule(self, data):
self._info('incorrect bondStereo tag')
bonds.append((atom_map[a1], atom_map[a2], order))

mol = {'atoms': atoms, 'bonds': bonds, 'stereo': stereo}
mol = {'atoms': atoms, 'bonds': bonds, 'stereo': stereo, 'meta': {}}
if '@title' in data:
mol['title'] = data['@title']
return mol
Expand Down
8 changes: 4 additions & 4 deletions CGRtools/files/RDFrw.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright 2014-2020 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2014-2021 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2019 Dinar Batyrshin <batyrshin-dinar@mail.ru>
# This file is part of CGRtools.
#
Expand Down Expand Up @@ -164,7 +164,7 @@ def __reader(self):
self._flush_log()
elif line.startswith('$RFMT'):
if record:
record['meta'] = self._prepare_meta(meta)
record['meta'].update(self._prepare_meta(meta))
if title:
record['title'] = title
try:
Expand Down Expand Up @@ -200,7 +200,7 @@ def __reader(self):
meta = defaultdict(list)
elif line.startswith('$MFMT'):
if record:
record['meta'] = self._prepare_meta(meta)
record['meta'].update(self._prepare_meta(meta))
if title:
record['title'] = title
try:
Expand Down Expand Up @@ -270,7 +270,7 @@ def __reader(self):
self.__already_seeked = False
self._flush_log()
if record:
record['meta'] = self._prepare_meta(meta)
record['meta'].update(self._prepare_meta(meta))
if title:
record['title'] = title
try:
Expand Down
6 changes: 3 additions & 3 deletions CGRtools/files/SDFrw.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright 2014-2020 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2014-2021 Ramil Nugmanov <nougmanoff@protonmail.com>
# This file is part of CGRtools.
#
# CGRtools is free software; you can redistribute it and/or modify
Expand Down Expand Up @@ -147,7 +147,7 @@ def __reader(self):
self._flush_log()
elif line.startswith("$$$$"):
if record:
record['meta'] = self._prepare_meta(meta)
record['meta'].update(self._prepare_meta(meta))
if title:
record['title'] = title
try:
Expand Down Expand Up @@ -220,7 +220,7 @@ def __reader(self):
self._flush_log()

if record: # True for MOL file only.
record['meta'] = self._prepare_meta(meta)
record['meta'].update(self._prepare_meta(meta))
if title:
record['title'] = title
try:
Expand Down
6 changes: 3 additions & 3 deletions CGRtools/files/SMILESrw.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright 2018-2020 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2018-2021 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2019 Artem Mukanov <nostro32@mail.ru>
# This file is part of CGRtools.
#
Expand Down Expand Up @@ -254,7 +254,7 @@ def parse(self, smiles: str) -> Union[MoleculeContainer, CGRContainer, ReactionC
self._info(f'line: {smi}\nconsist errors:\n{format_exc()}')
return meta

record['meta'] = meta
record['meta'].update(meta)
try:
container = self._convert_structure(record)
except ValueError:
Expand Down Expand Up @@ -728,7 +728,7 @@ def _parse_tokens(self, tokens):

stereo_bonds = {n: ms for n, ms in stereo_bonds.items() if len(ms) == 1 or len(ms) == set(ms.values())}
mol = {'atoms': atoms, 'bonds': bonds, 'order': order,
'stereo_bonds': stereo_bonds, 'stereo_atoms': stereo_atoms, 'hydrogens': hydrogens}
'stereo_bonds': stereo_bonds, 'stereo_atoms': stereo_atoms, 'hydrogens': hydrogens, 'meta': {}}
if cgr or any(x == 11 for x in atoms_types):
mol['cgr'] = cgr
return mol
Expand Down
12 changes: 9 additions & 3 deletions CGRtools/files/_mdl/emol.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright 2020 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2020, 2021 Ramil Nugmanov <nougmanoff@protonmail.com>
# This file is part of CGRtools.
#
# CGRtools is free software; you can redistribute it and/or modify
Expand All @@ -26,14 +26,15 @@ def __init__(self, log_buffer=None):
self.__bonds = []
self.__atom_map = {}
self.__stereo = []
self.__meta = {}
if log_buffer is None:
log_buffer = []
self.__log_buffer = log_buffer

def getvalue(self):
if self.__in_mol or self.__in_mol is None:
raise ValueError('molecule not complete')
return {'atoms': self.__atoms, 'bonds': self.__bonds, 'stereo': self.__stereo}
return {'atoms': self.__atoms, 'bonds': self.__bonds, 'stereo': self.__stereo, 'meta': self.__meta}

def __call__(self, line, lineu=None):
if lineu is None:
Expand Down Expand Up @@ -73,12 +74,17 @@ def __call__(self, line, lineu=None):
raise ValueError('invalid CTAB')

else: # M V30 COUNTS line expected
a, b, *_ = line[13:].split()
a, b, *meta = line[13:].split()
atom_count = int(a)
if not atom_count:
raise EmptyMolecule
self.__bonds_count = int(b)
self.__atoms_count = atom_count
for kv in meta:
if '=' in kv:
k, v = kv.split('=', 1)
if k and v:
self.__meta[k] = v

elif self.__in_mol is not None:
raise SyntaxError('invalid usage')
Expand Down
4 changes: 2 additions & 2 deletions CGRtools/files/_mdl/erxn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright 2020 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2020, 2021 Ramil Nugmanov <nougmanoff@protonmail.com>
# This file is part of CGRtools.
#
# CGRtools is free software; you can redistribute it and/or modify
Expand Down Expand Up @@ -90,7 +90,7 @@ def __call__(self, line):

def getvalue(self):
if self.__rend:
return {'reactants': self.__reactants, 'products': self.__products, 'reagents': self.__reagents}
return {'reactants': self.__reactants, 'products': self.__products, 'reagents': self.__reagents, 'meta': {}}
raise ValueError('reaction not complete')

__parser_group = __parser = None
Expand Down
4 changes: 2 additions & 2 deletions CGRtools/files/_mdl/mol.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright 2020 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2020, 2021 Ramil Nugmanov <nougmanoff@protonmail.com>
# This file is part of CGRtools.
#
# CGRtools is free software; you can redistribute it and/or modify
Expand Down Expand Up @@ -57,7 +57,7 @@ def __new__(cls, line, log_buffer=None):

def getvalue(self):
if self.__mend:
mol = {'atoms': self.__atoms, 'bonds': self.__bonds, 'stereo': self.__stereo}
mol = {'atoms': self.__atoms, 'bonds': self.__bonds, 'stereo': self.__stereo, 'meta': {}}
if self.__cgr:
mol['cgr'] = self.__cgr
if self.__query:
Expand Down
3 changes: 2 additions & 1 deletion CGRtools/files/_mdl/parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright 2014-2020 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2014-2021 Ramil Nugmanov <nougmanoff@protonmail.com>
# This file is part of CGRtools.
#
# CGRtools is free software; you can redistribute it and/or modify
Expand Down Expand Up @@ -117,6 +117,7 @@ def _convert_reaction(self, reaction):
remapped = {x: y for x, y in enumerate(tmp[shift: atom_len + shift])}
shift += atom_len
g = self.__prepare_structure(j, remapped)
g.meta.update(j['meta'])
rc[i].append(g)
return ReactionContainer(meta=reaction['meta'], name=reaction.get('title'), **rc)

Expand Down
5 changes: 3 additions & 2 deletions CGRtools/files/_mdl/rxn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
#
# Copyright 2020 Ramil Nugmanov <nougmanoff@protonmail.com>
# Copyright 2020, 2021 Ramil Nugmanov <nougmanoff@protonmail.com>
# This file is part of CGRtools.
#
# CGRtools is free software; you can redistribute it and/or modify
Expand Down Expand Up @@ -83,7 +83,8 @@ def getvalue(self):
if self.__rend:
return {'reactants': self.__molecules[:self.__reactants_count],
'products': self.__molecules[self.__reactants_count:self.__products_count],
'reagents': self.__molecules[self.__products_count:self.__reagents_count]}
'reagents': self.__molecules[self.__products_count:self.__reagents_count],
'meta': {}}
raise ValueError('reaction not complete')

__parser = None
Expand Down

0 comments on commit 78ce928

Please sign in to comment.