Skip to content

Commit

Permalink
SmilesCGR (#5)
Browse files Browse the repository at this point in the history
CGR to SMILES
  • Loading branch information
TimurGimadiev authored and Ramil Nugmanov committed Feb 14, 2019
1 parent 9665dd4 commit 99569f5
Showing 1 changed file with 57 additions and 3 deletions.
60 changes: 57 additions & 3 deletions CGRtools/algorithms/strings.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
#
# Copyright 2017-2019 Ramil Nugmanov <stsouko@live.ru>
# Copyright 2019 Timur Gimadiev <timur.gimadiev@gmail.com>
# This file is part of CGRtools.
#
# CGRtools is free software; you can redistribute it and/or modify
Expand All @@ -19,7 +20,7 @@
from collections import defaultdict
from hashlib import sha512
from itertools import count
from ..attributes import Atom, DynAtom, QueryAtom, DynQueryAtom
from ..attributes import Atom, DynAtom, QueryAtom, DynQueryAtom, DynBond
from ..cache import cached_method, cached_args_method


Expand All @@ -28,6 +29,24 @@
charge_str = {-3: '-3', -2: '-2', -1: '-', 0: '0', 1: '+', 2: '+2', 3: '+3'}
order_str = {1: '-', 2: '=', 3: '#', 4: ':', 5: '~', None: '.'}
stereo_str = {1: '@', -1: '@@'}
dyn_order_str = {(None, 1): "[.>-]", (None, 2): "[.>=]", (None, 3): "[.>#]", (None, 4): "[.>:]", (None, 5): "[.>~]",
(1, None): "[->.]", (1, 1): "", (1, 2): "[->=]", (1, 3): "[->#]", (1, 4): "[->:]", (1, 5): "[->~]",
(2, None): "[=>.]", (2, 1): "[=>-]", (2, 2): "=", (2, 3): "[=>#]", (2, 4): "[=>:]", (2, 5): "[=>~]",
(3, None): "[#>.]", (3, 1): "[#>-]", (3, 2): "[#>=]", (3, 3): "#", (3, 4): "[#>:]", (3, 5): "[#>~]",
(4, None): "[:>.]", (4, 1): "[:>-]", (4, 2): "[:>=]", (4, 3): "[:>#]", (4, 4): ":", (4, 5): "[:>~]",
(5, None): "[~>.]", (5, 1): "[~>-]", (5, 2): "[~>=]", (5, 3): "[~>#]", (5, 4): "[~>:]", (5, 5): "[~]"}
dyn_charge_str = {(-3, -3): "-3", (-3, -2): "-3>-2", (-3, -1): "-3>-", (-3, 0): "-3>0", (-3, 1): "-3>+",
(-3, 2): "-3>+2", (-3, 3): "-3>3", (-2, -3): "-2>-3", (-2, -2): "-2", (-2, -1): "-2>-",
(-2, 0): "-2>0", (-2, 1): "-2>+", (-2, 2): "-2>+2", (-2, 3): "-2>+3", (-1, -3): "->-3",
(-1, -2): "->-2", (-1, -1): "-", (-1, 0): "->0", (-1, 1): "->+", (-1, 2): "->+2", (-1, 3): "->+3",
(0, -3): "0>-3", (0, -2): "0>-2", (0, -1): "0>-", (0, 0): "", (0, 1): "0>+", (0, 2): "0>+2",
(0, 3): "0>+3", (1, -3): "+>-3", (1, -2): "+>-2", (1, -1): "+>-", (1, 0): "+>0", (1, 1): "+",
(1, 2): "+>+2", (1, 3): "+>+3", (2, -3): "+2>-3", (2, -2): "+2>-2", (2, -1): "+2>-", (2, 0): "+2>0",
(2, 1): "+2>+", (2, 2): "+2", (2, 3): "+2>+3", (3, -3): "+3>-3", (3, -2): "+3>-2", (3, -1): "+3>-",
(3, 0): "+3>0", (3, 1): "+3>+", (3, 2): "+3>+2", (3, 3): "+3"}
dyn_multiplicity_str = {(1, 1): "*", (1, 2): "*>*2", (1, 3): "*>*3", (1, None): "*>n", (2, 1): "*2>*", (2, 2): "*2",
(2, 3): "*2>*3", (2, None): "*2>n", (3, 1): "*3>*", (3, 2): "*3>*2", (3, 3): "*3",
(3, None): "*3>n", (None, 1): "n>1", (None, 2): "n>2", (None, 3): "n>3", (None, None): ""}


class HashableSmiles:
Expand Down Expand Up @@ -224,9 +243,13 @@ def __format__(self, format_spec):
"""
format CGR as SMIRKS string
:param format_spec: if == 'n' add neighbors count of atoms. don't forget to call reset query marks before.
if == 'h' add hybridizations of atoms. if 'nh' or 'hn' add both.
:param format_spec: if 's' in fromat_spec only representation of CGR as smiles will be shown.
No hybridization and neighbors count will be used. High priority option.
if == 'n' add neighbors count of atoms. don't forget to call reset query marks before.
if == 'h' add hybridization of atoms. if 'nh' or 'hn' add both.
"""
if format_spec and 's' in format_spec:
return self._format_string_cgr(self.atoms_order.__getitem__)
if not format_spec:
neighbors = False
hybridization = False
Expand Down Expand Up @@ -319,6 +342,37 @@ def __format_atom(atom, neighbors, hybridization):

return ''.join(smi), ''.join(p_smi)

def _format_string_cgr(self, order):
smiles = []
for x in self._flatten(order):
if isinstance(x, str):
smiles.append(x)
elif isinstance(x, list):
smiles.append(self.__format_atom_cgr(x[0]))
for b, c in sorted(x[1:], key=lambda e: int(e[1])):
smiles.append(dyn_order_str[(b.order, b.p_order)])
smiles.append(str(c))
elif isinstance(x, DynAtom):
smiles.append(self.__format_atom_cgr(x))
else:
smiles.append(dyn_order_str[(x.order, x.p_order)])
return "".join(smiles)

@staticmethod
def __format_atom_cgr(atom):
if atom.isotope != atom.common_isotope:
smi = [str(atom.isotope), atom.element]
else:
smi = [atom.element]
if atom.charge or atom.p_charge:
smi.append(dyn_charge_str[(atom.charge, atom.p_charge)])
if atom.multiplicity or atom.p_multiplicity:
smi.append(dyn_multiplicity_str[(atom.multiplicity, atom.p_multiplicity)])
if len(smi) != 1 or atom.element not in {'C', 'N', 'O', 'P', 'S', 'F', 'Cl', 'Br', 'I', 'B'}:
smi.insert(0, '[')
smi.append(']')
return ''.join(smi)


class SmilesQuery(StringCommon):
@cached_method
Expand Down

0 comments on commit 99569f5

Please sign in to comment.