From dfc5a6fb74b02472d341670d7bc2e218409cd076 Mon Sep 17 00:00:00 2001 From: John May Date: Tue, 10 Dec 2013 19:04:23 +0000 Subject: [PATCH] Assign numbers to unlabelled atoms (i.e. hydrogens). Beam will handle the correct hydrogen ordering (CDK API makes this difficult). Signed-off-by: Stephan Beisken Signed-off-by: Egon Willighagen --- .../cdk/graph/invariant/InChINumbersTools.java | 14 ++++++++++++-- .../cdk/graph/invariant/InChINumbersToolsTest.java | 6 ++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/main/org/openscience/cdk/graph/invariant/InChINumbersTools.java b/src/main/org/openscience/cdk/graph/invariant/InChINumbersTools.java index 4d5e42cb2cb..6b6a0cf29a4 100644 --- a/src/main/org/openscience/cdk/graph/invariant/InChINumbersTools.java +++ b/src/main/org/openscience/cdk/graph/invariant/InChINumbersTools.java @@ -92,6 +92,11 @@ public static long[] getUSmilesNumbers(IAtomContainer container) throws CDKExcep * charged oxygen atom, start instead at any carbonyl oxygen attached to the * same neighbour."

* + * All unlabelled atoms (e.g. hydrogens) are assigned the same label which + * is different but larger then all other labels. The hydrogen + * labelling then needs to be adjusted externally as universal SMILES + * suggests hydrogens should be visited first. + * * @param aux inchi AuxInfo * @param container the structure to obtain the numbering of * @return the numbers string to use @@ -102,8 +107,8 @@ static long[] parseUSmilesNumbers(String aux, IAtomContainer container) { int index; long[] numbers = new long[container.getAtomCount()]; int[] first = null; - int label = 1; - + int label = 1; + if ((index = aux.indexOf("/R:")) >= 0) { // reconnected metal numbers String[] baseNumbers = aux.substring(index + 8, aux.indexOf('/', index + 8)).split(";"); first = new int[baseNumbers.length]; @@ -176,6 +181,11 @@ static long[] parseUSmilesNumbers(String aux, IAtomContainer container) { } } } + + // assign unlabelled atoms + for (int i = 0; i < numbers.length; i++) + if (numbers[i] == 0) + numbers[i] = label; return numbers; } diff --git a/src/test/org/openscience/cdk/graph/invariant/InChINumbersToolsTest.java b/src/test/org/openscience/cdk/graph/invariant/InChINumbersToolsTest.java index e188c1fb3cb..1535eab2a6e 100644 --- a/src/test/org/openscience/cdk/graph/invariant/InChINumbersToolsTest.java +++ b/src/test/org/openscience/cdk/graph/invariant/InChINumbersToolsTest.java @@ -157,6 +157,12 @@ public void fixedH() throws Exception { is(new long[]{3, 2, 1})); } + @Test public void unlabelledHydrogens() throws Exception { + IAtomContainer container = new SmilesParser(SilentChemObjectBuilder.getInstance()).parseSmiles("[H]C([H])([H])[H]"); + assertThat(InChINumbersTools.getUSmilesNumbers(container), + is(new long[]{2, 1, 2, 2, 2})); + } + static IAtomContainer mock(int nAtoms) { IAtomContainer container = Mockito.mock(IAtomContainer.class); when(container.getAtomCount()).thenReturn(nAtoms);