From bf0ddd1fffdbde7ba148b137380887618a8347dc Mon Sep 17 00:00:00 2001 From: John May Date: Mon, 28 Jul 2014 09:13:34 +0100 Subject: [PATCH] Ensure canonical labelling can still be computed with explicit hydrogens. This was broken in a previous patch. Signed-off-by: Egon Willighagen --- .../cdk/graph/invariant/Canon.java | 14 ++++++++++++++ .../cdk/graph/invariant/CanonTest.java | 19 +++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/base/standard/src/main/java/org/openscience/cdk/graph/invariant/Canon.java b/base/standard/src/main/java/org/openscience/cdk/graph/invariant/Canon.java index faa428ce470..a818a105421 100644 --- a/base/standard/src/main/java/org/openscience/cdk/graph/invariant/Canon.java +++ b/base/standard/src/main/java/org/openscience/cdk/graph/invariant/Canon.java @@ -214,6 +214,12 @@ private long[] refine(long[] invariants, boolean[] hydrogens) { } if (symmetry == null) { + + // After symmetry classes have been found without hydrogens we add + // back in the hydrogens and assign ranks. We don't refine the + // partition until the next time round the while loop to avoid + // artificially splitting due to hydrogen representation, for example + // the two hydrogens are equivalent in this SMILES for ethane '[H]CC' for (int i = 0; i < g.length; i++) { if (hydrogens[i]) { curr[i] = prev[g[i][0]]; @@ -222,6 +228,14 @@ private long[] refine(long[] invariants, boolean[] hydrogens) { } n = ranker.rank(currVs, nextVs, nnu, curr, prev); symmetry = Arrays.copyOf(prev, ord); + + // Update the buffer of non-unique vertices as hydrogens next + // to discrete heavy atoms are also discrete (and removed from + // 'nextVs' during ranking. + nnu = 0; + for (int i = 0; i < ord && nextVs[i] >= 0; i++) { + currVs[nnu++] = nextVs[i]; + } } // partition is discrete or only symmetry classes are needed diff --git a/base/test-standard/src/test/java/org/openscience/cdk/graph/invariant/CanonTest.java b/base/test-standard/src/test/java/org/openscience/cdk/graph/invariant/CanonTest.java index f75d2984f47..75420ae445e 100644 --- a/base/test-standard/src/test/java/org/openscience/cdk/graph/invariant/CanonTest.java +++ b/base/test-standard/src/test/java/org/openscience/cdk/graph/invariant/CanonTest.java @@ -24,6 +24,7 @@ package org.openscience.cdk.graph.invariant; +import org.junit.Assert; import org.junit.Test; import org.openscience.cdk.graph.GraphUtil; import org.openscience.cdk.interfaces.IAtomContainer; @@ -120,6 +121,24 @@ public class CanonTest { long[] symmetry = Canon.symmetry(m, GraphUtil.toAdjList(m)); assertThat(symmetry, is(new long[]{4, 2, 3, 5, 5, 1})); } + + @Test public void canonicallyLabelEthaneWithInConsistentHydrogenRepresentation() throws Exception { + IAtomContainer m = smi("CC[H]"); + long[] labels = Canon.label(m, GraphUtil.toAdjList(m)); + Assert.assertThat(labels, is(new long[]{2, 3, 1})); + } + + @Test public void canonicallyLabelEthaneWithInConsistentHydrogenRepresentation2() throws Exception { + IAtomContainer m = smi("CC([H])([H])"); + long[] labels = Canon.label(m, GraphUtil.toAdjList(m)); + Assert.assertThat(labels, is(new long[]{3, 4, 1, 2})); + } + + @Test public void canonicallyLabelCaffeineWithExplicitHydrogenRepresentation() throws Exception { + IAtomContainer m = smi("[H]C1=NC2=C(N1C([H])([H])[H])C(=O)N(C(=O)N2C([H])([H])[H])C([H])([H])[H]"); + long[] labels = Canon.label(m, GraphUtil.toAdjList(m)); + Assert.assertThat(labels, is(new long[]{1, 14, 13, 16, 18, 19, 22, 2, 3, 4, 15, 11, 20, 17, 12, 21, 24, 8, 9, 10, 23, 5, 6, 7})); + } static final SmilesParser sp = new SmilesParser(SilentChemObjectBuilder.getInstance());