Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unique atoms #900

Merged
merged 2 commits into from
Sep 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,6 @@ public Mappings stereochemistry() {
*
* @return fluent-api instance
* @see #uniqueBonds()
* @see #uniqueAtomSets()
* @see #exclusiveAtoms()
*/
public Mappings uniqueAtoms() {
Expand All @@ -286,22 +285,6 @@ public Mappings uniqueAtoms() {
return new Mappings(query, target, () -> stream().filter(new UniqueAtomMatches()).iterator());
}

/**
* Filter the mappings for those which cover a unique set of atoms in the
* target. The unique atom mappings are a subset of the unique atom set
* matches.
*
* @return fluent-api instance
* @see #uniqueBonds()
* @see #uniqueAtomSets()
* @see #exclusiveAtoms()
*/
public Mappings uniqueAtomSets() {
// we need the unique predicate to be reset for each new iterator -
// otherwise multiple iterations are always filtered (seen before)
return new Mappings(query, target, () -> stream().filter(new UniqueAtomSetMatches()).iterator());
}

/**
* Filter the mappings for those which cover an exclusive set of atoms in
* the target. If a match overlaps with another one it is not returned. For
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
/*
* Copyright (c) 2013 European Bioinformatics Institute (EMBL-EBI)
* John May <jwmay@users.sf.net>
* 2022 John Mayfield (né May)
* Copyright (c) 2022 John Mayfield (né May)
*
* Contact: cdk-devel@lists.sourceforge.net
*
Expand All @@ -26,21 +24,23 @@
package org.openscience.cdk.isomorphism;

import java.util.BitSet;
import java.util.HashSet;
import java.util.Set;
import java.util.function.Predicate;

/**
* A predicate for filtering atom-mapping results.
* A predicate for filtering atom-mapping results. Important
*
* <pre>{@code
* [0, 1, 2] => accept
* [0, 1, 3] => accept
* [2, 1, 3] => reject (no new mappings)
* [2, 1, 3] => accept (nothing new but not seen this combination before)
* </pre>
*
* This class is intended for use with {@link Pattern}.
*
* <blockquote><pre>{@code
* Pattern pattern = Ullmann.findSubstructure(query);
* Pattern pattern = Pattern.findSubstructure(query);
* List<int[]> unique = FluentIterable.of(patter.matchAll(target))
* .filter(new UniqueAtomMatches())
* .toList();
Expand All @@ -54,7 +54,7 @@ final class UniqueAtomMatches implements Predicate<int[]> {
/**
* Which atoms have we seen in a mapping already.
*/
private final BitSet visit = new BitSet();
private final Set<BitSet> visit = new HashSet<>();

/**
* Create filter for unique matches.
Expand All @@ -67,23 +67,11 @@ public UniqueAtomMatches() {
*/
@Override
public boolean test(int[] mapping) {
if (some(mapping))
return add(mapping);
return false;
return add(mapping);
}

// If some (at least one) has not been seen yet
boolean some(int[] mapping) {
for (int atomIdx : mapping)
if (!this.visit.get(atomIdx))
return true;
return false;
}

boolean add(int[] mapping) {
for (int atomIdx : mapping)
this.visit.set(atomIdx);
return true;
private boolean add(int[] mapping) {
return this.visit.add(toBitSet(mapping));
}

/**
Expand All @@ -96,4 +84,16 @@ boolean add(int[] mapping) {
public boolean apply(int[] ints) {
return test(ints);
}

// If some (at least one) has not been seen yet
private boolean some(int[] mapping) {
return !visit.contains(toBitSet(mapping));
}

public BitSet toBitSet(int[] mapping) {
BitSet bs = new BitSet();
for (int x : mapping)
bs.set(x);
return bs;
}
}

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -448,8 +448,7 @@ public ICountFingerprint getCountFingerprint(IAtomContainer atomContainer) throw
for (int i = 0; i < keys.size(); i++) {
Pattern ptrn = keys.get(i).pattern;
int count = ptrn.matchAll(atomContainer)
.uniqueAtomSets()
.count();
.countUnique();
map.put(i, count);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,17 @@ public void uniqueAtoms() throws Exception {
assertFalse(uam.apply(new int[]{1, 5, 2, 3}));
}

@Test
public void uniqueAtomsUniqSet() {
UniqueAtomMatches uam = new UniqueAtomMatches();
assertTrue(uam.apply(new int[]{1, 2, 3}));
assertTrue(uam.apply(new int[]{4, 2, 5}));
// seen 3 and 2 and 5 on their own before but not all together
// this matches other toolkits so is what most would expect but
// an alternative unique overall which would reject this as a duplicate.
assertTrue(uam.apply(new int[]{3, 2, 5}));
}

@Test
public void uniqueBonds() throws Exception {

Expand Down