forked from jonalv/cdk
/
ShortestPathWalker.java
232 lines (194 loc) · 6.91 KB
/
ShortestPathWalker.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
/*
* Copyright (C) 2012 Syed Asad Rahman <asad@ebi.ac.uk>
* 2013 John May <jwmay@users.sf.net>
*
*
* Contact: cdk-devel@lists.sourceforge.net
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
* All we ask is that proper credit is given for our work, which includes
* - but is not limited to - adding the above copyright notice to the beginning
* of your source code files, and to any copyright notice that you may distribute
* with programs based on this work.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.fingerprint;
import java.util.*;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.graph.AllPairsShortestPaths;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IPseudoAtom;
/**
*
* @author Syed Asad Rahman (2012)
* @author John May (2013)
* @cdk.keyword fingerprint
* @cdk.keyword similarity
* @cdk.module fingerprint
* @cdk.githash
*
*/
public final class ShortestPathWalker {
/* container which is being traversed */
private final IAtomContainer container;
/* set of encoded atom paths */
private final Set<String> paths;
/* list of encoded pseudo atoms */
private final List<String> pseudoAtoms;
/* maximum number of shortest paths, when there is more then one path */
private static final int MAX_SHORTEST_PATHS = 5;
/**
* Create a new shortest path walker for a given container.
* @param container the molecule to encode the shortest paths
*/
public ShortestPathWalker(IAtomContainer container) {
this.container = container;
this.pseudoAtoms = new ArrayList<String>(5);
this.paths = Collections.unmodifiableSet(traverse());
}
/**
* Access a set of all shortest paths.
* @return the paths
*/
public Set<String> paths() {
return Collections.unmodifiableSet(paths);
}
/**
* Access the number of unique encode paths traversed.
* @return the number paths
*/
public int getPathCount() {
return paths.size();
}
/**
* Traverse all-pairs of shortest-paths within a chemical graph.
*/
private Set<String> traverse() {
Set<String> paths = new TreeSet<String>();
// All-Pairs Shortest-Paths (APSP)
AllPairsShortestPaths apsp = new AllPairsShortestPaths(container);
for (int i = 0, n = container.getAtomCount(); i < n; i++) {
paths.add(toAtomPattern(container.getAtom(i)));
// only do the comparison for i,j then reverse the path for j,i
for (int j = i + 1; j < n; j++) {
int nPaths = apsp.from(i).nPathsTo(j);
// only encode when there is a manageable number of paths
if (nPaths > 0 && nPaths < MAX_SHORTEST_PATHS) {
for(int[] path : apsp.from(i).pathsTo(j)){
paths.add(encode(path));
paths.add(encode(reverse(path)));
}
}
}
}
return paths;
}
/**
* Reverse an array of integers.
*
* @param src array to reverse
* @return reversed copy of <i>src</i>
*/
private int[] reverse(int[] src) {
int[] dest = Arrays.copyOf(src, src.length);
int left = 0;
int right = src.length - 1;
while (left < right) {
// swap the values at the left and right indices
dest[left] = src[right];
dest[right] = src[left];
// move the left and right index pointers in toward the center
left++; right--;
}
return dest;
}
/**
* Encode the provided path of atoms to a string.
*
* @param path inclusive array of vertex indices
* @return encoded path
*/
private String encode(int[] path) {
StringBuilder sb = new StringBuilder(path.length * 3);
for (int i = 0, n = path.length - 1; i <= n; i++) {
IAtom atom = container.getAtom(path[i]);
sb.append(toAtomPattern(atom));
if(atom instanceof IPseudoAtom) {
pseudoAtoms.add(atom.getSymbol());
// potential bug, although the atoms are canonical we cannot guarantee the order we will visit them.
// sb.append(PeriodicTable.getElementCount() + pseudoAtoms.size());
}
// if we are not at the last index, add the connecting bond
if(i < n){
IBond bond = container.getBond(container.getAtom(path[i]),
container.getAtom(path[i + 1]));
sb.append(getBondSymbol(bond));
}
}
return sb.toString();
}
/**
* Convert an atom to a string representation. Currently this method just
* returns the symbol but in future may include other properties, such as, stereo
* descriptor and charge.
* @param atom The atom to encode
* @return encoded atom
*/
private String toAtomPattern(IAtom atom) {
return atom.getSymbol();
}
/**
* Gets the bondSymbol attribute of the HashedFingerprinter class
*
* @param bond Description of the Parameter
* @return The bondSymbol value
*]\ */
private char getBondSymbol(IBond bond) {
if (isSP2Bond(bond)) {
return '@';
} else {
switch (bond.getOrder()) {
case SINGLE:
return '1';
case DOUBLE:
return '2';
case TRIPLE:
return '3';
case QUADRUPLE:
return '4';
default:
return '5';
}
}
}
/**
* Returns true if the bond binds two atoms, and both atoms are SP2 in a ring system.
*/
private boolean isSP2Bond(IBond bond) {
return bond.getFlag(CDKConstants.ISAROMATIC);
}
/**
* @inheritDoc
*/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
for (String path : paths) {
sb.append(path).append("->");
}
return sb.toString();
}
}