-
Notifications
You must be signed in to change notification settings - Fork 23
/
EMMAImplementation.java
305 lines (247 loc) · 9.65 KB
/
EMMAImplementation.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
package net.seninp.jmotif.sax.motif;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map.Entry;
import org.slf4j.LoggerFactory;
import ch.qos.logback.classic.Level;
import ch.qos.logback.classic.Logger;
import net.seninp.jmotif.distance.EuclideanDistance;
import net.seninp.jmotif.sax.SAXProcessor;
import net.seninp.jmotif.sax.TSProcessor;
import net.seninp.jmotif.sax.alphabet.NormalAlphabet;
import net.seninp.util.JmotifMapEntry;
/**
* Implements the motif discovery routines.
*
* @author psenin
*
*/
public class EMMAImplementation {
// logging stuff
//
private static final Logger LOGGER;
private static final Level LOGGING_LEVEL = Level.INFO;
static {
LOGGER = (Logger) LoggerFactory.getLogger(EMMAImplementation.class);
LOGGER.setLevel(LOGGING_LEVEL);
}
private static TSProcessor tp = new TSProcessor();
private static SAXProcessor sp = new SAXProcessor();
private static NormalAlphabet normalA = new NormalAlphabet();
private static EuclideanDistance ed = new EuclideanDistance();
public static int eaCounter;
public static int distCounter;
/**
* Finds 1-Motif
*
* @param series the input time series.
* @param motifSize the motif size.
* @param range the similarity range cut off.
* @param paaSize the PAA size.
* @param alphabetSize the alphabet size.
* @param znormThreshold z normalization threshold.
* @return motif's positions.
* @throws Exception if error occurs.
*/
public static MotifRecord series2EMMAMotifs(double[] series, int motifSize, double range,
int paaSize, int alphabetSize, double znormThreshold) throws Exception {
MotifRecord res = new MotifRecord(-1, new ArrayList<Integer>());
boolean finished = false;
HashMap<String, ArrayList<Integer>> buckets = new HashMap<String, ArrayList<Integer>>(
(int) Math.pow(paaSize, alphabetSize));
for (int i = 0; i < (series.length - motifSize); i++) {
String sax = String.valueOf(tp.ts2String(
tp.paa(tp.znorm(tp.subseriesByCopy(series, i, i + motifSize), znormThreshold), paaSize),
normalA.getCuts(alphabetSize)));
if (null == buckets.get(sax)) {
buckets.put(sax, new ArrayList<Integer>());
}
buckets.get(sax).add(i);
}
ArrayList<JmotifMapEntry<Integer, String>> bucketsOrder = new ArrayList<JmotifMapEntry<Integer, String>>(
buckets.size());
for (Entry<String, ArrayList<Integer>> e : buckets.entrySet()) {
bucketsOrder.add(new JmotifMapEntry<Integer, String>(e.getValue().size(), e.getKey()));
}
Collections.sort(bucketsOrder, new Comparator<JmotifMapEntry<Integer, String>>() {
public int compare(JmotifMapEntry<Integer, String> a, JmotifMapEntry<Integer, String> b) {
return b.getKey().compareTo(a.getKey());
}
});
double[][] dm = normalA.getDistanceMatrix(alphabetSize);
int currBucketIdx = 0;
JmotifMapEntry<Integer, String> MPC = bucketsOrder.get(currBucketIdx);
ArrayList<Integer> neighborhood = new ArrayList<Integer>(buckets.get(MPC.getValue()));
while (!(finished) && (currBucketIdx < bucketsOrder.size()) && (neighborhood.size() > 2)) {
if (currBucketIdx < (bucketsOrder.size() - 1)) {
for (int i = currBucketIdx + 1; i < bucketsOrder.size(); i++) {
String cWord = bucketsOrder.get(i).getValue();
if (range > sp.saxMinDist(MPC.getValue().toCharArray(), cWord.toCharArray(), dm,
motifSize, paaSize)) {
neighborhood.addAll(buckets.get(cWord));
}
}
}
LOGGER.debug("current bucket {} at {}", MPC.getValue(), neighborhood);
MotifRecord tmpRes = ADM(series, neighborhood, motifSize, range, znormThreshold);
LOGGER.debug("current tmp motif {} ", tmpRes.toString());
if (tmpRes.getFrequency() > res.getFrequency() || res.isEmpty()) {
res = tmpRes;
LOGGER.debug("updating the best motif to {} ", res.toString());
}
else if (tmpRes.getFrequency() == res.getFrequency() && !(res.isEmpty())) {
LOGGER.debug(" ** its's a tie, checking for variation...");
double[] motifA = tp.subseriesByCopy(series, res.getLocation(),
res.getLocation() + motifSize);
double[] distancesA = new double[res.getFrequency()];
double[] motifB = tp.subseriesByCopy(series, tmpRes.getLocation(),
tmpRes.getLocation() + motifSize);
double[] distancesB = new double[res.getFrequency()];
ArrayList<Integer> bestMotifOccurrences = res.getOccurrences();
ArrayList<Integer> tmpMotifOccurrences = tmpRes.getOccurrences();
for (int j = 0; j < res.getFrequency(); j++) {
Integer locA = bestMotifOccurrences.get(j);
double distA = ed.distance(tp.znorm(motifA, znormThreshold),
tp.znorm(tp.subseriesByCopy(series, locA, locA + motifSize), znormThreshold));
distancesA[j] = distA;
Integer locB = tmpMotifOccurrences.get(j);
double distB = ed.distance(tp.znorm(motifB, znormThreshold),
tp.znorm(tp.subseriesByCopy(series, locB, locB + motifSize), znormThreshold));
distancesB[j] = distB;
}
double varA = tp.var(distancesA);
double varB = tp.var(distancesB);
if (varB < varA) {
LOGGER.debug("updated current best motif to {}", tmpRes);
res = tmpRes;
}
}
if ((currBucketIdx < (bucketsOrder.size() - 1))
&& (tmpRes.getFrequency() > bucketsOrder.get(currBucketIdx + 1).getKey())) {
finished = true;
}
else {
currBucketIdx++;
if (currBucketIdx == bucketsOrder.size()) {
// we processed all buckets up in here -- break out
break;
}
MPC = bucketsOrder.get(currBucketIdx);
neighborhood = new ArrayList<Integer>(buckets.get(MPC.getValue()));
}
}
return res;
}
/**
* This is not a real ADM implementation.
*
* @param series the input timeseries.
* @param neighborhood the neighborhood coordinates.
* @param motifSize the motif size.
* @param range the range value.
* @param znormThreshold z-normalization threshold.
* @return the best motif record found within the neighborhood.
* @throws Exception if error occurs.
*
*/
private static MotifRecord ADM(double[] series, ArrayList<Integer> neighborhood, int motifSize,
double range, double znormThreshold) throws Exception {
MotifRecord res = new MotifRecord(-1, new ArrayList<Integer>());
ArrayList<BitSet> admDistances = new ArrayList<BitSet>(neighborhood.size());
for (int i = 0; i < neighborhood.size(); i++) {
admDistances.add(new BitSet(i));
}
for (int i = 0; i < neighborhood.size(); i++) {
for (int j = 0; j < i; j++) { // diagonal wouldn't count anyway
boolean isMatch = isNonTrivialMatch(series, neighborhood.get(i), neighborhood.get(j),
motifSize, range, znormThreshold);
if (isMatch) {
admDistances.get(i).set(j);
admDistances.get(j).set(i);
}
}
}
int maxCount = 0;
for (int i = 0; i < neighborhood.size(); i++) {
int tmpCounter = 0;
for (int j = 0; j < neighborhood.size(); j++) {
if (admDistances.get(i).get(j)) {
tmpCounter++;
}
}
if (tmpCounter > maxCount) {
maxCount = tmpCounter;
ArrayList<Integer> occurrences = new ArrayList<>();
for (int j = 0; j < neighborhood.size(); j++) {
if (admDistances.get(i).get(j)) {
occurrences.add(neighborhood.get(j));
}
}
res = new MotifRecord(neighborhood.get(i), occurrences);
}
}
return res;
}
/**
* Checks for the overlap and the range-configured distance.
*
* @param series the series to use.
* @param i the position of subseries a.
* @param j the position of subseries b.
* @param motifSize the motif length.
* @param range the range value.
* @param znormThreshold z-normalization threshold.
* @return true if all is cool, false if overlaps or above the range value.
*/
private static boolean isNonTrivialMatch(double[] series, int i, int j, Integer motifSize,
double range, double znormThreshold) {
if (Math.abs(i - j) < motifSize) {
return false;
}
Double dd = eaDistance(series, i, j, motifSize, range, znormThreshold);
if (Double.isFinite(dd)) {
return true;
}
return false;
}
/**
* Early abandoning distance configure by range.
*
* @param series the series to use.
* @param a the position of subseries a.
* @param b the position of subseries b.
* @param motifSize the motif length.
* @param range the range value.
* @param znormThreshold z-normalization threshold.
* @return a distance value or NAN if above the threshold.
*/
private static Double eaDistance(double[] series, int a, int b, Integer motifSize, double range,
double znormThreshold) {
distCounter++;
double cutOff2 = range * range;
double[] seriesA = tp.znorm(tp.subseriesByCopy(series, a, a + motifSize), znormThreshold);
double[] seriesB = tp.znorm(tp.subseriesByCopy(series, b, b + motifSize), znormThreshold);
Double res = 0D;
for (int i = 0; i < motifSize; i++) {
res = res + distance2(seriesA[i], seriesB[i]);
if (res > cutOff2) {
eaCounter++;
return Double.NaN;
}
}
return Math.sqrt(res);
}
/**
* Distance square.
*
* @param p1 point1.
* @param p2 point2.
* @return the distance square.
*/
private static double distance2(double p1, double p2) {
return (p1 - p2) * (p1 - p2);
}
}