Skip to content

Commit

Permalink
integrating ahocorasick double array trie
Browse files Browse the repository at this point in the history
  • Loading branch information
almondtools committed Mar 31, 2017
1 parent 2cd3e10 commit 885fcdc
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 1 deletion.
7 changes: 6 additions & 1 deletion pom.xml
Expand Up @@ -70,6 +70,11 @@
<artifactId>stringsearchalgorithms</artifactId>
<version>0.3.17</version>
</dependency>
<dependency>
<groupId>com.hankcs</groupId>
<artifactId>aho-corasick-double-array-trie</artifactId>
<version>1.0.1</version>
</dependency>
<dependency>
<groupId>org.openjdk.jmh</groupId>
<artifactId>jmh-core</artifactId>
Expand Down Expand Up @@ -104,7 +109,7 @@
<artifactId>maven-surefire-plugin</artifactId>
<version>2.19.1</version>
<configuration>
<argLine>-Xmx2048m -XX:MaxPermSize=256m</argLine>
<argLine>-Xmx2048m -XX:MaxPermSize=256m</argLine>
<excludes>
<exclude>**/incubation/**/*.java</exclude>
</excludes>
Expand Down
@@ -0,0 +1,75 @@
package com.almondtools.stringbench.multipattern.hankcs;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.almondtools.stringbench.multipattern.MultiPatternMatcherBenchmark;
import com.hankcs.algorithm.AhoCorasickDoubleArrayTrie;
import com.hankcs.algorithm.AhoCorasickDoubleArrayTrie.IHit;

public class HankcsAhoCorasickBenchmark extends MultiPatternMatcherBenchmark {

private static final String ID = "AhoCorasickDoubleArrayTrie Aho-Corasick";

private AhoCorasickDoubleArrayTrie<String> trie;

@Override
public String getId() {
return ID;
}

@Override
public void preparePatterns(Set<String> patterns) {
trie = new AhoCorasickDoubleArrayTrie<String>();
Map<String, String> map = new HashMap<>();
for (String pattern : patterns) {
map.put(pattern, pattern);
}
trie.build(map);
}

@Override
public List<Integer> find(String text) {
List<Integer> result = new ArrayList<>();
trie.parseText(text, new Processor(result));
return result;
}

@Override
public List<Integer> find(File file) throws IOException {
List<Integer> result = new ArrayList<>();
String text = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8);
trie.parseText(text, new Processor(result));
return result;
}

@Override
public void free() {
trie = null;
}

private static class Processor implements IHit<String> {

private List<Integer> result;
private int next = 0;

public Processor(List<Integer> result) {
this.result = result;
}

@Override
public void hit(int begin, int end, String value) {
if (begin >= next) {
result.add(begin);
next = begin + value.length();
}
}
}
}
@@ -0,0 +1,15 @@
package com.almondtools.stringbench.multipattern.incubation.hankcs;

import com.almondtools.stringbench.multipattern.MultiPatternMatcherBenchmark;
import com.almondtools.stringbench.multipattern.MultiPatternMatcherBenchmarkTest;
import com.almondtools.stringbench.multipattern.hankcs.HankcsAhoCorasickBenchmark;

public class HankcsAhoCorasickBenchmarkTest extends MultiPatternMatcherBenchmarkTest {

@Override
protected MultiPatternMatcherBenchmark getBenchmark() {
return new HankcsAhoCorasickBenchmark();
}


}

0 comments on commit 885fcdc

Please sign in to comment.