Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
integrating ahocorasick double array trie
- Loading branch information
1 parent
2cd3e10
commit 885fcdc
Showing
3 changed files
with
96 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
75 changes: 75 additions & 0 deletions
75
...main/java/com/almondtools/stringbench/multipattern/hankcs/HankcsAhoCorasickBenchmark.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
package com.almondtools.stringbench.multipattern.hankcs; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.nio.charset.StandardCharsets; | ||
import java.nio.file.Files; | ||
import java.util.ArrayList; | ||
import java.util.HashMap; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Set; | ||
|
||
import com.almondtools.stringbench.multipattern.MultiPatternMatcherBenchmark; | ||
import com.hankcs.algorithm.AhoCorasickDoubleArrayTrie; | ||
import com.hankcs.algorithm.AhoCorasickDoubleArrayTrie.IHit; | ||
|
||
public class HankcsAhoCorasickBenchmark extends MultiPatternMatcherBenchmark { | ||
|
||
private static final String ID = "AhoCorasickDoubleArrayTrie Aho-Corasick"; | ||
|
||
private AhoCorasickDoubleArrayTrie<String> trie; | ||
|
||
@Override | ||
public String getId() { | ||
return ID; | ||
} | ||
|
||
@Override | ||
public void preparePatterns(Set<String> patterns) { | ||
trie = new AhoCorasickDoubleArrayTrie<String>(); | ||
Map<String, String> map = new HashMap<>(); | ||
for (String pattern : patterns) { | ||
map.put(pattern, pattern); | ||
} | ||
trie.build(map); | ||
} | ||
|
||
@Override | ||
public List<Integer> find(String text) { | ||
List<Integer> result = new ArrayList<>(); | ||
trie.parseText(text, new Processor(result)); | ||
return result; | ||
} | ||
|
||
@Override | ||
public List<Integer> find(File file) throws IOException { | ||
List<Integer> result = new ArrayList<>(); | ||
String text = new String(Files.readAllBytes(file.toPath()), StandardCharsets.UTF_8); | ||
trie.parseText(text, new Processor(result)); | ||
return result; | ||
} | ||
|
||
@Override | ||
public void free() { | ||
trie = null; | ||
} | ||
|
||
private static class Processor implements IHit<String> { | ||
|
||
private List<Integer> result; | ||
private int next = 0; | ||
|
||
public Processor(List<Integer> result) { | ||
this.result = result; | ||
} | ||
|
||
@Override | ||
public void hit(int begin, int end, String value) { | ||
if (begin >= next) { | ||
result.add(begin); | ||
next = begin + value.length(); | ||
} | ||
} | ||
} | ||
} |
15 changes: 15 additions & 0 deletions
15
...lmondtools/stringbench/multipattern/incubation/hankcs/HankcsAhoCorasickBenchmarkTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
package com.almondtools.stringbench.multipattern.incubation.hankcs; | ||
|
||
import com.almondtools.stringbench.multipattern.MultiPatternMatcherBenchmark; | ||
import com.almondtools.stringbench.multipattern.MultiPatternMatcherBenchmarkTest; | ||
import com.almondtools.stringbench.multipattern.hankcs.HankcsAhoCorasickBenchmark; | ||
|
||
public class HankcsAhoCorasickBenchmarkTest extends MultiPatternMatcherBenchmarkTest { | ||
|
||
@Override | ||
protected MultiPatternMatcherBenchmark getBenchmark() { | ||
return new HankcsAhoCorasickBenchmark(); | ||
} | ||
|
||
|
||
} |