Skip to content

Commit

Permalink
Add tests for min_hash configuration and fix settings names (#84753)
Browse files Browse the repository at this point in the history
Fixes #84578
  • Loading branch information
romseygeek committed Mar 9, 2022
1 parent 987a83b commit 6242118
Show file tree
Hide file tree
Showing 3 changed files with 47 additions and 3 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/84753.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 84753
summary: Add tests for `min_hash` configuration and fix settings names
area: Analysis
type: bug
issues:
- 84578
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,10 @@ private Map<String, String> convertSettings(Settings settings) {
if (settings.hasValue("hash_count")) {
settingMap.put("hashCount", settings.get("hash_count"));
}
if (settings.hasValue("bucketCount")) {
if (settings.hasValue("bucket_count")) {
settingMap.put("bucketCount", settings.get("bucket_count"));
}
if (settings.hasValue("hashSetSize")) {
if (settings.hasValue("hash_set_size")) {
settingMap.put("hashSetSize", settings.get("hash_set_size"));
}
if (settings.hasValue("with_rotation")) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public void testDefault() throws IOException {
assertStreamHasNumberOfTokens(tokenFilter.create(tokenizer), default_hash_count * default_bucket_size * default_hash_set_size);
}

public void testSettings() throws IOException {
public void testRotationSettings() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.test_min_hash.type", "min_hash")
.put("index.analysis.filter.test_min_hash.hash_count", "1")
Expand All @@ -56,4 +56,42 @@ public void testSettings() throws IOException {
// because with_rotation is false, we only expect 1 token here.
assertStreamHasNumberOfTokens(tokenFilter.create(tokenizer), 1);
}

public void testHashCountSettings() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.test_min_hash.type", "min_hash")
.put("index.analysis.filter.test_min_hash.hash_count", "1")
.put("index.analysis.filter.test_min_hash.bucket_count", "1")
.put("index.analysis.filter.test_min_hash.hash_set_size", "1")
.put("index.analysis.filter.test_min_hash.with_rotation", true)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("test_min_hash");
String source = "sushi";
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));

// Single bucket should result in a single output token
assertStreamHasNumberOfTokens(tokenFilter.create(tokenizer), 1);
}

public void testHashSetSize() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.test_min_hash.type", "min_hash")
.put("index.analysis.filter.test_min_hash.hash_count", "1")
.put("index.analysis.filter.test_min_hash.bucket_count", "1")
.put("index.analysis.filter.test_min_hash.hash_set_size", "2")
.put("index.analysis.filter.test_min_hash.with_rotation", false)
.put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString())
.build();
ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, new CommonAnalysisPlugin());
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("test_min_hash");
String source = "another, longer test";
Tokenizer tokenizer = new WhitespaceTokenizer();
tokenizer.setReader(new StringReader(source));

// hash_set_size = 2 should give us two buckets
assertStreamHasNumberOfTokens(tokenFilter.create(tokenizer), 2);
}
}

0 comments on commit 6242118

Please sign in to comment.