From 7acb75fdc812d56fea97ec6040e4fa7fc4107f5f Mon Sep 17 00:00:00 2001 From: jwierzbo Date: Fri, 14 Jan 2022 17:00:32 +0100 Subject: [PATCH] Drivers 3.9: Segmentation and Collation Analyzers (TG-202) --- test/arangosearch_analyzers_test.go | 30 +++++++++++++++++++++++++++++ view_arangosearch.go | 20 ++++++++++++++++++- 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/test/arangosearch_analyzers_test.go b/test/arangosearch_analyzers_test.go index abed11f4..5048ff11 100644 --- a/test/arangosearch_analyzers_test.go +++ b/test/arangosearch_analyzers_test.go @@ -251,6 +251,36 @@ func TestArangoSearchAnalyzerEnsureAnalyzer(t *testing.T) { }, }, }, + { + Name: "create-segmentation", + MinVersion: newVersion("3.9"), + Definition: driver.ArangoSearchAnalyzerDefinition{ + Name: "my-segmentation", + Type: driver.ArangoSearchAnalyzerTypeSegmentation, + Properties: driver.ArangoSearchAnalyzerProperties{ + Break: driver.ArangoSearchBreakTypeAll, + Case: driver.ArangoSearchCaseUpper, + }, + }, + }, + { + Name: "create-collation", + MinVersion: newVersion("3.9"), + Definition: driver.ArangoSearchAnalyzerDefinition{ + Name: "my-collation", + Type: driver.ArangoSearchAnalyzerTypeCollation, + Properties: driver.ArangoSearchAnalyzerProperties{ + Locale: "en_US.utf-8", + }, + }, + ExpectedDefinition: &driver.ArangoSearchAnalyzerDefinition{ + Name: "my-collation", + Type: driver.ArangoSearchAnalyzerTypeCollation, + Properties: driver.ArangoSearchAnalyzerProperties{ + Locale: "en_US", + }, + }, + }, } for _, testCase := range testCases { diff --git a/view_arangosearch.go b/view_arangosearch.go index 970e4d59..aad74efb 100644 --- a/view_arangosearch.go +++ b/view_arangosearch.go @@ -65,6 +65,10 @@ const ( ArangoSearchAnalyzerTypeGeoJSON ArangoSearchAnalyzerType = "geojson" // ArangoSearchAnalyzerTypeGeoPoint an Analyzer capable of breaking up JSON object describing a coordinate into a set of indexable tokens for further usage with ArangoSearch Geo functions. ArangoSearchAnalyzerTypeGeoPoint ArangoSearchAnalyzerType = "geopoint" + // ArangoSearchAnalyzerTypeSegmentation an Analyzer capable of breaking up the input text into tokens in a language-agnostic manner + ArangoSearchAnalyzerTypeSegmentation ArangoSearchAnalyzerType = "segmentation" + // ArangoSearchAnalyzerTypeCollation an Analyzer capable of converting the input into a set of language-specific tokens + ArangoSearchAnalyzerTypeCollation ArangoSearchAnalyzerType = "collation" ) // ArangoSearchAnalyzerFeature specifies a feature to an analyzer @@ -90,6 +94,17 @@ const ( ArangoSearchCaseNone ArangoSearchCaseType = "none" ) +type ArangoSearchBreakType string + +const ( + // ArangoSearchBreakTypeAll to return all tokens + ArangoSearchBreakTypeAll ArangoSearchBreakType = "all" + // ArangoSearchBreakTypeAlpha to return tokens composed of alphanumeric characters only (default) + ArangoSearchBreakTypeAlpha ArangoSearchBreakType = "alpha" + // ArangoSearchBreakTypeGraphic to return tokens composed of non-whitespace characters only + ArangoSearchBreakTypeGraphic ArangoSearchBreakType = "graphic" +) + type ArangoSearchNGramStreamType string const ( @@ -120,7 +135,7 @@ type ArangoSearchAnalyzerProperties struct { Delimiter string `json:"delimiter,omitempty"` // Accent used by Norm, Text Accent *bool `json:"accent,omitempty"` - // Case used by Norm, Text + // Case used by Norm, Text, Segmentation Case ArangoSearchCaseType `json:"case,omitempty"` // EdgeNGram used by Text @@ -173,6 +188,9 @@ type ArangoSearchAnalyzerProperties struct { Latitude []string `json:"latitude,omitempty"` // Longitude used by GetPoint. Longitude []string `json:"longitude,omitempty"` + + // Break used by Segmentation + Break ArangoSearchBreakType `json:"break,omitempty"` } // ArangoSearchAnalyzerGeoJSONType GeoJSON Type parameter.