diff --git a/src/analyzer.rs b/src/analyzer.rs index 8656881..69a1eda 100644 --- a/src/analyzer.rs +++ b/src/analyzer.rs @@ -1,7 +1,7 @@ use serde::{Deserialize, Serialize}; use typed_builder::TypedBuilder; -#[derive(Debug, Serialize, Deserialize, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "lowercase")] pub enum AnalyzerFeature { Frequency, @@ -9,7 +9,7 @@ pub enum AnalyzerFeature { Position, } -#[derive(Debug, Serialize, Deserialize, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "lowercase")] pub enum AnalyzerCase { Lower, @@ -17,14 +17,14 @@ pub enum AnalyzerCase { Upper, } -#[derive(Debug, Serialize, Deserialize, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "lowercase")] pub enum NgramStreamType { Binary, Utf8, } -#[derive(Debug, Serialize, Deserialize, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "lowercase")] pub enum GeoJsonType { Shape, @@ -32,7 +32,7 @@ pub enum GeoJsonType { Point, } -#[derive(Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] #[builder(doc)] pub struct DelimiterAnalyzerProperties { /// The value will be used as delimiter to split text into tokens as specified @@ -42,14 +42,14 @@ pub struct DelimiterAnalyzerProperties { pub delimiter: Option, } -#[derive(Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] #[builder(doc)] pub struct StemAnalyzerProperties { /// Format: `language[_COUNTRY][.encoding][@variant]` pub locale: String, } -#[derive(Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] #[builder(doc)] pub struct NormAnalyzerProperties { /// Format: `language[_COUNTRY][.encoding][@variant]` @@ -66,7 +66,7 @@ pub struct NormAnalyzerProperties { pub accent: Option, } -#[derive(Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] #[builder(doc)] #[serde(rename_all = "camelCase")] pub struct NgramAnalyzerProperties { @@ -85,7 +85,7 @@ pub struct NgramAnalyzerProperties { pub stream_type: Option, } -#[derive(Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] #[builder(doc)] #[serde(rename_all = "camelCase")] pub struct TextAnalyzerProperties { @@ -122,7 +122,7 @@ pub struct TextAnalyzerProperties { pub stemming: Option, } -#[derive(Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] +#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] #[builder(doc)] pub struct GeoJsonAnalyzerProperties { /// Whether to index all GeoJSON geometry types, just the centroid, or just points @@ -132,6 +132,22 @@ pub struct GeoJsonAnalyzerProperties { // Skip the options as they "generally should remain unchanged" } +#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] +#[builder(doc)] +#[serde(rename_all = "camelCase")] +pub struct PipelineAnalyzerProperties { + pub pipeline: Vec, +} +#[derive(Clone, Debug, Serialize, Deserialize, TypedBuilder, PartialEq)] +#[builder(doc)] +#[serde(rename_all = "camelCase")] +pub struct StopwordsAnalyzerProperties { + #[serde(skip_serializing_if = "Option::is_none")] + #[builder(default, setter(strip_option))] + pub hex: Option, + pub stopwords: Vec, +} + #[derive(Debug, Serialize, Deserialize, PartialEq)] #[serde(rename_all = "camelCase", tag = "type")] pub enum AnalyzerInfo { @@ -201,9 +217,83 @@ pub enum AnalyzerInfo { #[serde(skip_serializing_if = "Option::is_none")] properties: Option, }, + Stopwords { + name: String, + properties: StopwordsAnalyzerProperties, + #[serde(skip_serializing_if = "Option::is_none")] + features: Option>, + }, + Pipeline { + name: String, + properties: PipelineAnalyzerProperties, + }, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct AnalyzerDescription { pub name: String, } + +//these are the exact same analyzer types , but customized to be used in a pipeline analyzer +//since in pipeline analyzers `name` is not required for each sub-analyzer, the name filed is deleted +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)] +#[serde(rename_all = "camelCase", tag = "type")] +pub enum PipelineAnalyzers { + /// The `identity` Analyzer does not take additional properties. + Identity { + #[serde(skip_serializing_if = "Option::is_none")] + features: Option>, + }, + Delimiter { + #[serde(skip_serializing_if = "Option::is_none")] + features: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + properties: Option, + }, + + Stem { + #[serde(skip_serializing_if = "Option::is_none")] + features: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + properties: Option, + }, + + Norm { + #[serde(skip_serializing_if = "Option::is_none")] + features: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + properties: Option, + }, + + Ngram { + #[serde(skip_serializing_if = "Option::is_none")] + features: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + properties: Option, + }, + + Text { + #[serde(skip_serializing_if = "Option::is_none")] + features: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + properties: Option, + }, + + Geojson { + #[serde(skip_serializing_if = "Option::is_none")] + features: Option>, + + #[serde(skip_serializing_if = "Option::is_none")] + properties: Option, + }, + Stopwords { + properties: StopwordsAnalyzerProperties, + #[serde(skip_serializing_if = "Option::is_none")] + features: Option>, + }, +} diff --git a/tests/analyzer.rs b/tests/analyzer.rs index cc1d547..c96f929 100644 --- a/tests/analyzer.rs +++ b/tests/analyzer.rs @@ -10,7 +10,8 @@ use uclient::ClientExt; use arangors::analyzer::{ AnalyzerCase, AnalyzerFeature, AnalyzerInfo, GeoJsonAnalyzerProperties, GeoJsonType, - NgramAnalyzerProperties, NgramStreamType, NormAnalyzerProperties, + NgramAnalyzerProperties, NgramStreamType, NormAnalyzerProperties, PipelineAnalyzerProperties, + PipelineAnalyzers, }; use arangors::{ collection::{ @@ -83,6 +84,43 @@ async fn create_geo_analyzer( database.create_analyzer(info).await } +#[maybe_async] +async fn create_pipeline_analyzer( + database: &Database, + analyzer_name: String, +) -> Result { + let norm_analyzer = PipelineAnalyzers::Norm { + features: Some(vec![AnalyzerFeature::Frequency, AnalyzerFeature::Norm]), + properties: Some( + NormAnalyzerProperties::builder() + .locale("en.utf-8".to_string()) + .case(AnalyzerCase::Lower) + .build(), + ), + }; + + let ngram_analyzer = PipelineAnalyzers::Ngram { + features: Some(vec![AnalyzerFeature::Frequency, AnalyzerFeature::Norm]), + properties: Some( + NgramAnalyzerProperties::builder() + .min(2) + .max(2) + .preserve_original(false) + .stream_type(NgramStreamType::Utf8) + .build(), + ), + }; + + let pipe = AnalyzerInfo::Pipeline { + name: analyzer_name, + properties: PipelineAnalyzerProperties::builder() + .pipeline(vec![norm_analyzer, ngram_analyzer]) + .build(), + }; + + database.create_analyzer(pipe).await +} + #[maybe_async::test( any(feature = "reqwest_blocking"), async(any(feature = "reqwest_async"), tokio::test), @@ -149,6 +187,28 @@ async fn test_create_and_drop_geo_analyzer() { assert_eq!(result.is_err(), false); } +#[maybe_async::test( + any(feature = "reqwest_blocking"), + async(any(feature = "reqwest_async"), tokio::test), + async(any(feature = "surf_async"), async_std::test) +)] +async fn test_create_and_drop_pipeline_analyzer() { + test_setup(); + let analyzer_name = "test_analyzer_pipeline_create".to_string(); + let conn = connection().await; + let database = conn.db("test_db").await.unwrap(); + + let analyzer = create_pipeline_analyzer(&database, analyzer_name.clone()).await; + + trace!("{:?}", analyzer); + + assert_eq!(analyzer.is_err(), false); + + let result = database.drop_analyzer(&analyzer_name).await; + + assert_eq!(result.is_err(), false); +} + #[maybe_async::test( any(feature = "reqwest_blocking"), async(any(feature = "reqwest_async"), tokio::test),