From 772b626311033ecc3fe52664924ff9b7a4463f35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=BCseyin=20Emre=20Arma=C4=9Fan?= Date: Mon, 15 Apr 2024 18:21:46 +0100 Subject: [PATCH] Add CategorizeText aggregation (#1589) --- elasticsearch_dsl/aggs.py | 4 ++++ tests/test_aggs.py | 17 +++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/elasticsearch_dsl/aggs.py b/elasticsearch_dsl/aggs.py index 69c2d85e..96b830e0 100644 --- a/elasticsearch_dsl/aggs.py +++ b/elasticsearch_dsl/aggs.py @@ -309,6 +309,10 @@ class MultiTerms(Bucket): name = "multi_terms" +class CategorizeText(Bucket): + name = "categorize_text" + + # metric aggregations class TopHits(Agg): name = "top_hits" diff --git a/tests/test_aggs.py b/tests/test_aggs.py index aa928de4..e2848864 100644 --- a/tests/test_aggs.py +++ b/tests/test_aggs.py @@ -342,6 +342,23 @@ def test_multi_terms_aggregation(): } == a.to_dict() +def test_categorize_text_aggregation(): + a = aggs.CategorizeText( + field="tags", + categorization_filters=["\\w+\\_\\d{3}"], + max_matched_tokens=2, + similarity_threshold=30, + ) + assert { + "categorize_text": { + "field": "tags", + "categorization_filters": ["\\w+\\_\\d{3}"], + "max_matched_tokens": 2, + "similarity_threshold": 30, + } + } == a.to_dict() + + def test_median_absolute_deviation_aggregation(): a = aggs.MedianAbsoluteDeviation(field="rating")