From 3dde6689132e19e261bcfe6b7331196d673c595a Mon Sep 17 00:00:00 2001
From: Miguel Grinberg <miguel.grinberg@gmail.com>
Date: Fri, 30 May 2025 11:55:22 +0100
Subject: [PATCH] Fix some new type warnings from mypy (#2974)

(cherry picked from commit 63efa48aabc353f806ef0a0b07add5130136fc5d)
---
 elasticsearch/dsl/_async/document.py |  2 +-
 elasticsearch/dsl/_sync/document.py  |  2 +-
 elasticsearch/dsl/field.py           | 12 ++++++-
 elasticsearch/dsl/query.py           | 44 ++++++++++++++++++++++-
 elasticsearch/dsl/types.py           | 54 ++++++++++++++++++++++------
 5 files changed, 100 insertions(+), 14 deletions(-)

diff --git a/elasticsearch/dsl/_async/document.py b/elasticsearch/dsl/_async/document.py
index 4b7654761..de6e9eecc 100644
--- a/elasticsearch/dsl/_async/document.py
+++ b/elasticsearch/dsl/_async/document.py
@@ -96,7 +96,7 @@ class AsyncDocument(DocumentBase, metaclass=AsyncIndexMeta):
 
     @classmethod
     def _get_using(cls, using: Optional[AsyncUsingType] = None) -> AsyncUsingType:
-        return cast(AsyncUsingType, using or cls._index._using)
+        return using or cls._index._using
 
     @classmethod
     def _get_connection(
diff --git a/elasticsearch/dsl/_sync/document.py b/elasticsearch/dsl/_sync/document.py
index 316ece5cb..f68be4aae 100644
--- a/elasticsearch/dsl/_sync/document.py
+++ b/elasticsearch/dsl/_sync/document.py
@@ -92,7 +92,7 @@ class Document(DocumentBase, metaclass=IndexMeta):
 
     @classmethod
     def _get_using(cls, using: Optional[UsingType] = None) -> UsingType:
-        return cast(UsingType, using or cls._index._using)
+        return using or cls._index._using
 
     @classmethod
     def _get_connection(cls, using: Optional[UsingType] = None) -> "Elasticsearch":
diff --git a/elasticsearch/dsl/field.py b/elasticsearch/dsl/field.py
index 726fbe358..e3ed5dfcd 100644
--- a/elasticsearch/dsl/field.py
+++ b/elasticsearch/dsl/field.py
@@ -1290,7 +1290,7 @@ def _deserialize(self, data: Any) -> Union[datetime, date]:
         if isinstance(data, datetime):
             if self._default_timezone and data.tzinfo is None:
                 data = data.replace(tzinfo=self._default_timezone)
-            return data
+            return cast(datetime, data)
         if isinstance(data, date):
             return data
         if isinstance(data, int):
@@ -3689,6 +3689,11 @@ class SemanticText(Field):
         by using the Update mapping API. Use the Create inference API to
         create the endpoint. If not specified, the inference endpoint
         defined by inference_id will be used at both index and query time.
+    :arg chunking_settings: Settings for chunking text into smaller
+        passages. If specified, these will override the chunking settings
+        sent in the inference endpoint associated with inference_id. If
+        chunking settings are updated, they will not be applied to
+        existing documents until they are reindexed.
     """
 
     name = "semantic_text"
@@ -3699,6 +3704,9 @@ def __init__(
         meta: Union[Mapping[str, str], "DefaultType"] = DEFAULT,
         inference_id: Union[str, "DefaultType"] = DEFAULT,
         search_inference_id: Union[str, "DefaultType"] = DEFAULT,
+        chunking_settings: Union[
+            "types.ChunkingSettings", Dict[str, Any], "DefaultType"
+        ] = DEFAULT,
         **kwargs: Any,
     ):
         if meta is not DEFAULT:
@@ -3707,6 +3715,8 @@ def __init__(
             kwargs["inference_id"] = inference_id
         if search_inference_id is not DEFAULT:
             kwargs["search_inference_id"] = search_inference_id
+        if chunking_settings is not DEFAULT:
+            kwargs["chunking_settings"] = chunking_settings
         super().__init__(*args, **kwargs)
 
 
diff --git a/elasticsearch/dsl/query.py b/elasticsearch/dsl/query.py
index 1282d3b02..06be2f7fb 100644
--- a/elasticsearch/dsl/query.py
+++ b/elasticsearch/dsl/query.py
@@ -1382,7 +1382,49 @@ def __init__(
         min_term_freq: Union[int, "DefaultType"] = DEFAULT,
         min_word_length: Union[int, "DefaultType"] = DEFAULT,
         routing: Union[str, "DefaultType"] = DEFAULT,
-        stop_words: Union[str, Sequence[str], "DefaultType"] = DEFAULT,
+        stop_words: Union[
+            Literal[
+                "_arabic_",
+                "_armenian_",
+                "_basque_",
+                "_bengali_",
+                "_brazilian_",
+                "_bulgarian_",
+                "_catalan_",
+                "_cjk_",
+                "_czech_",
+                "_danish_",
+                "_dutch_",
+                "_english_",
+                "_estonian_",
+                "_finnish_",
+                "_french_",
+                "_galician_",
+                "_german_",
+                "_greek_",
+                "_hindi_",
+                "_hungarian_",
+                "_indonesian_",
+                "_irish_",
+                "_italian_",
+                "_latvian_",
+                "_lithuanian_",
+                "_norwegian_",
+                "_persian_",
+                "_portuguese_",
+                "_romanian_",
+                "_russian_",
+                "_serbian_",
+                "_sorani_",
+                "_spanish_",
+                "_swedish_",
+                "_thai_",
+                "_turkish_",
+                "_none_",
+            ],
+            Sequence[str],
+            "DefaultType",
+        ] = DEFAULT,
         unlike: Union[
             Union[str, "types.LikeDocument"],
             Sequence[Union[str, "types.LikeDocument"]],
diff --git a/elasticsearch/dsl/types.py b/elasticsearch/dsl/types.py
index 6dc9f09df..e6e19e410 100644
--- a/elasticsearch/dsl/types.py
+++ b/elasticsearch/dsl/types.py
@@ -142,6 +142,48 @@ def __init__(
         super().__init__(kwargs)
 
 
+class ChunkingSettings(AttrDict[Any]):
+    """
+    :arg strategy: (required) The chunking strategy: `sentence` or `word`.
+        Defaults to `sentence` if omitted.
+    :arg max_chunk_size: (required) The maximum size of a chunk in words.
+        This value cannot be higher than `300` or lower than `20` (for
+        `sentence` strategy) or `10` (for `word` strategy). Defaults to
+        `250` if omitted.
+    :arg overlap: The number of overlapping words for chunks. It is
+        applicable only to a `word` chunking strategy. This value cannot
+        be higher than half the `max_chunk_size` value. Defaults to `100`
+        if omitted.
+    :arg sentence_overlap: The number of overlapping sentences for chunks.
+        It is applicable only for a `sentence` chunking strategy. It can
+        be either `1` or `0`. Defaults to `1` if omitted.
+    """
+
+    strategy: Union[str, DefaultType]
+    max_chunk_size: Union[int, DefaultType]
+    overlap: Union[int, DefaultType]
+    sentence_overlap: Union[int, DefaultType]
+
+    def __init__(
+        self,
+        *,
+        strategy: Union[str, DefaultType] = DEFAULT,
+        max_chunk_size: Union[int, DefaultType] = DEFAULT,
+        overlap: Union[int, DefaultType] = DEFAULT,
+        sentence_overlap: Union[int, DefaultType] = DEFAULT,
+        **kwargs: Any,
+    ):
+        if strategy is not DEFAULT:
+            kwargs["strategy"] = strategy
+        if max_chunk_size is not DEFAULT:
+            kwargs["max_chunk_size"] = max_chunk_size
+        if overlap is not DEFAULT:
+            kwargs["overlap"] = overlap
+        if sentence_overlap is not DEFAULT:
+            kwargs["sentence_overlap"] = sentence_overlap
+        super().__init__(kwargs)
+
+
 class ClassificationInferenceOptions(AttrDict[Any]):
     """
     :arg num_top_classes: Specifies the number of top class predictions to
@@ -1561,11 +1603,7 @@ class InnerHits(AttrDict[Any]):
         DefaultType,
     ]
     seq_no_primary_term: Union[bool, DefaultType]
-    fields: Union[
-        Union[str, InstrumentedField],
-        Sequence[Union[str, InstrumentedField]],
-        DefaultType,
-    ]
+    fields: Union[Sequence[Union[str, InstrumentedField]], DefaultType]
     sort: Union[
         Union[Union[str, InstrumentedField], "SortOptions"],
         Sequence[Union[Union[str, InstrumentedField], "SortOptions"]],
@@ -1600,11 +1638,7 @@ def __init__(
             DefaultType,
         ] = DEFAULT,
         seq_no_primary_term: Union[bool, DefaultType] = DEFAULT,
-        fields: Union[
-            Union[str, InstrumentedField],
-            Sequence[Union[str, InstrumentedField]],
-            DefaultType,
-        ] = DEFAULT,
+        fields: Union[Sequence[Union[str, InstrumentedField]], DefaultType] = DEFAULT,
         sort: Union[
             Union[Union[str, InstrumentedField], "SortOptions"],
             Sequence[Union[Union[str, InstrumentedField], "SortOptions"]],