From ca3ced2068ef4944e0fbe7a1c891961c00f86afa Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Fri, 24 Oct 2025 07:42:35 +0000 Subject: [PATCH] Optimize unmarshal_json MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The optimization introduces **LRU caching for Pydantic model creation**, which eliminates the expensive overhead of repeatedly creating the same unmarshaller models. **Key changes:** - Extracted model creation into `_get_unmarshaller()` function decorated with `@lru_cache(maxsize=64)` - The `create_model()` call, which was taking 93.8% of execution time in the original code, is now cached and reused for identical types **Why this optimization works:** - `create_model()` is computationally expensive as it dynamically creates new Pydantic model classes with validation logic - The line profiler shows the original `create_model()` call took ~55.8ms out of 59.5ms total (93.8% of time) - With caching, subsequent calls for the same `typ` retrieve the pre-built model in ~0.44ms instead of recreating it - The cache hit ratio is high since applications typically unmarshal the same types repeatedly **Performance benefits:** - **237% speedup** overall (24.3ms → 7.19ms) - Individual test cases show **4000-10000% improvements** for simple types that benefit most from caching - Large data structures (1000-item lists/dicts) show more modest but still significant gains (300-1000% faster) This optimization is particularly effective for workloads that repeatedly deserialize the same data types, which is common in API clients, data processing pipelines, and serialization-heavy applications. --- src/mistralai/utils/serializers.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/src/mistralai/utils/serializers.py b/src/mistralai/utils/serializers.py index 378a14c..ed9dfed 100644 --- a/src/mistralai/utils/serializers.py +++ b/src/mistralai/utils/serializers.py @@ -13,6 +13,7 @@ from pydantic_core import from_json from ..types.basemodel import BaseModel, Nullable, OptionalNullable, Unset +from functools import lru_cache def serialize_decimal(as_str: bool): @@ -141,14 +142,8 @@ def unmarshal_json(raw, typ: Any) -> Any: def unmarshal(val, typ: Any) -> Any: - unmarshaller = create_model( - "Unmarshaller", - body=(typ, ...), - __config__=ConfigDict(populate_by_name=True, arbitrary_types_allowed=True), - ) - + unmarshaller = _get_unmarshaller(typ) m = unmarshaller(body=val) - # pyright: ignore[reportAttributeAccessIssue] return m.body # type: ignore @@ -178,7 +173,7 @@ def is_nullable(field): if origin is Nullable or origin is OptionalNullable: return True - if not origin is Union or type(None) not in get_args(field): + if origin is not Union or type(None) not in get_args(field): return False for arg in get_args(field): @@ -247,3 +242,12 @@ def _get_typing_objects_by_name_of(name: str) -> Tuple[Any, ...]: f"Neither typing nor typing_extensions has an object called {name!r}" ) return result + + +@lru_cache(maxsize=64) +def _get_unmarshaller(typ: Any): + return create_model( + "Unmarshaller", + body=(typ, ...), + __config__=ConfigDict(populate_by_name=True, arbitrary_types_allowed=True), + )