Skip to content

Conversation

@codeflash-ai
Copy link

@codeflash-ai codeflash-ai bot commented Oct 24, 2025

📄 17% (0.17x) speedup for BatchJobIn.serialize_model in src/mistralai/models/batchjobin.py

⏱️ Runtime : 167 microseconds 142 microseconds (best of 153 runs)

📝 Explanation and details

Optimizations made:

  • Changed optional_fields, nullable_fields, and null_default_fields to sets for O(1) membership checks.
  • Replaced set intersection with n in fields_set for faster field presence testing.
  • Removed unnecessary list creation (intersection({n})) and used direct lookups.
  • Reduced lookups/calls inside the loop (no repeatedly calling model_fields.items() or recalculating set intersections).
  • Commented clarifications preserved as-is per behavioral constraints.
  • Variable names, comments, and structure otherwise preserved as in the original.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 40 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 🔘 None Found
📊 Tests Coverage 100.0%
🌀 Generated Regression Tests and Runtime
from __future__ import annotations

from typing import Any, Dict, List, Optional

# imports
import pytest  # used for our unit tests
from mistralai.models.batchjobin import BatchJobIn
from typing_extensions import Annotated

# --- Minimal stubs for dependencies (since we can't import real ones here) ---

# Sentinel values for unset/null
class _UnsetType:
    pass
UNSET = _UnsetType()
UNSET_SENTINEL = UNSET

def validate_open_enum(strict: bool):
    # Dummy validator, does nothing
    def validator(x):
        return x
    return validator

class APIEndpoint(str):
    pass

# Simulate OptionalNullable (like Optional, but allows None and a sentinel value)
OptionalNullable = Optional

# Minimal BaseModel stub
class BaseModel:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
        self.__pydantic_fields_set__ = set(kwargs.keys())
    @classmethod
    def model_fields(cls):
        # Simulate Pydantic model fields
        return {k: type('Field', (), {'alias': None}) for k in cls.__annotations__}

# Dummy decorator for model_serializer
def model_serializer(mode=None):
    def decorator(fn):
        return fn
    return decorator

# Dummy PlainValidator
def PlainValidator(fn):
    return fn
from mistralai.models.batchjobin import BatchJobIn


# --- Helper handler for serialization ---
def default_handler(obj):
    # Simulate Pydantic serialization: return dict of set fields, with defaults if not set
    result = {}
    for field in [
        'input_files', 'endpoint', 'model', 'agent_id', 'metadata', 'timeout_hours'
    ]:
        value = getattr(obj, field, UNSET_SENTINEL)
        result[field] = value
    return result

# --- Unit tests ---

# 1. Basic Test Cases

def test_basic_required_fields_only():
    # Only required fields set
    bj = BatchJobIn(input_files=['file1.txt'], endpoint=APIEndpoint('v1/completions'))
    codeflash_output = bj.serialize_model(default_handler); out = codeflash_output # 9.84μs -> 8.97μs (9.63% faster)


def test_basic_some_optional_fields_set():
    # Some optional fields set, some unset
    bj = BatchJobIn(
        input_files=['file1.txt'],
        endpoint=APIEndpoint('v1/completions'),
        model='mistral-7b'
    )
    codeflash_output = bj.serialize_model(default_handler); out = codeflash_output # 8.02μs -> 6.88μs (16.5% faster)

# 2. Edge Test Cases

def test_edge_empty_input_files():
    # input_files is empty list
    bj = BatchJobIn(
        input_files=[],
        endpoint=APIEndpoint('v1/completions')
    )
    codeflash_output = bj.serialize_model(default_handler); out = codeflash_output # 7.84μs -> 6.59μs (18.9% faster)

def test_edge_none_metadata():
    # metadata explicitly set to None
    bj = BatchJobIn(
        input_files=['file1.txt'],
        endpoint=APIEndpoint('v1/completions'),
        metadata=None
    )
    codeflash_output = bj.serialize_model(default_handler); out = codeflash_output # 7.67μs -> 6.39μs (20.0% faster)

def test_edge_none_model_and_agent_id():
    # model and agent_id explicitly set to None
    bj = BatchJobIn(
        input_files=['file1.txt'],
        endpoint=APIEndpoint('v1/completions'),
        model=None,
        agent_id=None
    )
    codeflash_output = bj.serialize_model(default_handler); out = codeflash_output # 7.55μs -> 6.21μs (21.5% faster)


def test_edge_timeout_hours_set_to_none():
    # timeout_hours set to None (should appear in output)
    bj = BatchJobIn(
        input_files=['file1.txt'],
        endpoint=APIEndpoint('v1/completions'),
        timeout_hours=None
    )
    codeflash_output = bj.serialize_model(default_handler); out = codeflash_output # 7.37μs -> 6.39μs (15.2% faster)

def test_edge_metadata_empty_dict():
    # metadata set to empty dict
    bj = BatchJobIn(
        input_files=['file1.txt'],
        endpoint=APIEndpoint('v1/completions'),
        metadata={}
    )
    codeflash_output = bj.serialize_model(default_handler); out = codeflash_output # 7.21μs -> 6.49μs (11.2% faster)

def test_edge_input_files_contains_empty_string():
    # input_files contains empty string
    bj = BatchJobIn(
        input_files=[''],
        endpoint=APIEndpoint('v1/completions')
    )
    codeflash_output = bj.serialize_model(default_handler); out = codeflash_output # 7.19μs -> 6.37μs (12.8% faster)

def test_edge_agent_id_empty_string():
    # agent_id set to empty string
    bj = BatchJobIn(
        input_files=['file1.txt'],
        endpoint=APIEndpoint('v1/completions'),
        agent_id=''
    )
    codeflash_output = bj.serialize_model(default_handler); out = codeflash_output # 7.10μs -> 6.29μs (13.0% faster)


def test_large_many_input_files():
    # Large number of input files
    files = [f'file_{i}.txt' for i in range(1000)]
    bj = BatchJobIn(
        input_files=files,
        endpoint=APIEndpoint('v1/completions')
    )
    codeflash_output = bj.serialize_model(default_handler); out = codeflash_output # 9.85μs -> 8.90μs (10.7% faster)



def test_large_timeout_hours_extreme():
    # timeout_hours set to very large value
    bj = BatchJobIn(
        input_files=['file1.txt'],
        endpoint=APIEndpoint('v1/completions'),
        timeout_hours=10**6
    )
    codeflash_output = bj.serialize_model(default_handler); out = codeflash_output # 8.15μs -> 6.91μs (17.9% faster)
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
from __future__ import annotations

from typing import Dict, List, Optional

# imports
import pytest  # used for our unit tests
from mistralai.models.batchjobin import BatchJobIn


# Minimal stubs for imported symbols/classes, to allow the tests to run
class UNSET_SENTINEL_TYPE: pass
UNSET_SENTINEL = UNSET_SENTINEL_TYPE()
UNSET = UNSET_SENTINEL
class APIEndpoint(str): pass

class BaseModel:
    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)
        # Track which fields were set explicitly
        self.__pydantic_fields_set__ = set(kwargs.keys())
    @classmethod
    def model_fields(cls):
        # Return field info as dict: name -> dummy field object
        return {k: type('Field', (), {'alias': None})() for k in cls.__annotations__}

# Helper for OptionalNullable
class OptionalNullable:
    def __init__(self, value):
        self.value = value
    def __eq__(self, other):
        # For testing, treat OptionalNullable as transparent
        return self.value == other or (isinstance(other, OptionalNullable) and self.value == other.value)
    def __repr__(self):
        return f"OptionalNullable({self.value!r})"
from mistralai.models.batchjobin import BatchJobIn


# Helper handler for serialization (returns instance's __dict__)
def handler(obj):
    # For OptionalNullable, unwrap to value for serialization
    result = {}
    for k, v in obj.__dict__.items():
        if isinstance(v, OptionalNullable):
            result[k] = v.value
        else:
            result[k] = v
    return result

# ----------- Unit Tests ------------

# BASIC TEST CASES

def test_basic_required_fields():
    # Only required fields set
    b = BatchJobIn(input_files=['file1.txt'], endpoint=APIEndpoint('endpoint'))
    codeflash_output = b.serialize_model(handler); result = codeflash_output # 8.33μs -> 6.81μs (22.3% faster)



def test_basic_empty_input_files():
    # input_files is empty
    b = BatchJobIn(input_files=[], endpoint=APIEndpoint('endpoint'))
    codeflash_output = b.serialize_model(handler); result = codeflash_output # 10.4μs -> 8.89μs (16.6% faster)

# EDGE TEST CASES






def test_edge_timeout_hours_zero():
    # timeout_hours set to zero
    b = BatchJobIn(
        input_files=['file1.txt'],
        endpoint=APIEndpoint('endpoint'),
        timeout_hours=0
    )
    codeflash_output = b.serialize_model(handler); result = codeflash_output # 10.6μs -> 8.91μs (18.7% faster)

def test_edge_timeout_hours_negative():
    # timeout_hours set to negative value
    b = BatchJobIn(
        input_files=['file1.txt'],
        endpoint=APIEndpoint('endpoint'),
        timeout_hours=-5
    )
    codeflash_output = b.serialize_model(handler); result = codeflash_output # 8.84μs -> 6.84μs (29.2% faster)



def test_large_many_input_files():
    # Large number of input files (up to 1000)
    files = [f'file_{i}.txt' for i in range(1000)]
    b = BatchJobIn(input_files=files, endpoint=APIEndpoint('endpoint'))
    codeflash_output = b.serialize_model(handler); result = codeflash_output # 10.5μs -> 8.82μs (19.4% faster)

To edit these changes git checkout codeflash/optimize-BatchJobIn.serialize_model-mh4dsutx and push.

Codeflash

**Optimizations made:**
- Changed `optional_fields`, `nullable_fields`, and `null_default_fields` to sets for O(1) membership checks.
- Replaced set intersection with `n in fields_set` for faster field presence testing.
- Removed unnecessary list creation (`intersection({n})`) and used direct lookups.
- Reduced lookups/calls inside the loop (no repeatedly calling `model_fields.items()` or recalculating set intersections).
- Commented clarifications preserved as-is per behavioral constraints.  
- Variable names, comments, and structure otherwise preserved as in the original.
@codeflash-ai codeflash-ai bot requested a review from mashraf-222 October 24, 2025 04:59
@codeflash-ai codeflash-ai bot added the ⚡️ codeflash Optimization PR opened by Codeflash AI label Oct 24, 2025
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

⚡️ codeflash Optimization PR opened by Codeflash AI

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant