diff --git a/optillm/__init__.py b/optillm/__init__.py index bc23fac9..610fc269 100644 --- a/optillm/__init__.py +++ b/optillm/__init__.py @@ -2,7 +2,7 @@ import os # Version information -__version__ = "0.1.17" +__version__ = "0.1.18" # Get the path to the root optillm.py spec = util.spec_from_file_location( diff --git a/optillm/inference.py b/optillm/inference.py index 07640a49..206b357e 100644 --- a/optillm/inference.py +++ b/optillm/inference.py @@ -189,7 +189,8 @@ def should_use_mlx(model_id: str) -> bool: # Models that should use MLX mlx_patterns = [ "mlx-community/", - "mlx-" + "mlx-", + "-mlx-" ] # Known problematic models that should prefer MLX on Apple Silicon diff --git a/scripts/eval_math500_benchmark.py b/scripts/eval_math500_benchmark.py index ad0e9f2d..165eefd5 100644 --- a/scripts/eval_math500_benchmark.py +++ b/scripts/eval_math500_benchmark.py @@ -692,7 +692,7 @@ def get_llm_response(problem: str, model: str) -> str: messages=[ {"role": "user", "content": SYSTEM_PROMPT + "\n" + problem} ], - max_tokens=32768, # for thinking models, we need to use a lot more tokens + max_tokens=8192, # for thinking models, we need to use a lot more tokens # extra_body = { # "decoding" : "thinkdeeper", # } diff --git a/setup.py b/setup.py index fb6f59f9..5de8c1dd 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name="optillm", - version="0.1.17", + version="0.1.18", packages=find_packages(include=['optillm', 'optillm.*']), # This ensures all subpackages are included py_modules=['optillm'], package_data={