diff --git a/optillm/__init__.py b/optillm/__init__.py
index bc23fac9..610fc269 100644
--- a/optillm/__init__.py
+++ b/optillm/__init__.py
@@ -2,7 +2,7 @@
 import os
 
 # Version information
-__version__ = "0.1.17"
+__version__ = "0.1.18"
 
 # Get the path to the root optillm.py
 spec = util.spec_from_file_location(
diff --git a/optillm/inference.py b/optillm/inference.py
index 07640a49..206b357e 100644
--- a/optillm/inference.py
+++ b/optillm/inference.py
@@ -189,7 +189,8 @@ def should_use_mlx(model_id: str) -> bool:
     # Models that should use MLX
     mlx_patterns = [
         "mlx-community/",
-        "mlx-"
+        "mlx-",
+        "-mlx-"
     ]
     
     # Known problematic models that should prefer MLX on Apple Silicon
diff --git a/scripts/eval_math500_benchmark.py b/scripts/eval_math500_benchmark.py
index ad0e9f2d..165eefd5 100644
--- a/scripts/eval_math500_benchmark.py
+++ b/scripts/eval_math500_benchmark.py
@@ -692,7 +692,7 @@ def get_llm_response(problem: str, model: str) -> str:
             messages=[
                 {"role": "user", "content": SYSTEM_PROMPT + "\n" + problem}
             ],
-            max_tokens=32768, # for thinking models, we need to use a lot more tokens
+            max_tokens=8192, # for thinking models, we need to use a lot more tokens
             # extra_body = {
             #     "decoding" : "thinkdeeper",
             # }
diff --git a/setup.py b/setup.py
index fb6f59f9..5de8c1dd 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 
 setup(
     name="optillm",
-    version="0.1.17",
+    version="0.1.18",
     packages=find_packages(include=['optillm', 'optillm.*']),  # This ensures all subpackages are included
     py_modules=['optillm'],
     package_data={