coder · ThomasK33 · May 20, 2026 · May 19, 2026 · May 19, 2026 · May 20, 2026
diff --git a/docs/config/models.mdx b/docs/config/models.mdx
@@ -25,7 +25,7 @@ Mux ships with curated models kept up to date with the frontier. Use any custom
 | Codex Mini 5.1         | openai:gpt-5.1-codex-mini     | `codex-mini`                                                 |         |
 | Codex Max 5.1          | openai:gpt-5.1-codex-max      | `codex-max`                                                  |         |
 | Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro`                                       |         |
-| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash`                                               |         |
+| Gemini 3.5 Flash       | google:gemini-3.5-flash       | `gemini-flash`                                               |         |
 | Grok 4 1 Fast          | xai:grok-4-1-fast             | `grok`, `grok-4`, `grok-4.1`, `grok-4-1`                     |         |
 | Grok Code Fast 1       | xai:grok-code-fast-1          | `grok-code`                                                  |         |
 | DeepSeek V4 Pro        | deepseek:deepseek-v4-pro      | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` |         |

diff --git a/src/common/constants/knownModels.test.ts b/src/common/constants/knownModels.test.ts
@@ -29,6 +29,10 @@ describe("Known Models Integration", () => {
     }
   });
 
+  test("gemini-flash resolves to the stable Gemini 3.5 Flash model", () => {
+    expect(MODEL_ABBREVIATIONS["gemini-flash"]).toBe("google:gemini-3.5-flash");
+  });
+
   test("known model ids and aliases stay unique across the curated registry", () => {
     const seenIds = new Set<string>();
     const seenAliases = new Set<string>();

diff --git a/src/common/constants/knownModels.ts b/src/common/constants/knownModels.ts
@@ -115,9 +115,10 @@ const MODEL_DEFINITIONS = {
     aliases: ["gemini", "gemini-pro"],
     tokenizerOverride: "google/gemini-2.5-pro",
   },
-  GEMINI_3_FLASH: {
+  // Gemini Flash alias tracks the latest stable Flash tier.
+  GEMINI_FLASH: {
     provider: "google",
-    providerModelId: "gemini-3-flash-preview",
+    providerModelId: "gemini-3.5-flash",
     aliases: ["gemini-flash"],
     tokenizerOverride: "google/gemini-2.5-pro",
   },

diff --git a/src/common/utils/ai/modelCapabilities.test.ts b/src/common/utils/ai/modelCapabilities.test.ts
@@ -47,6 +47,15 @@ describe("getModelCapabilities", () => {
     expect(caps?.maxPdfSizeMb).toBeGreaterThan(0);
   });
 
+  it("returns multimodal capabilities for Gemini 3.5 Flash", () => {
+    const caps = getModelCapabilities("google:gemini-3.5-flash");
+    expect(caps).not.toBeNull();
+    expect(caps?.supportsPdfInput).toBe(true);
+    expect(caps?.supportsVision).toBe(true);
+    expect(caps?.supportsAudioInput).toBe(true);
+    expect(caps?.supportsVideoInput).toBe(true);
+  });
+
   it("returns null for unknown models", () => {
     expect(getModelCapabilities("anthropic:this-model-does-not-exist")).toBeNull();
   });

diff --git a/src/common/utils/ai/modelDisplay.test.ts b/src/common/utils/ai/modelDisplay.test.ts
@@ -45,6 +45,7 @@ describe("formatModelDisplayName", () => {
   describe("Gemini models", () => {
     test("formats Gemini models", () => {
       expect(formatModelDisplayName("gemini-2-0-flash-exp")).toBe("Gemini 2.0 Flash Exp");
+      expect(formatModelDisplayName("gemini-3.5-flash")).toBe("Gemini 3.5 Flash");
       expect(formatModelDisplayName("gemini-3.1-pro-preview")).toBe("Gemini 3.1 Pro Preview");
     });
   });

diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
@@ -749,6 +749,155 @@ describe("buildProviderOptions - OpenAI", () => {
   });
 });
 
+describe("buildProviderOptions - Google", () => {
+  test("maps Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:gemini-3.5-flash", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
+  test("maps gateway Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("mux-gateway:google/gemini-3.5-flash", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
+  test("maps namespaced Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:models/gemini-3.5-flash", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
+  test("maps versioned Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:gemini-3.5-flash-001", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
+  test("maps Gemini 3.5 Flash medium to thinkingLevel medium with thoughts", () => {
+    expect(buildProviderOptions("mux-gateway:google/gemini-3.5-flash", "medium")).toEqual({
+      google: {
+        thinkingConfig: {
+          includeThoughts: true,
+          thinkingLevel: "medium",
+        },
+      },
+    });
+  });
+
+  test("uses mapped model capabilities for custom Gemini 3.5 Flash aliases", () => {
+    const providersConfig = createMockProvidersConfig({
+      "google:custom-flash": "google:gemini-3.5-flash",
+    });
+
+    expect(
+      buildProviderOptions(
+        "google:custom-flash",
+        "off",
+        undefined,
+        undefined,
+        undefined,
+        undefined,
+        undefined,
+        providersConfig
+      )
+    ).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
+  test("maps non-preview Gemini 3 Flash off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:gemini-3-flash", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
+  test("maps Gemini 3 Flash Preview off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:gemini-3-flash-preview", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
+  test("maps versioned Gemini 3 Flash Preview off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:gemini-3-flash-preview-latest", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
+  test("defensively maps unsupported Gemini 3.5 Flash xhigh to high", () => {
+    expect(buildProviderOptions("google:gemini-3.5-flash", "xhigh")).toEqual({
+      google: {
+        thinkingConfig: {
+          includeThoughts: true,
+          thinkingLevel: "high",
+        },
+      },
+    });
+  });
+
+  test("passes Gemini 3.1 Pro low through as thinkingLevel low with thoughts", () => {
+    expect(buildProviderOptions("google:gemini-3.1-pro-preview", "low")).toEqual({
+      google: {
+        thinkingConfig: {
+          includeThoughts: true,
+          thinkingLevel: "low",
+        },
+      },
+    });
+  });
+
+  test("defensively maps unsupported Gemini 3.5 Flash max to high", () => {
+    expect(buildProviderOptions("google:gemini-3.5-flash", "max")).toEqual({
+      google: {
+        thinkingConfig: {
+          includeThoughts: true,
+          thinkingLevel: "high",
+        },
+      },
+    });
+  });
+
+  test("keeps Gemini 3.1 Pro off without provider thinking config", () => {
+    expect(buildProviderOptions("google:gemini-3.1-pro-preview", "off")).toEqual({
+      google: {
+        thinkingConfig: undefined,
+      },
+    });
+  });
+});
+
 describe("buildRequestHeaders", () => {
   for (const { name, model, options, expected } of [
     {

diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
@@ -23,6 +23,7 @@ import {
   OPENAI_REASONING_EFFORT,
   OPENROUTER_REASONING_EFFORT,
 } from "@/common/types/thinking";
+import { isGeminiFlashThinkingLevelModelName } from "@/common/utils/thinking/policy";
 import { resolveModelForMetadata } from "@/common/utils/providers/modelEntries";
 import { log } from "@/node/services/log";
 import type { MuxMessage } from "@/common/types/message";
@@ -409,22 +410,25 @@ export function buildProviderOptions(
 
   // Build Google-specific options
   if (formatProvider === "google") {
-    const isGemini3 = capModelName.includes("gemini-3");
+    const capBareModelName = capModelName.split("/").at(-1) ?? capModelName;
+    const usesGeminiThinkingLevelConfig = capBareModelName.includes("gemini-3");
+    const isGeminiFlashThinkingModel = isGeminiFlashThinkingLevelModelName(capBareModelName);
     let thinkingConfig: GoogleGenerativeAIProviderOptions["thinkingConfig"];
 
-    if (effectiveThinking !== "off") {
+    if (isGeminiFlashThinkingModel && effectiveThinking === "off") {
+      // Gemini Flash chat models default to medium and do not support true thinking-off;
+      // send minimal explicitly so Mux's "off" setting means lowest-effort behavior.
+      thinkingConfig = { thinkingLevel: "minimal" };
+    } else if (effectiveThinking !== "off") {
       thinkingConfig = {
         includeThoughts: true,
       };
 
-      if (isGemini3) {
-        // Policy enforcement already clamped to valid levels for Flash/Pro,
-        // so effectiveThinking is guaranteed in the model's allowed set.
-        // Flash: off/low/medium/high; Pro: low/high. "xhigh" can't reach here.
-        thinkingConfig.thinkingLevel = effectiveThinking as Exclude<
-          ThinkingLevel,
-          "off" | "xhigh" | "max"
-        >;
+      if (usesGeminiThinkingLevelConfig) {
+        // Policy enforcement should clamp to valid Google levels before this adapter runs.
+        // Avoid leaking xhigh/max to Google if a caller bypasses policy.
+        thinkingConfig.thinkingLevel =
+          effectiveThinking === "xhigh" || effectiveThinking === "max" ? "high" : effectiveThinking;
       } else {
         // Gemini 2.5 uses thinkingBudget
         const budget = GEMINI_THINKING_BUDGETS[effectiveThinking];

diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts
@@ -1,5 +1,10 @@
 import { describe, expect, test } from "bun:test";
-import { getThinkingPolicyForModel, enforceThinkingPolicy, resolveThinkingInput } from "./policy";
+import {
+  getThinkingPolicyForModel,
+  enforceThinkingPolicy,
+  resolveThinkingInput,
+  isGeminiFlashThinkingLevelModelName,
+} from "./policy";
 
 describe("getThinkingPolicyForModel", () => {
   test("returns 5 levels including xhigh for gpt-5.1-codex-max", () => {
@@ -386,6 +391,55 @@ describe("getThinkingPolicyForModel", () => {
     expect(getThinkingPolicyForModel("google:gemini-3.1-pro-preview")).toEqual(["low", "high"]);
   });
 
+  test("returns off/low/medium/high for stable Gemini 3.5 Flash", () => {
+    expect(getThinkingPolicyForModel("google:gemini-3.5-flash")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+    ]);
+    expect(getThinkingPolicyForModel("mux-gateway:google/gemini-3.5-flash")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+    ]);
+  });
+
+  test("returns off/low/medium/high for versioned stable Gemini 3.5 Flash IDs", () => {
+    for (const model of [
+      "google:gemini-3.5-flash-001",
+      "google:gemini-3.5-flash-latest",
+      "google:gemini-3.5-flash-preview",
+    ]) {
+      expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]);
+    }
+  });
+
+  test("returns off/low/medium/high for stable Gemini 3.5 Flash behind OpenRouter", () => {
+    expect(getThinkingPolicyForModel("openrouter:google/gemini-3.5-flash")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+    ]);
+  });
+
+  test("returns off/low/medium/high for non-preview Gemini 3 Flash IDs", () => {
+    for (const model of ["google:gemini-3-flash", "google:gemini-3-flash-001"]) {
+      expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]);
+    }
+  });
+
+  test("returns off/low/medium/high for versioned Gemini 3 Flash Preview IDs", () => {
+    for (const model of [
+      "google:gemini-3-flash-preview-20251217",
+      "google:gemini-3-flash-preview-latest",
+    ]) {
+      expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]);
+    }
+  });
+
   test("returns off/low/medium/high for Gemini 3 Flash", () => {
     expect(getThinkingPolicyForModel("google:gemini-3-flash-preview")).toEqual([
       "off",
@@ -411,6 +465,13 @@ describe("getThinkingPolicyForModel", () => {
   });
 });
 
+describe("isGeminiFlashThinkingLevelModelName", () => {
+  test("does not classify Gemini Flash Lite variants as Flash thinking-level chat models", () => {
+    expect(isGeminiFlashThinkingLevelModelName("gemini-3-flash-lite")).toBe(false);
+    expect(isGeminiFlashThinkingLevelModelName("gemini-3.5-flash-lite")).toBe(false);
+  });
+});
+
 describe("enforceThinkingPolicy", () => {
   describe("single-option policy models (gpt-5-pro)", () => {
     test("enforces high for any requested level", () => {

diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts
@@ -25,6 +25,20 @@ import {
  */
 export type ThinkingPolicy = readonly ThinkingLevel[];
 
+/**
+ * True when modelName is a bare Gemini Flash chat model ID using Google's
+ * thinkingLevel config (minimal/low/medium/high) instead of Gemini 2.x thinkingBudget.
+ * @param modelName Provider model ID without the provider prefix (e.g. "gemini-3.5-flash", not "google:gemini-3.5-flash").
+ */
+export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean {
+  const normalized = modelName.trim().toLowerCase();
+  return (
+    ((normalized === "gemini-3-flash" || normalized.startsWith("gemini-3-flash-")) &&
+      !normalized.startsWith("gemini-3-flash-lite")) ||
+    (normalized.startsWith("gemini-3.5-flash") && !normalized.startsWith("gemini-3.5-flash-lite"))
+  );
+}
+
 /**
  * Returns the thinking policy for a given model.
  *
@@ -36,7 +50,8 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
  * - openai:gpt-5.2 / openai:gpt-5.5 → ["off", "low", "medium", "high", "xhigh"]
  * - openai:gpt-5.2-pro / openai:gpt-5.5-pro → ["medium", "high", "xhigh"] (3 levels)
  * - openai:gpt-5-pro → ["high"] (only supported level, legacy)
- * - gemini-3 → ["low", "high"] (thinking level only)
+ * - Gemini Flash chat variants → ["off", "low", "medium", "high"]
+ * - gemini-3 Pro variants → ["low", "high"] (thinking level only)
  * - default → ["off", "low", "medium", "high"] (standard 4 levels; xhigh is opt-in per model)
  *
  * Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
@@ -95,8 +110,8 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
     return ["high"];
   }
 
-  // Gemini 3 Flash supports 4 levels: off (minimal), low, medium, high
-  if (withoutProviderNamespace.includes("gemini-3-flash")) {
+  // Gemini Flash chat models support minimal/low/medium/high. Mux exposes minimal as "off".
+  if (isGeminiFlashThinkingLevelModelName(withoutProviderNamespace)) {
     return ["off", "low", "medium", "high"];
   }
 

diff --git a/src/common/utils/tokens/modelStats.test.ts b/src/common/utils/tokens/modelStats.test.ts
@@ -43,6 +43,15 @@ describe("getModelStats", () => {
     expect(stats.tiered_pricing_threshold_tokens).toBeUndefined();
   });
 
+  test("resolves Gemini 3.5 Flash with published standard pricing and limits", () => {
+    const stats = expectStats(KNOWN_MODELS.GEMINI_FLASH.id);
+    expect(stats.max_input_tokens).toBe(1048576);
+    expect(stats.max_output_tokens).toBe(65536);
+    expect(stats.input_cost_per_token).toBe(0.0000015);
+    expect(stats.output_cost_per_token).toBe(0.000009);
+    expect(stats.cache_read_input_token_cost).toBe(0.00000015);
+  });
+
   test("defaults tiered pricing threshold to 200K when metadata only ships *_above_200k rates", () => {
     const stats = expectStats("google:gemini-3.1-pro-preview");
     expect(stats.tiered_pricing_threshold_tokens).toBe(200000);