From e85e18c1e6e84c17c8f66bbfba595424cada3c5f Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Tue, 19 May 2026 21:39:07 +0000
Subject: [PATCH 1/9] Update Gemini Flash to Gemini 3.5 Flash

---
 docs/config/models.mdx                        |  2 +-
 src/common/constants/knownModels.test.ts      |  4 ++
 src/common/constants/knownModels.ts           |  3 +-
 src/common/utils/ai/modelCapabilities.test.ts |  9 +++
 src/common/utils/ai/modelDisplay.test.ts      |  1 +
 src/common/utils/ai/providerOptions.test.ts   | 70 +++++++++++++++++++
 src/common/utils/ai/providerOptions.ts        | 19 ++---
 src/common/utils/thinking/policy.test.ts      | 24 +++++++
 src/common/utils/thinking/policy.ts           | 13 +++-
 src/common/utils/tokens/modelStats.test.ts    |  9 +++
 src/common/utils/tokens/models-extra.ts       | 23 ++++++
 .../builtInSkillContent.generated.ts          |  2 +-
 12 files changed, 166 insertions(+), 13 deletions(-)
diff --git a/docs/config/models.mdx b/docs/config/models.mdx
index 6d3c44f5f3..9ef53563e9 100644
--- a/docs/config/models.mdx
+++ b/docs/config/models.mdx
@@ -25,7 +25,7 @@ Mux ships with curated models kept up to date with the frontier. Use any custom
 | Codex Mini 5.1         | openai:gpt-5.1-codex-mini     | `codex-mini`                                                 |         |
 | Codex Max 5.1          | openai:gpt-5.1-codex-max      | `codex-max`                                                  |         |
 | Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro`                                       |         |
-| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash`                                               |         |
+| Gemini 3.5 Flash       | google:gemini-3.5-flash       | `gemini-flash`                                               |         |
 | Grok 4 1 Fast          | xai:grok-4-1-fast             | `grok`, `grok-4`, `grok-4.1`, `grok-4-1`                     |         |
 | Grok Code Fast 1       | xai:grok-code-fast-1          | `grok-code`                                                  |         |
 | DeepSeek V4 Pro        | deepseek:deepseek-v4-pro      | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` |         |
diff --git a/src/common/constants/knownModels.test.ts b/src/common/constants/knownModels.test.ts
index 2abfcd8866..6cb043afdd 100644
--- a/src/common/constants/knownModels.test.ts
+++ b/src/common/constants/knownModels.test.ts
@@ -29,6 +29,10 @@ describe("Known Models Integration", () => {
     }
   });
 
+  test("gemini-flash resolves to the stable Gemini 3.5 Flash model", () => {
+    expect(MODEL_ABBREVIATIONS["gemini-flash"]).toBe("google:gemini-3.5-flash");
+  });
+
   test("known model ids and aliases stay unique across the curated registry", () => {
     const seenIds = new Set<string>();
     const seenAliases = new Set<string>();
diff --git a/src/common/constants/knownModels.ts b/src/common/constants/knownModels.ts
index cdcb2e3c1a..ea2798bf14 100644
--- a/src/common/constants/knownModels.ts
+++ b/src/common/constants/knownModels.ts
@@ -115,9 +115,10 @@ const MODEL_DEFINITIONS = {
     aliases: ["gemini", "gemini-pro"],
     tokenizerOverride: "google/gemini-2.5-pro",
   },
+  // Gemini Flash alias tracks the latest stable Flash tier.
   GEMINI_3_FLASH: {
     provider: "google",
-    providerModelId: "gemini-3-flash-preview",
+    providerModelId: "gemini-3.5-flash",
     aliases: ["gemini-flash"],
     tokenizerOverride: "google/gemini-2.5-pro",
   },
diff --git a/src/common/utils/ai/modelCapabilities.test.ts b/src/common/utils/ai/modelCapabilities.test.ts
index a2a9b7d18a..ff06345327 100644
--- a/src/common/utils/ai/modelCapabilities.test.ts
+++ b/src/common/utils/ai/modelCapabilities.test.ts
@@ -47,6 +47,15 @@ describe("getModelCapabilities", () => {
     expect(caps?.maxPdfSizeMb).toBeGreaterThan(0);
   });
 
+  it("returns multimodal capabilities for Gemini 3.5 Flash", () => {
+    const caps = getModelCapabilities("google:gemini-3.5-flash");
+    expect(caps).not.toBeNull();
+    expect(caps?.supportsPdfInput).toBe(true);
+    expect(caps?.supportsVision).toBe(true);
+    expect(caps?.supportsAudioInput).toBe(true);
+    expect(caps?.supportsVideoInput).toBe(true);
+  });
+
   it("returns null for unknown models", () => {
     expect(getModelCapabilities("anthropic:this-model-does-not-exist")).toBeNull();
   });
diff --git a/src/common/utils/ai/modelDisplay.test.ts b/src/common/utils/ai/modelDisplay.test.ts
index 12ddd780be..b91b7b9a20 100644
--- a/src/common/utils/ai/modelDisplay.test.ts
+++ b/src/common/utils/ai/modelDisplay.test.ts
@@ -45,6 +45,7 @@ describe("formatModelDisplayName", () => {
   describe("Gemini models", () => {
     test("formats Gemini models", () => {
       expect(formatModelDisplayName("gemini-2-0-flash-exp")).toBe("Gemini 2.0 Flash Exp");
+      expect(formatModelDisplayName("gemini-3.5-flash")).toBe("Gemini 3.5 Flash");
       expect(formatModelDisplayName("gemini-3.1-pro-preview")).toBe("Gemini 3.1 Pro Preview");
     });
   });
diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
index 9d57d5c468..2b6fafcd0a 100644
--- a/src/common/utils/ai/providerOptions.test.ts
+++ b/src/common/utils/ai/providerOptions.test.ts
@@ -749,6 +749,76 @@ describe("buildProviderOptions - OpenAI", () => {
   });
 });
 
+describe("buildProviderOptions - Google", () => {
+  test("maps Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:gemini-3.5-flash", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
+  test("maps Gemini 3.5 Flash medium to thinkingLevel medium with thoughts", () => {
+    expect(buildProviderOptions("mux-gateway:google/gemini-3.5-flash", "medium")).toEqual({
+      google: {
+        thinkingConfig: {
+          includeThoughts: true,
+          thinkingLevel: "medium",
+        },
+      },
+    });
+  });
+
+  test("uses mapped model capabilities for custom Gemini 3.5 Flash aliases", () => {
+    const providersConfig = createMockProvidersConfig({
+      "google:custom-flash": "google:gemini-3.5-flash",
+    });
+
+    expect(
+      buildProviderOptions(
+        "google:custom-flash",
+        "off",
+        undefined,
+        undefined,
+        undefined,
+        undefined,
+        undefined,
+        providersConfig
+      )
+    ).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
+  test("defensively maps unsupported Gemini 3.5 Flash xhigh to high", () => {
+    expect(buildProviderOptions("google:gemini-3.5-flash", "xhigh")).toEqual({
+      google: {
+        thinkingConfig: {
+          includeThoughts: true,
+          thinkingLevel: "high",
+        },
+      },
+    });
+  });
+
+  test("keeps Gemini 3.1 Pro off clamped to low-style behavior outside Flash mapping", () => {
+    expect(buildProviderOptions("google:gemini-3.1-pro-preview", "low")).toEqual({
+      google: {
+        thinkingConfig: {
+          includeThoughts: true,
+          thinkingLevel: "low",
+        },
+      },
+    });
+  });
+});
+
 describe("buildRequestHeaders", () => {
   for (const { name, model, options, expected } of [
     {
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
index 1433fb01a0..e38657f754 100644
--- a/src/common/utils/ai/providerOptions.ts
+++ b/src/common/utils/ai/providerOptions.ts
@@ -23,6 +23,7 @@ import {
   OPENAI_REASONING_EFFORT,
   OPENROUTER_REASONING_EFFORT,
 } from "@/common/types/thinking";
+import { isGeminiFlashThinkingLevelModelName } from "@/common/utils/thinking/policy";
 import { resolveModelForMetadata } from "@/common/utils/providers/modelEntries";
 import { log } from "@/node/services/log";
 import type { MuxMessage } from "@/common/types/message";
@@ -410,21 +411,23 @@ export function buildProviderOptions(
   // Build Google-specific options
   if (formatProvider === "google") {
     const isGemini3 = capModelName.includes("gemini-3");
+    const isGeminiFlashThinkingModel = isGeminiFlashThinkingLevelModelName(capModelName);
     let thinkingConfig: GoogleGenerativeAIProviderOptions["thinkingConfig"];
 
-    if (effectiveThinking !== "off") {
+    if (isGeminiFlashThinkingModel && effectiveThinking === "off") {
+      // Gemini 3.5 Flash defaults to medium and does not support true thinking-off;
+      // send minimal explicitly so Mux's "off" setting means lowest-effort behavior.
+      thinkingConfig = { thinkingLevel: "minimal" };
+    } else if (effectiveThinking !== "off") {
       thinkingConfig = {
         includeThoughts: true,
       };
 
       if (isGemini3) {
-        // Policy enforcement already clamped to valid levels for Flash/Pro,
-        // so effectiveThinking is guaranteed in the model's allowed set.
-        // Flash: off/low/medium/high; Pro: low/high. "xhigh" can't reach here.
-        thinkingConfig.thinkingLevel = effectiveThinking as Exclude<
-          ThinkingLevel,
-          "off" | "xhigh" | "max"
-        >;
+        // Policy enforcement should clamp to valid Google levels before this adapter runs.
+        // Avoid leaking xhigh/max to Google if a caller bypasses policy.
+        thinkingConfig.thinkingLevel =
+          effectiveThinking === "xhigh" || effectiveThinking === "max" ? "high" : effectiveThinking;
       } else {
         // Gemini 2.5 uses thinkingBudget
         const budget = GEMINI_THINKING_BUDGETS[effectiveThinking];
diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts
index be282fbe1a..53af8d0c3c 100644
--- a/src/common/utils/thinking/policy.test.ts
+++ b/src/common/utils/thinking/policy.test.ts
@@ -386,6 +386,30 @@ describe("getThinkingPolicyForModel", () => {
     expect(getThinkingPolicyForModel("google:gemini-3.1-pro-preview")).toEqual(["low", "high"]);
   });
 
+  test("returns off/low/medium/high for stable Gemini 3.5 Flash", () => {
+    expect(getThinkingPolicyForModel("google:gemini-3.5-flash")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+    ]);
+    expect(getThinkingPolicyForModel("mux-gateway:google/gemini-3.5-flash")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+    ]);
+  });
+
+  test("returns off/low/medium/high for stable Gemini 3.5 Flash behind OpenRouter", () => {
+    expect(getThinkingPolicyForModel("openrouter:google/gemini-3.5-flash")).toEqual([
+      "off",
+      "low",
+      "medium",
+      "high",
+    ]);
+  });
+
   test("returns off/low/medium/high for Gemini 3 Flash", () => {
     expect(getThinkingPolicyForModel("google:gemini-3-flash-preview")).toEqual([
       "off",
diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts
index bf4a5506d6..261ae60896 100644
--- a/src/common/utils/thinking/policy.ts
+++ b/src/common/utils/thinking/policy.ts
@@ -25,6 +25,15 @@ import {
  */
 export type ThinkingPolicy = readonly ThinkingLevel[];
 
+const GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES = new Set([
+  "gemini-3-flash-preview",
+  "gemini-3.5-flash",
+]);
+
+export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean {
+  return GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES.has(modelName.trim().toLowerCase());
+}
+
 /**
  * Returns the thinking policy for a given model.
  *
@@ -95,8 +104,8 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
     return ["high"];
   }
 
-  // Gemini 3 Flash supports 4 levels: off (minimal), low, medium, high
-  if (withoutProviderNamespace.includes("gemini-3-flash")) {
+  // Gemini Flash chat models support minimal/low/medium/high. Mux exposes minimal as "off".
+  if (isGeminiFlashThinkingLevelModelName(withoutProviderNamespace)) {
     return ["off", "low", "medium", "high"];
   }
 
diff --git a/src/common/utils/tokens/modelStats.test.ts b/src/common/utils/tokens/modelStats.test.ts
index 907b974cb8..1715a6a21f 100644
--- a/src/common/utils/tokens/modelStats.test.ts
+++ b/src/common/utils/tokens/modelStats.test.ts
@@ -43,6 +43,15 @@ describe("getModelStats", () => {
     expect(stats.tiered_pricing_threshold_tokens).toBeUndefined();
   });
 
+  test("resolves Gemini 3.5 Flash with published standard pricing and limits", () => {
+    const stats = expectStats(KNOWN_MODELS.GEMINI_3_FLASH.id);
+    expect(stats.max_input_tokens).toBe(1048576);
+    expect(stats.max_output_tokens).toBe(65536);
+    expect(stats.input_cost_per_token).toBe(0.0000015);
+    expect(stats.output_cost_per_token).toBe(0.000009);
+    expect(stats.cache_read_input_token_cost).toBe(0.00000015);
+  });
+
   test("defaults tiered pricing threshold to 200K when metadata only ships *_above_200k rates", () => {
     const stats = expectStats("google:gemini-3.1-pro-preview");
     expect(stats.tiered_pricing_threshold_tokens).toBe(200000);
diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts
index 4eff23d23e..bc090aae4d 100644
--- a/src/common/utils/tokens/models-extra.ts
+++ b/src/common/utils/tokens/models-extra.ts
@@ -27,6 +27,8 @@ interface ModelData {
   supports_function_calling?: boolean;
   supports_vision?: boolean;
   supports_pdf_input?: boolean;
+  supports_audio_input?: boolean;
+  supports_video_input?: boolean;
   max_pdf_size_mb?: number;
   supports_reasoning?: boolean;
   supports_response_schema?: boolean;
@@ -248,6 +250,27 @@ export const modelsExtra: Record<string, ModelData> = {
     supports_response_schema: true,
   },
 
+  // Gemini 3.5 Flash - GA on May 19, 2026. Google AI docs list a stable
+  // `gemini-3.5-flash` model ID with 1M context, 65K max output, standard
+  // pricing of $1.50/M input, $9/M output, and $0.15/M cached input.
+  "gemini-3.5-flash": {
+    max_input_tokens: 1048576,
+    max_output_tokens: 65536,
+    input_cost_per_token: 0.0000015, // $1.50 per million input tokens
+    output_cost_per_token: 0.000009, // $9 per million output tokens, including thinking tokens
+    cache_read_input_token_cost: 0.00000015, // $0.15 per million cached input tokens
+    litellm_provider: "vertex_ai-language-models",
+    mode: "chat",
+    supports_function_calling: true,
+    supports_vision: true,
+    supports_pdf_input: true,
+    supports_audio_input: true,
+    supports_video_input: true,
+    supports_reasoning: true,
+    supports_response_schema: true,
+    knowledge_cutoff: "2025-01",
+  },
+
   // Gemini 3.1 Pro Preview - Released February 19, 2026
   // Tiered pricing: ≤200K tokens $2/M input, $12/M output; >200K tokens $4/M input, $18/M output
   // 1M input context, ~64K max output tokens
diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts
index c706d0f464..37b1c66c61 100644
--- a/src/node/services/agentSkills/builtInSkillContent.generated.ts
+++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts
@@ -2334,7 +2334,7 @@ export const BUILTIN_SKILL_FILES: Record<string, Record<string, string>> = {
       "| Codex Mini 5.1         | openai:gpt-5.1-codex-mini     | `codex-mini`                                                 |         |",
       "| Codex Max 5.1          | openai:gpt-5.1-codex-max      | `codex-max`                                                  |         |",
       "| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro`                                       |         |",
-      "| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash`                                               |         |",
+      "| Gemini 3.5 Flash       | google:gemini-3.5-flash       | `gemini-flash`                                               |         |",
       "| Grok 4 1 Fast          | xai:grok-4-1-fast             | `grok`, `grok-4`, `grok-4.1`, `grok-4-1`                     |         |",
       "| Grok Code Fast 1       | xai:grok-code-fast-1          | `grok-code`                                                  |         |",
       "| DeepSeek V4 Pro        | deepseek:deepseek-v4-pro      | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` |         |",

From 7c4505dd4e2bfe1c0af8dfc8e3812b49d9b24620 Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Tue, 19 May 2026 22:25:33 +0000
Subject: [PATCH 2/9] Address Gemini Flash review feedback

---
 src/common/constants/knownModels.ts         |  2 +-
 src/common/utils/ai/providerOptions.test.ts | 30 ++++++++++++++++++++-
 src/common/utils/ai/providerOptions.ts      |  6 ++---
 src/common/utils/thinking/policy.test.ts    | 10 +++++++
 src/common/utils/thinking/policy.ts         | 15 +++++++----
 src/common/utils/tokens/modelStats.test.ts  |  2 +-
 src/common/utils/tokens/models-extra.ts     |  2 +-
 7 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/src/common/constants/knownModels.ts b/src/common/constants/knownModels.ts
index ea2798bf14..9a2185b5a4 100644
--- a/src/common/constants/knownModels.ts
+++ b/src/common/constants/knownModels.ts
@@ -116,7 +116,7 @@ const MODEL_DEFINITIONS = {
     tokenizerOverride: "google/gemini-2.5-pro",
   },
   // Gemini Flash alias tracks the latest stable Flash tier.
-  GEMINI_3_FLASH: {
+  GEMINI_FLASH: {
     provider: "google",
     providerModelId: "gemini-3.5-flash",
     aliases: ["gemini-flash"],
diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
index 2b6fafcd0a..ab2fa13921 100644
--- a/src/common/utils/ai/providerOptions.test.ts
+++ b/src/common/utils/ai/providerOptions.test.ts
@@ -760,6 +760,26 @@ describe("buildProviderOptions - Google", () => {
     });
   });
 
+  test("maps gateway Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("mux-gateway:google/gemini-3.5-flash", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
+  test("maps versioned Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:gemini-3.5-flash-001", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
   test("maps Gemini 3.5 Flash medium to thinkingLevel medium with thoughts", () => {
     expect(buildProviderOptions("mux-gateway:google/gemini-3.5-flash", "medium")).toEqual({
       google: {
@@ -807,7 +827,7 @@ describe("buildProviderOptions - Google", () => {
     });
   });
 
-  test("keeps Gemini 3.1 Pro off clamped to low-style behavior outside Flash mapping", () => {
+  test("passes Gemini 3.1 Pro low through as thinkingLevel low with thoughts", () => {
     expect(buildProviderOptions("google:gemini-3.1-pro-preview", "low")).toEqual({
       google: {
         thinkingConfig: {
@@ -817,6 +837,14 @@ describe("buildProviderOptions - Google", () => {
       },
     });
   });
+
+  test("keeps Gemini 3.1 Pro off without provider thinking config", () => {
+    expect(buildProviderOptions("google:gemini-3.1-pro-preview", "off")).toEqual({
+      google: {
+        thinkingConfig: undefined,
+      },
+    });
+  });
 });
 
 describe("buildRequestHeaders", () => {
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
index e38657f754..1e98ad50fd 100644
--- a/src/common/utils/ai/providerOptions.ts
+++ b/src/common/utils/ai/providerOptions.ts
@@ -410,12 +410,12 @@ export function buildProviderOptions(
 
   // Build Google-specific options
   if (formatProvider === "google") {
-    const isGemini3 = capModelName.includes("gemini-3");
+    const usesGeminiThinkingLevelConfig = capModelName.includes("gemini-3");
     const isGeminiFlashThinkingModel = isGeminiFlashThinkingLevelModelName(capModelName);
     let thinkingConfig: GoogleGenerativeAIProviderOptions["thinkingConfig"];
 
     if (isGeminiFlashThinkingModel && effectiveThinking === "off") {
-      // Gemini 3.5 Flash defaults to medium and does not support true thinking-off;
+      // Gemini Flash chat models default to medium and do not support true thinking-off;
       // send minimal explicitly so Mux's "off" setting means lowest-effort behavior.
       thinkingConfig = { thinkingLevel: "minimal" };
     } else if (effectiveThinking !== "off") {
@@ -423,7 +423,7 @@ export function buildProviderOptions(
         includeThoughts: true,
       };
 
-      if (isGemini3) {
+      if (usesGeminiThinkingLevelConfig) {
         // Policy enforcement should clamp to valid Google levels before this adapter runs.
         // Avoid leaking xhigh/max to Google if a caller bypasses policy.
         thinkingConfig.thinkingLevel =
diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts
index 53af8d0c3c..8fb315d458 100644
--- a/src/common/utils/thinking/policy.test.ts
+++ b/src/common/utils/thinking/policy.test.ts
@@ -401,6 +401,16 @@ describe("getThinkingPolicyForModel", () => {
     ]);
   });
 
+  test("returns off/low/medium/high for versioned stable Gemini 3.5 Flash IDs", () => {
+    for (const model of [
+      "google:gemini-3.5-flash-001",
+      "google:gemini-3.5-flash-latest",
+      "google:gemini-3.5-flash-preview",
+    ]) {
+      expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]);
+    }
+  });
+
   test("returns off/low/medium/high for stable Gemini 3.5 Flash behind OpenRouter", () => {
     expect(getThinkingPolicyForModel("openrouter:google/gemini-3.5-flash")).toEqual([
       "off",
diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts
index 261ae60896..d4485caf46 100644
--- a/src/common/utils/thinking/policy.ts
+++ b/src/common/utils/thinking/policy.ts
@@ -25,13 +25,18 @@ import {
  */
 export type ThinkingPolicy = readonly ThinkingLevel[];
 
-const GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES = new Set([
-  "gemini-3-flash-preview",
-  "gemini-3.5-flash",
-]);
+const GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES = new Set(["gemini-3-flash-preview"]);
 
+/**
+ * Bare provider model IDs for Gemini Flash chat variants that accept Google's
+ * thinkingLevel config (minimal/low/medium/high) instead of Gemini 2.x thinkingBudget.
+ */
 export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean {
-  return GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES.has(modelName.trim().toLowerCase());
+  const normalized = modelName.trim().toLowerCase();
+  return (
+    GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES.has(normalized) ||
+    (normalized.startsWith("gemini-3.5-flash") && !normalized.startsWith("gemini-3.5-flash-lite"))
+  );
 }
 
 /**
diff --git a/src/common/utils/tokens/modelStats.test.ts b/src/common/utils/tokens/modelStats.test.ts
index 1715a6a21f..a94709f81f 100644
--- a/src/common/utils/tokens/modelStats.test.ts
+++ b/src/common/utils/tokens/modelStats.test.ts
@@ -44,7 +44,7 @@ describe("getModelStats", () => {
   });
 
   test("resolves Gemini 3.5 Flash with published standard pricing and limits", () => {
-    const stats = expectStats(KNOWN_MODELS.GEMINI_3_FLASH.id);
+    const stats = expectStats(KNOWN_MODELS.GEMINI_FLASH.id);
     expect(stats.max_input_tokens).toBe(1048576);
     expect(stats.max_output_tokens).toBe(65536);
     expect(stats.input_cost_per_token).toBe(0.0000015);
diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts
index bc090aae4d..556e09413b 100644
--- a/src/common/utils/tokens/models-extra.ts
+++ b/src/common/utils/tokens/models-extra.ts
@@ -268,7 +268,7 @@ export const modelsExtra: Record<string, ModelData> = {
     supports_video_input: true,
     supports_reasoning: true,
     supports_response_schema: true,
-    knowledge_cutoff: "2025-01",
+    knowledge_cutoff: "2026-01",
   },
 
   // Gemini 3.1 Pro Preview - Released February 19, 2026

From 835cb2b1f3fc179ba56439c66d2fe35ffc47a78d Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Wed, 20 May 2026 06:00:35 +0000
Subject: [PATCH 3/9] Handle versioned Gemini 3 Flash preview IDs

---
 src/common/utils/ai/providerOptions.test.ts | 10 ++++++++++
 src/common/utils/thinking/policy.test.ts    |  9 +++++++++
 src/common/utils/thinking/policy.ts         |  1 +
 3 files changed, 20 insertions(+)

diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
index ab2fa13921..0cd92e2a2e 100644
--- a/src/common/utils/ai/providerOptions.test.ts
+++ b/src/common/utils/ai/providerOptions.test.ts
@@ -816,6 +816,16 @@ describe("buildProviderOptions - Google", () => {
     });
   });
 
+  test("maps versioned Gemini 3 Flash Preview off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:gemini-3-flash-preview-latest", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
   test("defensively maps unsupported Gemini 3.5 Flash xhigh to high", () => {
     expect(buildProviderOptions("google:gemini-3.5-flash", "xhigh")).toEqual({
       google: {
diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts
index 8fb315d458..48aaf507c4 100644
--- a/src/common/utils/thinking/policy.test.ts
+++ b/src/common/utils/thinking/policy.test.ts
@@ -420,6 +420,15 @@ describe("getThinkingPolicyForModel", () => {
     ]);
   });
 
+  test("returns off/low/medium/high for versioned Gemini 3 Flash Preview IDs", () => {
+    for (const model of [
+      "google:gemini-3-flash-preview-20251217",
+      "google:gemini-3-flash-preview-latest",
+    ]) {
+      expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]);
+    }
+  });
+
   test("returns off/low/medium/high for Gemini 3 Flash", () => {
     expect(getThinkingPolicyForModel("google:gemini-3-flash-preview")).toEqual([
       "off",
diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts
index d4485caf46..7192b8c71f 100644
--- a/src/common/utils/thinking/policy.ts
+++ b/src/common/utils/thinking/policy.ts
@@ -35,6 +35,7 @@ export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean
   const normalized = modelName.trim().toLowerCase();
   return (
     GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES.has(normalized) ||
+    normalized.startsWith("gemini-3-flash-preview-") ||
     (normalized.startsWith("gemini-3.5-flash") && !normalized.startsWith("gemini-3.5-flash-lite"))
   );
 }

From 70d6ea62721cbf9449ac47f64eb08517da856dcb Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Wed, 20 May 2026 06:02:04 +0000
Subject: [PATCH 4/9] Address Gemini Flash review coverage

---
 src/common/utils/ai/providerOptions.test.ts | 10 ++++++++++
 src/common/utils/thinking/policy.test.ts    | 13 ++++++++++++-
 src/common/utils/thinking/policy.ts         |  6 ++----
 src/common/utils/tokens/models-extra.ts     |  1 +
 4 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
index 0cd92e2a2e..217d39f927 100644
--- a/src/common/utils/ai/providerOptions.test.ts
+++ b/src/common/utils/ai/providerOptions.test.ts
@@ -816,6 +816,16 @@ describe("buildProviderOptions - Google", () => {
     });
   });
 
+  test("maps Gemini 3 Flash Preview off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:gemini-3-flash-preview", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
   test("maps versioned Gemini 3 Flash Preview off to minimal thinking without thoughts", () => {
     expect(buildProviderOptions("google:gemini-3-flash-preview-latest", "off")).toEqual({
       google: {
diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts
index 48aaf507c4..155738acf9 100644
--- a/src/common/utils/thinking/policy.test.ts
+++ b/src/common/utils/thinking/policy.test.ts
@@ -1,5 +1,10 @@
 import { describe, expect, test } from "bun:test";
-import { getThinkingPolicyForModel, enforceThinkingPolicy, resolveThinkingInput } from "./policy";
+import {
+  getThinkingPolicyForModel,
+  enforceThinkingPolicy,
+  resolveThinkingInput,
+  isGeminiFlashThinkingLevelModelName,
+} from "./policy";
 
 describe("getThinkingPolicyForModel", () => {
   test("returns 5 levels including xhigh for gpt-5.1-codex-max", () => {
@@ -454,6 +459,12 @@ describe("getThinkingPolicyForModel", () => {
   });
 });
 
+describe("isGeminiFlashThinkingLevelModelName", () => {
+  test("does not classify Gemini 3.5 Flash Lite as a Flash thinking-level chat model", () => {
+    expect(isGeminiFlashThinkingLevelModelName("gemini-3.5-flash-lite")).toBe(false);
+  });
+});
+
 describe("enforceThinkingPolicy", () => {
   describe("single-option policy models (gpt-5-pro)", () => {
     test("enforces high for any requested level", () => {
diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts
index 7192b8c71f..f2a601b6fb 100644
--- a/src/common/utils/thinking/policy.ts
+++ b/src/common/utils/thinking/policy.ts
@@ -25,16 +25,14 @@ import {
  */
 export type ThinkingPolicy = readonly ThinkingLevel[];
 
-const GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES = new Set(["gemini-3-flash-preview"]);
-
 /**
- * Bare provider model IDs for Gemini Flash chat variants that accept Google's
+ * True when modelName is a bare Gemini Flash chat model ID using Google's
  * thinkingLevel config (minimal/low/medium/high) instead of Gemini 2.x thinkingBudget.
  */
 export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean {
   const normalized = modelName.trim().toLowerCase();
   return (
-    GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES.has(normalized) ||
+    normalized === "gemini-3-flash-preview" ||
     normalized.startsWith("gemini-3-flash-preview-") ||
     (normalized.startsWith("gemini-3.5-flash") && !normalized.startsWith("gemini-3.5-flash-lite"))
   );
diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts
index 556e09413b..b909672425 100644
--- a/src/common/utils/tokens/models-extra.ts
+++ b/src/common/utils/tokens/models-extra.ts
@@ -253,6 +253,7 @@ export const modelsExtra: Record<string, ModelData> = {
   // Gemini 3.5 Flash - GA on May 19, 2026. Google AI docs list a stable
   // `gemini-3.5-flash` model ID with 1M context, 65K max output, standard
   // pricing of $1.50/M input, $9/M output, and $0.15/M cached input.
+  // Source: Google Gemini API pricing/model docs as of 2026-05-19.
   "gemini-3.5-flash": {
     max_input_tokens: 1048576,
     max_output_tokens: 65536,

From a5e4c9de5a7dbad0da0fe3791456f66537a2cf0d Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Wed, 20 May 2026 06:07:45 +0000
Subject: [PATCH 5/9] Clarify Gemini Flash metadata and policy docs

---
 src/common/utils/thinking/policy.ts     | 3 ++-
 src/common/utils/tokens/models-extra.ts | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts
index f2a601b6fb..e480b07e1a 100644
--- a/src/common/utils/thinking/policy.ts
+++ b/src/common/utils/thinking/policy.ts
@@ -49,7 +49,8 @@ export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean
  * - openai:gpt-5.2 / openai:gpt-5.5 → ["off", "low", "medium", "high", "xhigh"]
  * - openai:gpt-5.2-pro / openai:gpt-5.5-pro → ["medium", "high", "xhigh"] (3 levels)
  * - openai:gpt-5-pro → ["high"] (only supported level, legacy)
- * - gemini-3 → ["low", "high"] (thinking level only)
+ * - gemini-3 Flash chat variants → ["off", "low", "medium", "high"]
+ * - gemini-3 Pro variants → ["low", "high"] (thinking level only)
  * - default → ["off", "low", "medium", "high"] (standard 4 levels; xhigh is opt-in per model)
  *
  * Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts
index b909672425..dfca2f7f5c 100644
--- a/src/common/utils/tokens/models-extra.ts
+++ b/src/common/utils/tokens/models-extra.ts
@@ -253,7 +253,7 @@ export const modelsExtra: Record<string, ModelData> = {
   // Gemini 3.5 Flash - GA on May 19, 2026. Google AI docs list a stable
   // `gemini-3.5-flash` model ID with 1M context, 65K max output, standard
   // pricing of $1.50/M input, $9/M output, and $0.15/M cached input.
-  // Source: Google Gemini API pricing/model docs as of 2026-05-19.
+  // Source: Google DeepMind Gemini 3.5 Flash model info and Gemini API pricing docs as of 2026-05-20.
   "gemini-3.5-flash": {
     max_input_tokens: 1048576,
     max_output_tokens: 65536,
@@ -269,7 +269,7 @@ export const modelsExtra: Record<string, ModelData> = {
     supports_video_input: true,
     supports_reasoning: true,
     supports_response_schema: true,
-    knowledge_cutoff: "2026-01",
+    knowledge_cutoff: "2025-01",
   },
 
   // Gemini 3.1 Pro Preview - Released February 19, 2026

From 7ed21f11db86dd0f627ecc74df8b507ed074af58 Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Wed, 20 May 2026 06:30:43 +0000
Subject: [PATCH 6/9] Cover Gemini Flash defensive thinking mappings

---
 src/common/utils/ai/providerOptions.test.ts | 11 +++++++++++
 src/common/utils/thinking/policy.ts         |  3 ++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
index 217d39f927..0dcfbd150e 100644
--- a/src/common/utils/ai/providerOptions.test.ts
+++ b/src/common/utils/ai/providerOptions.test.ts
@@ -858,6 +858,17 @@ describe("buildProviderOptions - Google", () => {
     });
   });
 
+  test("defensively maps unsupported Gemini 3.5 Flash max to high", () => {
+    expect(buildProviderOptions("google:gemini-3.5-flash", "max")).toEqual({
+      google: {
+        thinkingConfig: {
+          includeThoughts: true,
+          thinkingLevel: "high",
+        },
+      },
+    });
+  });
+
   test("keeps Gemini 3.1 Pro off without provider thinking config", () => {
     expect(buildProviderOptions("google:gemini-3.1-pro-preview", "off")).toEqual({
       google: {
diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts
index e480b07e1a..06b20f5e9c 100644
--- a/src/common/utils/thinking/policy.ts
+++ b/src/common/utils/thinking/policy.ts
@@ -28,6 +28,7 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
 /**
  * True when modelName is a bare Gemini Flash chat model ID using Google's
  * thinkingLevel config (minimal/low/medium/high) instead of Gemini 2.x thinkingBudget.
+ * @param modelName Provider model ID without the provider prefix (e.g. "gemini-3.5-flash", not "google:gemini-3.5-flash").
  */
 export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean {
   const normalized = modelName.trim().toLowerCase();
@@ -49,7 +50,7 @@ export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean
  * - openai:gpt-5.2 / openai:gpt-5.5 → ["off", "low", "medium", "high", "xhigh"]
  * - openai:gpt-5.2-pro / openai:gpt-5.5-pro → ["medium", "high", "xhigh"] (3 levels)
  * - openai:gpt-5-pro → ["high"] (only supported level, legacy)
- * - gemini-3 Flash chat variants → ["off", "low", "medium", "high"]
+ * - Gemini Flash chat variants → ["off", "low", "medium", "high"]
  * - gemini-3 Pro variants → ["low", "high"] (thinking level only)
  * - default → ["off", "low", "medium", "high"] (standard 4 levels; xhigh is opt-in per model)
  *

From 769e8977d6ea2fdd99b3a2847776f4320409f0fa Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Wed, 20 May 2026 06:52:39 +0000
Subject: [PATCH 7/9] Support non-preview Gemini 3 Flash aliases

---
 src/common/utils/ai/providerOptions.test.ts | 10 ++++++++++
 src/common/utils/thinking/policy.test.ts    |  6 ++++++
 src/common/utils/thinking/policy.ts         |  4 ++--
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
index 0dcfbd150e..a99f5f6ad9 100644
--- a/src/common/utils/ai/providerOptions.test.ts
+++ b/src/common/utils/ai/providerOptions.test.ts
@@ -816,6 +816,16 @@ describe("buildProviderOptions - Google", () => {
     });
   });
 
+  test("maps non-preview Gemini 3 Flash off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:gemini-3-flash", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
   test("maps Gemini 3 Flash Preview off to minimal thinking without thoughts", () => {
     expect(buildProviderOptions("google:gemini-3-flash-preview", "off")).toEqual({
       google: {
diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts
index 155738acf9..5c9519d426 100644
--- a/src/common/utils/thinking/policy.test.ts
+++ b/src/common/utils/thinking/policy.test.ts
@@ -425,6 +425,12 @@ describe("getThinkingPolicyForModel", () => {
     ]);
   });
 
+  test("returns off/low/medium/high for non-preview Gemini 3 Flash IDs", () => {
+    for (const model of ["google:gemini-3-flash", "google:gemini-3-flash-001"]) {
+      expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]);
+    }
+  });
+
   test("returns off/low/medium/high for versioned Gemini 3 Flash Preview IDs", () => {
     for (const model of [
       "google:gemini-3-flash-preview-20251217",
diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts
index 06b20f5e9c..88bac6388f 100644
--- a/src/common/utils/thinking/policy.ts
+++ b/src/common/utils/thinking/policy.ts
@@ -33,8 +33,8 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
 export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean {
   const normalized = modelName.trim().toLowerCase();
   return (
-    normalized === "gemini-3-flash-preview" ||
-    normalized.startsWith("gemini-3-flash-preview-") ||
+    ((normalized === "gemini-3-flash" || normalized.startsWith("gemini-3-flash-")) &&
+      !normalized.startsWith("gemini-3-flash-lite")) ||
     (normalized.startsWith("gemini-3.5-flash") && !normalized.startsWith("gemini-3.5-flash-lite"))
   );
 }

From 43b46c839656b9085975f53912c6645e20c2d291 Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Wed, 20 May 2026 07:08:13 +0000
Subject: [PATCH 8/9] Normalize Gemini Flash provider option model IDs

---
 src/common/utils/ai/providerOptions.test.ts | 10 ++++++++++
 src/common/utils/ai/providerOptions.ts      |  5 +++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
index a99f5f6ad9..58b9a63fa8 100644
--- a/src/common/utils/ai/providerOptions.test.ts
+++ b/src/common/utils/ai/providerOptions.test.ts
@@ -770,6 +770,16 @@ describe("buildProviderOptions - Google", () => {
     });
   });
 
+  test("maps namespaced Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
+    expect(buildProviderOptions("google:models/gemini-3.5-flash", "off")).toEqual({
+      google: {
+        thinkingConfig: {
+          thinkingLevel: "minimal",
+        },
+      },
+    });
+  });
+
   test("maps versioned Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
     expect(buildProviderOptions("google:gemini-3.5-flash-001", "off")).toEqual({
       google: {
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
index 1e98ad50fd..9c8e76c68b 100644
--- a/src/common/utils/ai/providerOptions.ts
+++ b/src/common/utils/ai/providerOptions.ts
@@ -410,8 +410,9 @@ export function buildProviderOptions(
 
   // Build Google-specific options
   if (formatProvider === "google") {
-    const usesGeminiThinkingLevelConfig = capModelName.includes("gemini-3");
-    const isGeminiFlashThinkingModel = isGeminiFlashThinkingLevelModelName(capModelName);
+    const capBareModelName = capModelName.split("/").at(-1) ?? capModelName;
+    const usesGeminiThinkingLevelConfig = capBareModelName.includes("gemini-3");
+    const isGeminiFlashThinkingModel = isGeminiFlashThinkingLevelModelName(capBareModelName);
     let thinkingConfig: GoogleGenerativeAIProviderOptions["thinkingConfig"];
 
     if (isGeminiFlashThinkingModel && effectiveThinking === "off") {

From 644297fcdcf259efaf2d0107c8a455b70ec5c0c6 Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Wed, 20 May 2026 07:23:42 +0000
Subject: [PATCH 9/9] Cover Gemini 3 Flash Lite exclusion

---
 src/common/utils/thinking/policy.test.ts | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts
index 5c9519d426..aae7e1cb62 100644
--- a/src/common/utils/thinking/policy.test.ts
+++ b/src/common/utils/thinking/policy.test.ts
@@ -466,7 +466,8 @@ describe("getThinkingPolicyForModel", () => {
 });
 
 describe("isGeminiFlashThinkingLevelModelName", () => {
-  test("does not classify Gemini 3.5 Flash Lite as a Flash thinking-level chat model", () => {
+  test("does not classify Gemini Flash Lite variants as Flash thinking-level chat models", () => {
+    expect(isGeminiFlashThinkingLevelModelName("gemini-3-flash-lite")).toBe(false);
     expect(isGeminiFlashThinkingLevelModelName("gemini-3.5-flash-lite")).toBe(false);
   });
 });