From e85e18c1e6e84c17c8f66bbfba595424cada3c5f Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Tue, 19 May 2026 21:39:07 +0000 Subject: [PATCH 1/9] Update Gemini Flash to Gemini 3.5 Flash --- docs/config/models.mdx | 2 +- src/common/constants/knownModels.test.ts | 4 ++ src/common/constants/knownModels.ts | 3 +- src/common/utils/ai/modelCapabilities.test.ts | 9 +++ src/common/utils/ai/modelDisplay.test.ts | 1 + src/common/utils/ai/providerOptions.test.ts | 70 +++++++++++++++++++ src/common/utils/ai/providerOptions.ts | 19 ++--- src/common/utils/thinking/policy.test.ts | 24 +++++++ src/common/utils/thinking/policy.ts | 13 +++- src/common/utils/tokens/modelStats.test.ts | 9 +++ src/common/utils/tokens/models-extra.ts | 23 ++++++ .../builtInSkillContent.generated.ts | 2 +- 12 files changed, 166 insertions(+), 13 deletions(-) diff --git a/docs/config/models.mdx b/docs/config/models.mdx index 6d3c44f5f3..9ef53563e9 100644 --- a/docs/config/models.mdx +++ b/docs/config/models.mdx @@ -25,7 +25,7 @@ Mux ships with curated models kept up to date with the frontier. Use any custom | Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | | | Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | | | Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | | -| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | | +| Gemini 3.5 Flash | google:gemini-3.5-flash | `gemini-flash` | | | Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | | | Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | | | DeepSeek V4 Pro | deepseek:deepseek-v4-pro | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` | | diff --git a/src/common/constants/knownModels.test.ts b/src/common/constants/knownModels.test.ts index 2abfcd8866..6cb043afdd 100644 --- a/src/common/constants/knownModels.test.ts +++ b/src/common/constants/knownModels.test.ts @@ -29,6 +29,10 @@ describe("Known Models Integration", () => { } }); + test("gemini-flash resolves to the stable Gemini 3.5 Flash model", () => { + expect(MODEL_ABBREVIATIONS["gemini-flash"]).toBe("google:gemini-3.5-flash"); + }); + test("known model ids and aliases stay unique across the curated registry", () => { const seenIds = new Set(); const seenAliases = new Set(); diff --git a/src/common/constants/knownModels.ts b/src/common/constants/knownModels.ts index cdcb2e3c1a..ea2798bf14 100644 --- a/src/common/constants/knownModels.ts +++ b/src/common/constants/knownModels.ts @@ -115,9 +115,10 @@ const MODEL_DEFINITIONS = { aliases: ["gemini", "gemini-pro"], tokenizerOverride: "google/gemini-2.5-pro", }, + // Gemini Flash alias tracks the latest stable Flash tier. GEMINI_3_FLASH: { provider: "google", - providerModelId: "gemini-3-flash-preview", + providerModelId: "gemini-3.5-flash", aliases: ["gemini-flash"], tokenizerOverride: "google/gemini-2.5-pro", }, diff --git a/src/common/utils/ai/modelCapabilities.test.ts b/src/common/utils/ai/modelCapabilities.test.ts index a2a9b7d18a..ff06345327 100644 --- a/src/common/utils/ai/modelCapabilities.test.ts +++ b/src/common/utils/ai/modelCapabilities.test.ts @@ -47,6 +47,15 @@ describe("getModelCapabilities", () => { expect(caps?.maxPdfSizeMb).toBeGreaterThan(0); }); + it("returns multimodal capabilities for Gemini 3.5 Flash", () => { + const caps = getModelCapabilities("google:gemini-3.5-flash"); + expect(caps).not.toBeNull(); + expect(caps?.supportsPdfInput).toBe(true); + expect(caps?.supportsVision).toBe(true); + expect(caps?.supportsAudioInput).toBe(true); + expect(caps?.supportsVideoInput).toBe(true); + }); + it("returns null for unknown models", () => { expect(getModelCapabilities("anthropic:this-model-does-not-exist")).toBeNull(); }); diff --git a/src/common/utils/ai/modelDisplay.test.ts b/src/common/utils/ai/modelDisplay.test.ts index 12ddd780be..b91b7b9a20 100644 --- a/src/common/utils/ai/modelDisplay.test.ts +++ b/src/common/utils/ai/modelDisplay.test.ts @@ -45,6 +45,7 @@ describe("formatModelDisplayName", () => { describe("Gemini models", () => { test("formats Gemini models", () => { expect(formatModelDisplayName("gemini-2-0-flash-exp")).toBe("Gemini 2.0 Flash Exp"); + expect(formatModelDisplayName("gemini-3.5-flash")).toBe("Gemini 3.5 Flash"); expect(formatModelDisplayName("gemini-3.1-pro-preview")).toBe("Gemini 3.1 Pro Preview"); }); }); diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index 9d57d5c468..2b6fafcd0a 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -749,6 +749,76 @@ describe("buildProviderOptions - OpenAI", () => { }); }); +describe("buildProviderOptions - Google", () => { + test("maps Gemini 3.5 Flash off to minimal thinking without thoughts", () => { + expect(buildProviderOptions("google:gemini-3.5-flash", "off")).toEqual({ + google: { + thinkingConfig: { + thinkingLevel: "minimal", + }, + }, + }); + }); + + test("maps Gemini 3.5 Flash medium to thinkingLevel medium with thoughts", () => { + expect(buildProviderOptions("mux-gateway:google/gemini-3.5-flash", "medium")).toEqual({ + google: { + thinkingConfig: { + includeThoughts: true, + thinkingLevel: "medium", + }, + }, + }); + }); + + test("uses mapped model capabilities for custom Gemini 3.5 Flash aliases", () => { + const providersConfig = createMockProvidersConfig({ + "google:custom-flash": "google:gemini-3.5-flash", + }); + + expect( + buildProviderOptions( + "google:custom-flash", + "off", + undefined, + undefined, + undefined, + undefined, + undefined, + providersConfig + ) + ).toEqual({ + google: { + thinkingConfig: { + thinkingLevel: "minimal", + }, + }, + }); + }); + + test("defensively maps unsupported Gemini 3.5 Flash xhigh to high", () => { + expect(buildProviderOptions("google:gemini-3.5-flash", "xhigh")).toEqual({ + google: { + thinkingConfig: { + includeThoughts: true, + thinkingLevel: "high", + }, + }, + }); + }); + + test("keeps Gemini 3.1 Pro off clamped to low-style behavior outside Flash mapping", () => { + expect(buildProviderOptions("google:gemini-3.1-pro-preview", "low")).toEqual({ + google: { + thinkingConfig: { + includeThoughts: true, + thinkingLevel: "low", + }, + }, + }); + }); +}); + describe("buildRequestHeaders", () => { for (const { name, model, options, expected } of [ { diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 1433fb01a0..e38657f754 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -23,6 +23,7 @@ import { OPENAI_REASONING_EFFORT, OPENROUTER_REASONING_EFFORT, } from "@/common/types/thinking"; +import { isGeminiFlashThinkingLevelModelName } from "@/common/utils/thinking/policy"; import { resolveModelForMetadata } from "@/common/utils/providers/modelEntries"; import { log } from "@/node/services/log"; import type { MuxMessage } from "@/common/types/message"; @@ -410,21 +411,23 @@ export function buildProviderOptions( // Build Google-specific options if (formatProvider === "google") { const isGemini3 = capModelName.includes("gemini-3"); + const isGeminiFlashThinkingModel = isGeminiFlashThinkingLevelModelName(capModelName); let thinkingConfig: GoogleGenerativeAIProviderOptions["thinkingConfig"]; - if (effectiveThinking !== "off") { + if (isGeminiFlashThinkingModel && effectiveThinking === "off") { + // Gemini 3.5 Flash defaults to medium and does not support true thinking-off; + // send minimal explicitly so Mux's "off" setting means lowest-effort behavior. + thinkingConfig = { thinkingLevel: "minimal" }; + } else if (effectiveThinking !== "off") { thinkingConfig = { includeThoughts: true, }; if (isGemini3) { - // Policy enforcement already clamped to valid levels for Flash/Pro, - // so effectiveThinking is guaranteed in the model's allowed set. - // Flash: off/low/medium/high; Pro: low/high. "xhigh" can't reach here. - thinkingConfig.thinkingLevel = effectiveThinking as Exclude< - ThinkingLevel, - "off" | "xhigh" | "max" - >; + // Policy enforcement should clamp to valid Google levels before this adapter runs. + // Avoid leaking xhigh/max to Google if a caller bypasses policy. + thinkingConfig.thinkingLevel = + effectiveThinking === "xhigh" || effectiveThinking === "max" ? "high" : effectiveThinking; } else { // Gemini 2.5 uses thinkingBudget const budget = GEMINI_THINKING_BUDGETS[effectiveThinking]; diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts index be282fbe1a..53af8d0c3c 100644 --- a/src/common/utils/thinking/policy.test.ts +++ b/src/common/utils/thinking/policy.test.ts @@ -386,6 +386,30 @@ describe("getThinkingPolicyForModel", () => { expect(getThinkingPolicyForModel("google:gemini-3.1-pro-preview")).toEqual(["low", "high"]); }); + test("returns off/low/medium/high for stable Gemini 3.5 Flash", () => { + expect(getThinkingPolicyForModel("google:gemini-3.5-flash")).toEqual([ + "off", + "low", + "medium", + "high", + ]); + expect(getThinkingPolicyForModel("mux-gateway:google/gemini-3.5-flash")).toEqual([ + "off", + "low", + "medium", + "high", + ]); + }); + + test("returns off/low/medium/high for stable Gemini 3.5 Flash behind OpenRouter", () => { + expect(getThinkingPolicyForModel("openrouter:google/gemini-3.5-flash")).toEqual([ + "off", + "low", + "medium", + "high", + ]); + }); + test("returns off/low/medium/high for Gemini 3 Flash", () => { expect(getThinkingPolicyForModel("google:gemini-3-flash-preview")).toEqual([ "off", diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts index bf4a5506d6..261ae60896 100644 --- a/src/common/utils/thinking/policy.ts +++ b/src/common/utils/thinking/policy.ts @@ -25,6 +25,15 @@ import { */ export type ThinkingPolicy = readonly ThinkingLevel[]; +const GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES = new Set([ + "gemini-3-flash-preview", + "gemini-3.5-flash", +]); + +export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean { + return GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES.has(modelName.trim().toLowerCase()); +} + /** * Returns the thinking policy for a given model. * @@ -95,8 +104,8 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy { return ["high"]; } - // Gemini 3 Flash supports 4 levels: off (minimal), low, medium, high - if (withoutProviderNamespace.includes("gemini-3-flash")) { + // Gemini Flash chat models support minimal/low/medium/high. Mux exposes minimal as "off". + if (isGeminiFlashThinkingLevelModelName(withoutProviderNamespace)) { return ["off", "low", "medium", "high"]; } diff --git a/src/common/utils/tokens/modelStats.test.ts b/src/common/utils/tokens/modelStats.test.ts index 907b974cb8..1715a6a21f 100644 --- a/src/common/utils/tokens/modelStats.test.ts +++ b/src/common/utils/tokens/modelStats.test.ts @@ -43,6 +43,15 @@ describe("getModelStats", () => { expect(stats.tiered_pricing_threshold_tokens).toBeUndefined(); }); + test("resolves Gemini 3.5 Flash with published standard pricing and limits", () => { + const stats = expectStats(KNOWN_MODELS.GEMINI_3_FLASH.id); + expect(stats.max_input_tokens).toBe(1048576); + expect(stats.max_output_tokens).toBe(65536); + expect(stats.input_cost_per_token).toBe(0.0000015); + expect(stats.output_cost_per_token).toBe(0.000009); + expect(stats.cache_read_input_token_cost).toBe(0.00000015); + }); + test("defaults tiered pricing threshold to 200K when metadata only ships *_above_200k rates", () => { const stats = expectStats("google:gemini-3.1-pro-preview"); expect(stats.tiered_pricing_threshold_tokens).toBe(200000); diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index 4eff23d23e..bc090aae4d 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -27,6 +27,8 @@ interface ModelData { supports_function_calling?: boolean; supports_vision?: boolean; supports_pdf_input?: boolean; + supports_audio_input?: boolean; + supports_video_input?: boolean; max_pdf_size_mb?: number; supports_reasoning?: boolean; supports_response_schema?: boolean; @@ -248,6 +250,27 @@ export const modelsExtra: Record = { supports_response_schema: true, }, + // Gemini 3.5 Flash - GA on May 19, 2026. Google AI docs list a stable + // `gemini-3.5-flash` model ID with 1M context, 65K max output, standard + // pricing of $1.50/M input, $9/M output, and $0.15/M cached input. + "gemini-3.5-flash": { + max_input_tokens: 1048576, + max_output_tokens: 65536, + input_cost_per_token: 0.0000015, // $1.50 per million input tokens + output_cost_per_token: 0.000009, // $9 per million output tokens, including thinking tokens + cache_read_input_token_cost: 0.00000015, // $0.15 per million cached input tokens + litellm_provider: "vertex_ai-language-models", + mode: "chat", + supports_function_calling: true, + supports_vision: true, + supports_pdf_input: true, + supports_audio_input: true, + supports_video_input: true, + supports_reasoning: true, + supports_response_schema: true, + knowledge_cutoff: "2025-01", + }, + // Gemini 3.1 Pro Preview - Released February 19, 2026 // Tiered pricing: ≤200K tokens $2/M input, $12/M output; >200K tokens $4/M input, $18/M output // 1M input context, ~64K max output tokens diff --git a/src/node/services/agentSkills/builtInSkillContent.generated.ts b/src/node/services/agentSkills/builtInSkillContent.generated.ts index c706d0f464..37b1c66c61 100644 --- a/src/node/services/agentSkills/builtInSkillContent.generated.ts +++ b/src/node/services/agentSkills/builtInSkillContent.generated.ts @@ -2334,7 +2334,7 @@ export const BUILTIN_SKILL_FILES: Record> = { "| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | |", "| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | |", "| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | |", - "| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | |", + "| Gemini 3.5 Flash | google:gemini-3.5-flash | `gemini-flash` | |", "| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | |", "| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | |", "| DeepSeek V4 Pro | deepseek:deepseek-v4-pro | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` | |", From 7c4505dd4e2bfe1c0af8dfc8e3812b49d9b24620 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Tue, 19 May 2026 22:25:33 +0000 Subject: [PATCH 2/9] Address Gemini Flash review feedback --- src/common/constants/knownModels.ts | 2 +- src/common/utils/ai/providerOptions.test.ts | 30 ++++++++++++++++++++- src/common/utils/ai/providerOptions.ts | 6 ++--- src/common/utils/thinking/policy.test.ts | 10 +++++++ src/common/utils/thinking/policy.ts | 15 +++++++---- src/common/utils/tokens/modelStats.test.ts | 2 +- src/common/utils/tokens/models-extra.ts | 2 +- 7 files changed, 55 insertions(+), 12 deletions(-) diff --git a/src/common/constants/knownModels.ts b/src/common/constants/knownModels.ts index ea2798bf14..9a2185b5a4 100644 --- a/src/common/constants/knownModels.ts +++ b/src/common/constants/knownModels.ts @@ -116,7 +116,7 @@ const MODEL_DEFINITIONS = { tokenizerOverride: "google/gemini-2.5-pro", }, // Gemini Flash alias tracks the latest stable Flash tier. - GEMINI_3_FLASH: { + GEMINI_FLASH: { provider: "google", providerModelId: "gemini-3.5-flash", aliases: ["gemini-flash"], diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index 2b6fafcd0a..ab2fa13921 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -760,6 +760,26 @@ describe("buildProviderOptions - Google", () => { }); }); + test("maps gateway Gemini 3.5 Flash off to minimal thinking without thoughts", () => { + expect(buildProviderOptions("mux-gateway:google/gemini-3.5-flash", "off")).toEqual({ + google: { + thinkingConfig: { + thinkingLevel: "minimal", + }, + }, + }); + }); + + test("maps versioned Gemini 3.5 Flash off to minimal thinking without thoughts", () => { + expect(buildProviderOptions("google:gemini-3.5-flash-001", "off")).toEqual({ + google: { + thinkingConfig: { + thinkingLevel: "minimal", + }, + }, + }); + }); + test("maps Gemini 3.5 Flash medium to thinkingLevel medium with thoughts", () => { expect(buildProviderOptions("mux-gateway:google/gemini-3.5-flash", "medium")).toEqual({ google: { @@ -807,7 +827,7 @@ describe("buildProviderOptions - Google", () => { }); }); - test("keeps Gemini 3.1 Pro off clamped to low-style behavior outside Flash mapping", () => { + test("passes Gemini 3.1 Pro low through as thinkingLevel low with thoughts", () => { expect(buildProviderOptions("google:gemini-3.1-pro-preview", "low")).toEqual({ google: { thinkingConfig: { @@ -817,6 +837,14 @@ describe("buildProviderOptions - Google", () => { }, }); }); + + test("keeps Gemini 3.1 Pro off without provider thinking config", () => { + expect(buildProviderOptions("google:gemini-3.1-pro-preview", "off")).toEqual({ + google: { + thinkingConfig: undefined, + }, + }); + }); }); describe("buildRequestHeaders", () => { diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index e38657f754..1e98ad50fd 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -410,12 +410,12 @@ export function buildProviderOptions( // Build Google-specific options if (formatProvider === "google") { - const isGemini3 = capModelName.includes("gemini-3"); + const usesGeminiThinkingLevelConfig = capModelName.includes("gemini-3"); const isGeminiFlashThinkingModel = isGeminiFlashThinkingLevelModelName(capModelName); let thinkingConfig: GoogleGenerativeAIProviderOptions["thinkingConfig"]; if (isGeminiFlashThinkingModel && effectiveThinking === "off") { - // Gemini 3.5 Flash defaults to medium and does not support true thinking-off; + // Gemini Flash chat models default to medium and do not support true thinking-off; // send minimal explicitly so Mux's "off" setting means lowest-effort behavior. thinkingConfig = { thinkingLevel: "minimal" }; } else if (effectiveThinking !== "off") { @@ -423,7 +423,7 @@ export function buildProviderOptions( includeThoughts: true, }; - if (isGemini3) { + if (usesGeminiThinkingLevelConfig) { // Policy enforcement should clamp to valid Google levels before this adapter runs. // Avoid leaking xhigh/max to Google if a caller bypasses policy. thinkingConfig.thinkingLevel = diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts index 53af8d0c3c..8fb315d458 100644 --- a/src/common/utils/thinking/policy.test.ts +++ b/src/common/utils/thinking/policy.test.ts @@ -401,6 +401,16 @@ describe("getThinkingPolicyForModel", () => { ]); }); + test("returns off/low/medium/high for versioned stable Gemini 3.5 Flash IDs", () => { + for (const model of [ + "google:gemini-3.5-flash-001", + "google:gemini-3.5-flash-latest", + "google:gemini-3.5-flash-preview", + ]) { + expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]); + } + }); + test("returns off/low/medium/high for stable Gemini 3.5 Flash behind OpenRouter", () => { expect(getThinkingPolicyForModel("openrouter:google/gemini-3.5-flash")).toEqual([ "off", diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts index 261ae60896..d4485caf46 100644 --- a/src/common/utils/thinking/policy.ts +++ b/src/common/utils/thinking/policy.ts @@ -25,13 +25,18 @@ import { */ export type ThinkingPolicy = readonly ThinkingLevel[]; -const GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES = new Set([ - "gemini-3-flash-preview", - "gemini-3.5-flash", -]); +const GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES = new Set(["gemini-3-flash-preview"]); +/** + * Bare provider model IDs for Gemini Flash chat variants that accept Google's + * thinkingLevel config (minimal/low/medium/high) instead of Gemini 2.x thinkingBudget. + */ export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean { - return GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES.has(modelName.trim().toLowerCase()); + const normalized = modelName.trim().toLowerCase(); + return ( + GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES.has(normalized) || + (normalized.startsWith("gemini-3.5-flash") && !normalized.startsWith("gemini-3.5-flash-lite")) + ); } /** diff --git a/src/common/utils/tokens/modelStats.test.ts b/src/common/utils/tokens/modelStats.test.ts index 1715a6a21f..a94709f81f 100644 --- a/src/common/utils/tokens/modelStats.test.ts +++ b/src/common/utils/tokens/modelStats.test.ts @@ -44,7 +44,7 @@ describe("getModelStats", () => { }); test("resolves Gemini 3.5 Flash with published standard pricing and limits", () => { - const stats = expectStats(KNOWN_MODELS.GEMINI_3_FLASH.id); + const stats = expectStats(KNOWN_MODELS.GEMINI_FLASH.id); expect(stats.max_input_tokens).toBe(1048576); expect(stats.max_output_tokens).toBe(65536); expect(stats.input_cost_per_token).toBe(0.0000015); diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index bc090aae4d..556e09413b 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -268,7 +268,7 @@ export const modelsExtra: Record = { supports_video_input: true, supports_reasoning: true, supports_response_schema: true, - knowledge_cutoff: "2025-01", + knowledge_cutoff: "2026-01", }, // Gemini 3.1 Pro Preview - Released February 19, 2026 From 835cb2b1f3fc179ba56439c66d2fe35ffc47a78d Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Wed, 20 May 2026 06:00:35 +0000 Subject: [PATCH 3/9] Handle versioned Gemini 3 Flash preview IDs --- src/common/utils/ai/providerOptions.test.ts | 10 ++++++++++ src/common/utils/thinking/policy.test.ts | 9 +++++++++ src/common/utils/thinking/policy.ts | 1 + 3 files changed, 20 insertions(+) diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index ab2fa13921..0cd92e2a2e 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -816,6 +816,16 @@ describe("buildProviderOptions - Google", () => { }); }); + test("maps versioned Gemini 3 Flash Preview off to minimal thinking without thoughts", () => { + expect(buildProviderOptions("google:gemini-3-flash-preview-latest", "off")).toEqual({ + google: { + thinkingConfig: { + thinkingLevel: "minimal", + }, + }, + }); + }); + test("defensively maps unsupported Gemini 3.5 Flash xhigh to high", () => { expect(buildProviderOptions("google:gemini-3.5-flash", "xhigh")).toEqual({ google: { diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts index 8fb315d458..48aaf507c4 100644 --- a/src/common/utils/thinking/policy.test.ts +++ b/src/common/utils/thinking/policy.test.ts @@ -420,6 +420,15 @@ describe("getThinkingPolicyForModel", () => { ]); }); + test("returns off/low/medium/high for versioned Gemini 3 Flash Preview IDs", () => { + for (const model of [ + "google:gemini-3-flash-preview-20251217", + "google:gemini-3-flash-preview-latest", + ]) { + expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]); + } + }); + test("returns off/low/medium/high for Gemini 3 Flash", () => { expect(getThinkingPolicyForModel("google:gemini-3-flash-preview")).toEqual([ "off", diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts index d4485caf46..7192b8c71f 100644 --- a/src/common/utils/thinking/policy.ts +++ b/src/common/utils/thinking/policy.ts @@ -35,6 +35,7 @@ export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean const normalized = modelName.trim().toLowerCase(); return ( GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES.has(normalized) || + normalized.startsWith("gemini-3-flash-preview-") || (normalized.startsWith("gemini-3.5-flash") && !normalized.startsWith("gemini-3.5-flash-lite")) ); } From 70d6ea62721cbf9449ac47f64eb08517da856dcb Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Wed, 20 May 2026 06:02:04 +0000 Subject: [PATCH 4/9] Address Gemini Flash review coverage --- src/common/utils/ai/providerOptions.test.ts | 10 ++++++++++ src/common/utils/thinking/policy.test.ts | 13 ++++++++++++- src/common/utils/thinking/policy.ts | 6 ++---- src/common/utils/tokens/models-extra.ts | 1 + 4 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index 0cd92e2a2e..217d39f927 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -816,6 +816,16 @@ describe("buildProviderOptions - Google", () => { }); }); + test("maps Gemini 3 Flash Preview off to minimal thinking without thoughts", () => { + expect(buildProviderOptions("google:gemini-3-flash-preview", "off")).toEqual({ + google: { + thinkingConfig: { + thinkingLevel: "minimal", + }, + }, + }); + }); + test("maps versioned Gemini 3 Flash Preview off to minimal thinking without thoughts", () => { expect(buildProviderOptions("google:gemini-3-flash-preview-latest", "off")).toEqual({ google: { diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts index 48aaf507c4..155738acf9 100644 --- a/src/common/utils/thinking/policy.test.ts +++ b/src/common/utils/thinking/policy.test.ts @@ -1,5 +1,10 @@ import { describe, expect, test } from "bun:test"; -import { getThinkingPolicyForModel, enforceThinkingPolicy, resolveThinkingInput } from "./policy"; +import { + getThinkingPolicyForModel, + enforceThinkingPolicy, + resolveThinkingInput, + isGeminiFlashThinkingLevelModelName, +} from "./policy"; describe("getThinkingPolicyForModel", () => { test("returns 5 levels including xhigh for gpt-5.1-codex-max", () => { @@ -454,6 +459,12 @@ describe("getThinkingPolicyForModel", () => { }); }); +describe("isGeminiFlashThinkingLevelModelName", () => { + test("does not classify Gemini 3.5 Flash Lite as a Flash thinking-level chat model", () => { + expect(isGeminiFlashThinkingLevelModelName("gemini-3.5-flash-lite")).toBe(false); + }); +}); + describe("enforceThinkingPolicy", () => { describe("single-option policy models (gpt-5-pro)", () => { test("enforces high for any requested level", () => { diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts index 7192b8c71f..f2a601b6fb 100644 --- a/src/common/utils/thinking/policy.ts +++ b/src/common/utils/thinking/policy.ts @@ -25,16 +25,14 @@ import { */ export type ThinkingPolicy = readonly ThinkingLevel[]; -const GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES = new Set(["gemini-3-flash-preview"]); - /** - * Bare provider model IDs for Gemini Flash chat variants that accept Google's + * True when modelName is a bare Gemini Flash chat model ID using Google's * thinkingLevel config (minimal/low/medium/high) instead of Gemini 2.x thinkingBudget. */ export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean { const normalized = modelName.trim().toLowerCase(); return ( - GEMINI_FLASH_THINKING_LEVEL_MODEL_NAMES.has(normalized) || + normalized === "gemini-3-flash-preview" || normalized.startsWith("gemini-3-flash-preview-") || (normalized.startsWith("gemini-3.5-flash") && !normalized.startsWith("gemini-3.5-flash-lite")) ); diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index 556e09413b..b909672425 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -253,6 +253,7 @@ export const modelsExtra: Record = { // Gemini 3.5 Flash - GA on May 19, 2026. Google AI docs list a stable // `gemini-3.5-flash` model ID with 1M context, 65K max output, standard // pricing of $1.50/M input, $9/M output, and $0.15/M cached input. + // Source: Google Gemini API pricing/model docs as of 2026-05-19. "gemini-3.5-flash": { max_input_tokens: 1048576, max_output_tokens: 65536, From a5e4c9de5a7dbad0da0fe3791456f66537a2cf0d Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Wed, 20 May 2026 06:07:45 +0000 Subject: [PATCH 5/9] Clarify Gemini Flash metadata and policy docs --- src/common/utils/thinking/policy.ts | 3 ++- src/common/utils/tokens/models-extra.ts | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts index f2a601b6fb..e480b07e1a 100644 --- a/src/common/utils/thinking/policy.ts +++ b/src/common/utils/thinking/policy.ts @@ -49,7 +49,8 @@ export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean * - openai:gpt-5.2 / openai:gpt-5.5 → ["off", "low", "medium", "high", "xhigh"] * - openai:gpt-5.2-pro / openai:gpt-5.5-pro → ["medium", "high", "xhigh"] (3 levels) * - openai:gpt-5-pro → ["high"] (only supported level, legacy) - * - gemini-3 → ["low", "high"] (thinking level only) + * - gemini-3 Flash chat variants → ["off", "low", "medium", "high"] + * - gemini-3 Pro variants → ["low", "high"] (thinking level only) * - default → ["off", "low", "medium", "high"] (standard 4 levels; xhigh is opt-in per model) * * Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06). diff --git a/src/common/utils/tokens/models-extra.ts b/src/common/utils/tokens/models-extra.ts index b909672425..dfca2f7f5c 100644 --- a/src/common/utils/tokens/models-extra.ts +++ b/src/common/utils/tokens/models-extra.ts @@ -253,7 +253,7 @@ export const modelsExtra: Record = { // Gemini 3.5 Flash - GA on May 19, 2026. Google AI docs list a stable // `gemini-3.5-flash` model ID with 1M context, 65K max output, standard // pricing of $1.50/M input, $9/M output, and $0.15/M cached input. - // Source: Google Gemini API pricing/model docs as of 2026-05-19. + // Source: Google DeepMind Gemini 3.5 Flash model info and Gemini API pricing docs as of 2026-05-20. "gemini-3.5-flash": { max_input_tokens: 1048576, max_output_tokens: 65536, @@ -269,7 +269,7 @@ export const modelsExtra: Record = { supports_video_input: true, supports_reasoning: true, supports_response_schema: true, - knowledge_cutoff: "2026-01", + knowledge_cutoff: "2025-01", }, // Gemini 3.1 Pro Preview - Released February 19, 2026 From 7ed21f11db86dd0f627ecc74df8b507ed074af58 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Wed, 20 May 2026 06:30:43 +0000 Subject: [PATCH 6/9] Cover Gemini Flash defensive thinking mappings --- src/common/utils/ai/providerOptions.test.ts | 11 +++++++++++ src/common/utils/thinking/policy.ts | 3 ++- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index 217d39f927..0dcfbd150e 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -858,6 +858,17 @@ describe("buildProviderOptions - Google", () => { }); }); + test("defensively maps unsupported Gemini 3.5 Flash max to high", () => { + expect(buildProviderOptions("google:gemini-3.5-flash", "max")).toEqual({ + google: { + thinkingConfig: { + includeThoughts: true, + thinkingLevel: "high", + }, + }, + }); + }); + test("keeps Gemini 3.1 Pro off without provider thinking config", () => { expect(buildProviderOptions("google:gemini-3.1-pro-preview", "off")).toEqual({ google: { diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts index e480b07e1a..06b20f5e9c 100644 --- a/src/common/utils/thinking/policy.ts +++ b/src/common/utils/thinking/policy.ts @@ -28,6 +28,7 @@ export type ThinkingPolicy = readonly ThinkingLevel[]; /** * True when modelName is a bare Gemini Flash chat model ID using Google's * thinkingLevel config (minimal/low/medium/high) instead of Gemini 2.x thinkingBudget. + * @param modelName Provider model ID without the provider prefix (e.g. "gemini-3.5-flash", not "google:gemini-3.5-flash"). */ export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean { const normalized = modelName.trim().toLowerCase(); @@ -49,7 +50,7 @@ export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean * - openai:gpt-5.2 / openai:gpt-5.5 → ["off", "low", "medium", "high", "xhigh"] * - openai:gpt-5.2-pro / openai:gpt-5.5-pro → ["medium", "high", "xhigh"] (3 levels) * - openai:gpt-5-pro → ["high"] (only supported level, legacy) - * - gemini-3 Flash chat variants → ["off", "low", "medium", "high"] + * - Gemini Flash chat variants → ["off", "low", "medium", "high"] * - gemini-3 Pro variants → ["low", "high"] (thinking level only) * - default → ["off", "low", "medium", "high"] (standard 4 levels; xhigh is opt-in per model) * From 769e8977d6ea2fdd99b3a2847776f4320409f0fa Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Wed, 20 May 2026 06:52:39 +0000 Subject: [PATCH 7/9] Support non-preview Gemini 3 Flash aliases --- src/common/utils/ai/providerOptions.test.ts | 10 ++++++++++ src/common/utils/thinking/policy.test.ts | 6 ++++++ src/common/utils/thinking/policy.ts | 4 ++-- 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index 0dcfbd150e..a99f5f6ad9 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -816,6 +816,16 @@ describe("buildProviderOptions - Google", () => { }); }); + test("maps non-preview Gemini 3 Flash off to minimal thinking without thoughts", () => { + expect(buildProviderOptions("google:gemini-3-flash", "off")).toEqual({ + google: { + thinkingConfig: { + thinkingLevel: "minimal", + }, + }, + }); + }); + test("maps Gemini 3 Flash Preview off to minimal thinking without thoughts", () => { expect(buildProviderOptions("google:gemini-3-flash-preview", "off")).toEqual({ google: { diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts index 155738acf9..5c9519d426 100644 --- a/src/common/utils/thinking/policy.test.ts +++ b/src/common/utils/thinking/policy.test.ts @@ -425,6 +425,12 @@ describe("getThinkingPolicyForModel", () => { ]); }); + test("returns off/low/medium/high for non-preview Gemini 3 Flash IDs", () => { + for (const model of ["google:gemini-3-flash", "google:gemini-3-flash-001"]) { + expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]); + } + }); + test("returns off/low/medium/high for versioned Gemini 3 Flash Preview IDs", () => { for (const model of [ "google:gemini-3-flash-preview-20251217", diff --git a/src/common/utils/thinking/policy.ts b/src/common/utils/thinking/policy.ts index 06b20f5e9c..88bac6388f 100644 --- a/src/common/utils/thinking/policy.ts +++ b/src/common/utils/thinking/policy.ts @@ -33,8 +33,8 @@ export type ThinkingPolicy = readonly ThinkingLevel[]; export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean { const normalized = modelName.trim().toLowerCase(); return ( - normalized === "gemini-3-flash-preview" || - normalized.startsWith("gemini-3-flash-preview-") || + ((normalized === "gemini-3-flash" || normalized.startsWith("gemini-3-flash-")) && + !normalized.startsWith("gemini-3-flash-lite")) || (normalized.startsWith("gemini-3.5-flash") && !normalized.startsWith("gemini-3.5-flash-lite")) ); } From 43b46c839656b9085975f53912c6645e20c2d291 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Wed, 20 May 2026 07:08:13 +0000 Subject: [PATCH 8/9] Normalize Gemini Flash provider option model IDs --- src/common/utils/ai/providerOptions.test.ts | 10 ++++++++++ src/common/utils/ai/providerOptions.ts | 5 +++-- 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index a99f5f6ad9..58b9a63fa8 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -770,6 +770,16 @@ describe("buildProviderOptions - Google", () => { }); }); + test("maps namespaced Gemini 3.5 Flash off to minimal thinking without thoughts", () => { + expect(buildProviderOptions("google:models/gemini-3.5-flash", "off")).toEqual({ + google: { + thinkingConfig: { + thinkingLevel: "minimal", + }, + }, + }); + }); + test("maps versioned Gemini 3.5 Flash off to minimal thinking without thoughts", () => { expect(buildProviderOptions("google:gemini-3.5-flash-001", "off")).toEqual({ google: { diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 1e98ad50fd..9c8e76c68b 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -410,8 +410,9 @@ export function buildProviderOptions( // Build Google-specific options if (formatProvider === "google") { - const usesGeminiThinkingLevelConfig = capModelName.includes("gemini-3"); - const isGeminiFlashThinkingModel = isGeminiFlashThinkingLevelModelName(capModelName); + const capBareModelName = capModelName.split("/").at(-1) ?? capModelName; + const usesGeminiThinkingLevelConfig = capBareModelName.includes("gemini-3"); + const isGeminiFlashThinkingModel = isGeminiFlashThinkingLevelModelName(capBareModelName); let thinkingConfig: GoogleGenerativeAIProviderOptions["thinkingConfig"]; if (isGeminiFlashThinkingModel && effectiveThinking === "off") { From 644297fcdcf259efaf2d0107c8a455b70ec5c0c6 Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Wed, 20 May 2026 07:23:42 +0000 Subject: [PATCH 9/9] Cover Gemini 3 Flash Lite exclusion --- src/common/utils/thinking/policy.test.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/common/utils/thinking/policy.test.ts b/src/common/utils/thinking/policy.test.ts index 5c9519d426..aae7e1cb62 100644 --- a/src/common/utils/thinking/policy.test.ts +++ b/src/common/utils/thinking/policy.test.ts @@ -466,7 +466,8 @@ describe("getThinkingPolicyForModel", () => { }); describe("isGeminiFlashThinkingLevelModelName", () => { - test("does not classify Gemini 3.5 Flash Lite as a Flash thinking-level chat model", () => { + test("does not classify Gemini Flash Lite variants as Flash thinking-level chat models", () => { + expect(isGeminiFlashThinkingLevelModelName("gemini-3-flash-lite")).toBe(false); expect(isGeminiFlashThinkingLevelModelName("gemini-3.5-flash-lite")).toBe(false); }); });