Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/config/models.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Mux ships with curated models kept up to date with the frontier. Use any custom
| Codex Mini 5.1 | openai:gpt-5.1-codex-mini | `codex-mini` | |
| Codex Max 5.1 | openai:gpt-5.1-codex-max | `codex-max` | |
| Gemini 3.1 Pro Preview | google:gemini-3.1-pro-preview | `gemini`, `gemini-pro` | |
| Gemini 3 Flash Preview | google:gemini-3-flash-preview | `gemini-flash` | |
| Gemini 3.5 Flash | google:gemini-3.5-flash | `gemini-flash` | |
| Grok 4 1 Fast | xai:grok-4-1-fast | `grok`, `grok-4`, `grok-4.1`, `grok-4-1` | |
| Grok Code Fast 1 | xai:grok-code-fast-1 | `grok-code` | |
| DeepSeek V4 Pro | deepseek:deepseek-v4-pro | `deepseek`, `deepseek-pro`, `deepseek-v4`, `deepseek-v4-pro` | |
Expand Down
4 changes: 4 additions & 0 deletions src/common/constants/knownModels.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ describe("Known Models Integration", () => {
}
});

test("gemini-flash resolves to the stable Gemini 3.5 Flash model", () => {
expect(MODEL_ABBREVIATIONS["gemini-flash"]).toBe("google:gemini-3.5-flash");
});

test("known model ids and aliases stay unique across the curated registry", () => {
const seenIds = new Set<string>();
const seenAliases = new Set<string>();
Expand Down
5 changes: 3 additions & 2 deletions src/common/constants/knownModels.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,10 @@ const MODEL_DEFINITIONS = {
aliases: ["gemini", "gemini-pro"],
tokenizerOverride: "google/gemini-2.5-pro",
},
GEMINI_3_FLASH: {
// Gemini Flash alias tracks the latest stable Flash tier.
GEMINI_FLASH: {
provider: "google",
providerModelId: "gemini-3-flash-preview",
providerModelId: "gemini-3.5-flash",
aliases: ["gemini-flash"],
tokenizerOverride: "google/gemini-2.5-pro",
},
Expand Down
9 changes: 9 additions & 0 deletions src/common/utils/ai/modelCapabilities.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,15 @@ describe("getModelCapabilities", () => {
expect(caps?.maxPdfSizeMb).toBeGreaterThan(0);
});

it("returns multimodal capabilities for Gemini 3.5 Flash", () => {
const caps = getModelCapabilities("google:gemini-3.5-flash");
expect(caps).not.toBeNull();
expect(caps?.supportsPdfInput).toBe(true);
expect(caps?.supportsVision).toBe(true);
expect(caps?.supportsAudioInput).toBe(true);
expect(caps?.supportsVideoInput).toBe(true);
});

it("returns null for unknown models", () => {
expect(getModelCapabilities("anthropic:this-model-does-not-exist")).toBeNull();
});
Expand Down
1 change: 1 addition & 0 deletions src/common/utils/ai/modelDisplay.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ describe("formatModelDisplayName", () => {
describe("Gemini models", () => {
test("formats Gemini models", () => {
expect(formatModelDisplayName("gemini-2-0-flash-exp")).toBe("Gemini 2.0 Flash Exp");
expect(formatModelDisplayName("gemini-3.5-flash")).toBe("Gemini 3.5 Flash");
expect(formatModelDisplayName("gemini-3.1-pro-preview")).toBe("Gemini 3.1 Pro Preview");
});
});
Expand Down
149 changes: 149 additions & 0 deletions src/common/utils/ai/providerOptions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -749,6 +749,155 @@ describe("buildProviderOptions - OpenAI", () => {
});
});

describe("buildProviderOptions - Google", () => {
Comment thread
ThomasK33 marked this conversation as resolved.
test("maps Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
expect(buildProviderOptions("google:gemini-3.5-flash", "off")).toEqual({
google: {
thinkingConfig: {
thinkingLevel: "minimal",
},
},
});
});

test("maps gateway Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
expect(buildProviderOptions("mux-gateway:google/gemini-3.5-flash", "off")).toEqual({
google: {
thinkingConfig: {
thinkingLevel: "minimal",
},
},
});
});

test("maps namespaced Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
expect(buildProviderOptions("google:models/gemini-3.5-flash", "off")).toEqual({
google: {
thinkingConfig: {
thinkingLevel: "minimal",
},
},
});
});

test("maps versioned Gemini 3.5 Flash off to minimal thinking without thoughts", () => {
expect(buildProviderOptions("google:gemini-3.5-flash-001", "off")).toEqual({
google: {
thinkingConfig: {
thinkingLevel: "minimal",
},
},
});
});

test("maps Gemini 3.5 Flash medium to thinkingLevel medium with thoughts", () => {
Comment thread
ThomasK33 marked this conversation as resolved.
expect(buildProviderOptions("mux-gateway:google/gemini-3.5-flash", "medium")).toEqual({
google: {
thinkingConfig: {
includeThoughts: true,
thinkingLevel: "medium",
},
},
});
});

test("uses mapped model capabilities for custom Gemini 3.5 Flash aliases", () => {
const providersConfig = createMockProvidersConfig({
"google:custom-flash": "google:gemini-3.5-flash",
});

expect(
buildProviderOptions(
"google:custom-flash",
"off",
undefined,
undefined,
undefined,
undefined,
undefined,
providersConfig
)
).toEqual({
google: {
thinkingConfig: {
thinkingLevel: "minimal",
},
},
});
});

test("maps non-preview Gemini 3 Flash off to minimal thinking without thoughts", () => {
expect(buildProviderOptions("google:gemini-3-flash", "off")).toEqual({
google: {
thinkingConfig: {
thinkingLevel: "minimal",
},
},
});
});

test("maps Gemini 3 Flash Preview off to minimal thinking without thoughts", () => {
expect(buildProviderOptions("google:gemini-3-flash-preview", "off")).toEqual({
google: {
thinkingConfig: {
thinkingLevel: "minimal",
},
},
});
});

test("maps versioned Gemini 3 Flash Preview off to minimal thinking without thoughts", () => {
expect(buildProviderOptions("google:gemini-3-flash-preview-latest", "off")).toEqual({
google: {
thinkingConfig: {
thinkingLevel: "minimal",
},
},
});
});

test("defensively maps unsupported Gemini 3.5 Flash xhigh to high", () => {
Comment thread
ThomasK33 marked this conversation as resolved.
expect(buildProviderOptions("google:gemini-3.5-flash", "xhigh")).toEqual({
google: {
thinkingConfig: {
includeThoughts: true,
thinkingLevel: "high",
},
},
});
});

test("passes Gemini 3.1 Pro low through as thinkingLevel low with thoughts", () => {
expect(buildProviderOptions("google:gemini-3.1-pro-preview", "low")).toEqual({
google: {
thinkingConfig: {
includeThoughts: true,
thinkingLevel: "low",
},
},
});
});

test("defensively maps unsupported Gemini 3.5 Flash max to high", () => {
expect(buildProviderOptions("google:gemini-3.5-flash", "max")).toEqual({
google: {
thinkingConfig: {
includeThoughts: true,
thinkingLevel: "high",
},
},
});
});

test("keeps Gemini 3.1 Pro off without provider thinking config", () => {
expect(buildProviderOptions("google:gemini-3.1-pro-preview", "off")).toEqual({
google: {
thinkingConfig: undefined,
},
});
});
});

describe("buildRequestHeaders", () => {
for (const { name, model, options, expected } of [
{
Expand Down
24 changes: 14 additions & 10 deletions src/common/utils/ai/providerOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import {
OPENAI_REASONING_EFFORT,
OPENROUTER_REASONING_EFFORT,
} from "@/common/types/thinking";
import { isGeminiFlashThinkingLevelModelName } from "@/common/utils/thinking/policy";
import { resolveModelForMetadata } from "@/common/utils/providers/modelEntries";
import { log } from "@/node/services/log";
import type { MuxMessage } from "@/common/types/message";
Expand Down Expand Up @@ -409,22 +410,25 @@ export function buildProviderOptions(

// Build Google-specific options
if (formatProvider === "google") {
const isGemini3 = capModelName.includes("gemini-3");
const capBareModelName = capModelName.split("/").at(-1) ?? capModelName;
const usesGeminiThinkingLevelConfig = capBareModelName.includes("gemini-3");
const isGeminiFlashThinkingModel = isGeminiFlashThinkingLevelModelName(capBareModelName);
let thinkingConfig: GoogleGenerativeAIProviderOptions["thinkingConfig"];

if (effectiveThinking !== "off") {
if (isGeminiFlashThinkingModel && effectiveThinking === "off") {
// Gemini Flash chat models default to medium and do not support true thinking-off;
// send minimal explicitly so Mux's "off" setting means lowest-effort behavior.
thinkingConfig = { thinkingLevel: "minimal" };
} else if (effectiveThinking !== "off") {
thinkingConfig = {
includeThoughts: true,
};

if (isGemini3) {
// Policy enforcement already clamped to valid levels for Flash/Pro,
// so effectiveThinking is guaranteed in the model's allowed set.
// Flash: off/low/medium/high; Pro: low/high. "xhigh" can't reach here.
thinkingConfig.thinkingLevel = effectiveThinking as Exclude<
ThinkingLevel,
"off" | "xhigh" | "max"
>;
if (usesGeminiThinkingLevelConfig) {
// Policy enforcement should clamp to valid Google levels before this adapter runs.
// Avoid leaking xhigh/max to Google if a caller bypasses policy.
thinkingConfig.thinkingLevel =
effectiveThinking === "xhigh" || effectiveThinking === "max" ? "high" : effectiveThinking;
} else {
// Gemini 2.5 uses thinkingBudget
const budget = GEMINI_THINKING_BUDGETS[effectiveThinking];
Expand Down
63 changes: 62 additions & 1 deletion src/common/utils/thinking/policy.test.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
import { describe, expect, test } from "bun:test";
import { getThinkingPolicyForModel, enforceThinkingPolicy, resolveThinkingInput } from "./policy";
import {
getThinkingPolicyForModel,
enforceThinkingPolicy,
resolveThinkingInput,
isGeminiFlashThinkingLevelModelName,
} from "./policy";

describe("getThinkingPolicyForModel", () => {
test("returns 5 levels including xhigh for gpt-5.1-codex-max", () => {
Expand Down Expand Up @@ -386,6 +391,55 @@ describe("getThinkingPolicyForModel", () => {
expect(getThinkingPolicyForModel("google:gemini-3.1-pro-preview")).toEqual(["low", "high"]);
});

test("returns off/low/medium/high for stable Gemini 3.5 Flash", () => {
expect(getThinkingPolicyForModel("google:gemini-3.5-flash")).toEqual([
"off",
"low",
"medium",
"high",
]);
expect(getThinkingPolicyForModel("mux-gateway:google/gemini-3.5-flash")).toEqual([
"off",
"low",
"medium",
"high",
]);
});

test("returns off/low/medium/high for versioned stable Gemini 3.5 Flash IDs", () => {
for (const model of [
"google:gemini-3.5-flash-001",
"google:gemini-3.5-flash-latest",
"google:gemini-3.5-flash-preview",
]) {
expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]);
}
});

test("returns off/low/medium/high for stable Gemini 3.5 Flash behind OpenRouter", () => {
expect(getThinkingPolicyForModel("openrouter:google/gemini-3.5-flash")).toEqual([
"off",
"low",
"medium",
"high",
]);
});

test("returns off/low/medium/high for non-preview Gemini 3 Flash IDs", () => {
for (const model of ["google:gemini-3-flash", "google:gemini-3-flash-001"]) {
expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]);
}
});

test("returns off/low/medium/high for versioned Gemini 3 Flash Preview IDs", () => {
for (const model of [
"google:gemini-3-flash-preview-20251217",
"google:gemini-3-flash-preview-latest",
]) {
expect(getThinkingPolicyForModel(model)).toEqual(["off", "low", "medium", "high"]);
}
});

test("returns off/low/medium/high for Gemini 3 Flash", () => {
expect(getThinkingPolicyForModel("google:gemini-3-flash-preview")).toEqual([
"off",
Expand All @@ -411,6 +465,13 @@ describe("getThinkingPolicyForModel", () => {
});
});

describe("isGeminiFlashThinkingLevelModelName", () => {
test("does not classify Gemini Flash Lite variants as Flash thinking-level chat models", () => {
expect(isGeminiFlashThinkingLevelModelName("gemini-3-flash-lite")).toBe(false);
expect(isGeminiFlashThinkingLevelModelName("gemini-3.5-flash-lite")).toBe(false);
Comment thread
ThomasK33 marked this conversation as resolved.
});
});

describe("enforceThinkingPolicy", () => {
describe("single-option policy models (gpt-5-pro)", () => {
test("enforces high for any requested level", () => {
Expand Down
21 changes: 18 additions & 3 deletions src/common/utils/thinking/policy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,20 @@ import {
*/
export type ThinkingPolicy = readonly ThinkingLevel[];

/**
* True when modelName is a bare Gemini Flash chat model ID using Google's
Comment thread
ThomasK33 marked this conversation as resolved.
* thinkingLevel config (minimal/low/medium/high) instead of Gemini 2.x thinkingBudget.
* @param modelName Provider model ID without the provider prefix (e.g. "gemini-3.5-flash", not "google:gemini-3.5-flash").
*/
export function isGeminiFlashThinkingLevelModelName(modelName: string): boolean {
Comment thread
ThomasK33 marked this conversation as resolved.
const normalized = modelName.trim().toLowerCase();
return (
((normalized === "gemini-3-flash" || normalized.startsWith("gemini-3-flash-")) &&
!normalized.startsWith("gemini-3-flash-lite")) ||
(normalized.startsWith("gemini-3.5-flash") && !normalized.startsWith("gemini-3.5-flash-lite"))
Comment thread
ThomasK33 marked this conversation as resolved.
);
}

/**
* Returns the thinking policy for a given model.
*
Expand All @@ -36,7 +50,8 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
* - openai:gpt-5.2 / openai:gpt-5.5 β†’ ["off", "low", "medium", "high", "xhigh"]
* - openai:gpt-5.2-pro / openai:gpt-5.5-pro β†’ ["medium", "high", "xhigh"] (3 levels)
* - openai:gpt-5-pro β†’ ["high"] (only supported level, legacy)
* - gemini-3 β†’ ["low", "high"] (thinking level only)
* - Gemini Flash chat variants β†’ ["off", "low", "medium", "high"]
* - gemini-3 Pro variants β†’ ["low", "high"] (thinking level only)
* - default β†’ ["off", "low", "medium", "high"] (standard 4 levels; xhigh is opt-in per model)
*
* Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
Expand Down Expand Up @@ -95,8 +110,8 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
return ["high"];
}

// Gemini 3 Flash supports 4 levels: off (minimal), low, medium, high
if (withoutProviderNamespace.includes("gemini-3-flash")) {
// Gemini Flash chat models support minimal/low/medium/high. Mux exposes minimal as "off".
if (isGeminiFlashThinkingLevelModelName(withoutProviderNamespace)) {
return ["off", "low", "medium", "high"];
}

Expand Down
9 changes: 9 additions & 0 deletions src/common/utils/tokens/modelStats.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,15 @@ describe("getModelStats", () => {
expect(stats.tiered_pricing_threshold_tokens).toBeUndefined();
});

test("resolves Gemini 3.5 Flash with published standard pricing and limits", () => {
const stats = expectStats(KNOWN_MODELS.GEMINI_FLASH.id);
expect(stats.max_input_tokens).toBe(1048576);
expect(stats.max_output_tokens).toBe(65536);
expect(stats.input_cost_per_token).toBe(0.0000015);
expect(stats.output_cost_per_token).toBe(0.000009);
expect(stats.cache_read_input_token_cost).toBe(0.00000015);
});

test("defaults tiered pricing threshold to 200K when metadata only ships *_above_200k rates", () => {
const stats = expectStats("google:gemini-3.1-pro-preview");
expect(stats.tiered_pricing_threshold_tokens).toBe(200000);
Expand Down
Loading
Loading