From 3388ffeb7567f75a31487a3dd774bb9e0818d437 Mon Sep 17 00:00:00 2001 From: brandon chen <9735006+brandonkachen@users.noreply.github.com> Date: Tue, 28 Apr 2026 17:42:27 -0700 Subject: [PATCH 1/3] Scope Kimi tool call compatibility (#560) Co-authored-by: James Grugett --- .../__tests__/kimi-tool-compat.test.ts | 112 ++++++++++++++++++ web/src/llm-api/canopywave.ts | 6 +- web/src/llm-api/kimi-tool-compat.ts | 67 +++++++++++ web/src/llm-api/openrouter.ts | 7 +- web/src/llm-api/types.ts | 12 ++ 5 files changed, 202 insertions(+), 2 deletions(-) create mode 100644 web/src/llm-api/__tests__/kimi-tool-compat.test.ts create mode 100644 web/src/llm-api/kimi-tool-compat.ts diff --git a/web/src/llm-api/__tests__/kimi-tool-compat.test.ts b/web/src/llm-api/__tests__/kimi-tool-compat.test.ts new file mode 100644 index 0000000000..9e4fbdabb0 --- /dev/null +++ b/web/src/llm-api/__tests__/kimi-tool-compat.test.ts @@ -0,0 +1,112 @@ +import { describe, expect, it } from 'bun:test' + +import { addKimiToolCompatibilityFields, isKimiModel } from '../kimi-tool-compat' + +import type { ChatCompletionRequestBody } from '../types' + +describe('addKimiToolCompatibilityFields', () => { + it('adds declaration ids and tool-result names without mutating input', () => { + const body: ChatCompletionRequestBody = { + model: 'moonshotai/kimi-k2.6', + messages: [ + { + role: 'assistant', + content: '', + tool_calls: [ + { + id: 'call_123', + type: 'function', + function: { + name: 'read_files', + arguments: JSON.stringify({ paths: ['README.md'] }), + }, + }, + ], + }, + { + role: 'tool', + tool_call_id: 'call_123', + content: JSON.stringify({ message: 'ok' }), + }, + ], + tools: [ + { + type: 'function', + function: { + name: 'read_files', + description: 'Read files', + parameters: { type: 'object' }, + }, + }, + ], + } + + const result = addKimiToolCompatibilityFields(body) + + expect(result.tools?.[0]).toEqual({ + id: 'tool_1', + type: 'function', + function: { + name: 'read_files', + description: 'Read files', + parameters: { type: 'object' }, + }, + }) + expect(result.messages[1]).toEqual({ + role: 'tool', + tool_call_id: 'call_123', + name: 'read_files', + content: JSON.stringify({ message: 'ok' }), + }) + expect(body.tools?.[0]).not.toHaveProperty('id') + expect(body.messages[1]).not.toHaveProperty('name') + }) + + it('preserves existing ids and names', () => { + const body: ChatCompletionRequestBody = { + model: 'moonshotai/kimi-k2.6', + messages: [ + { + role: 'assistant', + content: '', + tool_calls: [ + { + id: 'call_456', + type: 'function', + function: { + name: 'write_todos', + arguments: JSON.stringify({ todos: [] }), + }, + }, + ], + }, + { + role: 'tool', + tool_call_id: 'call_456', + name: 'existing_name', + content: '{}', + }, + ], + tools: [ + { + id: 'existing_tool_id', + type: 'function', + function: { + name: 'write_todos', + parameters: { type: 'object' }, + }, + }, + ], + } + + expect(addKimiToolCompatibilityFields(body)).toEqual(body) + }) +}) + +describe('isKimiModel', () => { + it('matches only Moonshot model ids', () => { + expect(isKimiModel('moonshotai/kimi-k2.6')).toBe(true) + expect(isKimiModel('anthropic/claude-sonnet-4.5')).toBe(false) + expect(isKimiModel(undefined)).toBe(false) + }) +}) diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts index 9a5b2ba125..341bc239ce 100644 --- a/web/src/llm-api/canopywave.ts +++ b/web/src/llm-api/canopywave.ts @@ -9,6 +9,7 @@ import { extractRequestMetadata, insertMessageToBigQuery, } from './helpers' +import { addKimiToolCompatibilityFields, isKimiModel } from './kimi-tool-compat' import type { UsageData } from './helpers' import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery' @@ -88,8 +89,11 @@ function createCanopyWaveRequest(params: { fetch: typeof globalThis.fetch }) { const { body, originalModel, fetch } = params + const providerBody = isKimiModel(originalModel) + ? addKimiToolCompatibilityFields(body) + : body const canopywaveBody: Record = { - ...body, + ...providerBody, model: getCanopyWaveModelId(originalModel), } diff --git a/web/src/llm-api/kimi-tool-compat.ts b/web/src/llm-api/kimi-tool-compat.ts new file mode 100644 index 0000000000..334a41b914 --- /dev/null +++ b/web/src/llm-api/kimi-tool-compat.ts @@ -0,0 +1,67 @@ +import type { ChatCompletionRequestBody } from './types' + +export function isKimiModel(model: unknown): model is string { + return typeof model === 'string' && model.startsWith('moonshotai/') +} + +function getToolCallNamesById( + messages: ChatCompletionRequestBody['messages'], +): Map { + const namesById = new Map() + + for (const message of messages) { + if (message.role !== 'assistant') { + continue + } + for (const toolCall of message.tool_calls ?? []) { + if (toolCall.id && toolCall.function.name) { + namesById.set(toolCall.id, toolCall.function.name) + } + } + } + + return namesById +} + +/** + * Kimi-compatible providers require two OpenAI-compatible extensions that are + * not part of the strict Chat Completions schema: ids on tool declarations and + * names on tool-result messages. + */ +export function addKimiToolCompatibilityFields( + body: ChatCompletionRequestBody, +): ChatCompletionRequestBody { + const namesByToolCallId = getToolCallNamesById(body.messages) + + return { + ...body, + tools: body.tools?.map((tool, index) => { + if (tool.type !== 'function' || tool.id) { + return tool + } + return { + ...tool, + id: `tool_${index + 1}`, + } + }), + messages: body.messages.map((message) => { + if ( + message.role !== 'tool' || + message.name || + typeof message.tool_call_id !== 'string' + ) { + return message + } + + const name = namesByToolCallId.get(message.tool_call_id) + if (!name) { + return message + } + + return { + ...message, + name, + } + }), + } +} diff --git a/web/src/llm-api/openrouter.ts b/web/src/llm-api/openrouter.ts index 2762a60d8d..bf7231abd9 100644 --- a/web/src/llm-api/openrouter.ts +++ b/web/src/llm-api/openrouter.ts @@ -9,6 +9,7 @@ import { extractRequestMetadata, insertMessageToBigQuery, } from './helpers' +import { addKimiToolCompatibilityFields, isKimiModel } from './kimi-tool-compat' import { OpenRouterErrorResponseSchema, OpenRouterStreamChatCompletionChunkSchema, @@ -61,6 +62,10 @@ function createOpenRouterRequest(params: { fetch: typeof globalThis.fetch }) { const { body, openrouterApiKey, fetch } = params + const providerBody = isKimiModel(body.model) + ? addKimiToolCompatibilityFields(body) + : body + return fetch('https://openrouter.ai/api/v1/chat/completions', { method: 'POST', headers: { @@ -69,7 +74,7 @@ function createOpenRouterRequest(params: { 'X-Title': 'Codebuff', 'Content-Type': 'application/json', }, - body: JSON.stringify(body), + body: JSON.stringify(providerBody), // Use custom agent with extended headers timeout for deep-thinking models // @ts-expect-error - dispatcher is a valid undici option not in fetch types dispatcher: openrouterAgent, diff --git a/web/src/llm-api/types.ts b/web/src/llm-api/types.ts index b3bb1eaf97..dd3b89a4d7 100644 --- a/web/src/llm-api/types.ts +++ b/web/src/llm-api/types.ts @@ -28,9 +28,21 @@ export interface ChatMessage { tool_call_id?: string } +export interface ChatCompletionTool { + id?: string + type: string + function?: { + name: string + description?: string + parameters?: unknown + strict?: boolean + } +} + export interface ChatCompletionRequestBody { model: string messages: ChatMessage[] + tools?: ChatCompletionTool[] stream?: boolean temperature?: number max_tokens?: number From 0cdbe0177dd986b307c4c11435eb218b0b04077a Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 28 Apr 2026 17:59:59 -0700 Subject: [PATCH 2/3] Simplify ad response shape (#562) --- cli/src/chat.tsx | 9 +- cli/src/components/waiting-room-screen.tsx | 11 +- cli/src/hooks/use-gravity-ad.ts | 145 ++++----------------- web/src/app/api/v1/ads/_post.ts | 41 ++---- web/src/lib/ad-providers/carbon.ts | 2 +- web/src/lib/ad-providers/gravity.ts | 24 +--- web/src/lib/ad-providers/types.ts | 7 +- 7 files changed, 51 insertions(+), 188 deletions(-) diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx index 09727ea6ea..a8bae5b033 100644 --- a/cli/src/chat.tsx +++ b/cli/src/chat.tsx @@ -174,7 +174,7 @@ export const Chat = ({ }) const hasSubscription = subscriptionData?.hasSubscription ?? false - const { adData, recordImpression } = useGravityAd({ + const { ads, recordImpression } = useGravityAd({ enabled: IS_FREEBUFF || !hasSubscription, provider: 'gravity', fallbackProvider: 'carbon', @@ -1463,11 +1463,8 @@ export const Chat = ({ /> )} - {adData && (IS_FREEBUFF || getAdsEnabled()) && ( - + {ads && (IS_FREEBUFF || getAdsEnabled()) && ( + )} {reviewMode ? ( diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx index 7cc0aca4a0..9ccba664a7 100644 --- a/cli/src/components/waiting-room-screen.tsx +++ b/cli/src/components/waiting-room-screen.tsx @@ -115,7 +115,7 @@ export const WaitingRoomScreen: React.FC = ({ // forceStart bypasses the "wait for first user message" gate inside the hook, // which would otherwise block ads here since no conversation exists yet. // Try Gravity first, then fall back to Carbon when Gravity doesn't fill. - const { adData, recordImpression } = useGravityAd({ + const { ads, recordImpression } = useGravityAd({ enabled: true, forceStart: true, provider: 'gravity', @@ -369,17 +369,14 @@ export const WaitingRoomScreen: React.FC = ({ {/* Ad banner pinned to the bottom, same look-and-feel as in chat. */} - {adData && ( + {ads && ( - + )} {/* Horizontal separator (mirrors chat input divider style) */} - {!adData && ( + {!ads && ( {'─'.repeat(terminalWidth)} diff --git a/cli/src/hooks/use-gravity-ad.ts b/cli/src/hooks/use-gravity-ad.ts index ea6977864b..0a7f2e9e6d 100644 --- a/cli/src/hooks/use-gravity-ad.ts +++ b/cli/src/hooks/use-gravity-ad.ts @@ -9,7 +9,7 @@ import { getAuthToken } from '../utils/auth' import { IS_FREEBUFF } from '../utils/constants' import { logger } from '../utils/logger' -import type { Message} from '@codebuff/sdk'; +import type { Message } from '@codebuff/sdk' const AD_ROTATION_INTERVAL_MS = 60 * 1000 // 60 seconds per ad const MAX_ADS_AFTER_ACTIVITY = 3 // Show up to 3 ads after last activity, then pause fetching new ads @@ -28,8 +28,6 @@ export type AdResponse = { credits?: number // Set after impression is recorded (in cents) } -export type AdVariant = 'banner' | 'choice' - /** * Which upstream ad network to query. The server maps each provider onto the * same normalized response shape, so the rest of the hook is provider-agnostic. @@ -37,43 +35,19 @@ export type AdVariant = 'banner' | 'choice' export type AdProvider = 'gravity' | 'carbon' export type AdSurface = 'waiting_room' -export type AdData = - | { variant: 'banner'; ad: AdResponse } - | { variant: 'choice'; ads: AdResponse[] } - export type GravityAdState = { - ad: AdResponse | null - adData: AdData | null + ads: AdResponse[] | null isLoading: boolean recordImpression: (impUrl: string) => void } // Consolidated controller state for the ad rotation logic type GravityController = { - cache: AdResponse[] - cacheIndex: number choiceCache: AdResponse[][] // Cache of choice ad sets (each entry is 4 ads) choiceCacheIndex: number - variant: AdVariant | null // Assigned variant from backend impressionsFired: Set adsShownSinceActivity: number tickInFlight: boolean - intervalId: ReturnType | null -} - -// Pure helper: add an ad to the cache (if not already present) -function addToCache(ctrl: GravityController, ad: AdResponse): void { - if (ctrl.cache.some((x) => x.impUrl === ad.impUrl)) return - if (ctrl.cache.length >= MAX_AD_CACHE_SIZE) ctrl.cache.shift() - ctrl.cache.push(ad) -} - -// Pure helper: get the next cached ad (cycles through the cache) -function nextFromCache(ctrl: GravityController): AdResponse | null { - if (ctrl.cache.length === 0) return null - const ad = ctrl.cache[ctrl.cacheIndex % ctrl.cache.length]! - ctrl.cacheIndex = (ctrl.cacheIndex + 1) % ctrl.cache.length - return ad } // Pure helper: add a choice ad set to the choice cache @@ -121,8 +95,7 @@ export const useGravityAd = (options?: { const provider: AdProvider = options?.provider ?? 'gravity' const fallbackProvider = options?.fallbackProvider const surface = options?.surface - const [ad, setAd] = useState(null) - const [adData, setAdData] = useState(null) + const [ads, setAds] = useState(null) const [isLoading, setIsLoading] = useState(false) // Check if terminal height is too small to show ads @@ -146,19 +119,15 @@ export const useGravityAd = (options?: { // Single consolidated controller ref const ctrlRef = useRef({ - cache: [], - cacheIndex: 0, choiceCache: [], choiceCacheIndex: 0, - variant: null, impressionsFired: new Set(), adsShownSinceActivity: 0, tickInFlight: false, - intervalId: null, }) // Ref for the tick function (avoids useCallback dependency issues) - const tickRef = useRef<() => void>(() => { }) + const tickRef = useRef<() => void>(() => {}) // Ref to track whether ads should be hidden for use in async code const shouldHideAdsRef = useRef(shouldHideAds) @@ -197,26 +166,12 @@ export const useGravityAd = (options?: { { creditsGranted: data.creditsGranted }, '[ads] Ad impression credits granted', ) - setAd((cur) => - cur?.impUrl === impUrl - ? { ...cur, credits: data.creditsGranted } - : cur, - ) - // Also update credits in adData for choice ads - setAdData((cur) => { + // Also update credits in visible ads + setAds((cur) => { if (!cur) return cur - if (cur.variant === 'choice') { - return { - ...cur, - ads: cur.ads.map((a) => - a.impUrl === impUrl ? { ...a, credits: data.creditsGranted } : a, - ), - } - } - if (cur.variant === 'banner' && cur.ad.impUrl === impUrl) { - return { ...cur, ad: { ...cur.ad, credits: data.creditsGranted } } - } - return cur + return cur.map((a) => + a.impUrl === impUrl ? { ...a, credits: data.creditsGranted } : a, + ) }) } }) @@ -225,23 +180,7 @@ export const useGravityAd = (options?: { }) } - // Show a single banner ad and fire impression - const showAd = (next: AdResponse): void => { - setAd(next) - setAdData({ variant: 'banner', ad: next }) - recordImpressionOnce(next.impUrl) - } - - // Show a choice ad set (impressions are fired by the component for visible ads only) - const showChoiceAds = (ads: AdResponse[]): void => { - setAd(ads[0] ?? null) // Keep backwards compat for ad field - setAdData({ variant: 'choice', ads }) - } - - type FetchAdResult = - | { variant: 'banner'; ad: AdResponse } - | { variant: 'choice'; ads: AdResponse[] } - | null + type FetchAdResult = { ads: AdResponse[] } | null // Fetch an ad via web API const fetchAd = async (): Promise => { @@ -324,21 +263,15 @@ export const useGravityAd = (options?: { } const data = await response.json() - const variant = data.variant ?? 'banner' - - if ( - variant === 'choice' && - Array.isArray(data.ads) && - data.ads.length > 0 - ) { - return { variant: 'choice', ads: data.ads as AdResponse[] } - } - if (data.ad) { - return { variant: 'banner', ad: data.ad as AdResponse } + if (Array.isArray(data.ads) && data.ads.length > 0) { + return { ads: data.ads as AdResponse[] } } } catch (err) { - logger.error({ err, provider: providerToTry }, '[ads] Failed to fetch ad') + logger.error( + { err, provider: providerToTry }, + '[ads] Failed to fetch ad', + ) } } @@ -363,30 +296,15 @@ export const useGravityAd = (options?: { const result = canFetchNew ? await fetchAd() : null if (result) { - ctrl.variant = result.variant - if (result.variant === 'choice') { - addToChoiceCache(ctrl, result.ads) - ctrl.adsShownSinceActivity += 1 - showChoiceAds(result.ads) - } else { - addToCache(ctrl, result.ad) - ctrl.adsShownSinceActivity += 1 - showAd(result.ad) - } + addToChoiceCache(ctrl, result.ads) + ctrl.adsShownSinceActivity += 1 + setAds(result.ads) } else { // Fall back to cached ads - if (ctrl.variant === 'choice') { - const cachedSet = nextFromChoiceCache(ctrl) - if (cachedSet) { - ctrl.adsShownSinceActivity += 1 - showChoiceAds(cachedSet) - } - } else { - const next = nextFromCache(ctrl) - if (next) { - ctrl.adsShownSinceActivity += 1 - showAd(next) - } + const cachedSet = nextFromChoiceCache(ctrl) + if (cachedSet) { + ctrl.adsShownSinceActivity += 1 + setAds(cachedSet) } } } finally { @@ -414,14 +332,8 @@ export const useGravityAd = (options?: { const result = await fetchAd() if (result) { const ctrl = ctrlRef.current - ctrl.variant = result.variant - if (result.variant === 'choice') { - addToChoiceCache(ctrl, result.ads) - showChoiceAds(result.ads) - } else { - addToCache(ctrl, result.ad) - showAd(result.ad) - } + addToChoiceCache(ctrl, result.ads) + setAds(result.ads) ctrl.adsShownSinceActivity = 1 } setIsLoading(false) @@ -429,19 +341,16 @@ export const useGravityAd = (options?: { // Start interval for rotation (consistent 60s intervals) const id = setInterval(() => tickRef.current(), AD_ROTATION_INTERVAL_MS) - ctrlRef.current.intervalId = id return () => { clearInterval(id) - ctrlRef.current.intervalId = null } }, [shouldStart, shouldHideAds, provider, fallbackProvider, surface]) - // Don't return ad when ads should be hidden + // Don't return ads when ads should be hidden const visible = shouldStart && !shouldHideAds return { - ad: visible ? ad : null, - adData: visible ? adData : null, + ads: visible ? ads : null, isLoading, recordImpression: recordImpressionOnce, } diff --git a/web/src/app/api/v1/ads/_post.ts b/web/src/app/api/v1/ads/_post.ts index a56846b055..370f11622b 100644 --- a/web/src/app/api/v1/ads/_post.ts +++ b/web/src/app/api/v1/ads/_post.ts @@ -53,6 +53,10 @@ export type AdsEnv = { CB_ENVIRONMENT: string } +function noAdsResponse(provider: AdProviderId) { + return NextResponse.json({ ads: [], provider }, { status: 200 }) +} + export async function postAds(params: { req: NextRequest getUserInfoFromApiKey: GetUserInfoFromApiKeyFn @@ -119,13 +123,13 @@ export async function postAds(params: { if (providerId === 'carbon') { if (!serverEnv.CARBON_ZONE_KEY) { logger.warn('[ads] CARBON_ZONE_KEY not configured') - return NextResponse.json({ ad: null, provider: providerId }, { status: 200 }) + return noAdsResponse(providerId) } provider = createCarbonProvider({ zoneKey: serverEnv.CARBON_ZONE_KEY }) } else { if (!serverEnv.GRAVITY_API_KEY) { logger.warn('[ads] GRAVITY_API_KEY not configured') - return NextResponse.json({ ad: null, provider: providerId }, { status: 200 }) + return noAdsResponse(providerId) } provider = createGravityProvider({ apiKey: serverEnv.GRAVITY_API_KEY }) } @@ -146,20 +150,14 @@ export async function postAds(params: { }) if (!result) { - return NextResponse.json( - { ad: null, provider: provider.id }, - { status: 200 }, - ) + return noAdsResponse(provider.id) } - const adsToPersist: NormalizedAd[] = - result.variant === 'choice' ? result.ads : [result.ad] - // Persist served ads so the impression endpoint can validate + fire the // correct pixels. Any DB failure is logged but doesn't block serving. try { await Promise.all( - adsToPersist.map((ad) => + result.ads.map((ad) => db .insert(schema.adImpression) .values({ @@ -184,7 +182,7 @@ export async function postAds(params: { { userId, provider: provider.id, - adCount: adsToPersist.length, + adCount: result.ads.length, error: dbError instanceof Error ? { name: dbError.name, message: dbError.message } @@ -200,25 +198,12 @@ export async function postAds(params: { return rest } - if (result.variant === 'choice') { - logger.info( - { provider: provider.id, variant: 'choice', adCount: result.ads.length }, - '[ads] Fetched choice ads', - ) - return NextResponse.json({ - ads: result.ads.map(toClient), - variant: 'choice', - provider: provider.id, - }) - } - logger.info( - { provider: provider.id, variant: 'banner' }, - '[ads] Fetched banner ad', + { provider: provider.id, adCount: result.ads.length }, + '[ads] Fetched ads', ) return NextResponse.json({ - ad: toClient(result.ad), - variant: 'banner', + ads: result.ads.map(toClient), provider: provider.id, }) } catch (error) { @@ -235,7 +220,7 @@ export async function postAds(params: { ) return NextResponse.json( { - ad: null, + ads: [], provider: providerId, error: getErrorObject(error), }, diff --git a/web/src/lib/ad-providers/carbon.ts b/web/src/lib/ad-providers/carbon.ts index 64a926436f..f4775a00ac 100644 --- a/web/src/lib/ad-providers/carbon.ts +++ b/web/src/lib/ad-providers/carbon.ts @@ -164,7 +164,7 @@ export function createCarbonProvider(config: { return null } - return { variant: 'choice', ads } + return { ads } }, } } diff --git a/web/src/lib/ad-providers/gravity.ts b/web/src/lib/ad-providers/gravity.ts index 4ae33b5145..e0e8efec4e 100644 --- a/web/src/lib/ad-providers/gravity.ts +++ b/web/src/lib/ad-providers/gravity.ts @@ -1,18 +1,14 @@ -import { createHash } from 'crypto' - import { buildArray } from '@codebuff/common/util/array' import type { AdMessage, AdProvider, - AdVariant, FetchAdInput, FetchAdResult, NormalizedAd, } from './types' const GRAVITY_URL = 'https://server.trygravity.ai/api/v1/ad' -const BANNER_PLACEMENT_ID = 'code-assist-ad' const CHOICE_PLACEMENT_IDS = [ 'choice-ad-1', 'choice-ad-2', @@ -50,15 +46,6 @@ function normalize(raw: GravityRawAd): NormalizedAd { } } -/** - * A/B test: deterministically assign a user to the `banner` or `choice` - * variant based on their userId. Stable across requests. - */ -function getGravityVariant(userId: string): AdVariant { - const hash = createHash('sha256').update(`ad-variant:${userId}`).digest() - return hash[0] % 2 === 0 ? 'banner' : 'choice' -} - /** * Extract the content from the last tag in a string. * The CLI wraps raw user text in that tag; if no tag is found, returns the @@ -111,16 +98,12 @@ export function createGravityProvider(config: { apiKey: string }): AdProvider { fetch, } = input - const variant = - input.surface === 'waiting_room' ? 'choice' : getGravityVariant(userId) const filteredMessages = prepareGravityMessages(messages) const placementIds = input.surface === 'waiting_room' ? WAITING_ROOM_PLACEMENT_IDS - : variant === 'choice' - ? CHOICE_PLACEMENT_IDS - : [BANNER_PLACEMENT_ID] + : CHOICE_PLACEMENT_IDS const placements = placementIds.map((id) => ({ placement: 'below_response', @@ -192,10 +175,7 @@ export function createGravityProvider(config: { apiKey: string }): AdProvider { return null } - if (variant === 'choice') { - return { variant: 'choice', ads: ads.map(normalize) } - } - return { variant: 'banner', ad: normalize(ads[0]) } + return { ads: ads.map(normalize) } }, } } diff --git a/web/src/lib/ad-providers/types.ts b/web/src/lib/ad-providers/types.ts index fb3284e2af..ced439e8f7 100644 --- a/web/src/lib/ad-providers/types.ts +++ b/web/src/lib/ad-providers/types.ts @@ -8,8 +8,6 @@ import type { Logger } from '@codebuff/common/types/contracts/logger' */ export type AdProviderId = 'gravity' | 'carbon' -export type AdVariant = 'banner' | 'choice' - /** * Normalized ad shape returned by every provider. The CLI renders against * this shape; provider modules are responsible for mapping their upstream @@ -62,10 +60,7 @@ export type FetchAdInput = { fetch: typeof globalThis.fetch } -export type FetchAdResult = - | { variant: 'banner'; ad: NormalizedAd } - | { variant: 'choice'; ads: NormalizedAd[] } - | null +export type FetchAdResult = { ads: NormalizedAd[] } | null export type AdProvider = { id: AdProviderId From 37020fee9054ebe9272957661d40b789bfab9abc Mon Sep 17 00:00:00 2001 From: James Grugett Date: Tue, 28 Apr 2026 18:00:16 -0700 Subject: [PATCH 3/3] Use Kimi K2.6 for free and lite (#561) --- agents/__tests__/editor.test.ts | 11 ++ agents/base2/base2.ts | 2 +- agents/editor/editor-lite.ts | 2 +- agents/editor/editor.ts | 6 +- agents/reviewer/code-reviewer-lite.ts | 2 +- agents/types/agent-definition.ts | 1 + .../components/freebuff-model-selector.tsx | 8 +- cli/src/components/waiting-room-screen.tsx | 4 +- cli/src/hooks/use-freebuff-session.ts | 6 +- common/src/__tests__/freebuff-models.test.ts | 6 + common/src/constants/free-agents.ts | 6 +- common/src/constants/freebuff-models.ts | 12 +- .../types/agent-definition.ts | 1 + common/src/types/freebuff-session.ts | 10 +- freebuff/README.md | 2 +- freebuff/SPEC.md | 2 +- freebuff/web/src/app/home-client.tsx | 2 +- .../completions/__tests__/completions.test.ts | 10 +- .../session/__tests__/session.test.ts | 4 +- web/src/app/docs/[category]/[slug]/page.tsx | 2 +- web/src/content/advanced/how-does-it-work.mdx | 4 +- web/src/content/advanced/what-models.mdx | 6 +- web/src/content/help/faq.mdx | 2 +- web/src/content/tips/modes.mdx | 4 +- .../__tests__/fireworks-deployment.test.ts | 66 +++++++++- web/src/llm-api/canopywave.ts | 8 -- web/src/llm-api/fireworks.ts | 11 +- .../free-session/__tests__/public-api.test.ts | 118 +++++++++--------- .../__tests__/session-view.test.ts | 2 +- web/src/server/free-session/config.ts | 2 +- web/src/server/free-session/public-api.ts | 4 +- web/src/server/free-session/store.ts | 2 +- 32 files changed, 203 insertions(+), 125 deletions(-) diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts index 36d6b75c5c..dd5630930b 100644 --- a/agents/__tests__/editor.test.ts +++ b/agents/__tests__/editor.test.ts @@ -67,6 +67,11 @@ describe('editor agent', () => { expect(glmEditor.model).toBe('z-ai/glm-5.1') }) + test('creates kimi editor', () => { + const kimiEditor = createCodeEditor({ model: 'kimi' }) + expect(kimiEditor.model).toBe('moonshotai/kimi-k2.6') + }) + test('creates minimax editor', () => { const minimaxEditor = createCodeEditor({ model: 'minimax' }) expect(minimaxEditor.model).toBe('minimax/minimax-m2.7') @@ -84,6 +89,12 @@ describe('editor agent', () => { expect(glmEditor.instructionsPrompt).not.toContain('') }) + test('kimi editor does not include think tags in instructions', () => { + const kimiEditor = createCodeEditor({ model: 'kimi' }) + expect(kimiEditor.instructionsPrompt).not.toContain('') + expect(kimiEditor.instructionsPrompt).not.toContain('') + }) + test('minimax editor does not include think tags in instructions', () => { const minimaxEditor = createCodeEditor({ model: 'minimax' }) expect(minimaxEditor.instructionsPrompt).not.toContain('') diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 1a81f948bf..b1e24efff6 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -25,7 +25,7 @@ export function createBase2( const isFree = mode === 'free' || mode === 'lite' const isSonnet = false - const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7' + const model = isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7' return { publisher, diff --git a/agents/editor/editor-lite.ts b/agents/editor/editor-lite.ts index 29225f0c29..6dbb4bb3c6 100644 --- a/agents/editor/editor-lite.ts +++ b/agents/editor/editor-lite.ts @@ -3,7 +3,7 @@ import { createCodeEditor } from './editor' import type { AgentDefinition } from '../types/agent-definition' const definition: AgentDefinition = { - ...createCodeEditor({ model: 'glm' }), + ...createCodeEditor({ model: 'kimi' }), id: 'editor-lite', } export default definition diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts index c98544d0f2..bb31eaaeb1 100644 --- a/agents/editor/editor.ts +++ b/agents/editor/editor.ts @@ -4,7 +4,7 @@ import { publisher } from '../constants' import type { AgentDefinition } from '../types/agent-definition' export const createCodeEditor = (options: { - model: 'gpt-5' | 'opus' | 'glm' | 'minimax' + model: 'gpt-5' | 'opus' | 'glm' | 'kimi' | 'minimax' }): Omit => { const { model } = options return { @@ -14,6 +14,8 @@ export const createCodeEditor = (options: { ? 'openai/gpt-5.1' : options.model === 'minimax' ? 'minimax/minimax-m2.7' + : options.model === 'kimi' + ? 'moonshotai/kimi-k2.6' : options.model === 'glm' ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7', @@ -67,7 +69,7 @@ OR for new files or major rewrites: } -${model === 'gpt-5' || model === 'glm' || model === 'minimax' +${model === 'gpt-5' || model === 'glm' || model === 'kimi' || model === 'minimax' ? '' : `Before you start writing your implementation, you should use tags to think about the best way to implement the changes. diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts index feafb87c45..888cadf4f7 100644 --- a/agents/reviewer/code-reviewer-lite.ts +++ b/agents/reviewer/code-reviewer-lite.ts @@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer' const definition: SecretAgentDefinition = { id: 'code-reviewer-lite', publisher, - ...createReviewer('z-ai/glm-5.1'), + ...createReviewer('moonshotai/kimi-k2.6'), } export default definition diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts index 3608f36315..088dd1dca1 100644 --- a/agents/types/agent-definition.ts +++ b/agents/types/agent-definition.ts @@ -423,6 +423,7 @@ export type ModelName = // Other open source models | 'moonshotai/kimi-k2' | 'moonshotai/kimi-k2:nitro' + | 'moonshotai/kimi-k2.6' | 'z-ai/glm-5' | 'z-ai/glm-5.1' | 'z-ai/glm-4.6' diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx index a453a15389..ddc2922ab6 100644 --- a/cli/src/components/freebuff-model-selector.tsx +++ b/cli/src/components/freebuff-model-selector.tsx @@ -5,7 +5,7 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react' import { Button } from './button' import { FALLBACK_FREEBUFF_MODEL_ID, - FREEBUFF_GLM_MODEL_ID, + FREEBUFF_KIMI_MODEL_ID, FREEBUFF_MODELS, getFreebuffDeploymentAvailabilityLabel, isFreebuffModelAvailable, @@ -25,8 +25,8 @@ import { import type { KeyEvent } from '@opentui/core' const FREEBUFF_MODEL_SELECTOR_MODELS = [ - ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID), - ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID), + ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_KIMI_MODEL_ID), + ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_KIMI_MODEL_ID), ] /** @@ -72,7 +72,7 @@ export const FreebuffModelSelector: React.FC = () => { // unavailable (e.g. deployment hours close while the picker is open), // swap to the always-available fallback so Enter doesn't POST a model // the server will immediately reject. In-memory only — the user's saved - // preference (e.g. GLM) is preserved for the next launch. + // preference (e.g. Kimi) is preserved for the next launch. if ( (session?.status === 'none' || !session) && !isFreebuffModelAvailable(selectedModel, new Date(now)) diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx index 9ccba664a7..7f83f748d6 100644 --- a/cli/src/components/waiting-room-screen.tsx +++ b/cli/src/components/waiting-room-screen.tsx @@ -260,7 +260,7 @@ export const WaitingRoomScreen: React.FC = ({ Elapsed {formatElapsed(elapsedMs)} - {/* Per-model session quota (e.g. GLM 5.1 caps at 5/12h). Only + {/* Per-model session quota (e.g. Kimi K2.6 caps at 5/12h). Only rendered for rate-limited models so the Minimax queue stays clutter-free. */} {session.rateLimit && ( @@ -343,7 +343,7 @@ export const WaitingRoomScreen: React.FC = ({ )} - {/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the + {/* Per-model session quota exhausted (e.g. 5+ Kimi sessions in the last 12h). Terminal for this run — the user can exit and come back once the oldest session in the window rolls off. */} {session?.status === 'rate_limited' && ( diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts index 463a49126f..c78d4bbd0b 100644 --- a/cli/src/hooks/use-freebuff-session.ts +++ b/cli/src/hooks/use-freebuff-session.ts @@ -104,7 +104,7 @@ async function callSession( return body } } - // 429 from POST is the per-model session-quota reject (e.g. too many GLM + // 429 from POST is the per-model session-quota reject (e.g. too many Kimi // sessions in the last 12h). Terminal for the current poll — the CLI shows // a screen explaining the limit and when the user can try again. The 429 // status (rather than 200) keeps older CLIs in their error path so they @@ -442,9 +442,9 @@ export function useFreebuffSession(): UseFreebuffSessionResult { } if (next.status === 'model_unavailable') { // Server says the requested model isn't available right now (e.g. - // GLM outside deployment hours). Flip to the always-available + // Kimi outside deployment hours). Flip to the always-available // fallback for this run. In-memory only — `setSelectedModel` - // doesn't persist, so the user's saved preference (e.g. GLM) + // doesn't persist, so the user's saved preference (e.g. Kimi) // is preserved for their next launch during deployment hours. useFreebuffModelStore .getState() diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts index 0d01d2762c..752f6bb286 100644 --- a/common/src/__tests__/freebuff-models.test.ts +++ b/common/src/__tests__/freebuff-models.test.ts @@ -1,11 +1,17 @@ import { describe, expect, test } from 'bun:test' import { + DEFAULT_FREEBUFF_MODEL_ID, + FREEBUFF_KIMI_MODEL_ID, getFreebuffDeploymentAvailabilityLabel, isFreebuffDeploymentHours, } from '../constants/freebuff-models' describe('freebuff model availability', () => { + test('defaults to Kimi K2.6', () => { + expect(DEFAULT_FREEBUFF_MODEL_ID).toBe(FREEBUFF_KIMI_MODEL_ID) + }) + test('formats the close time in the user local timezone while deployment is open', () => { expect( getFreebuffDeploymentAvailabilityLabel(new Date('2026-01-05T18:00:00Z'), { diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index 308e12df6d..4a2a4a147e 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -28,7 +28,7 @@ export const FREE_MODE_AGENT_MODELS: Record> = { // Root orchestrator 'base2-free': new Set([ 'minimax/minimax-m2.7', - 'z-ai/glm-5.1', + 'moonshotai/kimi-k2.6', ]), // File exploration agents @@ -46,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record> = { // Editor for free mode 'editor-lite': new Set([ 'minimax/minimax-m2.7', - 'z-ai/glm-5.1', + 'moonshotai/kimi-k2.6', ]), // Code reviewer for free mode 'code-reviewer-lite': new Set([ 'minimax/minimax-m2.7', - 'z-ai/glm-5.1', + 'moonshotai/kimi-k2.6', ]), } diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts index 8b3e9d82d9..9c6ff423ee 100644 --- a/common/src/constants/freebuff-models.ts +++ b/common/src/constants/freebuff-models.ts @@ -21,7 +21,7 @@ export interface FreebuffModelOption { * the caller's local timezone. The CLI should render * `getFreebuffDeploymentAvailabilityLabel()` instead. */ export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day' -export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1' +export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6' export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7' const FREEBUFF_EASTERN_TIMEZONE = 'America/New_York' const FREEBUFF_PACIFIC_TIMEZONE = 'America/Los_Angeles' @@ -47,8 +47,8 @@ export const FREEBUFF_MODELS = [ availability: 'always', }, { - id: FREEBUFF_GLM_MODEL_ID, - displayName: 'GLM 5.1', + id: FREEBUFF_KIMI_MODEL_ID, + displayName: 'Kimi K2.6', tagline: 'Smartest', availability: 'deployment_hours', }, @@ -57,15 +57,15 @@ export const FREEBUFF_MODELS = [ export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id'] /** What new freebuff users see selected in the picker. May not be currently - * available (GLM is closed outside deployment hours); callers that need an + * available (Kimi is closed outside deployment hours); callers that need an * always-available id for resolution / auto-fallbacks should use * FALLBACK_FREEBUFF_MODEL_ID instead. */ -export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_GLM_MODEL_ID +export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_KIMI_MODEL_ID /** Always-available fallback used when the requested model can't be served * right now (unknown id, deployment hours closed, etc.). Kept distinct from * DEFAULT_FREEBUFF_MODEL_ID so a new user's "preferred default" can be the - * smartest model without auto-flipping anyone to a closed deployment. */ + * smartest model without auto-flipping anyone to a closed serverless model. */ export const FALLBACK_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_MINIMAX_MODEL_ID diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts index 3608f36315..088dd1dca1 100644 --- a/common/src/templates/initial-agents-dir/types/agent-definition.ts +++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts @@ -423,6 +423,7 @@ export type ModelName = // Other open source models | 'moonshotai/kimi-k2' | 'moonshotai/kimi-k2:nitro' + | 'moonshotai/kimi-k2.6' | 'z-ai/glm-5' | 'z-ai/glm-5.1' | 'z-ai/glm-4.6' diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts index 31fc4c87ea..428a73df41 100644 --- a/common/src/types/freebuff-session.ts +++ b/common/src/types/freebuff-session.ts @@ -9,7 +9,7 @@ /** * Per-model usage counter surfaced to the CLI so the waiting-room UI can * render "N of M sessions used" alongside queue/active state. Present when - * the joined model has a rate limit applied (today: GLM 5.1 with 5 admits + * the joined model has a rate limit applied (today: Kimi K2.6 with 5 admits * per 12-hour window). `recentCount` is the number of admissions inside * `windowHours` at the time the response was produced — see also the * standalone `rate_limited` status for the reject path. @@ -72,7 +72,7 @@ export type FreebuffSessionServerResponse = queueDepthByModel: Record estimatedWaitMs: number queuedAt: string - /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent + /** Rate-limit quota for rate-limited models (Kimi K2.6 today). Absent * for unlimited models or when the status was produced outside the * rate-limit check path (e.g. pure read via GET). */ rateLimit?: FreebuffSessionRateLimit @@ -85,7 +85,7 @@ export type FreebuffSessionServerResponse = admittedAt: string expiresAt: string remainingMs: number - /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent + /** Rate-limit quota for rate-limited models (Kimi K2.6 today). Absent * for unlimited models or when the status was produced outside the * rate-limit check path (e.g. pure read via GET). */ rateLimit?: FreebuffSessionRateLimit @@ -131,7 +131,7 @@ export type FreebuffSessionServerResponse = /** User has an active session bound to a different model. Returned * from POST /session when they pick a new model without ending their * current session first. The CLI shows a confirmation prompt: "End - * your active GLM session to switch?" → on confirm, DELETE then + * your active Kimi session to switch?" → on confirm, DELETE then * re-POST with the new model. */ status: 'model_locked' currentModel: string @@ -152,7 +152,7 @@ export type FreebuffSessionServerResponse = } | { /** User has used up their per-model admission quota in the rolling - * window (GLM 5.1: 5 one-hour sessions per 12h). Returned from POST + * window (Kimi K2.6: 5 one-hour sessions per 12h). Returned from POST * /session before the user is placed in the queue. `retryAfterMs` is * the time until the oldest admission inside the window falls off * and one quota slot opens up — clients should show the user when diff --git a/freebuff/README.md b/freebuff/README.md index 0749fc7c0b..1ba4405f63 100644 --- a/freebuff/README.md +++ b/freebuff/README.md @@ -54,7 +54,7 @@ freebuff **How can it be free?** Freebuff is supported by ads shown in the CLI. -**What models do you use?** GLM 5.1 as the main coding agent, Gemini 3.1 Flash Lite for finding files and research, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription. +**What models do you use?** Kimi K2.6 as the main coding agent, Gemini 3.1 Flash Lite for finding files and research, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription. **Are you training on my data?** No. We only use model providers that do not train on our requests. Your code stays yours. diff --git a/freebuff/SPEC.md b/freebuff/SPEC.md index 195081533c..5fad083691 100644 --- a/freebuff/SPEC.md +++ b/freebuff/SPEC.md @@ -84,7 +84,7 @@ Freebuff only supports **FREE mode**. All mode-related features are stripped. | `/agent:gpt-5` | Premium agent, not available in free tier | | `/review` | Uses thinker-gpt under the hood | | `/publish` | Agent publishing not available in free tier | -| `/image` (+ `/img`, `/attach`) | Image attachments unavailable with free model (GLM 5.1) | +| `/image` (+ `/img`, `/attach`) | Image attachments unavailable with free model (Kimi K2.6) | ### Commands to KEEP diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx index 3cff424a37..6a016272e4 100644 --- a/freebuff/web/src/app/home-client.tsx +++ b/freebuff/web/src/app/home-client.tsx @@ -31,7 +31,7 @@ const faqs = [ { question: 'What models do you use?', answer: - 'GLM 5.1 as the main coding agent. Gemini 3.1 Flash Lite for finding files and research.\n\nConnect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.', + 'Kimi K2.6 as the main coding agent. Gemini 3.1 Flash Lite for finding files and research.\n\nConnect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.', }, { question: 'Which countries is Freebuff available in?', diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts index e0b531c706..ce28f91e01 100644 --- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts +++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts @@ -642,7 +642,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(body.countryBlockReason).toBe('anonymized_or_unknown_country') }) - it('lets freebuff use GLM 5.1 through Fireworks availability rules', async () => { + it('lets freebuff use Kimi K2.6 through Fireworks availability rules', async () => { const fetchedBodies: Record[] = [] const fetchViaFireworks = mock( async (_url: string | URL | Request, init?: RequestInit) => { @@ -650,7 +650,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { return new Response( JSON.stringify({ id: 'test-id', - model: 'accounts/fireworks/models/glm-5p1', + model: 'accounts/fireworks/models/kimi-k2p6', choices: [{ message: { content: 'test response' } }], usage: { prompt_tokens: 10, @@ -672,7 +672,7 @@ describe('/api/v1/chat/completions POST endpoint', () => { method: 'POST', headers: allowedFreeModeHeaders('test-api-key-new-free'), body: JSON.stringify({ - model: 'z-ai/glm-5.1', + model: 'moonshotai/kimi-k2.6', stream: false, codebuff_metadata: { run_id: 'run-free', @@ -701,9 +701,9 @@ describe('/api/v1/chat/completions POST endpoint', () => { expect(response.status).toBe(200) expect(fetchedBodies).toHaveLength(1) expect(fetchedBodies[0].model).toBe( - 'accounts/fireworks/models/glm-5p1', + 'accounts/fireworks/models/kimi-k2p6', ) - expect(body.model).toBe('z-ai/glm-5.1') + expect(body.model).toBe('moonshotai/kimi-k2.6') expect(body.provider).toBe('Fireworks') } else { expect(response.status).toBe(503) diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts index 4c55a6458b..54481dca88 100644 --- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts +++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts @@ -281,10 +281,10 @@ describe('POST /api/v1/freebuff/session', () => { expect(body.status).toBe('queued') }) - test('returns model_unavailable for GLM outside deployment hours', async () => { + test('returns model_unavailable for Kimi outside deployment hours', async () => { const sessionDeps = makeSessionDeps() const resp = await postFreebuffSession( - makeReq('ok', { model: 'z-ai/glm-5.1' }), + makeReq('ok', { model: 'moonshotai/kimi-k2.6' }), makeDeps(sessionDeps, 'u1'), ) expect(resp.status).toBe(409) diff --git a/web/src/app/docs/[category]/[slug]/page.tsx b/web/src/app/docs/[category]/[slug]/page.tsx index 44d5174e0a..21d093d494 100644 --- a/web/src/app/docs/[category]/[slug]/page.tsx +++ b/web/src/app/docs/[category]/[slug]/page.tsx @@ -33,7 +33,7 @@ const FAQ_ITEMS = [ { question: 'What model does Codebuff use?', answer: - 'Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or GLM 5.1 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research.', + 'Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or Kimi K2.6 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research.', }, { question: 'Can I use my Claude Pro or Max subscription with Codebuff?', diff --git a/web/src/content/advanced/how-does-it-work.mdx b/web/src/content/advanced/how-does-it-work.mdx index 08f13366f5..79d2ecab31 100644 --- a/web/src/content/advanced/how-does-it-work.mdx +++ b/web/src/content/advanced/how-does-it-work.mdx @@ -24,8 +24,8 @@ The main agent ("Buffy") runs on Claude Opus 4.7. It reads your prompt, gathers - [**Code Searcher**](/publishers/codebuff/agents/code-searcher) - grep-style pattern matching - [**Researcher**](/publishers/codebuff/agents/researcher) (Gemini 3.1 Flash Lite) - web and docs lookup - [**Thinker**](/publishers/codebuff/agents/thinker) (Claude Opus 4.7, GPT-5.4) - works through hard problems -- [**Editor**](/publishers/codebuff/agents/editor) (Claude Opus 4.7, GPT-5.1, GLM 5.1) - writes and modifies code -- [**Reviewer**](/publishers/codebuff/agents/reviewer) (Claude Opus 4.7, GLM 5.1 in Lite mode) - catches bugs and style issues +- [**Editor**](/publishers/codebuff/agents/editor) (Claude Opus 4.7, GPT-5.1, Kimi K2.6) - writes and modifies code +- [**Reviewer**](/publishers/codebuff/agents/reviewer) (Claude Opus 4.7, Kimi K2.6 in Lite mode) - catches bugs and style issues - [**Basher**](/publishers/codebuff/agents/basher) (Gemini 3.1 Flash Lite) - runs terminal commands ## Best-of-N Selection (Max Mode) diff --git a/web/src/content/advanced/what-models.mdx b/web/src/content/advanced/what-models.mdx index 6fb3cd7367..f3dc59b386 100644 --- a/web/src/content/advanced/what-models.mdx +++ b/web/src/content/advanced/what-models.mdx @@ -19,7 +19,7 @@ The main agent ("Buffy") coordinates everything: | Default | Opus 4.7 | | Plan | Opus 4.7 | | Max | Opus 4.7 | - | Lite | GLM 5.1 | + | Lite | Kimi K2.6 | ## Subagents @@ -29,7 +29,7 @@ The orchestrator spawns these for specific jobs: | Task | Models | |------|--------| - | Code editing | Claude Opus 4.7, GLM 5.1 | + | Code editing | Claude Opus 4.7, Kimi K2.6 | | Thinking/reasoning | Claude Opus 4.7, GPT-5.4 | | Code review | Claude Opus 4.7, GPT-5.4 | | File discovery | Gemini 3.1 Flash Lite, Gemini 2.5 Flash Lite | @@ -37,4 +37,4 @@ The orchestrator spawns these for specific jobs: | Web/docs research | Gemini 3.1 Flash Lite | -Max mode runs multiple implementations in parallel and picks the best one. Default mode runs a single implementation pass. Lite mode uses GLM 5.1 and includes code review support. +Max mode runs multiple implementations in parallel and picks the best one. Default mode runs a single implementation pass. Lite mode uses Kimi K2.6 and includes code review support. diff --git a/web/src/content/help/faq.mdx b/web/src/content/help/faq.mdx index 477adbd8f5..bfd1df0839 100644 --- a/web/src/content/help/faq.mdx +++ b/web/src/content/help/faq.mdx @@ -13,7 +13,7 @@ Software development: Writing features, tests, and scripts across common languag ## What model does Codebuff use? -Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or GLM 5.1 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research. See [What models do you use?](/docs/advanced/what-models) for the full breakdown. +Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or Kimi K2.6 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research. See [What models do you use?](/docs/advanced/what-models) for the full breakdown. ## Can I use my Claude Pro or Max subscription with Codebuff? diff --git a/web/src/content/tips/modes.mdx b/web/src/content/tips/modes.mdx index 1b67daecd6..acab5d8aaa 100644 --- a/web/src/content/tips/modes.mdx +++ b/web/src/content/tips/modes.mdx @@ -15,7 +15,7 @@ Codebuff has four modes. Switch during a session with `Shift+Tab` or `/mode:` co | Default | Claude Opus 4.7 | editor | Yes | | Max | Claude Opus 4.7 | editor-multi-prompt | Yes | | Plan | Claude Opus 4.7 | None | No | - | Lite | GLM 5.1 | None | No | + | Lite | Kimi K2.6 | None | No | ## Default @@ -60,7 +60,7 @@ Switch to this mode with `/mode:plan`. ## Lite -GLM 5.1, cheaper and faster. +Kimi K2.6, cheaper and faster. An efficient mode for most coding tasks. diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts index 00ccf1f816..2d897767ae 100644 --- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts +++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts @@ -12,6 +12,7 @@ import { import type { Logger } from '@codebuff/common/types/contracts/logger' const STANDARD_MODEL_ID = 'accounts/fireworks/models/glm-5p1' +const KIMI_STANDARD_MODEL_ID = 'accounts/fireworks/models/kimi-k2p6' const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/mjb4i7ea' const TEST_DEPLOYMENT_MAP = { 'z-ai/glm-5.1': DEPLOYMENT_MODEL_ID, @@ -91,6 +92,14 @@ describe('Fireworks deployment routing', () => { model: 'z-ai/glm-5.1', messages: [{ role: 'user' as const, content: 'test' }], } + const kimiBody = { + model: 'moonshotai/kimi-k2.6', + messages: [{ role: 'user' as const, content: 'test' }], + } + const kimiLiteBody = { + ...kimiBody, + codebuff_metadata: { cost_mode: 'lite' }, + } const liteBody = { ...minimalBody, codebuff_metadata: { cost_mode: 'lite' }, @@ -143,6 +152,55 @@ describe('Fireworks deployment routing', () => { expect(fetchCalls).toEqual([STANDARD_MODEL_ID]) }) + it('uses serverless API for Kimi during hours without a deployment', async () => { + const fetchCalls: string[] = [] + + const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { + const body = JSON.parse(init?.body as string) + fetchCalls.push(body.model) + return new Response(JSON.stringify({ ok: true }), { status: 200 }) + }) as unknown as typeof globalThis.fetch + + const response = await createFireworksRequestWithFallback({ + body: kimiBody as never, + originalModel: 'moonshotai/kimi-k2.6', + fetch: mockFetch, + logger, + useCustomDeployment: true, + deploymentMap: { + 'z-ai/glm-5.1': DEPLOYMENT_MODEL_ID, + }, + sessionId: 'test-user-id', + now: IN_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(200) + expect(fetchCalls).toEqual([KIMI_STANDARD_MODEL_ID]) + }) + + it('keeps Kimi unavailable outside hours when no deployment is mapped', async () => { + const mockFetch = mock(async () => { + throw new Error('should not fetch outside deployment hours') + }) as unknown as typeof globalThis.fetch + + const response = await createFireworksRequestWithFallback({ + body: kimiBody as never, + originalModel: 'moonshotai/kimi-k2.6', + fetch: mockFetch, + logger, + useCustomDeployment: true, + deploymentMap: { + 'z-ai/glm-5.1': DEPLOYMENT_MODEL_ID, + }, + sessionId: 'test-user-id', + now: BEFORE_DEPLOYMENT_HOURS, + }) + + expect(response.status).toBe(503) + const body = await response.json() + expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS') + }) + it('keeps GLM unavailable outside hours when no deployment is mapped', async () => { const mockFetch = mock(async () => { throw new Error('should not fetch outside deployment hours') @@ -356,7 +414,7 @@ describe('Fireworks deployment routing', () => { expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS') }) - it('falls back to the standard Fireworks API in lite mode outside deployment hours', async () => { + it('falls back to the standard Fireworks API for Kimi lite mode outside deployment hours', async () => { const fetchCalls: string[] = [] const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => { @@ -366,8 +424,8 @@ describe('Fireworks deployment routing', () => { }) as unknown as typeof globalThis.fetch const response = await createFireworksRequestWithFallback({ - body: liteBody as never, - originalModel: 'z-ai/glm-5.1', + body: kimiLiteBody as never, + originalModel: 'moonshotai/kimi-k2.6', fetch: mockFetch, logger, useCustomDeployment: true, @@ -377,7 +435,7 @@ describe('Fireworks deployment routing', () => { }) expect(response.status).toBe(200) - expect(fetchCalls).toEqual([STANDARD_MODEL_ID]) + expect(fetchCalls).toEqual([KIMI_STANDARD_MODEL_ID]) }) it('returns non-5xx responses from deployment without fallback (e.g. 429)', async () => { diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts index 341bc239ce..4e87b1e55a 100644 --- a/web/src/llm-api/canopywave.ts +++ b/web/src/llm-api/canopywave.ts @@ -49,14 +49,6 @@ const CANOPYWAVE_MODELS: Record< outputCostPerToken: 1.08 / 1_000_000, }, }, - 'moonshotai/kimi-k2.6': { - canopywaveId: 'moonshotai/kimi-k2.6', - pricing: { - inputCostPerToken: 0.95 / 1_000_000, - cachedInputCostPerToken: 0.16 / 1_000_000, - outputCostPerToken: 4.00 / 1_000_000, - }, - }, } export function isCanopyWaveModel(model: string): boolean { diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index b0013e62a1..6bd5851fe0 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -2,7 +2,7 @@ import { Agent } from 'undici' import { FREEBUFF_DEPLOYMENT_HOURS_LABEL, - FREEBUFF_GLM_MODEL_ID, + FREEBUFF_KIMI_MODEL_ID, isFreebuffDeploymentHours, } from '@codebuff/common/constants/freebuff-models' import { PROFIT_MARGIN } from '@codebuff/common/constants/limits' @@ -36,12 +36,14 @@ const fireworksAgent = new Agent({ const FIREWORKS_MODEL_MAP: Record = { 'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5', 'minimax/minimax-m2.7': 'accounts/fireworks/models/minimax-m2p7', + 'moonshotai/kimi-k2.6': 'accounts/fireworks/models/kimi-k2p6', 'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1', } /** Models that stay limited to freebuff deployment hours even on serverless. */ const FIREWORKS_HOURS_GATED_MODELS = new Set([ - FREEBUFF_GLM_MODEL_ID, + FREEBUFF_KIMI_MODEL_ID, + 'z-ai/glm-5.1', ]) /** Flag to enable custom Fireworks deployments (set to false to use global API only) */ @@ -169,6 +171,11 @@ const FIREWORKS_PRICING_MAP: Record = { cachedInputCostPerToken: 0.06 / 1_000_000, outputCostPerToken: 1.20 / 1_000_000, }, + 'moonshotai/kimi-k2.6': { + inputCostPerToken: 0.95 / 1_000_000, + cachedInputCostPerToken: 0.16 / 1_000_000, + outputCostPerToken: 4.00 / 1_000_000, + }, 'z-ai/glm-5.1': { inputCostPerToken: 1.40 / 1_000_000, cachedInputCostPerToken: 0.26 / 1_000_000, diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts index f46a0f8c4c..7f08d2bddb 100644 --- a/web/src/server/free-session/__tests__/public-api.test.ts +++ b/web/src/server/free-session/__tests__/public-api.test.ts @@ -203,12 +203,12 @@ describe('requestSession', () => { test('deployment-hours-only model is unavailable outside deployment hours', async () => { const state = await requestSession({ userId: 'u1', - model: 'z-ai/glm-5.1', + model: 'moonshotai/kimi-k2.6', deps, }) expect(state).toEqual({ status: 'model_unavailable', - requestedModel: 'z-ai/glm-5.1', + requestedModel: 'moonshotai/kimi-k2.6', availableHours: '9am ET-5pm PT every day', }) expect(deps.rows.size).toBe(0) @@ -216,18 +216,18 @@ describe('requestSession', () => { test('queued response includes a per-model depth snapshot for the selector', async () => { deps._tick(new Date('2026-04-17T16:00:00Z')) - // Seed 2 users in MiniMax + 1 in GLM so the returned map captures both. + // Seed 2 users in MiniMax + 1 in Kimi so the returned map captures both. await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps }) deps._tick(new Date(deps._now().getTime() + 1000)) await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps }) deps._tick(new Date(deps._now().getTime() + 1000)) - await requestSession({ userId: 'u3', model: 'z-ai/glm-5.1', deps }) + await requestSession({ userId: 'u3', model: 'moonshotai/kimi-k2.6', deps }) const state = await getSessionState({ userId: 'u1', deps }) if (state.status !== 'queued') throw new Error('unreachable') expect(state.queueDepthByModel).toEqual({ [DEFAULT_MODEL]: 2, - 'z-ai/glm-5.1': 1, + 'moonshotai/kimi-k2.6': 1, }) }) @@ -302,7 +302,7 @@ describe('requestSession', () => { }) test('instant-admit: per-model capacities are independent', async () => { - // MiniMax saturated at 1 active, GLM still has room. + // MiniMax saturated at 1 active, Kimi still has room. const admitDeps = makeDeps({ getInstantAdmitCapacity: (model) => model === DEFAULT_MODEL ? 1 : 10, @@ -316,25 +316,25 @@ describe('requestSession', () => { }) const s3 = await requestSession({ userId: 'u3', - model: 'z-ai/glm-5.1', + model: 'moonshotai/kimi-k2.6', deps: admitDeps, }) expect(s2.status).toBe('queued') expect(s3.status).toBe('active') }) - // Per-user rate limit (5 GLM admissions per 12h) — the wire limit is + // Per-user rate limit (5 Kimi admissions per 12h) — the wire limit is // hard-coded in public-api.ts, so tests seed the fake admit log directly - // rather than configuring it. GLM also has deployment-hours gating, so + // rather than configuring it. Kimi also has deployment-hours gating, so // these tests bump `now` into the open window (12pm ET on a weekday) // before issuing the request. - const GLM_MODEL = 'z-ai/glm-5.1' - const GLM_LIMIT = 5 - const GLM_WINDOW_HOURS = 12 - const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z') + const KIMI_MODEL = 'moonshotai/kimi-k2.6' + const KIMI_LIMIT = 5 + const KIMI_WINDOW_HOURS = 12 + const KIMI_OPEN_TIME = new Date('2026-04-17T16:00:00Z') - test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => { - deps._tick(GLM_OPEN_TIME) + test('rate_limited: 5th Kimi admit in window blocks the 6th attempt', async () => { + deps._tick(KIMI_OPEN_TIME) // Seed 5 admits inside the 12h window, spaced so we can verify retryAfter // points at the oldest one sliding off. const now = deps._now() @@ -343,22 +343,22 @@ describe('requestSession', () => { for (const hoursAgo of ages) { deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000), }) } const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) expect(state.status).toBe('rate_limited') if (state.status !== 'rate_limited') throw new Error('unreachable') - expect(state.model).toBe(GLM_MODEL) - expect(state.limit).toBe(GLM_LIMIT) - expect(state.windowHours).toBe(GLM_WINDOW_HOURS) - expect(state.recentCount).toBe(GLM_LIMIT) + expect(state.model).toBe(KIMI_MODEL) + expect(state.limit).toBe(KIMI_LIMIT) + expect(state.windowHours).toBe(KIMI_WINDOW_HOURS) + expect(state.recentCount).toBe(KIMI_LIMIT) // Oldest admit is 11h ago; slot opens when it hits 12h, i.e. in 1h. expect(state.retryAfterMs).toBe(60 * 60 * 1000) // Blocked before any row is written — the user doesn't take a queue slot. @@ -366,21 +366,21 @@ describe('requestSession', () => { }) test('rate_limited: admits outside the 12h window do not count', async () => { - deps._tick(GLM_OPEN_TIME) + deps._tick(KIMI_OPEN_TIME) // 5 admits, each just over 12h old → all fall off the window. const now = deps._now() for (let i = 0; i < 5; i++) { deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date( - now.getTime() - (GLM_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i), + now.getTime() - (KIMI_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i), ), }) } const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) expect(state.status).toBe('queued') @@ -408,41 +408,41 @@ describe('requestSession', () => { expect(state.rateLimit).toBeUndefined() }) - test('queued GLM response carries the current admit count', async () => { - deps._tick(GLM_OPEN_TIME) + test('queued Kimi response carries the current admit count', async () => { + deps._tick(KIMI_OPEN_TIME) const now = deps._now() // 2 admits in the window — under the limit so the user still queues. deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - 60 * 60 * 1000), }) deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - 30 * 60 * 1000), }) const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) if (state.status !== 'queued') throw new Error('unreachable') expect(state.rateLimit).toEqual({ - model: GLM_MODEL, - limit: GLM_LIMIT, - windowHours: GLM_WINDOW_HOURS, + model: KIMI_MODEL, + limit: KIMI_LIMIT, + windowHours: KIMI_WINDOW_HOURS, recentCount: 2, }) }) - test('rate_limited: takeover of an active GLM row is allowed even when at cap', async () => { - // Reclaim path: user has an active+unexpired GLM session and restarts + test('rate_limited: takeover of an active Kimi row is allowed even when at cap', async () => { + // Reclaim path: user has an active+unexpired Kimi session and restarts // the CLI. POST must rotate their instance id (takeover) and NOT reject // with rate_limited — otherwise they'd be stranded with a live session // they can't reconnect to. The 5th admission is already in the log, so // this also exercises "at the cap" rather than "over the cap". - deps._tick(GLM_OPEN_TIME) + deps._tick(KIMI_OPEN_TIME) const now = deps._now() // Seed 5 prior admits (the cap), with the latest one matching the // active row we're about to install. @@ -450,7 +450,7 @@ describe('requestSession', () => { for (const hoursAgo of ages) { deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000), }) } @@ -461,7 +461,7 @@ describe('requestSession', () => { user_id: 'u1', status: 'active', active_instance_id: 'inst-pre', - model: GLM_MODEL, + model: KIMI_MODEL, queued_at: admittedAt, admitted_at: admittedAt, expires_at: new Date(admittedAt.getTime() + SESSION_LEN), @@ -471,27 +471,27 @@ describe('requestSession', () => { const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) expect(state.status).toBe('active') if (state.status !== 'active') throw new Error('unreachable') // Instance id rotated; quota snapshot still reflects the full window. expect(state.instanceId).not.toBe('inst-pre') - expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT) + expect(state.rateLimit?.recentCount).toBe(KIMI_LIMIT) }) - test('rate_limited: reclaim of a queued GLM row is allowed even when at cap', async () => { + test('rate_limited: reclaim of a queued Kimi row is allowed even when at cap', async () => { // Same reclaim exception for queued rows: if a user has already queued // (say they slipped in just before their 5th admit landed), a subsequent // POST from the same CLI must preserve their queue position instead of // flipping to rate_limited. - deps._tick(GLM_OPEN_TIME) + deps._tick(KIMI_OPEN_TIME) const now = deps._now() - for (let i = 0; i < GLM_LIMIT; i++) { + for (let i = 0; i < KIMI_LIMIT; i++) { deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000), }) } @@ -500,7 +500,7 @@ describe('requestSession', () => { user_id: 'u1', status: 'queued', active_instance_id: 'inst-pre', - model: GLM_MODEL, + model: KIMI_MODEL, queued_at: queuedAt, admitted_at: null, expires_at: null, @@ -510,7 +510,7 @@ describe('requestSession', () => { const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) expect(state.status).toBe('queued') @@ -518,20 +518,20 @@ describe('requestSession', () => { // Same position (1) since we preserved queued_at and nobody else is // ahead; the instance id rotated so any prior CLI is superseded. expect(state.instanceId).not.toBe('inst-pre') - expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT) + expect(state.rateLimit?.recentCount).toBe(KIMI_LIMIT) }) - test('rate_limited: expired GLM row is not a reclaim — quota still applies', async () => { + test('rate_limited: expired Kimi row is not a reclaim — quota still applies', async () => { // The stored row's expires_at is in the past, so it doesn't represent // an in-flight session. This POST is effectively a fresh request and // must be blocked by the quota. - deps._tick(GLM_OPEN_TIME) + deps._tick(KIMI_OPEN_TIME) const now = deps._now() const ages = [11, 4, 3, 2, 1] for (const hoursAgo of ages) { deps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000), }) } @@ -540,7 +540,7 @@ describe('requestSession', () => { user_id: 'u1', status: 'active', active_instance_id: 'inst-pre', - model: GLM_MODEL, + model: KIMI_MODEL, queued_at: admittedAt, admitted_at: admittedAt, expires_at: new Date(admittedAt.getTime() + SESSION_LEN), @@ -549,7 +549,7 @@ describe('requestSession', () => { }) const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps, }) expect(state.status).toBe('rate_limited') @@ -557,18 +557,18 @@ describe('requestSession', () => { test('instant-admit bumps the quota count for the freshly-written admit row', async () => { const admitDeps = makeDeps({ getInstantAdmitCapacity: () => 3 }) - admitDeps._tick(GLM_OPEN_TIME) + admitDeps._tick(KIMI_OPEN_TIME) // 1 existing admit in the window; this new call should instant-admit and // write a second row, so the response's recentCount reflects 2. const now = admitDeps._now() admitDeps.admits.push({ user_id: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, admitted_at: new Date(now.getTime() - 30 * 60 * 1000), }) const state = await requestSession({ userId: 'u1', - model: GLM_MODEL, + model: KIMI_MODEL, deps: admitDeps, }) if (state.status !== 'active') throw new Error('unreachable') @@ -636,16 +636,16 @@ describe('getSessionState', () => { // Regression: the POST response attached rateLimit, but GET polls did // not — so the "Sessions N/M used" line flashed once then disappeared on // the next 5s poll. GET must attach the same quota snapshot. Rate - // limits only apply to GLM, so this test uses GLM explicitly (inside + // limits only apply to Kimi, so this test uses Kimi explicitly (inside // deployment hours) rather than the Minimax DEFAULT_MODEL. deps._tick(new Date('2026-04-17T16:00:00Z')) const now = deps._now() deps.admits.push({ user_id: 'u1', - model: 'z-ai/glm-5.1', + model: 'moonshotai/kimi-k2.6', admitted_at: new Date(now.getTime() - 60 * 60 * 1000), }) - await requestSession({ userId: 'u1', model: 'z-ai/glm-5.1', deps }) + await requestSession({ userId: 'u1', model: 'moonshotai/kimi-k2.6', deps }) const row = deps.rows.get('u1')! row.status = 'active' row.admitted_at = now @@ -658,7 +658,7 @@ describe('getSessionState', () => { }) if (state.status !== 'active') throw new Error('unreachable') expect(state.rateLimit).toEqual({ - model: 'z-ai/glm-5.1', + model: 'moonshotai/kimi-k2.6', limit: 5, windowHours: 12, recentCount: 1, diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts index 52dc82c12b..215059b841 100644 --- a/web/src/server/free-session/__tests__/session-view.test.ts +++ b/web/src/server/free-session/__tests__/session-view.test.ts @@ -7,7 +7,7 @@ import type { InternalSessionRow } from '../types' const WAIT_PER_SPOT_MS = 24_000 const GRACE_MS = 30 * 60_000 -const TEST_MODEL = 'z-ai/glm-5.1' +const TEST_MODEL = 'moonshotai/kimi-k2.6' function row(overrides: Partial = {}): InternalSessionRow { const now = new Date('2026-04-17T12:00:00Z') diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts index 10071b35fc..6d162c4617 100644 --- a/web/src/server/free-session/config.ts +++ b/web/src/server/free-session/config.ts @@ -48,7 +48,7 @@ export function getSessionGraceMs(): number { * queue). */ const INSTANT_ADMIT_CAPACITY: Record = { - 'z-ai/glm-5.1': 50, + 'moonshotai/kimi-k2.6': 50, 'minimax/minimax-m2.7': 1000, } diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts index 528cd4ab31..75c2f24ff1 100644 --- a/web/src/server/free-session/public-api.ts +++ b/web/src/server/free-session/public-api.ts @@ -37,7 +37,7 @@ import type { /** * Per-model admission rate limits. Keyed by freebuff model id; a model not - * in the map has no rate limit applied. Today only GLM 5.1 is limited + * in the map has no rate limit applied. Today only Kimi K2.6 is limited * (Minimax is cheap enough to leave unlimited). * * Hard-coded rather than env-driven: the values need to be observable in the @@ -45,7 +45,7 @@ import type { * queued/active responses — changing them is a deliberate, typed edit. */ const RATE_LIMITS: Record = { - 'z-ai/glm-5.1': { limit: 5, windowHours: 12 }, + 'moonshotai/kimi-k2.6': { limit: 5, windowHours: 12 }, } /** Fetch the caller's current quota snapshot for `model`, or undefined if the diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts index 8831ad7a8c..d22835658f 100644 --- a/web/src/server/free-session/store.ts +++ b/web/src/server/free-session/store.ts @@ -466,7 +466,7 @@ export async function promoteQueuedUser(params: { * the oldest is needed to compute `retryAfterMs` when the window is full, * so one query covers both the check and the reject path. * - * Drives the per-user, per-model rate limit (e.g. at most 5 GLM sessions in + * Drives the per-user, per-model rate limit (e.g. at most 5 Kimi sessions in * the last 12h) enforced before `joinOrTakeOver`. */ export async function listRecentAdmits(params: {