From 3388ffeb7567f75a31487a3dd774bb9e0818d437 Mon Sep 17 00:00:00 2001
From: brandon chen <9735006+brandonkachen@users.noreply.github.com>
Date: Tue, 28 Apr 2026 17:42:27 -0700
Subject: [PATCH 1/3] Scope Kimi tool call compatibility (#560)

Co-authored-by: James Grugett <jahooma@gmail.com>
---
 .../__tests__/kimi-tool-compat.test.ts        | 112 ++++++++++++++++++
 web/src/llm-api/canopywave.ts                 |   6 +-
 web/src/llm-api/kimi-tool-compat.ts           |  67 +++++++++++
 web/src/llm-api/openrouter.ts                 |   7 +-
 web/src/llm-api/types.ts                      |  12 ++
 5 files changed, 202 insertions(+), 2 deletions(-)
 create mode 100644 web/src/llm-api/__tests__/kimi-tool-compat.test.ts
 create mode 100644 web/src/llm-api/kimi-tool-compat.ts

diff --git a/web/src/llm-api/__tests__/kimi-tool-compat.test.ts b/web/src/llm-api/__tests__/kimi-tool-compat.test.ts
new file mode 100644
index 0000000000..9e4fbdabb0
--- /dev/null
+++ b/web/src/llm-api/__tests__/kimi-tool-compat.test.ts
@@ -0,0 +1,112 @@
+import { describe, expect, it } from 'bun:test'
+
+import { addKimiToolCompatibilityFields, isKimiModel } from '../kimi-tool-compat'
+
+import type { ChatCompletionRequestBody } from '../types'
+
+describe('addKimiToolCompatibilityFields', () => {
+  it('adds declaration ids and tool-result names without mutating input', () => {
+    const body: ChatCompletionRequestBody = {
+      model: 'moonshotai/kimi-k2.6',
+      messages: [
+        {
+          role: 'assistant',
+          content: '',
+          tool_calls: [
+            {
+              id: 'call_123',
+              type: 'function',
+              function: {
+                name: 'read_files',
+                arguments: JSON.stringify({ paths: ['README.md'] }),
+              },
+            },
+          ],
+        },
+        {
+          role: 'tool',
+          tool_call_id: 'call_123',
+          content: JSON.stringify({ message: 'ok' }),
+        },
+      ],
+      tools: [
+        {
+          type: 'function',
+          function: {
+            name: 'read_files',
+            description: 'Read files',
+            parameters: { type: 'object' },
+          },
+        },
+      ],
+    }
+
+    const result = addKimiToolCompatibilityFields(body)
+
+    expect(result.tools?.[0]).toEqual({
+      id: 'tool_1',
+      type: 'function',
+      function: {
+        name: 'read_files',
+        description: 'Read files',
+        parameters: { type: 'object' },
+      },
+    })
+    expect(result.messages[1]).toEqual({
+      role: 'tool',
+      tool_call_id: 'call_123',
+      name: 'read_files',
+      content: JSON.stringify({ message: 'ok' }),
+    })
+    expect(body.tools?.[0]).not.toHaveProperty('id')
+    expect(body.messages[1]).not.toHaveProperty('name')
+  })
+
+  it('preserves existing ids and names', () => {
+    const body: ChatCompletionRequestBody = {
+      model: 'moonshotai/kimi-k2.6',
+      messages: [
+        {
+          role: 'assistant',
+          content: '',
+          tool_calls: [
+            {
+              id: 'call_456',
+              type: 'function',
+              function: {
+                name: 'write_todos',
+                arguments: JSON.stringify({ todos: [] }),
+              },
+            },
+          ],
+        },
+        {
+          role: 'tool',
+          tool_call_id: 'call_456',
+          name: 'existing_name',
+          content: '{}',
+        },
+      ],
+      tools: [
+        {
+          id: 'existing_tool_id',
+          type: 'function',
+          function: {
+            name: 'write_todos',
+            parameters: { type: 'object' },
+          },
+        },
+      ],
+    }
+
+    expect(addKimiToolCompatibilityFields(body)).toEqual(body)
+  })
+})
+
+describe('isKimiModel', () => {
+  it('matches only Moonshot model ids', () => {
+    expect(isKimiModel('moonshotai/kimi-k2.6')).toBe(true)
+    expect(isKimiModel('anthropic/claude-sonnet-4.5')).toBe(false)
+    expect(isKimiModel(undefined)).toBe(false)
+  })
+})
diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts
index 9a5b2ba125..341bc239ce 100644
--- a/web/src/llm-api/canopywave.ts
+++ b/web/src/llm-api/canopywave.ts
@@ -9,6 +9,7 @@ import {
   extractRequestMetadata,
   insertMessageToBigQuery,
 } from './helpers'
+import { addKimiToolCompatibilityFields, isKimiModel } from './kimi-tool-compat'
 
 import type { UsageData } from './helpers'
 import type { InsertMessageBigqueryFn } from '@codebuff/common/types/contracts/bigquery'
@@ -88,8 +89,11 @@ function createCanopyWaveRequest(params: {
   fetch: typeof globalThis.fetch
 }) {
   const { body, originalModel, fetch } = params
+  const providerBody = isKimiModel(originalModel)
+    ? addKimiToolCompatibilityFields(body)
+    : body
   const canopywaveBody: Record<string, unknown> = {
-    ...body,
+    ...providerBody,
     model: getCanopyWaveModelId(originalModel),
   }
 
diff --git a/web/src/llm-api/kimi-tool-compat.ts b/web/src/llm-api/kimi-tool-compat.ts
new file mode 100644
index 0000000000..334a41b914
--- /dev/null
+++ b/web/src/llm-api/kimi-tool-compat.ts
@@ -0,0 +1,67 @@
+import type { ChatCompletionRequestBody } from './types'
+
+export function isKimiModel(model: unknown): model is string {
+  return typeof model === 'string' && model.startsWith('moonshotai/')
+}
+
+function getToolCallNamesById(
+  messages: ChatCompletionRequestBody['messages'],
+): Map<string, string> {
+  const namesById = new Map<string, string>()
+
+  for (const message of messages) {
+    if (message.role !== 'assistant') {
+      continue
+    }
+    for (const toolCall of message.tool_calls ?? []) {
+      if (toolCall.id && toolCall.function.name) {
+        namesById.set(toolCall.id, toolCall.function.name)
+      }
+    }
+  }
+
+  return namesById
+}
+
+/**
+ * Kimi-compatible providers require two OpenAI-compatible extensions that are
+ * not part of the strict Chat Completions schema: ids on tool declarations and
+ * names on tool-result messages.
+ */
+export function addKimiToolCompatibilityFields(
+  body: ChatCompletionRequestBody,
+): ChatCompletionRequestBody {
+  const namesByToolCallId = getToolCallNamesById(body.messages)
+
+  return {
+    ...body,
+    tools: body.tools?.map((tool, index) => {
+      if (tool.type !== 'function' || tool.id) {
+        return tool
+      }
+      return {
+        ...tool,
+        id: `tool_${index + 1}`,
+      }
+    }),
+    messages: body.messages.map((message) => {
+      if (
+        message.role !== 'tool' ||
+        message.name ||
+        typeof message.tool_call_id !== 'string'
+      ) {
+        return message
+      }
+
+      const name = namesByToolCallId.get(message.tool_call_id)
+      if (!name) {
+        return message
+      }
+
+      return {
+        ...message,
+        name,
+      }
+    }),
+  }
+}
diff --git a/web/src/llm-api/openrouter.ts b/web/src/llm-api/openrouter.ts
index 2762a60d8d..bf7231abd9 100644
--- a/web/src/llm-api/openrouter.ts
+++ b/web/src/llm-api/openrouter.ts
@@ -9,6 +9,7 @@ import {
   extractRequestMetadata,
   insertMessageToBigQuery,
 } from './helpers'
+import { addKimiToolCompatibilityFields, isKimiModel } from './kimi-tool-compat'
 import {
   OpenRouterErrorResponseSchema,
   OpenRouterStreamChatCompletionChunkSchema,
@@ -61,6 +62,10 @@ function createOpenRouterRequest(params: {
   fetch: typeof globalThis.fetch
 }) {
   const { body, openrouterApiKey, fetch } = params
+  const providerBody = isKimiModel(body.model)
+    ? addKimiToolCompatibilityFields(body)
+    : body
+
   return fetch('https://openrouter.ai/api/v1/chat/completions', {
     method: 'POST',
     headers: {
@@ -69,7 +74,7 @@ function createOpenRouterRequest(params: {
       'X-Title': 'Codebuff',
       'Content-Type': 'application/json',
     },
-    body: JSON.stringify(body),
+    body: JSON.stringify(providerBody),
     // Use custom agent with extended headers timeout for deep-thinking models
     // @ts-expect-error - dispatcher is a valid undici option not in fetch types
     dispatcher: openrouterAgent,
diff --git a/web/src/llm-api/types.ts b/web/src/llm-api/types.ts
index b3bb1eaf97..dd3b89a4d7 100644
--- a/web/src/llm-api/types.ts
+++ b/web/src/llm-api/types.ts
@@ -28,9 +28,21 @@ export interface ChatMessage {
   tool_call_id?: string
 }
 
+export interface ChatCompletionTool {
+  id?: string
+  type: string
+  function?: {
+    name: string
+    description?: string
+    parameters?: unknown
+    strict?: boolean
+  }
+}
+
 export interface ChatCompletionRequestBody {
   model: string
   messages: ChatMessage[]
+  tools?: ChatCompletionTool[]
   stream?: boolean
   temperature?: number
   max_tokens?: number

From 0cdbe0177dd986b307c4c11435eb218b0b04077a Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 28 Apr 2026 17:59:59 -0700
Subject: [PATCH 2/3] Simplify ad response shape (#562)

---
 cli/src/chat.tsx                           |   9 +-
 cli/src/components/waiting-room-screen.tsx |  11 +-
 cli/src/hooks/use-gravity-ad.ts            | 145 ++++-----------------
 web/src/app/api/v1/ads/_post.ts            |  41 ++----
 web/src/lib/ad-providers/carbon.ts         |   2 +-
 web/src/lib/ad-providers/gravity.ts        |  24 +---
 web/src/lib/ad-providers/types.ts          |   7 +-
 7 files changed, 51 insertions(+), 188 deletions(-)

diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx
index 09727ea6ea..a8bae5b033 100644
--- a/cli/src/chat.tsx
+++ b/cli/src/chat.tsx
@@ -174,7 +174,7 @@ export const Chat = ({
   })
   const hasSubscription = subscriptionData?.hasSubscription ?? false
 
-  const { adData, recordImpression } = useGravityAd({
+  const { ads, recordImpression } = useGravityAd({
     enabled: IS_FREEBUFF || !hasSubscription,
     provider: 'gravity',
     fallbackProvider: 'carbon',
@@ -1463,11 +1463,8 @@ export const Chat = ({
           />
         )}
 
-        {adData && (IS_FREEBUFF || getAdsEnabled()) && (
-          <ChoiceAdBanner
-            ads={adData.variant === 'choice' ? adData.ads : [adData.ad]}
-            onImpression={recordImpression}
-          />
+        {ads && (IS_FREEBUFF || getAdsEnabled()) && (
+          <ChoiceAdBanner ads={ads} onImpression={recordImpression} />
         )}
 
         {reviewMode ? (
diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx
index 7cc0aca4a0..9ccba664a7 100644
--- a/cli/src/components/waiting-room-screen.tsx
+++ b/cli/src/components/waiting-room-screen.tsx
@@ -115,7 +115,7 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
   // forceStart bypasses the "wait for first user message" gate inside the hook,
   // which would otherwise block ads here since no conversation exists yet.
   // Try Gravity first, then fall back to Carbon when Gravity doesn't fill.
-  const { adData, recordImpression } = useGravityAd({
+  const { ads, recordImpression } = useGravityAd({
     enabled: true,
     forceStart: true,
     provider: 'gravity',
@@ -369,17 +369,14 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
       </box>
 
       {/* Ad banner pinned to the bottom, same look-and-feel as in chat. */}
-      {adData && (
+      {ads && (
         <box style={{ flexShrink: 0 }}>
-          <ChoiceAdBanner
-            ads={adData.variant === 'choice' ? adData.ads : [adData.ad]}
-            onImpression={recordImpression}
-          />
+          <ChoiceAdBanner ads={ads} onImpression={recordImpression} />
         </box>
       )}
 
       {/* Horizontal separator (mirrors chat input divider style) */}
-      {!adData && (
+      {!ads && (
         <text style={{ fg: theme.muted, flexShrink: 0 }}>
           {'─'.repeat(terminalWidth)}
         </text>
diff --git a/cli/src/hooks/use-gravity-ad.ts b/cli/src/hooks/use-gravity-ad.ts
index ea6977864b..0a7f2e9e6d 100644
--- a/cli/src/hooks/use-gravity-ad.ts
+++ b/cli/src/hooks/use-gravity-ad.ts
@@ -9,7 +9,7 @@ import { getAuthToken } from '../utils/auth'
 import { IS_FREEBUFF } from '../utils/constants'
 import { logger } from '../utils/logger'
 
-import type { Message} from '@codebuff/sdk';
+import type { Message } from '@codebuff/sdk'
 
 const AD_ROTATION_INTERVAL_MS = 60 * 1000 // 60 seconds per ad
 const MAX_ADS_AFTER_ACTIVITY = 3 // Show up to 3 ads after last activity, then pause fetching new ads
@@ -28,8 +28,6 @@ export type AdResponse = {
   credits?: number // Set after impression is recorded (in cents)
 }
 
-export type AdVariant = 'banner' | 'choice'
-
 /**
  * Which upstream ad network to query. The server maps each provider onto the
  * same normalized response shape, so the rest of the hook is provider-agnostic.
@@ -37,43 +35,19 @@ export type AdVariant = 'banner' | 'choice'
 export type AdProvider = 'gravity' | 'carbon'
 export type AdSurface = 'waiting_room'
 
-export type AdData =
-  | { variant: 'banner'; ad: AdResponse }
-  | { variant: 'choice'; ads: AdResponse[] }
-
 export type GravityAdState = {
-  ad: AdResponse | null
-  adData: AdData | null
+  ads: AdResponse[] | null
   isLoading: boolean
   recordImpression: (impUrl: string) => void
 }
 
 // Consolidated controller state for the ad rotation logic
 type GravityController = {
-  cache: AdResponse[]
-  cacheIndex: number
   choiceCache: AdResponse[][] // Cache of choice ad sets (each entry is 4 ads)
   choiceCacheIndex: number
-  variant: AdVariant | null // Assigned variant from backend
   impressionsFired: Set<string>
   adsShownSinceActivity: number
   tickInFlight: boolean
-  intervalId: ReturnType<typeof setInterval> | null
-}
-
-// Pure helper: add an ad to the cache (if not already present)
-function addToCache(ctrl: GravityController, ad: AdResponse): void {
-  if (ctrl.cache.some((x) => x.impUrl === ad.impUrl)) return
-  if (ctrl.cache.length >= MAX_AD_CACHE_SIZE) ctrl.cache.shift()
-  ctrl.cache.push(ad)
-}
-
-// Pure helper: get the next cached ad (cycles through the cache)
-function nextFromCache(ctrl: GravityController): AdResponse | null {
-  if (ctrl.cache.length === 0) return null
-  const ad = ctrl.cache[ctrl.cacheIndex % ctrl.cache.length]!
-  ctrl.cacheIndex = (ctrl.cacheIndex + 1) % ctrl.cache.length
-  return ad
 }
 
 // Pure helper: add a choice ad set to the choice cache
@@ -121,8 +95,7 @@ export const useGravityAd = (options?: {
   const provider: AdProvider = options?.provider ?? 'gravity'
   const fallbackProvider = options?.fallbackProvider
   const surface = options?.surface
-  const [ad, setAd] = useState<AdResponse | null>(null)
-  const [adData, setAdData] = useState<AdData | null>(null)
+  const [ads, setAds] = useState<AdResponse[] | null>(null)
   const [isLoading, setIsLoading] = useState(false)
 
   // Check if terminal height is too small to show ads
@@ -146,19 +119,15 @@ export const useGravityAd = (options?: {
 
   // Single consolidated controller ref
   const ctrlRef = useRef<GravityController>({
-    cache: [],
-    cacheIndex: 0,
     choiceCache: [],
     choiceCacheIndex: 0,
-    variant: null,
     impressionsFired: new Set(),
     adsShownSinceActivity: 0,
     tickInFlight: false,
-    intervalId: null,
   })
 
   // Ref for the tick function (avoids useCallback dependency issues)
-  const tickRef = useRef<() => void>(() => { })
+  const tickRef = useRef<() => void>(() => {})
 
   // Ref to track whether ads should be hidden for use in async code
   const shouldHideAdsRef = useRef(shouldHideAds)
@@ -197,26 +166,12 @@ export const useGravityAd = (options?: {
             { creditsGranted: data.creditsGranted },
             '[ads] Ad impression credits granted',
           )
-          setAd((cur) =>
-            cur?.impUrl === impUrl
-              ? { ...cur, credits: data.creditsGranted }
-              : cur,
-          )
-          // Also update credits in adData for choice ads
-          setAdData((cur) => {
+          // Also update credits in visible ads
+          setAds((cur) => {
             if (!cur) return cur
-            if (cur.variant === 'choice') {
-              return {
-                ...cur,
-                ads: cur.ads.map((a) =>
-                  a.impUrl === impUrl ? { ...a, credits: data.creditsGranted } : a,
-                ),
-              }
-            }
-            if (cur.variant === 'banner' && cur.ad.impUrl === impUrl) {
-              return { ...cur, ad: { ...cur.ad, credits: data.creditsGranted } }
-            }
-            return cur
+            return cur.map((a) =>
+              a.impUrl === impUrl ? { ...a, credits: data.creditsGranted } : a,
+            )
           })
         }
       })
@@ -225,23 +180,7 @@ export const useGravityAd = (options?: {
       })
   }
 
-  // Show a single banner ad and fire impression
-  const showAd = (next: AdResponse): void => {
-    setAd(next)
-    setAdData({ variant: 'banner', ad: next })
-    recordImpressionOnce(next.impUrl)
-  }
-
-  // Show a choice ad set (impressions are fired by the component for visible ads only)
-  const showChoiceAds = (ads: AdResponse[]): void => {
-    setAd(ads[0] ?? null) // Keep backwards compat for ad field
-    setAdData({ variant: 'choice', ads })
-  }
-
-  type FetchAdResult =
-    | { variant: 'banner'; ad: AdResponse }
-    | { variant: 'choice'; ads: AdResponse[] }
-    | null
+  type FetchAdResult = { ads: AdResponse[] } | null
 
   // Fetch an ad via web API
   const fetchAd = async (): Promise<FetchAdResult> => {
@@ -324,21 +263,15 @@ export const useGravityAd = (options?: {
         }
 
         const data = await response.json()
-        const variant = data.variant ?? 'banner'
-
-        if (
-          variant === 'choice' &&
-          Array.isArray(data.ads) &&
-          data.ads.length > 0
-        ) {
-          return { variant: 'choice', ads: data.ads as AdResponse[] }
-        }
 
-        if (data.ad) {
-          return { variant: 'banner', ad: data.ad as AdResponse }
+        if (Array.isArray(data.ads) && data.ads.length > 0) {
+          return { ads: data.ads as AdResponse[] }
         }
       } catch (err) {
-        logger.error({ err, provider: providerToTry }, '[ads] Failed to fetch ad')
+        logger.error(
+          { err, provider: providerToTry },
+          '[ads] Failed to fetch ad',
+        )
       }
     }
 
@@ -363,30 +296,15 @@ export const useGravityAd = (options?: {
         const result = canFetchNew ? await fetchAd() : null
 
         if (result) {
-          ctrl.variant = result.variant
-          if (result.variant === 'choice') {
-            addToChoiceCache(ctrl, result.ads)
-            ctrl.adsShownSinceActivity += 1
-            showChoiceAds(result.ads)
-          } else {
-            addToCache(ctrl, result.ad)
-            ctrl.adsShownSinceActivity += 1
-            showAd(result.ad)
-          }
+          addToChoiceCache(ctrl, result.ads)
+          ctrl.adsShownSinceActivity += 1
+          setAds(result.ads)
         } else {
           // Fall back to cached ads
-          if (ctrl.variant === 'choice') {
-            const cachedSet = nextFromChoiceCache(ctrl)
-            if (cachedSet) {
-              ctrl.adsShownSinceActivity += 1
-              showChoiceAds(cachedSet)
-            }
-          } else {
-            const next = nextFromCache(ctrl)
-            if (next) {
-              ctrl.adsShownSinceActivity += 1
-              showAd(next)
-            }
+          const cachedSet = nextFromChoiceCache(ctrl)
+          if (cachedSet) {
+            ctrl.adsShownSinceActivity += 1
+            setAds(cachedSet)
           }
         }
       } finally {
@@ -414,14 +332,8 @@ export const useGravityAd = (options?: {
       const result = await fetchAd()
       if (result) {
         const ctrl = ctrlRef.current
-        ctrl.variant = result.variant
-        if (result.variant === 'choice') {
-          addToChoiceCache(ctrl, result.ads)
-          showChoiceAds(result.ads)
-        } else {
-          addToCache(ctrl, result.ad)
-          showAd(result.ad)
-        }
+        addToChoiceCache(ctrl, result.ads)
+        setAds(result.ads)
         ctrl.adsShownSinceActivity = 1
       }
       setIsLoading(false)
@@ -429,19 +341,16 @@ export const useGravityAd = (options?: {
 
     // Start interval for rotation (consistent 60s intervals)
     const id = setInterval(() => tickRef.current(), AD_ROTATION_INTERVAL_MS)
-    ctrlRef.current.intervalId = id
 
     return () => {
       clearInterval(id)
-      ctrlRef.current.intervalId = null
     }
   }, [shouldStart, shouldHideAds, provider, fallbackProvider, surface])
 
-  // Don't return ad when ads should be hidden
+  // Don't return ads when ads should be hidden
   const visible = shouldStart && !shouldHideAds
   return {
-    ad: visible ? ad : null,
-    adData: visible ? adData : null,
+    ads: visible ? ads : null,
     isLoading,
     recordImpression: recordImpressionOnce,
   }
diff --git a/web/src/app/api/v1/ads/_post.ts b/web/src/app/api/v1/ads/_post.ts
index a56846b055..370f11622b 100644
--- a/web/src/app/api/v1/ads/_post.ts
+++ b/web/src/app/api/v1/ads/_post.ts
@@ -53,6 +53,10 @@ export type AdsEnv = {
   CB_ENVIRONMENT: string
 }
 
+function noAdsResponse(provider: AdProviderId) {
+  return NextResponse.json({ ads: [], provider }, { status: 200 })
+}
+
 export async function postAds(params: {
   req: NextRequest
   getUserInfoFromApiKey: GetUserInfoFromApiKeyFn
@@ -119,13 +123,13 @@ export async function postAds(params: {
   if (providerId === 'carbon') {
     if (!serverEnv.CARBON_ZONE_KEY) {
       logger.warn('[ads] CARBON_ZONE_KEY not configured')
-      return NextResponse.json({ ad: null, provider: providerId }, { status: 200 })
+      return noAdsResponse(providerId)
     }
     provider = createCarbonProvider({ zoneKey: serverEnv.CARBON_ZONE_KEY })
   } else {
     if (!serverEnv.GRAVITY_API_KEY) {
       logger.warn('[ads] GRAVITY_API_KEY not configured')
-      return NextResponse.json({ ad: null, provider: providerId }, { status: 200 })
+      return noAdsResponse(providerId)
     }
     provider = createGravityProvider({ apiKey: serverEnv.GRAVITY_API_KEY })
   }
@@ -146,20 +150,14 @@ export async function postAds(params: {
     })
 
     if (!result) {
-      return NextResponse.json(
-        { ad: null, provider: provider.id },
-        { status: 200 },
-      )
+      return noAdsResponse(provider.id)
     }
 
-    const adsToPersist: NormalizedAd[] =
-      result.variant === 'choice' ? result.ads : [result.ad]
-
     // Persist served ads so the impression endpoint can validate + fire the
     // correct pixels. Any DB failure is logged but doesn't block serving.
     try {
       await Promise.all(
-        adsToPersist.map((ad) =>
+        result.ads.map((ad) =>
           db
             .insert(schema.adImpression)
             .values({
@@ -184,7 +182,7 @@ export async function postAds(params: {
         {
           userId,
           provider: provider.id,
-          adCount: adsToPersist.length,
+          adCount: result.ads.length,
           error:
             dbError instanceof Error
               ? { name: dbError.name, message: dbError.message }
@@ -200,25 +198,12 @@ export async function postAds(params: {
       return rest
     }
 
-    if (result.variant === 'choice') {
-      logger.info(
-        { provider: provider.id, variant: 'choice', adCount: result.ads.length },
-        '[ads] Fetched choice ads',
-      )
-      return NextResponse.json({
-        ads: result.ads.map(toClient),
-        variant: 'choice',
-        provider: provider.id,
-      })
-    }
-
     logger.info(
-      { provider: provider.id, variant: 'banner' },
-      '[ads] Fetched banner ad',
+      { provider: provider.id, adCount: result.ads.length },
+      '[ads] Fetched ads',
     )
     return NextResponse.json({
-      ad: toClient(result.ad),
-      variant: 'banner',
+      ads: result.ads.map(toClient),
       provider: provider.id,
     })
   } catch (error) {
@@ -235,7 +220,7 @@ export async function postAds(params: {
     )
     return NextResponse.json(
       {
-        ad: null,
+        ads: [],
         provider: providerId,
         error: getErrorObject(error),
       },
diff --git a/web/src/lib/ad-providers/carbon.ts b/web/src/lib/ad-providers/carbon.ts
index 64a926436f..f4775a00ac 100644
--- a/web/src/lib/ad-providers/carbon.ts
+++ b/web/src/lib/ad-providers/carbon.ts
@@ -164,7 +164,7 @@ export function createCarbonProvider(config: {
         return null
       }
 
-      return { variant: 'choice', ads }
+      return { ads }
     },
   }
 }
diff --git a/web/src/lib/ad-providers/gravity.ts b/web/src/lib/ad-providers/gravity.ts
index 4ae33b5145..e0e8efec4e 100644
--- a/web/src/lib/ad-providers/gravity.ts
+++ b/web/src/lib/ad-providers/gravity.ts
@@ -1,18 +1,14 @@
-import { createHash } from 'crypto'
-
 import { buildArray } from '@codebuff/common/util/array'
 
 import type {
   AdMessage,
   AdProvider,
-  AdVariant,
   FetchAdInput,
   FetchAdResult,
   NormalizedAd,
 } from './types'
 
 const GRAVITY_URL = 'https://server.trygravity.ai/api/v1/ad'
-const BANNER_PLACEMENT_ID = 'code-assist-ad'
 const CHOICE_PLACEMENT_IDS = [
   'choice-ad-1',
   'choice-ad-2',
@@ -50,15 +46,6 @@ function normalize(raw: GravityRawAd): NormalizedAd {
   }
 }
 
-/**
- * A/B test: deterministically assign a user to the `banner` or `choice`
- * variant based on their userId. Stable across requests.
- */
-function getGravityVariant(userId: string): AdVariant {
-  const hash = createHash('sha256').update(`ad-variant:${userId}`).digest()
-  return hash[0] % 2 === 0 ? 'banner' : 'choice'
-}
-
 /**
  * Extract the content from the last <user_message> tag in a string.
  * The CLI wraps raw user text in that tag; if no tag is found, returns the
@@ -111,16 +98,12 @@ export function createGravityProvider(config: { apiKey: string }): AdProvider {
         fetch,
       } = input
 
-      const variant =
-        input.surface === 'waiting_room' ? 'choice' : getGravityVariant(userId)
       const filteredMessages = prepareGravityMessages(messages)
 
       const placementIds =
         input.surface === 'waiting_room'
           ? WAITING_ROOM_PLACEMENT_IDS
-          : variant === 'choice'
-          ? CHOICE_PLACEMENT_IDS
-          : [BANNER_PLACEMENT_ID]
+          : CHOICE_PLACEMENT_IDS
 
       const placements = placementIds.map((id) => ({
         placement: 'below_response',
@@ -192,10 +175,7 @@ export function createGravityProvider(config: { apiKey: string }): AdProvider {
         return null
       }
 
-      if (variant === 'choice') {
-        return { variant: 'choice', ads: ads.map(normalize) }
-      }
-      return { variant: 'banner', ad: normalize(ads[0]) }
+      return { ads: ads.map(normalize) }
     },
   }
 }
diff --git a/web/src/lib/ad-providers/types.ts b/web/src/lib/ad-providers/types.ts
index fb3284e2af..ced439e8f7 100644
--- a/web/src/lib/ad-providers/types.ts
+++ b/web/src/lib/ad-providers/types.ts
@@ -8,8 +8,6 @@ import type { Logger } from '@codebuff/common/types/contracts/logger'
  */
 export type AdProviderId = 'gravity' | 'carbon'
 
-export type AdVariant = 'banner' | 'choice'
-
 /**
  * Normalized ad shape returned by every provider. The CLI renders against
  * this shape; provider modules are responsible for mapping their upstream
@@ -62,10 +60,7 @@ export type FetchAdInput = {
   fetch: typeof globalThis.fetch
 }
 
-export type FetchAdResult =
-  | { variant: 'banner'; ad: NormalizedAd }
-  | { variant: 'choice'; ads: NormalizedAd[] }
-  | null
+export type FetchAdResult = { ads: NormalizedAd[] } | null
 
 export type AdProvider = {
   id: AdProviderId

From 37020fee9054ebe9272957661d40b789bfab9abc Mon Sep 17 00:00:00 2001
From: James Grugett <jahooma@gmail.com>
Date: Tue, 28 Apr 2026 18:00:16 -0700
Subject: [PATCH 3/3] Use Kimi K2.6 for free and lite (#561)

---
 agents/__tests__/editor.test.ts               |  11 ++
 agents/base2/base2.ts                         |   2 +-
 agents/editor/editor-lite.ts                  |   2 +-
 agents/editor/editor.ts                       |   6 +-
 agents/reviewer/code-reviewer-lite.ts         |   2 +-
 agents/types/agent-definition.ts              |   1 +
 .../components/freebuff-model-selector.tsx    |   8 +-
 cli/src/components/waiting-room-screen.tsx    |   4 +-
 cli/src/hooks/use-freebuff-session.ts         |   6 +-
 common/src/__tests__/freebuff-models.test.ts  |   6 +
 common/src/constants/free-agents.ts           |   6 +-
 common/src/constants/freebuff-models.ts       |  12 +-
 .../types/agent-definition.ts                 |   1 +
 common/src/types/freebuff-session.ts          |  10 +-
 freebuff/README.md                            |   2 +-
 freebuff/SPEC.md                              |   2 +-
 freebuff/web/src/app/home-client.tsx          |   2 +-
 .../completions/__tests__/completions.test.ts |  10 +-
 .../session/__tests__/session.test.ts         |   4 +-
 web/src/app/docs/[category]/[slug]/page.tsx   |   2 +-
 web/src/content/advanced/how-does-it-work.mdx |   4 +-
 web/src/content/advanced/what-models.mdx      |   6 +-
 web/src/content/help/faq.mdx                  |   2 +-
 web/src/content/tips/modes.mdx                |   4 +-
 .../__tests__/fireworks-deployment.test.ts    |  66 +++++++++-
 web/src/llm-api/canopywave.ts                 |   8 --
 web/src/llm-api/fireworks.ts                  |  11 +-
 .../free-session/__tests__/public-api.test.ts | 118 +++++++++---------
 .../__tests__/session-view.test.ts            |   2 +-
 web/src/server/free-session/config.ts         |   2 +-
 web/src/server/free-session/public-api.ts     |   4 +-
 web/src/server/free-session/store.ts          |   2 +-
 32 files changed, 203 insertions(+), 125 deletions(-)

diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts
index 36d6b75c5c..dd5630930b 100644
--- a/agents/__tests__/editor.test.ts
+++ b/agents/__tests__/editor.test.ts
@@ -67,6 +67,11 @@ describe('editor agent', () => {
       expect(glmEditor.model).toBe('z-ai/glm-5.1')
     })
 
+    test('creates kimi editor', () => {
+      const kimiEditor = createCodeEditor({ model: 'kimi' })
+      expect(kimiEditor.model).toBe('moonshotai/kimi-k2.6')
+    })
+
     test('creates minimax editor', () => {
       const minimaxEditor = createCodeEditor({ model: 'minimax' })
       expect(minimaxEditor.model).toBe('minimax/minimax-m2.7')
@@ -84,6 +89,12 @@ describe('editor agent', () => {
       expect(glmEditor.instructionsPrompt).not.toContain('</think>')
     })
 
+    test('kimi editor does not include think tags in instructions', () => {
+      const kimiEditor = createCodeEditor({ model: 'kimi' })
+      expect(kimiEditor.instructionsPrompt).not.toContain('<think>')
+      expect(kimiEditor.instructionsPrompt).not.toContain('</think>')
+    })
+
     test('minimax editor does not include think tags in instructions', () => {
       const minimaxEditor = createCodeEditor({ model: 'minimax' })
       expect(minimaxEditor.instructionsPrompt).not.toContain('<think>')
diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
index 1a81f948bf..b1e24efff6 100644
--- a/agents/base2/base2.ts
+++ b/agents/base2/base2.ts
@@ -25,7 +25,7 @@ export function createBase2(
   const isFree = mode === 'free' || mode === 'lite'
 
   const isSonnet = false
-  const model = isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.7'
+  const model = isFree ? 'moonshotai/kimi-k2.6' : 'anthropic/claude-opus-4.7'
 
   return {
     publisher,
diff --git a/agents/editor/editor-lite.ts b/agents/editor/editor-lite.ts
index 29225f0c29..6dbb4bb3c6 100644
--- a/agents/editor/editor-lite.ts
+++ b/agents/editor/editor-lite.ts
@@ -3,7 +3,7 @@ import { createCodeEditor } from './editor'
 import type { AgentDefinition } from '../types/agent-definition'
 
 const definition: AgentDefinition = {
-  ...createCodeEditor({ model: 'glm' }),
+  ...createCodeEditor({ model: 'kimi' }),
   id: 'editor-lite',
 }
 export default definition
diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts
index c98544d0f2..bb31eaaeb1 100644
--- a/agents/editor/editor.ts
+++ b/agents/editor/editor.ts
@@ -4,7 +4,7 @@ import { publisher } from '../constants'
 import type { AgentDefinition } from '../types/agent-definition'
 
 export const createCodeEditor = (options: {
-  model: 'gpt-5' | 'opus' | 'glm' | 'minimax'
+  model: 'gpt-5' | 'opus' | 'glm' | 'kimi' | 'minimax'
 }): Omit<AgentDefinition, 'id'> => {
   const { model } = options
   return {
@@ -14,6 +14,8 @@ export const createCodeEditor = (options: {
         ? 'openai/gpt-5.1'
         : options.model === 'minimax'
           ? 'minimax/minimax-m2.7'
+        : options.model === 'kimi'
+          ? 'moonshotai/kimi-k2.6'
         : options.model === 'glm'
           ? 'z-ai/glm-5.1'
           : 'anthropic/claude-opus-4.7',
@@ -67,7 +69,7 @@ OR for new files or major rewrites:
 }
 </codebuff_tool_call>
 
-${model === 'gpt-5' || model === 'glm' || model === 'minimax'
+${model === 'gpt-5' || model === 'glm' || model === 'kimi' || model === 'minimax'
         ? ''
         : `Before you start writing your implementation, you should use <think> tags to think about the best way to implement the changes.
 
diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts
index feafb87c45..888cadf4f7 100644
--- a/agents/reviewer/code-reviewer-lite.ts
+++ b/agents/reviewer/code-reviewer-lite.ts
@@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer'
 const definition: SecretAgentDefinition = {
   id: 'code-reviewer-lite',
   publisher,
-  ...createReviewer('z-ai/glm-5.1'),
+  ...createReviewer('moonshotai/kimi-k2.6'),
 }
 
 export default definition
diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts
index 3608f36315..088dd1dca1 100644
--- a/agents/types/agent-definition.ts
+++ b/agents/types/agent-definition.ts
@@ -423,6 +423,7 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
+  | 'moonshotai/kimi-k2.6'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
diff --git a/cli/src/components/freebuff-model-selector.tsx b/cli/src/components/freebuff-model-selector.tsx
index a453a15389..ddc2922ab6 100644
--- a/cli/src/components/freebuff-model-selector.tsx
+++ b/cli/src/components/freebuff-model-selector.tsx
@@ -5,7 +5,7 @@ import React, { useCallback, useEffect, useMemo, useState } from 'react'
 import { Button } from './button'
 import {
   FALLBACK_FREEBUFF_MODEL_ID,
-  FREEBUFF_GLM_MODEL_ID,
+  FREEBUFF_KIMI_MODEL_ID,
   FREEBUFF_MODELS,
   getFreebuffDeploymentAvailabilityLabel,
   isFreebuffModelAvailable,
@@ -25,8 +25,8 @@ import {
 import type { KeyEvent } from '@opentui/core'
 
 const FREEBUFF_MODEL_SELECTOR_MODELS = [
-  ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_GLM_MODEL_ID),
-  ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_GLM_MODEL_ID),
+  ...FREEBUFF_MODELS.filter((model) => model.id === FREEBUFF_KIMI_MODEL_ID),
+  ...FREEBUFF_MODELS.filter((model) => model.id !== FREEBUFF_KIMI_MODEL_ID),
 ]
 
 /**
@@ -72,7 +72,7 @@ export const FreebuffModelSelector: React.FC = () => {
     // unavailable (e.g. deployment hours close while the picker is open),
     // swap to the always-available fallback so Enter doesn't POST a model
     // the server will immediately reject. In-memory only — the user's saved
-    // preference (e.g. GLM) is preserved for the next launch.
+    // preference (e.g. Kimi) is preserved for the next launch.
     if (
       (session?.status === 'none' || !session) &&
       !isFreebuffModelAvailable(selectedModel, new Date(now))
diff --git a/cli/src/components/waiting-room-screen.tsx b/cli/src/components/waiting-room-screen.tsx
index 9ccba664a7..7f83f748d6 100644
--- a/cli/src/components/waiting-room-screen.tsx
+++ b/cli/src/components/waiting-room-screen.tsx
@@ -260,7 +260,7 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
                   <span>Elapsed </span>
                   {formatElapsed(elapsedMs)}
                 </text>
-                {/* Per-model session quota (e.g. GLM 5.1 caps at 5/12h). Only
+                {/* Per-model session quota (e.g. Kimi K2.6 caps at 5/12h). Only
                     rendered for rate-limited models so the Minimax queue stays
                     clutter-free. */}
                 {session.rateLimit && (
@@ -343,7 +343,7 @@ export const WaitingRoomScreen: React.FC<WaitingRoomScreenProps> = ({
             </>
           )}
 
-          {/* Per-model session quota exhausted (e.g. 5+ GLM sessions in the
+          {/* Per-model session quota exhausted (e.g. 5+ Kimi sessions in the
               last 12h). Terminal for this run — the user can exit and come
               back once the oldest session in the window rolls off. */}
           {session?.status === 'rate_limited' && (
diff --git a/cli/src/hooks/use-freebuff-session.ts b/cli/src/hooks/use-freebuff-session.ts
index 463a49126f..c78d4bbd0b 100644
--- a/cli/src/hooks/use-freebuff-session.ts
+++ b/cli/src/hooks/use-freebuff-session.ts
@@ -104,7 +104,7 @@ async function callSession(
       return body
     }
   }
-  // 429 from POST is the per-model session-quota reject (e.g. too many GLM
+  // 429 from POST is the per-model session-quota reject (e.g. too many Kimi
   // sessions in the last 12h). Terminal for the current poll — the CLI shows
   // a screen explaining the limit and when the user can try again. The 429
   // status (rather than 200) keeps older CLIs in their error path so they
@@ -442,9 +442,9 @@ export function useFreebuffSession(): UseFreebuffSessionResult {
         }
         if (next.status === 'model_unavailable') {
           // Server says the requested model isn't available right now (e.g.
-          // GLM outside deployment hours). Flip to the always-available
+          // Kimi outside deployment hours). Flip to the always-available
           // fallback for this run. In-memory only — `setSelectedModel`
-          // doesn't persist, so the user's saved preference (e.g. GLM)
+          // doesn't persist, so the user's saved preference (e.g. Kimi)
           // is preserved for their next launch during deployment hours.
           useFreebuffModelStore
             .getState()
diff --git a/common/src/__tests__/freebuff-models.test.ts b/common/src/__tests__/freebuff-models.test.ts
index 0d01d2762c..752f6bb286 100644
--- a/common/src/__tests__/freebuff-models.test.ts
+++ b/common/src/__tests__/freebuff-models.test.ts
@@ -1,11 +1,17 @@
 import { describe, expect, test } from 'bun:test'
 
 import {
+  DEFAULT_FREEBUFF_MODEL_ID,
+  FREEBUFF_KIMI_MODEL_ID,
   getFreebuffDeploymentAvailabilityLabel,
   isFreebuffDeploymentHours,
 } from '../constants/freebuff-models'
 
 describe('freebuff model availability', () => {
+  test('defaults to Kimi K2.6', () => {
+    expect(DEFAULT_FREEBUFF_MODEL_ID).toBe(FREEBUFF_KIMI_MODEL_ID)
+  })
+
   test('formats the close time in the user local timezone while deployment is open', () => {
     expect(
       getFreebuffDeploymentAvailabilityLabel(new Date('2026-01-05T18:00:00Z'), {
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
index 308e12df6d..4a2a4a147e 100644
--- a/common/src/constants/free-agents.ts
+++ b/common/src/constants/free-agents.ts
@@ -28,7 +28,7 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Root orchestrator
   'base2-free': new Set([
     'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
+    'moonshotai/kimi-k2.6',
   ]),
 
   // File exploration agents
@@ -46,13 +46,13 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
   // Editor for free mode
   'editor-lite': new Set([
     'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
+    'moonshotai/kimi-k2.6',
   ]),
 
   // Code reviewer for free mode
   'code-reviewer-lite': new Set([
     'minimax/minimax-m2.7',
-    'z-ai/glm-5.1',
+    'moonshotai/kimi-k2.6',
   ]),
 }
 
diff --git a/common/src/constants/freebuff-models.ts b/common/src/constants/freebuff-models.ts
index 8b3e9d82d9..9c6ff423ee 100644
--- a/common/src/constants/freebuff-models.ts
+++ b/common/src/constants/freebuff-models.ts
@@ -21,7 +21,7 @@ export interface FreebuffModelOption {
  *  the caller's local timezone. The CLI should render
  *  `getFreebuffDeploymentAvailabilityLabel()` instead. */
 export const FREEBUFF_DEPLOYMENT_HOURS_LABEL = '9am ET-5pm PT every day'
-export const FREEBUFF_GLM_MODEL_ID = 'z-ai/glm-5.1'
+export const FREEBUFF_KIMI_MODEL_ID = 'moonshotai/kimi-k2.6'
 export const FREEBUFF_MINIMAX_MODEL_ID = 'minimax/minimax-m2.7'
 const FREEBUFF_EASTERN_TIMEZONE = 'America/New_York'
 const FREEBUFF_PACIFIC_TIMEZONE = 'America/Los_Angeles'
@@ -47,8 +47,8 @@ export const FREEBUFF_MODELS = [
     availability: 'always',
   },
   {
-    id: FREEBUFF_GLM_MODEL_ID,
-    displayName: 'GLM 5.1',
+    id: FREEBUFF_KIMI_MODEL_ID,
+    displayName: 'Kimi K2.6',
     tagline: 'Smartest',
     availability: 'deployment_hours',
   },
@@ -57,15 +57,15 @@ export const FREEBUFF_MODELS = [
 export type FreebuffModelId = (typeof FREEBUFF_MODELS)[number]['id']
 
 /** What new freebuff users see selected in the picker. May not be currently
- *  available (GLM is closed outside deployment hours); callers that need an
+ *  available (Kimi is closed outside deployment hours); callers that need an
  *  always-available id for resolution / auto-fallbacks should use
  *  FALLBACK_FREEBUFF_MODEL_ID instead. */
-export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_GLM_MODEL_ID
+export const DEFAULT_FREEBUFF_MODEL_ID: FreebuffModelId = FREEBUFF_KIMI_MODEL_ID
 
 /** Always-available fallback used when the requested model can't be served
  *  right now (unknown id, deployment hours closed, etc.). Kept distinct from
  *  DEFAULT_FREEBUFF_MODEL_ID so a new user's "preferred default" can be the
- *  smartest model without auto-flipping anyone to a closed deployment. */
+ *  smartest model without auto-flipping anyone to a closed serverless model. */
 export const FALLBACK_FREEBUFF_MODEL_ID: FreebuffModelId =
   FREEBUFF_MINIMAX_MODEL_ID
 
diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts
index 3608f36315..088dd1dca1 100644
--- a/common/src/templates/initial-agents-dir/types/agent-definition.ts
+++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts
@@ -423,6 +423,7 @@ export type ModelName =
   // Other open source models
   | 'moonshotai/kimi-k2'
   | 'moonshotai/kimi-k2:nitro'
+  | 'moonshotai/kimi-k2.6'
   | 'z-ai/glm-5'
   | 'z-ai/glm-5.1'
   | 'z-ai/glm-4.6'
diff --git a/common/src/types/freebuff-session.ts b/common/src/types/freebuff-session.ts
index 31fc4c87ea..428a73df41 100644
--- a/common/src/types/freebuff-session.ts
+++ b/common/src/types/freebuff-session.ts
@@ -9,7 +9,7 @@
 /**
  * Per-model usage counter surfaced to the CLI so the waiting-room UI can
  * render "N of M sessions used" alongside queue/active state. Present when
- * the joined model has a rate limit applied (today: GLM 5.1 with 5 admits
+ * the joined model has a rate limit applied (today: Kimi K2.6 with 5 admits
  * per 12-hour window). `recentCount` is the number of admissions inside
  * `windowHours` at the time the response was produced — see also the
  * standalone `rate_limited` status for the reject path.
@@ -72,7 +72,7 @@ export type FreebuffSessionServerResponse =
       queueDepthByModel: Record<string, number>
       estimatedWaitMs: number
       queuedAt: string
-      /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+      /** Rate-limit quota for rate-limited models (Kimi K2.6 today). Absent
        *  for unlimited models or when the status was produced outside the
        *  rate-limit check path (e.g. pure read via GET). */
       rateLimit?: FreebuffSessionRateLimit
@@ -85,7 +85,7 @@ export type FreebuffSessionServerResponse =
       admittedAt: string
       expiresAt: string
       remainingMs: number
-      /** Rate-limit quota for rate-limited models (GLM 5.1 today). Absent
+      /** Rate-limit quota for rate-limited models (Kimi K2.6 today). Absent
        *  for unlimited models or when the status was produced outside the
        *  rate-limit check path (e.g. pure read via GET). */
       rateLimit?: FreebuffSessionRateLimit
@@ -131,7 +131,7 @@ export type FreebuffSessionServerResponse =
       /** User has an active session bound to a different model. Returned
        *  from POST /session when they pick a new model without ending their
        *  current session first. The CLI shows a confirmation prompt: "End
-       *  your active GLM session to switch?" → on confirm, DELETE then
+       *  your active Kimi session to switch?" → on confirm, DELETE then
        *  re-POST with the new model. */
       status: 'model_locked'
       currentModel: string
@@ -152,7 +152,7 @@ export type FreebuffSessionServerResponse =
     }
   | {
       /** User has used up their per-model admission quota in the rolling
-       *  window (GLM 5.1: 5 one-hour sessions per 12h). Returned from POST
+       *  window (Kimi K2.6: 5 one-hour sessions per 12h). Returned from POST
        *  /session before the user is placed in the queue. `retryAfterMs` is
        *  the time until the oldest admission inside the window falls off
        *  and one quota slot opens up — clients should show the user when
diff --git a/freebuff/README.md b/freebuff/README.md
index 0749fc7c0b..1ba4405f63 100644
--- a/freebuff/README.md
+++ b/freebuff/README.md
@@ -54,7 +54,7 @@ freebuff
 
 **How can it be free?** Freebuff is supported by ads shown in the CLI.
 
-**What models do you use?** GLM 5.1 as the main coding agent, Gemini 3.1 Flash Lite for finding files and research, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription.
+**What models do you use?** Kimi K2.6 as the main coding agent, Gemini 3.1 Flash Lite for finding files and research, and GPT-5.4 for deep thinking if you connect your ChatGPT subscription.
 
 **Are you training on my data?** No. We only use model providers that do not train on our requests. Your code stays yours.
 
diff --git a/freebuff/SPEC.md b/freebuff/SPEC.md
index 195081533c..5fad083691 100644
--- a/freebuff/SPEC.md
+++ b/freebuff/SPEC.md
@@ -84,7 +84,7 @@ Freebuff only supports **FREE mode**. All mode-related features are stripped.
 | `/agent:gpt-5` | Premium agent, not available in free tier |
 | `/review` | Uses thinker-gpt under the hood |
 | `/publish` | Agent publishing not available in free tier |
-| `/image` (+ `/img`, `/attach`) | Image attachments unavailable with free model (GLM 5.1) |
+| `/image` (+ `/img`, `/attach`) | Image attachments unavailable with free model (Kimi K2.6) |
 
 ### Commands to KEEP
 
diff --git a/freebuff/web/src/app/home-client.tsx b/freebuff/web/src/app/home-client.tsx
index 3cff424a37..6a016272e4 100644
--- a/freebuff/web/src/app/home-client.tsx
+++ b/freebuff/web/src/app/home-client.tsx
@@ -31,7 +31,7 @@ const faqs = [
   {
     question: 'What models do you use?',
     answer:
-      'GLM 5.1 as the main coding agent. Gemini 3.1 Flash Lite for finding files and research.\n\nConnect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.',
+      'Kimi K2.6 as the main coding agent. Gemini 3.1 Flash Lite for finding files and research.\n\nConnect your ChatGPT subscription to unlock GPT-5.4 for deep thinking.',
   },
   {
     question: 'Which countries is Freebuff available in?',
diff --git a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
index e0b531c706..ce28f91e01 100644
--- a/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
+++ b/web/src/app/api/v1/chat/completions/__tests__/completions.test.ts
@@ -642,7 +642,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
       expect(body.countryBlockReason).toBe('anonymized_or_unknown_country')
     })
 
-    it('lets freebuff use GLM 5.1 through Fireworks availability rules', async () => {
+    it('lets freebuff use Kimi K2.6 through Fireworks availability rules', async () => {
       const fetchedBodies: Record<string, unknown>[] = []
       const fetchViaFireworks = mock(
         async (_url: string | URL | Request, init?: RequestInit) => {
@@ -650,7 +650,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           return new Response(
             JSON.stringify({
               id: 'test-id',
-              model: 'accounts/fireworks/models/glm-5p1',
+              model: 'accounts/fireworks/models/kimi-k2p6',
               choices: [{ message: { content: 'test response' } }],
               usage: {
                 prompt_tokens: 10,
@@ -672,7 +672,7 @@ describe('/api/v1/chat/completions POST endpoint', () => {
           method: 'POST',
           headers: allowedFreeModeHeaders('test-api-key-new-free'),
           body: JSON.stringify({
-            model: 'z-ai/glm-5.1',
+            model: 'moonshotai/kimi-k2.6',
             stream: false,
             codebuff_metadata: {
               run_id: 'run-free',
@@ -701,9 +701,9 @@ describe('/api/v1/chat/completions POST endpoint', () => {
         expect(response.status).toBe(200)
         expect(fetchedBodies).toHaveLength(1)
         expect(fetchedBodies[0].model).toBe(
-          'accounts/fireworks/models/glm-5p1',
+          'accounts/fireworks/models/kimi-k2p6',
         )
-        expect(body.model).toBe('z-ai/glm-5.1')
+        expect(body.model).toBe('moonshotai/kimi-k2.6')
         expect(body.provider).toBe('Fireworks')
       } else {
         expect(response.status).toBe(503)
diff --git a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
index 4c55a6458b..54481dca88 100644
--- a/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
+++ b/web/src/app/api/v1/freebuff/session/__tests__/session.test.ts
@@ -281,10 +281,10 @@ describe('POST /api/v1/freebuff/session', () => {
     expect(body.status).toBe('queued')
   })
 
-  test('returns model_unavailable for GLM outside deployment hours', async () => {
+  test('returns model_unavailable for Kimi outside deployment hours', async () => {
     const sessionDeps = makeSessionDeps()
     const resp = await postFreebuffSession(
-      makeReq('ok', { model: 'z-ai/glm-5.1' }),
+      makeReq('ok', { model: 'moonshotai/kimi-k2.6' }),
       makeDeps(sessionDeps, 'u1'),
     )
     expect(resp.status).toBe(409)
diff --git a/web/src/app/docs/[category]/[slug]/page.tsx b/web/src/app/docs/[category]/[slug]/page.tsx
index 44d5174e0a..21d093d494 100644
--- a/web/src/app/docs/[category]/[slug]/page.tsx
+++ b/web/src/app/docs/[category]/[slug]/page.tsx
@@ -33,7 +33,7 @@ const FAQ_ITEMS = [
   {
     question: 'What model does Codebuff use?',
     answer:
-      'Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or GLM 5.1 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research.',
+      'Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or Kimi K2.6 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research.',
   },
   {
     question: 'Can I use my Claude Pro or Max subscription with Codebuff?',
diff --git a/web/src/content/advanced/how-does-it-work.mdx b/web/src/content/advanced/how-does-it-work.mdx
index 08f13366f5..79d2ecab31 100644
--- a/web/src/content/advanced/how-does-it-work.mdx
+++ b/web/src/content/advanced/how-does-it-work.mdx
@@ -24,8 +24,8 @@ The main agent ("Buffy") runs on Claude Opus 4.7. It reads your prompt, gathers
 - [**Code Searcher**](/publishers/codebuff/agents/code-searcher) - grep-style pattern matching
 - [**Researcher**](/publishers/codebuff/agents/researcher) (Gemini 3.1 Flash Lite) - web and docs lookup
 - [**Thinker**](/publishers/codebuff/agents/thinker) (Claude Opus 4.7, GPT-5.4) - works through hard problems
-- [**Editor**](/publishers/codebuff/agents/editor) (Claude Opus 4.7, GPT-5.1, GLM 5.1) - writes and modifies code
-- [**Reviewer**](/publishers/codebuff/agents/reviewer) (Claude Opus 4.7, GLM 5.1 in Lite mode) - catches bugs and style issues
+- [**Editor**](/publishers/codebuff/agents/editor) (Claude Opus 4.7, GPT-5.1, Kimi K2.6) - writes and modifies code
+- [**Reviewer**](/publishers/codebuff/agents/reviewer) (Claude Opus 4.7, Kimi K2.6 in Lite mode) - catches bugs and style issues
 - [**Basher**](/publishers/codebuff/agents/basher) (Gemini 3.1 Flash Lite) - runs terminal commands
 
 ## Best-of-N Selection (Max Mode)
diff --git a/web/src/content/advanced/what-models.mdx b/web/src/content/advanced/what-models.mdx
index 6fb3cd7367..f3dc59b386 100644
--- a/web/src/content/advanced/what-models.mdx
+++ b/web/src/content/advanced/what-models.mdx
@@ -19,7 +19,7 @@ The main agent ("Buffy") coordinates everything:
   | Default | Opus 4.7 |
   | Plan | Opus 4.7 |
   | Max | Opus 4.7 |
-  | Lite | GLM 5.1 |
+  | Lite | Kimi K2.6 |
 </MarkdownTable>
 
 ## Subagents
@@ -29,7 +29,7 @@ The orchestrator spawns these for specific jobs:
 <MarkdownTable>
   | Task | Models |
   |------|--------|
-  | Code editing | Claude Opus 4.7, GLM 5.1 |
+  | Code editing | Claude Opus 4.7, Kimi K2.6 |
   | Thinking/reasoning | Claude Opus 4.7, GPT-5.4 |
   | Code review | Claude Opus 4.7, GPT-5.4 |
   | File discovery | Gemini 3.1 Flash Lite, Gemini 2.5 Flash Lite |
@@ -37,4 +37,4 @@ The orchestrator spawns these for specific jobs:
   | Web/docs research | Gemini 3.1 Flash Lite |
 </MarkdownTable>
 
-Max mode runs multiple implementations in parallel and picks the best one. Default mode runs a single implementation pass. Lite mode uses GLM 5.1 and includes code review support.
+Max mode runs multiple implementations in parallel and picks the best one. Default mode runs a single implementation pass. Lite mode uses Kimi K2.6 and includes code review support.
diff --git a/web/src/content/help/faq.mdx b/web/src/content/help/faq.mdx
index 477adbd8f5..bfd1df0839 100644
--- a/web/src/content/help/faq.mdx
+++ b/web/src/content/help/faq.mdx
@@ -13,7 +13,7 @@ Software development: Writing features, tests, and scripts across common languag
 
 ## What model does Codebuff use?
 
-Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or GLM 5.1 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research. See [What models do you use?](/docs/advanced/what-models) for the full breakdown.
+Multiple. The orchestrator uses Claude Opus 4.7 in Default and Max modes, or Kimi K2.6 in Lite mode. Subagents are matched to their tasks: Claude Opus 4.7 and GPT-5.4 for deep reasoning and code review, and Gemini 3.1 Flash Lite for terminal commands, file discovery, and web/docs research. See [What models do you use?](/docs/advanced/what-models) for the full breakdown.
 
 ## Can I use my Claude Pro or Max subscription with Codebuff?
 
diff --git a/web/src/content/tips/modes.mdx b/web/src/content/tips/modes.mdx
index 1b67daecd6..acab5d8aaa 100644
--- a/web/src/content/tips/modes.mdx
+++ b/web/src/content/tips/modes.mdx
@@ -15,7 +15,7 @@ Codebuff has four modes. Switch during a session with `Shift+Tab` or `/mode:` co
   | Default | Claude Opus 4.7 | editor | Yes |
   | Max | Claude Opus 4.7 | editor-multi-prompt | Yes |
   | Plan | Claude Opus 4.7 | None | No |
-  | Lite | GLM 5.1 | None | No |
+  | Lite | Kimi K2.6 | None | No |
 </MarkdownTable>
 
 ## Default
@@ -60,7 +60,7 @@ Switch to this mode with `/mode:plan`.
 
 ## Lite
 
-GLM 5.1, cheaper and faster.
+Kimi K2.6, cheaper and faster.
 
 An efficient mode for most coding tasks.
 
diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
index 00ccf1f816..2d897767ae 100644
--- a/web/src/llm-api/__tests__/fireworks-deployment.test.ts
+++ b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
@@ -12,6 +12,7 @@ import {
 import type { Logger } from '@codebuff/common/types/contracts/logger'
 
 const STANDARD_MODEL_ID = 'accounts/fireworks/models/glm-5p1'
+const KIMI_STANDARD_MODEL_ID = 'accounts/fireworks/models/kimi-k2p6'
 const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/mjb4i7ea'
 const TEST_DEPLOYMENT_MAP = {
   'z-ai/glm-5.1': DEPLOYMENT_MODEL_ID,
@@ -91,6 +92,14 @@ describe('Fireworks deployment routing', () => {
       model: 'z-ai/glm-5.1',
       messages: [{ role: 'user' as const, content: 'test' }],
     }
+    const kimiBody = {
+      model: 'moonshotai/kimi-k2.6',
+      messages: [{ role: 'user' as const, content: 'test' }],
+    }
+    const kimiLiteBody = {
+      ...kimiBody,
+      codebuff_metadata: { cost_mode: 'lite' },
+    }
     const liteBody = {
       ...minimalBody,
       codebuff_metadata: { cost_mode: 'lite' },
@@ -143,6 +152,55 @@ describe('Fireworks deployment routing', () => {
       expect(fetchCalls).toEqual([STANDARD_MODEL_ID])
     })
 
+    it('uses serverless API for Kimi during hours without a deployment', async () => {
+      const fetchCalls: string[] = []
+
+      const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
+        const body = JSON.parse(init?.body as string)
+        fetchCalls.push(body.model)
+        return new Response(JSON.stringify({ ok: true }), { status: 200 })
+      }) as unknown as typeof globalThis.fetch
+
+      const response = await createFireworksRequestWithFallback({
+        body: kimiBody as never,
+        originalModel: 'moonshotai/kimi-k2.6',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        deploymentMap: {
+          'z-ai/glm-5.1': DEPLOYMENT_MODEL_ID,
+        },
+        sessionId: 'test-user-id',
+        now: IN_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(200)
+      expect(fetchCalls).toEqual([KIMI_STANDARD_MODEL_ID])
+    })
+
+    it('keeps Kimi unavailable outside hours when no deployment is mapped', async () => {
+      const mockFetch = mock(async () => {
+        throw new Error('should not fetch outside deployment hours')
+      }) as unknown as typeof globalThis.fetch
+
+      const response = await createFireworksRequestWithFallback({
+        body: kimiBody as never,
+        originalModel: 'moonshotai/kimi-k2.6',
+        fetch: mockFetch,
+        logger,
+        useCustomDeployment: true,
+        deploymentMap: {
+          'z-ai/glm-5.1': DEPLOYMENT_MODEL_ID,
+        },
+        sessionId: 'test-user-id',
+        now: BEFORE_DEPLOYMENT_HOURS,
+      })
+
+      expect(response.status).toBe(503)
+      const body = await response.json()
+      expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
+    })
+
     it('keeps GLM unavailable outside hours when no deployment is mapped', async () => {
       const mockFetch = mock(async () => {
         throw new Error('should not fetch outside deployment hours')
@@ -356,7 +414,7 @@ describe('Fireworks deployment routing', () => {
       expect(body.error.code).toBe('DEPLOYMENT_OUTSIDE_HOURS')
     })
 
-    it('falls back to the standard Fireworks API in lite mode outside deployment hours', async () => {
+    it('falls back to the standard Fireworks API for Kimi lite mode outside deployment hours', async () => {
       const fetchCalls: string[] = []
 
       const mockFetch = mock(async (_url: string | URL | Request, init?: RequestInit) => {
@@ -366,8 +424,8 @@ describe('Fireworks deployment routing', () => {
       }) as unknown as typeof globalThis.fetch
 
       const response = await createFireworksRequestWithFallback({
-        body: liteBody as never,
-        originalModel: 'z-ai/glm-5.1',
+        body: kimiLiteBody as never,
+        originalModel: 'moonshotai/kimi-k2.6',
         fetch: mockFetch,
         logger,
         useCustomDeployment: true,
@@ -377,7 +435,7 @@ describe('Fireworks deployment routing', () => {
       })
 
       expect(response.status).toBe(200)
-      expect(fetchCalls).toEqual([STANDARD_MODEL_ID])
+      expect(fetchCalls).toEqual([KIMI_STANDARD_MODEL_ID])
     })
 
     it('returns non-5xx responses from deployment without fallback (e.g. 429)', async () => {
diff --git a/web/src/llm-api/canopywave.ts b/web/src/llm-api/canopywave.ts
index 341bc239ce..4e87b1e55a 100644
--- a/web/src/llm-api/canopywave.ts
+++ b/web/src/llm-api/canopywave.ts
@@ -49,14 +49,6 @@ const CANOPYWAVE_MODELS: Record<
       outputCostPerToken: 1.08 / 1_000_000,
     },
   },
-  'moonshotai/kimi-k2.6': {
-    canopywaveId: 'moonshotai/kimi-k2.6',
-    pricing: {
-      inputCostPerToken: 0.95 / 1_000_000,
-      cachedInputCostPerToken: 0.16 / 1_000_000,
-      outputCostPerToken: 4.00 / 1_000_000,
-    },
-  },
 }
 
 export function isCanopyWaveModel(model: string): boolean {
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
index b0013e62a1..6bd5851fe0 100644
--- a/web/src/llm-api/fireworks.ts
+++ b/web/src/llm-api/fireworks.ts
@@ -2,7 +2,7 @@ import { Agent } from 'undici'
 
 import {
   FREEBUFF_DEPLOYMENT_HOURS_LABEL,
-  FREEBUFF_GLM_MODEL_ID,
+  FREEBUFF_KIMI_MODEL_ID,
   isFreebuffDeploymentHours,
 } from '@codebuff/common/constants/freebuff-models'
 import { PROFIT_MARGIN } from '@codebuff/common/constants/limits'
@@ -36,12 +36,14 @@ const fireworksAgent = new Agent({
 const FIREWORKS_MODEL_MAP: Record<string, string> = {
   'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5',
   'minimax/minimax-m2.7': 'accounts/fireworks/models/minimax-m2p7',
+  'moonshotai/kimi-k2.6': 'accounts/fireworks/models/kimi-k2p6',
   'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1',
 }
 
 /** Models that stay limited to freebuff deployment hours even on serverless. */
 const FIREWORKS_HOURS_GATED_MODELS = new Set<string>([
-  FREEBUFF_GLM_MODEL_ID,
+  FREEBUFF_KIMI_MODEL_ID,
+  'z-ai/glm-5.1',
 ])
 
 /** Flag to enable custom Fireworks deployments (set to false to use global API only) */
@@ -169,6 +171,11 @@ const FIREWORKS_PRICING_MAP: Record<string, FireworksPricing> = {
     cachedInputCostPerToken: 0.06 / 1_000_000,
     outputCostPerToken: 1.20 / 1_000_000,
   },
+  'moonshotai/kimi-k2.6': {
+    inputCostPerToken: 0.95 / 1_000_000,
+    cachedInputCostPerToken: 0.16 / 1_000_000,
+    outputCostPerToken: 4.00 / 1_000_000,
+  },
   'z-ai/glm-5.1': {
     inputCostPerToken: 1.40 / 1_000_000,
     cachedInputCostPerToken: 0.26 / 1_000_000,
diff --git a/web/src/server/free-session/__tests__/public-api.test.ts b/web/src/server/free-session/__tests__/public-api.test.ts
index f46a0f8c4c..7f08d2bddb 100644
--- a/web/src/server/free-session/__tests__/public-api.test.ts
+++ b/web/src/server/free-session/__tests__/public-api.test.ts
@@ -203,12 +203,12 @@ describe('requestSession', () => {
   test('deployment-hours-only model is unavailable outside deployment hours', async () => {
     const state = await requestSession({
       userId: 'u1',
-      model: 'z-ai/glm-5.1',
+      model: 'moonshotai/kimi-k2.6',
       deps,
     })
     expect(state).toEqual({
       status: 'model_unavailable',
-      requestedModel: 'z-ai/glm-5.1',
+      requestedModel: 'moonshotai/kimi-k2.6',
       availableHours: '9am ET-5pm PT every day',
     })
     expect(deps.rows.size).toBe(0)
@@ -216,18 +216,18 @@ describe('requestSession', () => {
 
   test('queued response includes a per-model depth snapshot for the selector', async () => {
     deps._tick(new Date('2026-04-17T16:00:00Z'))
-    // Seed 2 users in MiniMax + 1 in GLM so the returned map captures both.
+    // Seed 2 users in MiniMax + 1 in Kimi so the returned map captures both.
     await requestSession({ userId: 'u1', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
     await requestSession({ userId: 'u2', model: DEFAULT_MODEL, deps })
     deps._tick(new Date(deps._now().getTime() + 1000))
-    await requestSession({ userId: 'u3', model: 'z-ai/glm-5.1', deps })
+    await requestSession({ userId: 'u3', model: 'moonshotai/kimi-k2.6', deps })
 
     const state = await getSessionState({ userId: 'u1', deps })
     if (state.status !== 'queued') throw new Error('unreachable')
     expect(state.queueDepthByModel).toEqual({
       [DEFAULT_MODEL]: 2,
-      'z-ai/glm-5.1': 1,
+      'moonshotai/kimi-k2.6': 1,
     })
   })
 
@@ -302,7 +302,7 @@ describe('requestSession', () => {
   })
 
   test('instant-admit: per-model capacities are independent', async () => {
-    // MiniMax saturated at 1 active, GLM still has room.
+    // MiniMax saturated at 1 active, Kimi still has room.
     const admitDeps = makeDeps({
       getInstantAdmitCapacity: (model) =>
         model === DEFAULT_MODEL ? 1 : 10,
@@ -316,25 +316,25 @@ describe('requestSession', () => {
     })
     const s3 = await requestSession({
       userId: 'u3',
-      model: 'z-ai/glm-5.1',
+      model: 'moonshotai/kimi-k2.6',
       deps: admitDeps,
     })
     expect(s2.status).toBe('queued')
     expect(s3.status).toBe('active')
   })
 
-  // Per-user rate limit (5 GLM admissions per 12h) — the wire limit is
+  // Per-user rate limit (5 Kimi admissions per 12h) — the wire limit is
   // hard-coded in public-api.ts, so tests seed the fake admit log directly
-  // rather than configuring it. GLM also has deployment-hours gating, so
+  // rather than configuring it. Kimi also has deployment-hours gating, so
   // these tests bump `now` into the open window (12pm ET on a weekday)
   // before issuing the request.
-  const GLM_MODEL = 'z-ai/glm-5.1'
-  const GLM_LIMIT = 5
-  const GLM_WINDOW_HOURS = 12
-  const GLM_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
+  const KIMI_MODEL = 'moonshotai/kimi-k2.6'
+  const KIMI_LIMIT = 5
+  const KIMI_WINDOW_HOURS = 12
+  const KIMI_OPEN_TIME = new Date('2026-04-17T16:00:00Z')
 
-  test('rate_limited: 5th GLM admit in window blocks the 6th attempt', async () => {
-    deps._tick(GLM_OPEN_TIME)
+  test('rate_limited: 5th Kimi admit in window blocks the 6th attempt', async () => {
+    deps._tick(KIMI_OPEN_TIME)
     // Seed 5 admits inside the 12h window, spaced so we can verify retryAfter
     // points at the oldest one sliding off.
     const now = deps._now()
@@ -343,22 +343,22 @@ describe('requestSession', () => {
     for (const hoursAgo of ages) {
       deps.admits.push({
         user_id: 'u1',
-        model: GLM_MODEL,
+        model: KIMI_MODEL,
         admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
       })
     }
 
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     expect(state.status).toBe('rate_limited')
     if (state.status !== 'rate_limited') throw new Error('unreachable')
-    expect(state.model).toBe(GLM_MODEL)
-    expect(state.limit).toBe(GLM_LIMIT)
-    expect(state.windowHours).toBe(GLM_WINDOW_HOURS)
-    expect(state.recentCount).toBe(GLM_LIMIT)
+    expect(state.model).toBe(KIMI_MODEL)
+    expect(state.limit).toBe(KIMI_LIMIT)
+    expect(state.windowHours).toBe(KIMI_WINDOW_HOURS)
+    expect(state.recentCount).toBe(KIMI_LIMIT)
     // Oldest admit is 11h ago; slot opens when it hits 12h, i.e. in 1h.
     expect(state.retryAfterMs).toBe(60 * 60 * 1000)
     // Blocked before any row is written — the user doesn't take a queue slot.
@@ -366,21 +366,21 @@ describe('requestSession', () => {
   })
 
   test('rate_limited: admits outside the 12h window do not count', async () => {
-    deps._tick(GLM_OPEN_TIME)
+    deps._tick(KIMI_OPEN_TIME)
     // 5 admits, each just over 12h old → all fall off the window.
     const now = deps._now()
     for (let i = 0; i < 5; i++) {
       deps.admits.push({
         user_id: 'u1',
-        model: GLM_MODEL,
+        model: KIMI_MODEL,
         admitted_at: new Date(
-          now.getTime() - (GLM_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i),
+          now.getTime() - (KIMI_WINDOW_HOURS * 60 * 60 * 1000 + 60_000 + i),
         ),
       })
     }
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     expect(state.status).toBe('queued')
@@ -408,41 +408,41 @@ describe('requestSession', () => {
     expect(state.rateLimit).toBeUndefined()
   })
 
-  test('queued GLM response carries the current admit count', async () => {
-    deps._tick(GLM_OPEN_TIME)
+  test('queued Kimi response carries the current admit count', async () => {
+    deps._tick(KIMI_OPEN_TIME)
     const now = deps._now()
     // 2 admits in the window — under the limit so the user still queues.
     deps.admits.push({
       user_id: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       admitted_at: new Date(now.getTime() - 60 * 60 * 1000),
     })
     deps.admits.push({
       user_id: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       admitted_at: new Date(now.getTime() - 30 * 60 * 1000),
     })
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     if (state.status !== 'queued') throw new Error('unreachable')
     expect(state.rateLimit).toEqual({
-      model: GLM_MODEL,
-      limit: GLM_LIMIT,
-      windowHours: GLM_WINDOW_HOURS,
+      model: KIMI_MODEL,
+      limit: KIMI_LIMIT,
+      windowHours: KIMI_WINDOW_HOURS,
       recentCount: 2,
     })
   })
 
-  test('rate_limited: takeover of an active GLM row is allowed even when at cap', async () => {
-    // Reclaim path: user has an active+unexpired GLM session and restarts
+  test('rate_limited: takeover of an active Kimi row is allowed even when at cap', async () => {
+    // Reclaim path: user has an active+unexpired Kimi session and restarts
     // the CLI. POST must rotate their instance id (takeover) and NOT reject
     // with rate_limited — otherwise they'd be stranded with a live session
     // they can't reconnect to. The 5th admission is already in the log, so
     // this also exercises "at the cap" rather than "over the cap".
-    deps._tick(GLM_OPEN_TIME)
+    deps._tick(KIMI_OPEN_TIME)
     const now = deps._now()
     // Seed 5 prior admits (the cap), with the latest one matching the
     // active row we're about to install.
@@ -450,7 +450,7 @@ describe('requestSession', () => {
     for (const hoursAgo of ages) {
       deps.admits.push({
         user_id: 'u1',
-        model: GLM_MODEL,
+        model: KIMI_MODEL,
         admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
       })
     }
@@ -461,7 +461,7 @@ describe('requestSession', () => {
       user_id: 'u1',
       status: 'active',
       active_instance_id: 'inst-pre',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       queued_at: admittedAt,
       admitted_at: admittedAt,
       expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
@@ -471,27 +471,27 @@ describe('requestSession', () => {
 
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     expect(state.status).toBe('active')
     if (state.status !== 'active') throw new Error('unreachable')
     // Instance id rotated; quota snapshot still reflects the full window.
     expect(state.instanceId).not.toBe('inst-pre')
-    expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+    expect(state.rateLimit?.recentCount).toBe(KIMI_LIMIT)
   })
 
-  test('rate_limited: reclaim of a queued GLM row is allowed even when at cap', async () => {
+  test('rate_limited: reclaim of a queued Kimi row is allowed even when at cap', async () => {
     // Same reclaim exception for queued rows: if a user has already queued
     // (say they slipped in just before their 5th admit landed), a subsequent
     // POST from the same CLI must preserve their queue position instead of
     // flipping to rate_limited.
-    deps._tick(GLM_OPEN_TIME)
+    deps._tick(KIMI_OPEN_TIME)
     const now = deps._now()
-    for (let i = 0; i < GLM_LIMIT; i++) {
+    for (let i = 0; i < KIMI_LIMIT; i++) {
       deps.admits.push({
         user_id: 'u1',
-        model: GLM_MODEL,
+        model: KIMI_MODEL,
         admitted_at: new Date(now.getTime() - (i + 1) * 60 * 60 * 1000),
       })
     }
@@ -500,7 +500,7 @@ describe('requestSession', () => {
       user_id: 'u1',
       status: 'queued',
       active_instance_id: 'inst-pre',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       queued_at: queuedAt,
       admitted_at: null,
       expires_at: null,
@@ -510,7 +510,7 @@ describe('requestSession', () => {
 
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     expect(state.status).toBe('queued')
@@ -518,20 +518,20 @@ describe('requestSession', () => {
     // Same position (1) since we preserved queued_at and nobody else is
     // ahead; the instance id rotated so any prior CLI is superseded.
     expect(state.instanceId).not.toBe('inst-pre')
-    expect(state.rateLimit?.recentCount).toBe(GLM_LIMIT)
+    expect(state.rateLimit?.recentCount).toBe(KIMI_LIMIT)
   })
 
-  test('rate_limited: expired GLM row is not a reclaim — quota still applies', async () => {
+  test('rate_limited: expired Kimi row is not a reclaim — quota still applies', async () => {
     // The stored row's expires_at is in the past, so it doesn't represent
     // an in-flight session. This POST is effectively a fresh request and
     // must be blocked by the quota.
-    deps._tick(GLM_OPEN_TIME)
+    deps._tick(KIMI_OPEN_TIME)
     const now = deps._now()
     const ages = [11, 4, 3, 2, 1]
     for (const hoursAgo of ages) {
       deps.admits.push({
         user_id: 'u1',
-        model: GLM_MODEL,
+        model: KIMI_MODEL,
         admitted_at: new Date(now.getTime() - hoursAgo * 60 * 60 * 1000),
       })
     }
@@ -540,7 +540,7 @@ describe('requestSession', () => {
       user_id: 'u1',
       status: 'active',
       active_instance_id: 'inst-pre',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       queued_at: admittedAt,
       admitted_at: admittedAt,
       expires_at: new Date(admittedAt.getTime() + SESSION_LEN),
@@ -549,7 +549,7 @@ describe('requestSession', () => {
     })
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps,
     })
     expect(state.status).toBe('rate_limited')
@@ -557,18 +557,18 @@ describe('requestSession', () => {
 
   test('instant-admit bumps the quota count for the freshly-written admit row', async () => {
     const admitDeps = makeDeps({ getInstantAdmitCapacity: () => 3 })
-    admitDeps._tick(GLM_OPEN_TIME)
+    admitDeps._tick(KIMI_OPEN_TIME)
     // 1 existing admit in the window; this new call should instant-admit and
     // write a second row, so the response's recentCount reflects 2.
     const now = admitDeps._now()
     admitDeps.admits.push({
       user_id: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       admitted_at: new Date(now.getTime() - 30 * 60 * 1000),
     })
     const state = await requestSession({
       userId: 'u1',
-      model: GLM_MODEL,
+      model: KIMI_MODEL,
       deps: admitDeps,
     })
     if (state.status !== 'active') throw new Error('unreachable')
@@ -636,16 +636,16 @@ describe('getSessionState', () => {
     // Regression: the POST response attached rateLimit, but GET polls did
     // not — so the "Sessions N/M used" line flashed once then disappeared on
     // the next 5s poll. GET must attach the same quota snapshot. Rate
-    // limits only apply to GLM, so this test uses GLM explicitly (inside
+    // limits only apply to Kimi, so this test uses Kimi explicitly (inside
     // deployment hours) rather than the Minimax DEFAULT_MODEL.
     deps._tick(new Date('2026-04-17T16:00:00Z'))
     const now = deps._now()
     deps.admits.push({
       user_id: 'u1',
-      model: 'z-ai/glm-5.1',
+      model: 'moonshotai/kimi-k2.6',
       admitted_at: new Date(now.getTime() - 60 * 60 * 1000),
     })
-    await requestSession({ userId: 'u1', model: 'z-ai/glm-5.1', deps })
+    await requestSession({ userId: 'u1', model: 'moonshotai/kimi-k2.6', deps })
     const row = deps.rows.get('u1')!
     row.status = 'active'
     row.admitted_at = now
@@ -658,7 +658,7 @@ describe('getSessionState', () => {
     })
     if (state.status !== 'active') throw new Error('unreachable')
     expect(state.rateLimit).toEqual({
-      model: 'z-ai/glm-5.1',
+      model: 'moonshotai/kimi-k2.6',
       limit: 5,
       windowHours: 12,
       recentCount: 1,
diff --git a/web/src/server/free-session/__tests__/session-view.test.ts b/web/src/server/free-session/__tests__/session-view.test.ts
index 52dc82c12b..215059b841 100644
--- a/web/src/server/free-session/__tests__/session-view.test.ts
+++ b/web/src/server/free-session/__tests__/session-view.test.ts
@@ -7,7 +7,7 @@ import type { InternalSessionRow } from '../types'
 const WAIT_PER_SPOT_MS = 24_000
 const GRACE_MS = 30 * 60_000
 
-const TEST_MODEL = 'z-ai/glm-5.1'
+const TEST_MODEL = 'moonshotai/kimi-k2.6'
 
 function row(overrides: Partial<InternalSessionRow> = {}): InternalSessionRow {
   const now = new Date('2026-04-17T12:00:00Z')
diff --git a/web/src/server/free-session/config.ts b/web/src/server/free-session/config.ts
index 10071b35fc..6d162c4617 100644
--- a/web/src/server/free-session/config.ts
+++ b/web/src/server/free-session/config.ts
@@ -48,7 +48,7 @@ export function getSessionGraceMs(): number {
  * queue).
  */
 const INSTANT_ADMIT_CAPACITY: Record<string, number> = {
-  'z-ai/glm-5.1': 50,
+  'moonshotai/kimi-k2.6': 50,
   'minimax/minimax-m2.7': 1000,
 }
 
diff --git a/web/src/server/free-session/public-api.ts b/web/src/server/free-session/public-api.ts
index 528cd4ab31..75c2f24ff1 100644
--- a/web/src/server/free-session/public-api.ts
+++ b/web/src/server/free-session/public-api.ts
@@ -37,7 +37,7 @@ import type {
 
 /**
  * Per-model admission rate limits. Keyed by freebuff model id; a model not
- * in the map has no rate limit applied. Today only GLM 5.1 is limited
+ * in the map has no rate limit applied. Today only Kimi K2.6 is limited
  * (Minimax is cheap enough to leave unlimited).
  *
  * Hard-coded rather than env-driven: the values need to be observable in the
@@ -45,7 +45,7 @@ import type {
  * queued/active responses — changing them is a deliberate, typed edit.
  */
 const RATE_LIMITS: Record<string, { limit: number; windowHours: number }> = {
-  'z-ai/glm-5.1': { limit: 5, windowHours: 12 },
+  'moonshotai/kimi-k2.6': { limit: 5, windowHours: 12 },
 }
 
 /** Fetch the caller's current quota snapshot for `model`, or undefined if the
diff --git a/web/src/server/free-session/store.ts b/web/src/server/free-session/store.ts
index 8831ad7a8c..d22835658f 100644
--- a/web/src/server/free-session/store.ts
+++ b/web/src/server/free-session/store.ts
@@ -466,7 +466,7 @@ export async function promoteQueuedUser(params: {
  * the oldest is needed to compute `retryAfterMs` when the window is full,
  * so one query covers both the check and the reject path.
  *
- * Drives the per-user, per-model rate limit (e.g. at most 5 GLM sessions in
+ * Drives the per-user, per-model rate limit (e.g. at most 5 Kimi sessions in
  * the last 12h) enforced before `joinOrTakeOver`.
  */
 export async function listRecentAdmits(params: {