From 878e6e01e69ce6466ad0aff0bdd133bec4672633 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 17 Apr 2026 20:46:01 +0000
Subject: [PATCH 1/6] feat: inject rich workspace content for thinking models
(Kimi K2.5, DeepSeek R1)
- Add buildWorkspaceContent() to system-prompt.mjs: reads key project files
(README, package.json, entry points, etc.) up to 64KB total and formats
them as readable blocks for injection into thinking-model prompts
- Add buildThinkingModelSystemPrompt() to system-prompt.mjs: creates a
thinking-model-specific system prompt that tells the model it has a
workspace snapshot available and NOT to say it cannot see project files
- Update callNvidia() in agent-loop.mjs to use both new functions instead
of the old file-tree-only injection
- Update vscode-extension/README.md and root README.md to document the
enhanced workspace snapshot behavior
Agent-Logs-Url: https://github.com/codomium/CODE/sessions/75639e0f-6630-492c-9cd8-7769a1ebcbe6
Co-authored-by: codomium <255525663+codomium@users.noreply.github.com>
---
README.md | 8 ++-
v2/src/core/agent-loop.mjs | 12 ++--
v2/src/core/system-prompt.mjs | 112 ++++++++++++++++++++++++++++++++++
vscode-extension/README.md | 11 ++--
4 files changed, 129 insertions(+), 14 deletions(-)
diff --git a/README.md b/README.md
index 819bc4b..3c8f3a5 100644
--- a/README.md
+++ b/README.md
@@ -351,8 +351,9 @@ NVIDIA_API_KEY=nvapi-... occ -m kimi-k2.5 "hello"
> **Note — NVIDIA thinking models:** Models such as `kimi-k2.5` and `deepseek-r1` use
> `chat_template_kwargs: {thinking: true}` and do not accept a `tools` array in the same
> request. Open Claude Code automatically detects these models, omits tools from the
-> request (preventing the HTTP 400 error), and injects a compact workspace file-tree
-> snapshot into the system prompt so the model still has full structural awareness of
+> request (preventing the HTTP 400 error), and injects a **rich workspace snapshot**
+> (file tree + key file contents: README, package.json, entry points, etc.) directly
+> into the system prompt — giving the model full structural and content awareness of
> your project without needing live tool calls.
---
@@ -410,7 +411,8 @@ This is a **clean-room implementation** — no leaked source used. Architecture
**Fix: Proactive workspace analysis for all models** _(this PR)_
- All models now receive a strong agentic system prompt declaring the workspace `cwd` and instructing them to explore files with LS / Glob / Read / Grep / Bash before answering — never asking the user to paste code
- New `buildWorkspaceSnapshot` helper recursively walks the workspace (skipping `node_modules`, `.git`, `dist`, etc.) and returns a compact indented file tree capped at 200 entries
-- Kimi K2.5 and DeepSeek R1 (NVIDIA thinking models) now have the file tree injected directly into their system prompt — giving them full structural awareness even though NVIDIA NIM prevents live tool calls during thinking mode
+- New `buildWorkspaceContent` helper reads key project files (README, package.json, entry points, etc.) and returns their contents for inline injection — capped at 64 KB total
+- Kimi K2.5 and DeepSeek R1 (NVIDIA thinking models) now receive a **rich workspace snapshot** (file tree + key file contents) injected directly into their system prompt — giving them full project understanding even though NVIDIA NIM prevents live tool calls during thinking mode; the system prompt is also rewritten to not mention tools that aren't available
- Extension model descriptions updated; version bumped to 1.2.0
### v1.1.0 — VSCode Extension & Bug Fixes
diff --git a/v2/src/core/agent-loop.mjs b/v2/src/core/agent-loop.mjs
index d35795d..3b7addb 100644
--- a/v2/src/core/agent-loop.mjs
+++ b/v2/src/core/agent-loop.mjs
@@ -4,7 +4,7 @@
*/
import { streamResponse, accumulateStream } from './streaming.mjs';
import { ContextManager } from './context-manager.mjs';
-import { buildSystemPrompt, buildWorkspaceSnapshot } from './system-prompt.mjs';
+import { buildSystemPrompt, buildWorkspaceSnapshot, buildWorkspaceContent, buildThinkingModelSystemPrompt } from './system-prompt.mjs';
import { isNvidiaModel } from './providers.mjs';
import fs from 'fs';
import path from 'path';
@@ -406,18 +406,16 @@ async function callNvidia(model, state, toolDefs, settings, stream) {
// For thinking models the tool-list suffix in the system prompt would be
// misleading (no tools are sent), so use the static prefix only.
- // Additionally, inject a workspace file-tree snapshot so the model can
- // reason about the project structure even without tool access.
+ // Additionally, inject a rich workspace snapshot (file tree + key file contents)
+ // so the model can reason about the project structure and content without live tools.
let systemPrompt = state.systemPrompt;
if (supportsThinking) {
if (!state.systemPromptStatic) {
process.stderr.write('[open-claude-code] Warning: systemPromptStatic missing — falling back to full system prompt for ' + model + '\n');
}
const base = state.systemPromptStatic || state.systemPrompt;
- const snapshot = buildWorkspaceSnapshot(process.cwd());
- systemPrompt = snapshot
- ? base + '\n\n## Workspace file structure (read-only reference)\n\n```\n' + snapshot + '\n```'
- : base;
+ const workspaceContent = buildWorkspaceContent(process.cwd());
+ systemPrompt = buildThinkingModelSystemPrompt(base, workspaceContent.summary);
}
const effectiveState = supportsThinking
? { ...state, systemPrompt }
diff --git a/v2/src/core/system-prompt.mjs b/v2/src/core/system-prompt.mjs
index 5f33c85..8a0b8aa 100644
--- a/v2/src/core/system-prompt.mjs
+++ b/v2/src/core/system-prompt.mjs
@@ -7,6 +7,8 @@
* - Splits at cache boundary (static prefix cached, dynamic suffix not)
* - Includes tool schemas in the system prompt
* - Exports buildWorkspaceSnapshot for injecting a file-tree into prompts
+ * - Exports buildWorkspaceContent for injecting key file contents into prompts
+ * (used for thinking models that cannot make live tool calls)
*/
import fs from 'fs';
import path from 'path';
@@ -78,6 +80,116 @@ export function buildWorkspaceSnapshot(cwd = process.cwd(), maxFiles = 200) {
return lines.join('\n');
}
+// Priority-ordered list of project meta/config files to read for thinking models.
+// These give the model the most structural insight per token spent.
+const CONTENT_PRIORITY_FILES = [
+ // Documentation
+ 'README.md', 'readme.md', 'README.txt',
+ // Package / dependency manifests
+ 'package.json', 'Cargo.toml', 'pyproject.toml', 'setup.py', 'setup.cfg',
+ 'go.mod', 'pom.xml', 'build.gradle', 'composer.json', 'Gemfile',
+ // Entry points
+ 'index.js', 'index.mjs', 'index.ts', 'main.js', 'main.mjs', 'main.ts',
+ 'main.py', '__main__.py', 'app.py', 'app.js', 'app.ts',
+ 'src/index.js', 'src/index.mjs', 'src/index.ts',
+ 'src/main.js', 'src/main.mjs', 'src/main.ts', 'src/main.py',
+ // Config
+ 'CLAUDE.md', '.claude/CLAUDE.md',
+ 'tsconfig.json', '.eslintrc.json', '.prettierrc.json',
+ 'Makefile', 'Dockerfile',
+];
+
+/**
+ * Build a rich workspace context string that includes:
+ * 1. The compact file-tree snapshot (always)
+ * 2. Contents of high-value project files (README, package.json, entry points, etc.)
+ *
+ * This is intended for thinking models (e.g. Kimi K2.5, DeepSeek R1) that cannot
+ * make live tool calls. By providing actual file contents up front, the model can
+ * give accurate, project-specific answers without needing tool access.
+ *
+ * @param {string} [cwd] - workspace root (defaults to process.cwd())
+ * @param {object} [opts]
+ * @param {number} [opts.maxFileBytes=8192] - max bytes to include per file
+ * @param {number} [opts.maxTotalBytes=65536] - hard cap on total injected content
+ * @returns {{ tree: string, files: Array<{path: string, content: string}>, summary: string }}
+ */
+export function buildWorkspaceContent(cwd = process.cwd(), opts = {}) {
+ const { maxFileBytes = 8192, maxTotalBytes = 65536 } = opts;
+ const root = path.resolve(cwd);
+
+ // 1. Build the file tree
+ const tree = buildWorkspaceSnapshot(root);
+
+ // 2. Collect priority file contents
+ const files = [];
+ let totalBytes = 0;
+
+ for (const rel of CONTENT_PRIORITY_FILES) {
+ if (totalBytes >= maxTotalBytes) break;
+ const abs = path.join(root, rel);
+ if (!fs.existsSync(abs)) continue;
+ try {
+ const stat = fs.statSync(abs);
+ if (!stat.isFile()) continue;
+ let content = fs.readFileSync(abs, 'utf-8');
+ const truncated = content.length > maxFileBytes;
+ if (truncated) {
+ content = content.slice(0, maxFileBytes) + `\n… (truncated — ${content.length - maxFileBytes} more bytes)`;
+ }
+ files.push({ path: rel, content });
+ totalBytes += content.length;
+ } catch { /* skip unreadable */ }
+ }
+
+ // 3. Build the formatted summary string
+ const parts = [];
+
+ if (tree) {
+ parts.push('## Workspace file structure\n\n```\n' + tree + '\n```');
+ }
+
+ for (const { path: rel, content } of files) {
+ parts.push(`## File: ${rel}\n\n\`\`\`\n${content}\n\`\`\``);
+ }
+
+ return {
+ tree,
+ files,
+ summary: parts.join('\n\n'),
+ };
+}
+
+/**
+ * Build the system prompt text for thinking models (Kimi K2.5, DeepSeek R1).
+ *
+ * Unlike the standard system prompt (which instructs the model to call tools),
+ * this version acknowledges that no tools are available and instead points the
+ * model to the pre-injected workspace content below the prompt.
+ *
+ * @param {string} staticBase - the static prefix of the normal system prompt
+ * @param {string} workspaceSummary - output of buildWorkspaceContent().summary
+ * @returns {string}
+ */
+export function buildThinkingModelSystemPrompt(staticBase, workspaceSummary) {
+ const header = [
+ `You are an AI coding assistant with access to a snapshot of the user's workspace.`,
+ ``,
+ `IMPORTANT: You are operating in thinking mode. Live tool calls (Read, Write, Bash, Grep, etc.)`,
+ `are NOT available in this session. Instead, a snapshot of the key project files and the`,
+ `complete workspace file tree has been embedded below. Use this snapshot to answer questions`,
+ `accurately and in full — never say you cannot see the project or ask the user to paste code.`,
+ ``,
+ `When the snapshot does not contain a file the user mentions, say so clearly and offer to`,
+ `reason from the available context.`,
+ ].join('\n');
+
+ const parts = [header];
+ if (staticBase) parts.push(staticBase);
+ if (workspaceSummary) parts.push('---\n\n# Workspace snapshot (read-only)\n\n' + workspaceSummary);
+ return parts.join('\n\n');
+}
+
/**
* Load all CLAUDE.md files and merge them in order.
* @param {string} [cwd] - current working directory
diff --git a/vscode-extension/README.md b/vscode-extension/README.md
index 27df3f4..a624f38 100644
--- a/vscode-extension/README.md
+++ b/vscode-extension/README.md
@@ -8,8 +8,8 @@ A **Cursor-style AI coding assistant** built directly into VSCode — no termina
### 🗂️ Proactive workspace analysis (new in v1.2)
- **Automatic workspace exploration** — before answering questions the agent scans your project with LS, Glob, Read, and Grep instead of asking you to paste code
-- **Workspace file tree injection for thinking models** — Kimi K2.5 and DeepSeek R1 receive a compact file-tree snapshot in their system prompt so they know your project layout even though NVIDIA NIM prevents live tool calls during thinking
-- **Never "I can't see your files"** — the system prompt explicitly forbids asking you to share code; the agent reads files directly
+- **Rich workspace injection for thinking models** — Kimi K2.5 and DeepSeek R1 receive a full workspace snapshot (file tree + key file contents: README, package.json, entry points, etc.) directly in their system prompt, so they have genuine project understanding even though NVIDIA NIM prevents live tool calls during thinking mode
+- **Never "I can't see your files"** — the system prompt explicitly forbids asking you to share code; the agent reads files directly or uses the pre-injected snapshot
### 🖥️ Cursor-style Sidebar Panel (new in v1.1)
- **Dedicated activity bar icon** — opens a full chat panel in the VS Code sidebar
@@ -222,8 +222,11 @@ The subprocess persists across chat turns so the agent's conversation history is
NVIDIA NIM rejects requests that combine `chat_template_kwargs.thinking` with a tools array, so these models cannot make live tool calls. Open Claude Code works around this automatically:
- The agent omits tools from the request (preventing the HTTP 400 error)
-- A compact workspace file tree is appended to the system prompt so the model knows your project layout without needing live tool access
-- The system prompt instructs the model to reason about files by path rather than asking you to paste them
+- A **rich workspace snapshot** is built and injected directly into the system prompt:
+ - **File tree** — the full indented directory structure of your project
+ - **Key file contents** — README, package.json/Cargo.toml/pyproject.toml, main entry points, and other high-value project files (up to ~64 KB total)
+- The system prompt is rewritten for thinking mode: instead of "use tools to explore", it tells the model "here is the workspace content — reason from this"
+- The model can give accurate, project-specific answers without ever needing live tool access
To use a thinking model, select **moonshotai/kimi-k2.5** or **deepseek-ai/deepseek-r1** from the Model dropdown and enter your `NVIDIA_API_KEY` in Settings.
From 4ccc7a6d249196f5595dbbe28a73553d7225d1b2 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 17 Apr 2026 20:48:05 +0000
Subject: [PATCH 2/6] fix: correct totalBytes accumulation and truncation in
buildWorkspaceContent
Agent-Logs-Url: https://github.com/codomium/CODE/sessions/75639e0f-6630-492c-9cd8-7769a1ebcbe6
Co-authored-by: codomium <255525663+codomium@users.noreply.github.com>
---
v2/src/core/system-prompt.mjs | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/v2/src/core/system-prompt.mjs b/v2/src/core/system-prompt.mjs
index 8a0b8aa..e3d191d 100644
--- a/v2/src/core/system-prompt.mjs
+++ b/v2/src/core/system-prompt.mjs
@@ -133,12 +133,14 @@ export function buildWorkspaceContent(cwd = process.cwd(), opts = {}) {
const stat = fs.statSync(abs);
if (!stat.isFile()) continue;
let content = fs.readFileSync(abs, 'utf-8');
- const truncated = content.length > maxFileBytes;
- if (truncated) {
- content = content.slice(0, maxFileBytes) + `\n… (truncated — ${content.length - maxFileBytes} more bytes)`;
+ const originalLength = content.length;
+ if (originalLength > maxFileBytes) {
+ content = content.slice(0, maxFileBytes) + `\n… (truncated — ${originalLength - maxFileBytes} more bytes)`;
}
+ const contentLength = content.length;
+ if (totalBytes + contentLength > maxTotalBytes) break;
files.push({ path: rel, content });
- totalBytes += content.length;
+ totalBytes += contentLength;
} catch { /* skip unreadable */ }
}
From 4f46939be7345f3a3db6fffc84f89beeb1600b7c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 17 Apr 2026 20:57:10 +0000
Subject: [PATCH 3/6] feat: make NVIDIA thinking mode opt-in so Kimi
K2.5/DeepSeek R1 use full tool calling by default
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
By default Kimi K2.5 and DeepSeek R1 now work with all 25+ tools (Read,
Write, Bash, Grep, etc.) exactly like Cursor/opencode. Thinking/reasoning
mode is opt-in via NVIDIA_THINKING_MODE=true env var or the new
openClaudeCode.nvidiaThinkingMode VSCode setting (default: false).
- Rename NVIDIA_THINKING_MODELS → NVIDIA_THINKING_CAPABLE_MODELS; gate
supportsThinking on process.env.NVIDIA_THINKING_MODE === 'true'
- Add openClaudeCode.nvidiaThinkingMode boolean setting in package.json
- Pass NVIDIA_THINKING_MODE env var from extension.js to agent bridge
- Update model descriptions in package.json to reflect tool-calling default
- Update both READMEs with mode comparison table and updated behavior docs
Agent-Logs-Url: https://github.com/codomium/CODE/sessions/67a25766-01db-4fcc-b0be-87988bdc6ac2
Co-authored-by: codomium <255525663+codomium@users.noreply.github.com>
---
README.md | 11 +++--------
v2/src/core/agent-loop.mjs | 34 ++++++++++++++++++++--------------
vscode-extension/README.md | 30 +++++++++++++++++++++---------
vscode-extension/extension.js | 1 +
vscode-extension/package.json | 9 +++++++--
5 files changed, 52 insertions(+), 33 deletions(-)
diff --git a/README.md b/README.md
index 3c8f3a5..b638b9a 100644
--- a/README.md
+++ b/README.md
@@ -348,13 +348,7 @@ GOOGLE_APPLICATION_CREDENTIALS=... occ -m vertex/claude-sonnet "hello"
NVIDIA_API_KEY=nvapi-... occ -m kimi-k2.5 "hello"
```
-> **Note — NVIDIA thinking models:** Models such as `kimi-k2.5` and `deepseek-r1` use
-> `chat_template_kwargs: {thinking: true}` and do not accept a `tools` array in the same
-> request. Open Claude Code automatically detects these models, omits tools from the
-> request (preventing the HTTP 400 error), and injects a **rich workspace snapshot**
-> (file tree + key file contents: README, package.json, entry points, etc.) directly
-> into the system prompt — giving the model full structural and content awareness of
-> your project without needing live tool calls.
+> **Note — NVIDIA models (Kimi K2.5, DeepSeek R1):** These models support **full tool-calling by default** — they can Read, Write, Bash, Grep, and run all 25+ agent tools exactly like Cursor or opencode. Set `NVIDIA_THINKING_MODE=true` (or toggle the `openClaudeCode.nvidiaThinkingMode` setting in the VSCode extension) to opt into extended reasoning mode; in that mode tools are replaced with a rich workspace snapshot injected into the system prompt (file tree + key file contents), since NVIDIA NIM does not allow tools and thinking simultaneously.
---
@@ -412,7 +406,8 @@ This is a **clean-room implementation** — no leaked source used. Architecture
- All models now receive a strong agentic system prompt declaring the workspace `cwd` and instructing them to explore files with LS / Glob / Read / Grep / Bash before answering — never asking the user to paste code
- New `buildWorkspaceSnapshot` helper recursively walks the workspace (skipping `node_modules`, `.git`, `dist`, etc.) and returns a compact indented file tree capped at 200 entries
- New `buildWorkspaceContent` helper reads key project files (README, package.json, entry points, etc.) and returns their contents for inline injection — capped at 64 KB total
-- Kimi K2.5 and DeepSeek R1 (NVIDIA thinking models) now receive a **rich workspace snapshot** (file tree + key file contents) injected directly into their system prompt — giving them full project understanding even though NVIDIA NIM prevents live tool calls during thinking mode; the system prompt is also rewritten to not mention tools that aren't available
+- **Kimi K2.5 and DeepSeek R1 now use full tool-calling by default** — Read, Write, Bash, Grep, and all 25+ tools work exactly like Cursor or opencode; thinking/reasoning mode is an opt-in setting (`NVIDIA_THINKING_MODE=true` or `openClaudeCode.nvidiaThinkingMode` in VSCode settings)
+- When thinking mode IS enabled a rich workspace snapshot (file tree + key file contents) is injected into the system prompt with a purpose-built thinking-model system prompt
- Extension model descriptions updated; version bumped to 1.2.0
### v1.1.0 — VSCode Extension & Bug Fixes
diff --git a/v2/src/core/agent-loop.mjs b/v2/src/core/agent-loop.mjs
index 3b7addb..bee803e 100644
--- a/v2/src/core/agent-loop.mjs
+++ b/v2/src/core/agent-loop.mjs
@@ -10,11 +10,17 @@ import fs from 'fs';
import path from 'path';
/**
- * NVIDIA NIM models that use chat_template_kwargs.thinking=true.
- * These models do NOT support function-calling tools simultaneously —
- * the NVIDIA API returns 400 when both are present in the same request.
+ * NVIDIA NIM models that CAN use chat_template_kwargs.thinking=true for
+ * extended reasoning — but only when NVIDIA_THINKING_MODE=true is set.
+ *
+ * By default (NVIDIA_THINKING_MODE unset / false) these models work in
+ * standard tool-calling mode: Read, Write, Bash, Grep, etc. all work.
+ *
+ * When NVIDIA_THINKING_MODE=true the thinking flag is added and tools are
+ * omitted (NVIDIA NIM rejects the combination), falling back to workspace
+ * snapshot injection.
*/
-const NVIDIA_THINKING_MODELS = new Set([
+const NVIDIA_THINKING_CAPABLE_MODELS = new Set([
'moonshotai/kimi-k2.5',
'deepseek-ai/deepseek-r1',
]);
@@ -399,15 +405,15 @@ async function callNvidia(model, state, toolDefs, settings, stream) {
const apiKey = process.env.NVIDIA_API_KEY;
if (!apiKey) throw new Error('NVIDIA_API_KEY not set');
- // Models that support extended thinking via chat_template_kwargs.
- // Per NVIDIA NIM documentation, these models do NOT support function
- // calling simultaneously with thinking — tools must be omitted.
- const supportsThinking = NVIDIA_THINKING_MODELS.has(model);
+ // Thinking mode is opt-in: only enabled when NVIDIA_THINKING_MODE=true.
+ // By default, capable models (kimi-k2.5, deepseek-r1) use standard
+ // function-calling mode — tools work exactly as in any other provider.
+ const thinkingEnabled = process.env.NVIDIA_THINKING_MODE === 'true';
+ const supportsThinking = thinkingEnabled && NVIDIA_THINKING_CAPABLE_MODELS.has(model);
- // For thinking models the tool-list suffix in the system prompt would be
- // misleading (no tools are sent), so use the static prefix only.
- // Additionally, inject a rich workspace snapshot (file tree + key file contents)
- // so the model can reason about the project structure and content without live tools.
+ // When thinking mode is active the tool-list suffix would be misleading
+ // (NVIDIA NIM rejects tools + thinking together), so swap in a special
+ // system prompt with a rich workspace snapshot instead.
let systemPrompt = state.systemPrompt;
if (supportsThinking) {
if (!state.systemPromptStatic) {
@@ -434,8 +440,8 @@ async function callNvidia(model, state, toolDefs, settings, stream) {
...(supportsThinking && {
chat_template_kwargs: { thinking: true },
}),
- // Only include tools for non-thinking models — NVIDIA NIM rejects
- // the combination of chat_template_kwargs.thinking + tools.
+ // Include tools unless thinking mode is active (NVIDIA NIM rejects
+ // the combination of chat_template_kwargs.thinking + tools).
...(!supportsThinking && toolDefs.length > 0 && {
tools: toolDefs.map(t => ({
type: 'function',
diff --git a/vscode-extension/README.md b/vscode-extension/README.md
index a624f38..9e7b8d9 100644
--- a/vscode-extension/README.md
+++ b/vscode-extension/README.md
@@ -173,6 +173,7 @@ Open **Settings** (`Ctrl+,`) and search for `openClaudeCode`:
|---------|---------|-------------|
| `openClaudeCode.model` | `claude-sonnet-4-6` | AI model to use |
| `openClaudeCode.nvidiaApiKey` | _(empty)_ | NVIDIA NIM API key (`nvapi-...`) |
+| `openClaudeCode.nvidiaThinkingMode` | `false` | Enable extended reasoning mode for Kimi K2.5 / DeepSeek R1 (disables live tools) |
| `openClaudeCode.permissionMode` | `default` | How the agent handles file/shell permissions |
| `openClaudeCode.maxTurns` | `20` | Maximum agentic tool-use turns per request |
| `openClaudeCode.showToolOutput` | `true` | Show tool progress and results in chat |
@@ -217,18 +218,29 @@ The subprocess persists across chat turns so the agent's conversation history is
---
-### NVIDIA thinking models (Kimi K2.5, DeepSeek R1)
+### NVIDIA models — Kimi K2.5 and DeepSeek R1
-NVIDIA NIM rejects requests that combine `chat_template_kwargs.thinking` with a tools array, so these models cannot make live tool calls. Open Claude Code works around this automatically:
+These models are supported in two modes. **Tool-calling mode is the default** and works exactly like Cursor or opencode — the model reads files, runs Bash, greps for patterns, and edits code like any other agent model.
-- The agent omits tools from the request (preventing the HTTP 400 error)
-- A **rich workspace snapshot** is built and injected directly into the system prompt:
- - **File tree** — the full indented directory structure of your project
- - **Key file contents** — README, package.json/Cargo.toml/pyproject.toml, main entry points, and other high-value project files (up to ~64 KB total)
-- The system prompt is rewritten for thinking mode: instead of "use tools to explore", it tells the model "here is the workspace content — reason from this"
-- The model can give accurate, project-specific answers without ever needing live tool access
+#### Default: full tool-calling mode
-To use a thinking model, select **moonshotai/kimi-k2.5** or **deepseek-ai/deepseek-r1** from the Model dropdown and enter your `NVIDIA_API_KEY` in Settings.
+Just select **moonshotai/kimi-k2.5** or **deepseek-ai/deepseek-r1**, enter your `NVIDIA_API_KEY` in Settings, and start chatting. The model has access to all tools: Read, Write, Edit, Bash, Glob, Grep, and more.
+
+#### Optional: extended thinking (reasoning) mode
+
+If you want the model to show its step-by-step reasoning, enable the **nvidiaThinkingMode** setting:
+
+1. Open Settings (`Ctrl+,`), search for `openClaudeCode.nvidiaThinkingMode`, and set it to **true**.
+2. Run **Open Claude Code: Clear Session** so the bridge restarts with the new setting.
+
+In thinking mode the NVIDIA NIM API does not accept live tool calls alongside the thinking flag, so tools are replaced with a rich workspace snapshot injected into the system prompt:
+- **File tree** — the full indented directory structure of your project
+- **Key file contents** — README, package.json/Cargo.toml/pyproject.toml, main entry points, and other high-value project files (up to ~64 KB total)
+
+| Mode | Tools | Thinking trace | Best for |
+|------|-------|---------------|---------|
+| Tool-calling (default) | ✅ Full access | ❌ | Multi-step coding tasks, file edits, grep, bash |
+| Thinking (`nvidiaThinkingMode: true`) | ❌ | ✅ | Deep analysis, architecture review, explanations |
---
diff --git a/vscode-extension/extension.js b/vscode-extension/extension.js
index ac549e6..ea863df 100644
--- a/vscode-extension/extension.js
+++ b/vscode-extension/extension.js
@@ -190,6 +190,7 @@ async function getBridge() {
env.ANTHROPIC_MODEL = model;
env.CLAUDE_CODE_PERMISSION_MODE = permissionMode;
env.CLAUDE_CODE_MAX_TURNS = String(config.get('maxTurns') || 20);
+ env.NVIDIA_THINKING_MODE = String(config.get('nvidiaThinkingMode') || false);
const cwd = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath || process.cwd();
diff --git a/vscode-extension/package.json b/vscode-extension/package.json
index 34769be..1424a11 100644
--- a/vscode-extension/package.json
+++ b/vscode-extension/package.json
@@ -112,13 +112,13 @@
"OpenAI GPT-4o",
"OpenAI GPT-4o Mini",
"Google Gemini 2.0 Flash",
- "NVIDIA NIM — Moonshot AI Kimi K2.5 (thinking mode; workspace file tree injected automatically)",
+ "NVIDIA NIM — Moonshot AI Kimi K2.5 (full tool access; enable nvidiaThinkingMode for reasoning mode)",
"NVIDIA NIM — Llama 3.1 Nemotron 70B Instruct",
"NVIDIA NIM — Meta Llama 3.1 405B Instruct",
"NVIDIA NIM — Meta Llama 3.3 70B Instruct",
"NVIDIA NIM — Mistral Large 2 Instruct",
"NVIDIA NIM — Mixtral 8x22B Instruct",
- "NVIDIA NIM — DeepSeek R1 (thinking mode; workspace file tree injected automatically)"
+ "NVIDIA NIM — DeepSeek R1 (full tool access; enable nvidiaThinkingMode for reasoning mode)"
],
"description": "AI model to use for the chat participant"
},
@@ -127,6 +127,11 @@
"default": "",
"description": "NVIDIA NIM API key (from integrate.api.nvidia.com). Required when using any NVIDIA-hosted model."
},
+ "openClaudeCode.nvidiaThinkingMode": {
+ "type": "boolean",
+ "default": false,
+ "description": "Enable extended thinking (reasoning) mode for capable NVIDIA NIM models (kimi-k2.5, deepseek-r1). When enabled, the model reasons step-by-step but cannot use live tools (Read, Bash, Grep, etc.) — a workspace snapshot is injected instead. When disabled (default), these models use full tool-calling mode just like any other provider."
+ },
"openClaudeCode.permissionMode": {
"type": "string",
"enum": [
From 3c030d7bc06acccb89b432548cce5cf66fd67d74 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:00:48 +0000
Subject: [PATCH 4/6] fix: resolve path shadowing and ellipsis style in
system-prompt.mjs
Agent-Logs-Url: https://github.com/codomium/CODE/sessions/67a25766-01db-4fcc-b0be-87988bdc6ac2
Co-authored-by: codomium <255525663+codomium@users.noreply.github.com>
---
v2/src/core/system-prompt.mjs | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/v2/src/core/system-prompt.mjs b/v2/src/core/system-prompt.mjs
index e3d191d..e68087a 100644
--- a/v2/src/core/system-prompt.mjs
+++ b/v2/src/core/system-prompt.mjs
@@ -135,7 +135,7 @@ export function buildWorkspaceContent(cwd = process.cwd(), opts = {}) {
let content = fs.readFileSync(abs, 'utf-8');
const originalLength = content.length;
if (originalLength > maxFileBytes) {
- content = content.slice(0, maxFileBytes) + `\n… (truncated — ${originalLength - maxFileBytes} more bytes)`;
+ content = content.slice(0, maxFileBytes) + `\n... (truncated — ${originalLength - maxFileBytes} more bytes)`;
}
const contentLength = content.length;
if (totalBytes + contentLength > maxTotalBytes) break;
@@ -151,8 +151,8 @@ export function buildWorkspaceContent(cwd = process.cwd(), opts = {}) {
parts.push('## Workspace file structure\n\n```\n' + tree + '\n```');
}
- for (const { path: rel, content } of files) {
- parts.push(`## File: ${rel}\n\n\`\`\`\n${content}\n\`\`\``);
+ for (const { path: filePath, content } of files) {
+ parts.push(`## File: ${filePath}\n\n\`\`\`\n${content}\n\`\`\``);
}
return {
From d96e0a8f645a60556f66a0cc3d40cdff8c320610 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:07:16 +0000
Subject: [PATCH 5/6] feat: add in-UI thinking mode toggle for NVIDIA Kimi K2.5
/ DeepSeek R1
Adds a pill toggle switch directly in the chat controls bar that:
- Only appears when moonshotai/kimi-k2.5 or deepseek-ai/deepseek-r1 is selected
- Turns accent-colored when active so the user always knows the mode
- On change: saves openClaudeCode.nvidiaThinkingMode to workspace config and
restarts the agent bridge so NVIDIA_THINKING_MODE env var is re-read
- State is restored on panel open / reload via the initialized message
- Model change also syncs toggle visibility automatically
Agent-Logs-Url: https://github.com/codomium/CODE/sessions/a4c71128-d85e-4a7d-b72f-b661759b9b70
Co-authored-by: codomium <255525663+codomium@users.noreply.github.com>
---
vscode-extension/extension.js | 10 ++++++
vscode-extension/media/chat.css | 59 ++++++++++++++++++++++++++++++++
vscode-extension/media/chat.html | 6 ++++
vscode-extension/media/chat.js | 32 +++++++++++++++++
4 files changed, 107 insertions(+)
diff --git a/vscode-extension/extension.js b/vscode-extension/extension.js
index ea863df..dc4c02e 100644
--- a/vscode-extension/extension.js
+++ b/vscode-extension/extension.js
@@ -252,6 +252,7 @@ class ClaudeCodeViewProvider {
type: 'initialized',
model: config.get('model') || 'claude-sonnet-4-6',
mode: config.get('permissionMode') || 'default',
+ thinkingMode: !!config.get('nvidiaThinkingMode'),
hasApiKey,
});
break;
@@ -297,6 +298,15 @@ class ClaudeCodeViewProvider {
break;
}
+ case 'thinkingMode': {
+ const config = vscode.workspace.getConfiguration('openClaudeCode');
+ await config.update('nvidiaThinkingMode', !!msg.enabled, vscode.ConfigurationTarget.Global);
+ // Restart bridge so NVIDIA_THINKING_MODE env var is re-read
+ if (bridge) { bridge.dispose(); bridge = null; }
+ this.postMessage({ type: 'thinkingModeChanged', enabled: !!msg.enabled });
+ break;
+ }
+
case 'applyCode': {
await this._applyCodeToActiveEditor(msg.code, msg.language);
break;
diff --git a/vscode-extension/media/chat.css b/vscode-extension/media/chat.css
index 9d3e7fc..1db6196 100644
--- a/vscode-extension/media/chat.css
+++ b/vscode-extension/media/chat.css
@@ -134,6 +134,65 @@ select:focus { border-color: var(--accent); }
.control-spacer { flex: 1; }
+/* ---- Thinking mode toggle pill ---- */
+.toggle-switch {
+ position: relative;
+ display: inline-flex;
+ align-items: center;
+ width: 30px;
+ height: 16px;
+ cursor: pointer;
+ flex-shrink: 0;
+}
+.toggle-switch input {
+ opacity: 0;
+ width: 0;
+ height: 0;
+ position: absolute;
+}
+.toggle-slider {
+ position: absolute;
+ inset: 0;
+ background: var(--bg-input);
+ border: 1px solid var(--border);
+ border-radius: 16px;
+ transition: background 0.2s, border-color 0.2s;
+}
+.toggle-slider::before {
+ content: '';
+ position: absolute;
+ left: 2px;
+ top: 50%;
+ transform: translateY(-50%);
+ width: 10px;
+ height: 10px;
+ border-radius: 50%;
+ background: var(--text-muted);
+ transition: left 0.2s, background 0.2s;
+}
+.toggle-switch input:checked + .toggle-slider {
+ background: var(--accent);
+ border-color: var(--accent);
+}
+.toggle-switch input:checked + .toggle-slider::before {
+ left: 16px;
+ background: #fff;
+}
+.toggle-switch input:focus-visible + .toggle-slider {
+ outline: 2px solid var(--accent);
+ outline-offset: 2px;
+}
+
+#thinking-label {
+ font-size: 11px;
+ color: var(--text-muted);
+ white-space: nowrap;
+ cursor: default;
+}
+#thinking-label.active {
+ color: var(--accent);
+}
+
/* ---- Messages ---- */
#messages {
flex: 1;
diff --git a/vscode-extension/media/chat.html b/vscode-extension/media/chat.html
index 1f47e57..37f3ecd 100644
--- a/vscode-extension/media/chat.html
+++ b/vscode-extension/media/chat.html
@@ -56,6 +56,12 @@
+
+ 💭 Thinking
+
diff --git a/vscode-extension/media/chat.js b/vscode-extension/media/chat.js
index 9ede10b..2a1949f 100644
--- a/vscode-extension/media/chat.js
+++ b/vscode-extension/media/chat.js
@@ -52,6 +52,15 @@
const applyConfirmBtn = document.getElementById('apply-confirm-btn');
const applyPickBtn = document.getElementById('apply-pick-btn');
const applyCancelBtn = document.getElementById('apply-cancel-btn');
+ const thinkingToggleEl = document.getElementById('thinking-toggle');
+ const thinkingToggleWrapper = document.getElementById('thinking-toggle-wrapper');
+ const thinkingLabelEl = document.getElementById('thinking-label');
+
+ /** Models that support NVIDIA thinking mode toggle */
+ const THINKING_CAPABLE_MODELS = new Set([
+ 'moonshotai/kimi-k2.5',
+ 'deepseek-ai/deepseek-r1',
+ ]);
// ── Tick elapsed time ────────────────────────────────────────────────────
setInterval(() => {
@@ -714,6 +723,7 @@
case 'modelChanged':
currentModel = msg.model || currentModel;
if (modelSelect) modelSelect.value = msg.model || '';
+ syncThinkingToggleVisibility(currentModel);
updateStats();
break;
@@ -737,6 +747,11 @@
currentModel = msg.model || 'claude-sonnet-4-6';
if (modelSelect && msg.model) modelSelect.value = msg.model;
if (modeSelect && msg.mode) modeSelect.value = msg.mode;
+ if (thinkingToggleEl) {
+ thinkingToggleEl.checked = !!msg.thinkingMode;
+ if (thinkingLabelEl) thinkingLabelEl.classList.toggle('active', !!msg.thinkingMode);
+ }
+ syncThinkingToggleVisibility(currentModel);
updateStats();
showWelcome(!!msg.hasApiKey);
break;
@@ -1036,6 +1051,7 @@
vscode.postMessage({ type: 'model', model: modelSelect.value });
currentModel = modelSelect.value;
updateStats();
+ syncThinkingToggleVisibility(modelSelect.value);
});
}
@@ -1045,6 +1061,22 @@
});
}
+ // ── Thinking mode toggle (NVIDIA capable models only) ─────────────────────
+ function syncThinkingToggleVisibility(model) {
+ const visible = THINKING_CAPABLE_MODELS.has(model);
+ const display = visible ? '' : 'none';
+ if (thinkingToggleWrapper) thinkingToggleWrapper.style.display = display;
+ if (thinkingLabelEl) thinkingLabelEl.style.display = display;
+ }
+
+ if (thinkingToggleEl) {
+ thinkingToggleEl.addEventListener('change', () => {
+ const enabled = thinkingToggleEl.checked;
+ if (thinkingLabelEl) thinkingLabelEl.classList.toggle('active', enabled);
+ vscode.postMessage({ type: 'thinkingMode', enabled });
+ });
+ }
+
// ── Message from extension: fileSearchResults ─────────────────────────────
window.addEventListener('message', (event) => {
if (event.data.type === 'fileSearchResults') {
From 5e660f3807fcc51754bf6082d8de5ffa5c9517c7 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Fri, 17 Apr 2026 21:09:48 +0000
Subject: [PATCH 6/6] fix: use CSS custom properties for toggle dimensions
instead of magic numbers
Agent-Logs-Url: https://github.com/codomium/CODE/sessions/a4c71128-d85e-4a7d-b72f-b661759b9b70
Co-authored-by: codomium <255525663+codomium@users.noreply.github.com>
---
vscode-extension/media/chat.css | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/vscode-extension/media/chat.css b/vscode-extension/media/chat.css
index 1db6196..a300693 100644
--- a/vscode-extension/media/chat.css
+++ b/vscode-extension/media/chat.css
@@ -136,11 +136,15 @@ select:focus { border-color: var(--accent); }
/* ---- Thinking mode toggle pill ---- */
.toggle-switch {
+ --toggle-w: 30px;
+ --toggle-h: 16px;
+ --toggle-knob: 10px;
+ --toggle-knob-offset: 2px;
position: relative;
display: inline-flex;
align-items: center;
- width: 30px;
- height: 16px;
+ width: var(--toggle-w);
+ height: var(--toggle-h);
cursor: pointer;
flex-shrink: 0;
}
@@ -155,17 +159,17 @@ select:focus { border-color: var(--accent); }
inset: 0;
background: var(--bg-input);
border: 1px solid var(--border);
- border-radius: 16px;
+ border-radius: var(--toggle-h);
transition: background 0.2s, border-color 0.2s;
}
.toggle-slider::before {
content: '';
position: absolute;
- left: 2px;
+ left: var(--toggle-knob-offset);
top: 50%;
transform: translateY(-50%);
- width: 10px;
- height: 10px;
+ width: var(--toggle-knob);
+ height: var(--toggle-knob);
border-radius: 50%;
background: var(--text-muted);
transition: left 0.2s, background 0.2s;
@@ -175,7 +179,8 @@ select:focus { border-color: var(--accent); }
border-color: var(--accent);
}
.toggle-switch input:checked + .toggle-slider::before {
- left: 16px;
+ /* width - knob - offset = 30 - 10 - 2 = 18, but keep 2px from right edge: 30 - 10 - 2 = 18 */
+ left: calc(var(--toggle-w) - var(--toggle-knob) - var(--toggle-knob-offset));
background: #fff;
}
.toggle-switch input:focus-visible + .toggle-slider {