codomium · codomium · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/README.md b/README.md
@@ -348,12 +348,7 @@ GOOGLE_APPLICATION_CREDENTIALS=... occ -m vertex/claude-sonnet "hello"
 NVIDIA_API_KEY=nvapi-... occ -m kimi-k2.5 "hello"
 ```
 
-> **Note — NVIDIA thinking models:** Models such as `kimi-k2.5` and `deepseek-r1` use
-> `chat_template_kwargs: {thinking: true}` and do not accept a `tools` array in the same
-> request. Open Claude Code automatically detects these models, omits tools from the
-> request (preventing the HTTP 400 error), and injects a compact workspace file-tree
-> snapshot into the system prompt so the model still has full structural awareness of
-> your project without needing live tool calls.
+> **Note — NVIDIA models (Kimi K2.5, DeepSeek R1):** These models support **full tool-calling by default** — they can Read, Write, Bash, Grep, and run all 25+ agent tools exactly like Cursor or opencode. Set `NVIDIA_THINKING_MODE=true` (or toggle the `openClaudeCode.nvidiaThinkingMode` setting in the VSCode extension) to opt into extended reasoning mode; in that mode tools are replaced with a rich workspace snapshot injected into the system prompt (file tree + key file contents), since NVIDIA NIM does not allow tools and thinking simultaneously.
 
 ---
 
@@ -410,7 +405,9 @@ This is a **clean-room implementation** — no leaked source used. Architecture
 **Fix: Proactive workspace analysis for all models** _(this PR)_
 - All models now receive a strong agentic system prompt declaring the workspace `cwd` and instructing them to explore files with LS / Glob / Read / Grep / Bash before answering — never asking the user to paste code
 - New `buildWorkspaceSnapshot` helper recursively walks the workspace (skipping `node_modules`, `.git`, `dist`, etc.) and returns a compact indented file tree capped at 200 entries
-- Kimi K2.5 and DeepSeek R1 (NVIDIA thinking models) now have the file tree injected directly into their system prompt — giving them full structural awareness even though NVIDIA NIM prevents live tool calls during thinking mode
+- New `buildWorkspaceContent` helper reads key project files (README, package.json, entry points, etc.) and returns their contents for inline injection — capped at 64 KB total
+- **Kimi K2.5 and DeepSeek R1 now use full tool-calling by default** — Read, Write, Bash, Grep, and all 25+ tools work exactly like Cursor or opencode; thinking/reasoning mode is an opt-in setting (`NVIDIA_THINKING_MODE=true` or `openClaudeCode.nvidiaThinkingMode` in VSCode settings)
+- When thinking mode IS enabled a rich workspace snapshot (file tree + key file contents) is injected into the system prompt with a purpose-built thinking-model system prompt
 - Extension model descriptions updated; version bumped to 1.2.0
 
 ### v1.1.0 — VSCode Extension & Bug Fixes

diff --git a/v2/src/core/agent-loop.mjs b/v2/src/core/agent-loop.mjs
@@ -4,17 +4,23 @@
  */
 import { streamResponse, accumulateStream } from './streaming.mjs';
 import { ContextManager } from './context-manager.mjs';
-import { buildSystemPrompt, buildWorkspaceSnapshot } from './system-prompt.mjs';
+import { buildSystemPrompt, buildWorkspaceSnapshot, buildWorkspaceContent, buildThinkingModelSystemPrompt } from './system-prompt.mjs';
 import { isNvidiaModel } from './providers.mjs';
 import fs from 'fs';
 import path from 'path';
 
 /**
- * NVIDIA NIM models that use chat_template_kwargs.thinking=true.
- * These models do NOT support function-calling tools simultaneously —
- * the NVIDIA API returns 400 when both are present in the same request.
+ * NVIDIA NIM models that CAN use chat_template_kwargs.thinking=true for
+ * extended reasoning — but only when NVIDIA_THINKING_MODE=true is set.
+ *
+ * By default (NVIDIA_THINKING_MODE unset / false) these models work in
+ * standard tool-calling mode: Read, Write, Bash, Grep, etc. all work.
+ *
+ * When NVIDIA_THINKING_MODE=true the thinking flag is added and tools are
+ * omitted (NVIDIA NIM rejects the combination), falling back to workspace
+ * snapshot injection.
  */
-const NVIDIA_THINKING_MODELS = new Set([
+const NVIDIA_THINKING_CAPABLE_MODELS = new Set([
     'moonshotai/kimi-k2.5',
     'deepseek-ai/deepseek-r1',
 ]);
@@ -399,25 +405,23 @@ async function callNvidia(model, state, toolDefs, settings, stream) {
     const apiKey = process.env.NVIDIA_API_KEY;
     if (!apiKey) throw new Error('NVIDIA_API_KEY not set');
 
-    // Models that support extended thinking via chat_template_kwargs.
-    // Per NVIDIA NIM documentation, these models do NOT support function
-    // calling simultaneously with thinking — tools must be omitted.
-    const supportsThinking = NVIDIA_THINKING_MODELS.has(model);
+    // Thinking mode is opt-in: only enabled when NVIDIA_THINKING_MODE=true.
+    // By default, capable models (kimi-k2.5, deepseek-r1) use standard
+    // function-calling mode — tools work exactly as in any other provider.
+    const thinkingEnabled = process.env.NVIDIA_THINKING_MODE === 'true';
+    const supportsThinking = thinkingEnabled && NVIDIA_THINKING_CAPABLE_MODELS.has(model);
 
-    // For thinking models the tool-list suffix in the system prompt would be
-    // misleading (no tools are sent), so use the static prefix only.
-    // Additionally, inject a workspace file-tree snapshot so the model can
-    // reason about the project structure even without tool access.
+    // When thinking mode is active the tool-list suffix would be misleading
+    // (NVIDIA NIM rejects tools + thinking together), so swap in a special
+    // system prompt with a rich workspace snapshot instead.
     let systemPrompt = state.systemPrompt;
     if (supportsThinking) {
         if (!state.systemPromptStatic) {
             process.stderr.write('[open-claude-code] Warning: systemPromptStatic missing — falling back to full system prompt for ' + model + '\n');
         }
         const base = state.systemPromptStatic || state.systemPrompt;
-        const snapshot = buildWorkspaceSnapshot(process.cwd());
-        systemPrompt = snapshot
-            ? base + '\n\n## Workspace file structure (read-only reference)\n\n```\n' + snapshot + '\n```'
-            : base;
+        const workspaceContent = buildWorkspaceContent(process.cwd());
+        systemPrompt = buildThinkingModelSystemPrompt(base, workspaceContent.summary);
     }
     const effectiveState = supportsThinking
         ? { ...state, systemPrompt }
@@ -436,8 +440,8 @@ async function callNvidia(model, state, toolDefs, settings, stream) {
         ...(supportsThinking && {
             chat_template_kwargs: { thinking: true },
         }),
-        // Only include tools for non-thinking models — NVIDIA NIM rejects
-        // the combination of chat_template_kwargs.thinking + tools.
+        // Include tools unless thinking mode is active (NVIDIA NIM rejects
+        // the combination of chat_template_kwargs.thinking + tools).
         ...(!supportsThinking && toolDefs.length > 0 && {
             tools: toolDefs.map(t => ({
                 type: 'function',

diff --git a/v2/src/core/system-prompt.mjs b/v2/src/core/system-prompt.mjs
@@ -7,6 +7,8 @@
  * - Splits at cache boundary (static prefix cached, dynamic suffix not)
  * - Includes tool schemas in the system prompt
  * - Exports buildWorkspaceSnapshot for injecting a file-tree into prompts
+ * - Exports buildWorkspaceContent for injecting key file contents into prompts
+ *   (used for thinking models that cannot make live tool calls)
  */
 import fs from 'fs';
 import path from 'path';
@@ -78,6 +80,118 @@ export function buildWorkspaceSnapshot(cwd = process.cwd(), maxFiles = 200) {
     return lines.join('\n');
 }
 
+// Priority-ordered list of project meta/config files to read for thinking models.
+// These give the model the most structural insight per token spent.
+const CONTENT_PRIORITY_FILES = [
+    // Documentation
+    'README.md', 'readme.md', 'README.txt',
+    // Package / dependency manifests
+    'package.json', 'Cargo.toml', 'pyproject.toml', 'setup.py', 'setup.cfg',
+    'go.mod', 'pom.xml', 'build.gradle', 'composer.json', 'Gemfile',
+    // Entry points
+    'index.js', 'index.mjs', 'index.ts', 'main.js', 'main.mjs', 'main.ts',
+    'main.py', '__main__.py', 'app.py', 'app.js', 'app.ts',
+    'src/index.js', 'src/index.mjs', 'src/index.ts',
+    'src/main.js', 'src/main.mjs', 'src/main.ts', 'src/main.py',
+    // Config
+    'CLAUDE.md', '.claude/CLAUDE.md',
+    'tsconfig.json', '.eslintrc.json', '.prettierrc.json',
+    'Makefile', 'Dockerfile',
+];
+
+/**
+ * Build a rich workspace context string that includes:
+ * 1. The compact file-tree snapshot (always)
+ * 2. Contents of high-value project files (README, package.json, entry points, etc.)
+ *
+ * This is intended for thinking models (e.g. Kimi K2.5, DeepSeek R1) that cannot
+ * make live tool calls. By providing actual file contents up front, the model can
+ * give accurate, project-specific answers without needing tool access.
+ *
+ * @param {string} [cwd] - workspace root (defaults to process.cwd())
+ * @param {object} [opts]
+ * @param {number} [opts.maxFileBytes=8192]  - max bytes to include per file
+ * @param {number} [opts.maxTotalBytes=65536] - hard cap on total injected content
+ * @returns {{ tree: string, files: Array<{path: string, content: string}>, summary: string }}
+ */
+export function buildWorkspaceContent(cwd = process.cwd(), opts = {}) {
+    const { maxFileBytes = 8192, maxTotalBytes = 65536 } = opts;
+    const root = path.resolve(cwd);
+
+    // 1. Build the file tree
+    const tree = buildWorkspaceSnapshot(root);
+
+    // 2. Collect priority file contents
+    const files = [];
+    let totalBytes = 0;
+
+    for (const rel of CONTENT_PRIORITY_FILES) {
+        if (totalBytes >= maxTotalBytes) break;
+        const abs = path.join(root, rel);
+        if (!fs.existsSync(abs)) continue;
+        try {
+            const stat = fs.statSync(abs);
+            if (!stat.isFile()) continue;
+            let content = fs.readFileSync(abs, 'utf-8');
+            const originalLength = content.length;
+            if (originalLength > maxFileBytes) {
+                content = content.slice(0, maxFileBytes) + `\n... (truncated — ${originalLength - maxFileBytes} more bytes)`;
+            }
+            const contentLength = content.length;
+            if (totalBytes + contentLength > maxTotalBytes) break;
+            files.push({ path: rel, content });
+            totalBytes += contentLength;
+        } catch { /* skip unreadable */ }
+    }
+
+    // 3. Build the formatted summary string
+    const parts = [];
+
+    if (tree) {
+        parts.push('## Workspace file structure\n\n```\n' + tree + '\n```');
+    }
+
+    for (const { path: filePath, content } of files) {
+        parts.push(`## File: ${filePath}\n\n\`\`\`\n${content}\n\`\`\``);
+    }
+
+    return {
+        tree,
+        files,
+        summary: parts.join('\n\n'),
+    };
+}
+
+/**
+ * Build the system prompt text for thinking models (Kimi K2.5, DeepSeek R1).
+ *
+ * Unlike the standard system prompt (which instructs the model to call tools),
+ * this version acknowledges that no tools are available and instead points the
+ * model to the pre-injected workspace content below the prompt.
+ *
+ * @param {string} staticBase - the static prefix of the normal system prompt
+ * @param {string} workspaceSummary - output of buildWorkspaceContent().summary
+ * @returns {string}
+ */
+export function buildThinkingModelSystemPrompt(staticBase, workspaceSummary) {
+    const header = [
+        `You are an AI coding assistant with access to a snapshot of the user's workspace.`,
+        ``,
+        `IMPORTANT: You are operating in thinking mode. Live tool calls (Read, Write, Bash, Grep, etc.)`,
+        `are NOT available in this session. Instead, a snapshot of the key project files and the`,
+        `complete workspace file tree has been embedded below. Use this snapshot to answer questions`,
+        `accurately and in full — never say you cannot see the project or ask the user to paste code.`,
+        ``,
+        `When the snapshot does not contain a file the user mentions, say so clearly and offer to`,
+        `reason from the available context.`,
+    ].join('\n');
+
+    const parts = [header];
+    if (staticBase) parts.push(staticBase);
+    if (workspaceSummary) parts.push('---\n\n# Workspace snapshot (read-only)\n\n' + workspaceSummary);
+    return parts.join('\n\n');
+}
+
 /**
  * Load all CLAUDE.md files and merge them in order.
  * @param {string} [cwd] - current working directory

diff --git a/vscode-extension/README.md b/vscode-extension/README.md
@@ -8,8 +8,8 @@ A **Cursor-style AI coding assistant** built directly into VSCode — no termina
 
 ### 🗂️ Proactive workspace analysis (new in v1.2)
 - **Automatic workspace exploration** — before answering questions the agent scans your project with LS, Glob, Read, and Grep instead of asking you to paste code
-- **Workspace file tree injection for thinking models** — Kimi K2.5 and DeepSeek R1 receive a compact file-tree snapshot in their system prompt so they know your project layout even though NVIDIA NIM prevents live tool calls during thinking
-- **Never "I can't see your files"** — the system prompt explicitly forbids asking you to share code; the agent reads files directly
+- **Rich workspace injection for thinking models** — Kimi K2.5 and DeepSeek R1 receive a full workspace snapshot (file tree + key file contents: README, package.json, entry points, etc.) directly in their system prompt, so they have genuine project understanding even though NVIDIA NIM prevents live tool calls during thinking mode
+- **Never "I can't see your files"** — the system prompt explicitly forbids asking you to share code; the agent reads files directly or uses the pre-injected snapshot
 
 ### 🖥️ Cursor-style Sidebar Panel (new in v1.1)
 - **Dedicated activity bar icon** — opens a full chat panel in the VS Code sidebar
@@ -173,6 +173,7 @@ Open **Settings** (`Ctrl+,`) and search for `openClaudeCode`:
 |---------|---------|-------------|
 | `openClaudeCode.model` | `claude-sonnet-4-6` | AI model to use |
 | `openClaudeCode.nvidiaApiKey` | _(empty)_ | NVIDIA NIM API key (`nvapi-...`) |
+| `openClaudeCode.nvidiaThinkingMode` | `false` | Enable extended reasoning mode for Kimi K2.5 / DeepSeek R1 (disables live tools) |
 | `openClaudeCode.permissionMode` | `default` | How the agent handles file/shell permissions |
 | `openClaudeCode.maxTurns` | `20` | Maximum agentic tool-use turns per request |
 | `openClaudeCode.showToolOutput` | `true` | Show tool progress and results in chat |
@@ -217,15 +218,29 @@ The subprocess persists across chat turns so the agent's conversation history is
 
 ---
 
-### NVIDIA thinking models (Kimi K2.5, DeepSeek R1)
+### NVIDIA models — Kimi K2.5 and DeepSeek R1
 
-NVIDIA NIM rejects requests that combine `chat_template_kwargs.thinking` with a tools array, so these models cannot make live tool calls. Open Claude Code works around this automatically:
+These models are supported in two modes. **Tool-calling mode is the default** and works exactly like Cursor or opencode — the model reads files, runs Bash, greps for patterns, and edits code like any other agent model.
 
-- The agent omits tools from the request (preventing the HTTP 400 error)
-- A compact workspace file tree is appended to the system prompt so the model knows your project layout without needing live tool access
-- The system prompt instructs the model to reason about files by path rather than asking you to paste them
+#### Default: full tool-calling mode
 
-To use a thinking model, select **moonshotai/kimi-k2.5** or **deepseek-ai/deepseek-r1** from the Model dropdown and enter your `NVIDIA_API_KEY` in Settings.
+Just select **moonshotai/kimi-k2.5** or **deepseek-ai/deepseek-r1**, enter your `NVIDIA_API_KEY` in Settings, and start chatting. The model has access to all tools: Read, Write, Edit, Bash, Glob, Grep, and more.
+
+#### Optional: extended thinking (reasoning) mode
+
+If you want the model to show its step-by-step reasoning, enable the **nvidiaThinkingMode** setting:
+
+1. Open Settings (`Ctrl+,`), search for `openClaudeCode.nvidiaThinkingMode`, and set it to **true**.
+2. Run **Open Claude Code: Clear Session** so the bridge restarts with the new setting.
+
+In thinking mode the NVIDIA NIM API does not accept live tool calls alongside the thinking flag, so tools are replaced with a rich workspace snapshot injected into the system prompt:
+- **File tree** — the full indented directory structure of your project
+- **Key file contents** — README, package.json/Cargo.toml/pyproject.toml, main entry points, and other high-value project files (up to ~64 KB total)
+
+| Mode | Tools | Thinking trace | Best for |
+|------|-------|---------------|---------|
+| Tool-calling (default) | ✅ Full access | ❌ | Multi-step coding tasks, file edits, grep, bash |
+| Thinking (`nvidiaThinkingMode: true`) | ❌ | ✅ | Deep analysis, architecture review, explanations |
 
 ---
 

diff --git a/vscode-extension/extension.js b/vscode-extension/extension.js
@@ -190,6 +190,7 @@ async function getBridge() {
     env.ANTHROPIC_MODEL              = model;
     env.CLAUDE_CODE_PERMISSION_MODE  = permissionMode;
     env.CLAUDE_CODE_MAX_TURNS        = String(config.get('maxTurns') || 20);
+    env.NVIDIA_THINKING_MODE         = String(config.get('nvidiaThinkingMode') || false);
 
     const cwd = vscode.workspace.workspaceFolders?.[0]?.uri.fsPath || process.cwd();
 
@@ -251,6 +252,7 @@ class ClaudeCodeViewProvider {
                     type: 'initialized',
                     model: config.get('model') || 'claude-sonnet-4-6',
                     mode:  config.get('permissionMode') || 'default',
+                    thinkingMode: !!config.get('nvidiaThinkingMode'),
                     hasApiKey,
                 });
                 break;
@@ -296,6 +298,15 @@ class ClaudeCodeViewProvider {
                 break;
             }
 
+            case 'thinkingMode': {
+                const config = vscode.workspace.getConfiguration('openClaudeCode');
+                await config.update('nvidiaThinkingMode', !!msg.enabled, vscode.ConfigurationTarget.Global);
+                // Restart bridge so NVIDIA_THINKING_MODE env var is re-read
+                if (bridge) { bridge.dispose(); bridge = null; }
+                this.postMessage({ type: 'thinkingModeChanged', enabled: !!msg.enabled });
+                break;
+            }
+
             case 'applyCode': {
                 await this._applyCodeToActiveEditor(msg.code, msg.language);
                 break;

diff --git a/vscode-extension/media/chat.css b/vscode-extension/media/chat.css
@@ -134,6 +134,70 @@ select:focus { border-color: var(--accent); }
 
 .control-spacer { flex: 1; }
 
+/* ---- Thinking mode toggle pill ---- */
+.toggle-switch {
+  --toggle-w:           30px;
+  --toggle-h:           16px;
+  --toggle-knob:        10px;
+  --toggle-knob-offset: 2px;
+  position: relative;
+  display: inline-flex;
+  align-items: center;
+  width: var(--toggle-w);
+  height: var(--toggle-h);
+  cursor: pointer;
+  flex-shrink: 0;
+}
+.toggle-switch input {
+  opacity: 0;
+  width: 0;
+  height: 0;
+  position: absolute;
+}
+.toggle-slider {
+  position: absolute;
+  inset: 0;
+  background: var(--bg-input);
+  border: 1px solid var(--border);
+  border-radius: var(--toggle-h);
+  transition: background 0.2s, border-color 0.2s;
+}
+.toggle-slider::before {
+  content: '';
+  position: absolute;
+  left: var(--toggle-knob-offset);
+  top: 50%;
+  transform: translateY(-50%);
+  width: var(--toggle-knob);
+  height: var(--toggle-knob);
+  border-radius: 50%;
+  background: var(--text-muted);
+  transition: left 0.2s, background 0.2s;
+}
+.toggle-switch input:checked + .toggle-slider {
+  background: var(--accent);
+  border-color: var(--accent);
+}
+.toggle-switch input:checked + .toggle-slider::before {
+  /* width - knob - offset = 30 - 10 - 2 = 18, but keep 2px from right edge: 30 - 10 - 2 = 18 */
+  left: calc(var(--toggle-w) - var(--toggle-knob) - var(--toggle-knob-offset));
+  background: #fff;
+}
+.toggle-switch input:focus-visible + .toggle-slider {
+  outline: 2px solid var(--accent);
+  outline-offset: 2px;
+}
+
+#thinking-label {
+  font-size: 11px;
+  color: var(--text-muted);
+  white-space: nowrap;
+  cursor: default;
+}
+#thinking-label.active {
+  color: var(--accent);
+}
+
 /* ---- Messages ---- */
 #messages {
   flex: 1;