Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/browser/components/tools/WebFetchToolCall.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,10 @@ export const WebFetchToolCall: React.FC<WebFetchToolCallProps> = ({
</DetailSection>
)}

{result.success && result.content && (
{/* Show content for both success and error responses (error pages may have parsed content) */}
{result.content && (
<DetailSection>
<DetailLabel>Content</DetailLabel>
<DetailLabel>{result.success ? "Content" : "Error Page Content"}</DetailLabel>
<div className="bg-code-bg max-h-[300px] overflow-y-auto rounded px-3 py-2 text-[12px]">
<MarkdownRenderer content={result.content} />
</div>
Expand Down
2 changes: 2 additions & 0 deletions src/common/types/tools.ts
Original file line number Diff line number Diff line change
Expand Up @@ -219,4 +219,6 @@ export type WebFetchToolResult =
| {
success: false;
error: string;
/** Parsed error response body (e.g., from HTTP 4xx/5xx pages) */
content?: string;
};
32 changes: 32 additions & 0 deletions src/node/services/tools/web_fetch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -215,4 +215,36 @@ describe("web_fetch tool", () => {
expect(result.error).toContain("Failed to fetch URL");
}
});

// Test HTTP error handling with body parsing
it("should include HTTP status code in error for 404 responses", async () => {
using testEnv = createTestWebFetchTool();
const args: WebFetchToolArgs = {
// GitHub returns a proper 404 page for nonexistent users
url: "https://github.com/this-user-definitely-does-not-exist-12345",
};

const result = (await testEnv.tool.execute!(args, toolCallOptions)) as WebFetchToolResult;

expect(result.success).toBe(false);
if (!result.success) {
expect(result.error).toContain("HTTP 404");
}
});

it("should detect Cloudflare challenge pages", async () => {
using testEnv = createTestWebFetchTool();
const args: WebFetchToolArgs = {
// platform.openai.com is known to serve Cloudflare challenges
url: "https://platform.openai.com",
};

const result = (await testEnv.tool.execute!(args, toolCallOptions)) as WebFetchToolResult;

expect(result.success).toBe(false);
if (!result.success) {
expect(result.error).toContain("Cloudflare");
expect(result.error).toContain("JavaScript");
}
});
});
103 changes: 89 additions & 14 deletions src/node/services/tools/web_fetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,64 @@ import { execBuffered } from "@/node/utils/runtime/helpers";

const USER_AGENT = "Mux/1.0 (https://github.com/coder/mux; web-fetch tool)";

/** Parse curl -i output into headers and body */
function parseResponse(output: string): { headers: string; body: string; statusCode: string } {
// Find the last HTTP status line (after redirects) and its headers
// curl -i with -L shows all redirect responses, we want the final one
const httpMatches = [...output.matchAll(/HTTP\/[\d.]+ (\d{3})[^\r\n]*/g)];
const lastStatusMatch = httpMatches.length > 0 ? httpMatches[httpMatches.length - 1] : null;
const statusCode = lastStatusMatch ? lastStatusMatch[1] : "";

// Headers end with \r\n\r\n (or \n\n for some servers)
const headerEndIndex = output.indexOf("\r\n\r\n");
const altHeaderEndIndex = output.indexOf("\n\n");
const splitIndex =
headerEndIndex !== -1
? headerEndIndex + 4
: altHeaderEndIndex !== -1
? altHeaderEndIndex + 2
: 0;

const headers = splitIndex > 0 ? output.slice(0, splitIndex).toLowerCase() : "";
const body = splitIndex > 0 ? output.slice(splitIndex) : output;

return { headers, body, statusCode };
}

/** Detect if error response is a Cloudflare challenge page */
function isCloudflareChallenge(headers: string, body: string): boolean {
return (
headers.includes("cf-mitigated") ||
(body.includes("Just a moment") && body.includes("Enable JavaScript"))
);
}

/** Try to extract readable content from HTML, returns null on failure */
function tryExtractContent(
body: string,
url: string,
maxBytes: number
): { title: string; content: string } | null {
try {
const dom = new JSDOM(body, { url });
const reader = new Readability(dom.window.document);
const article = reader.parse();
if (!article?.content) return null;

const turndown = new TurndownService({
headingStyle: "atx",
codeBlockStyle: "fenced",
});
let content = turndown.turndown(article.content);
if (content.length > maxBytes) {
content = content.slice(0, maxBytes) + "\n\n[Content truncated]";
}
return { title: article.title ?? "Untitled", content };
} catch {
return null;
}
}

/**
* Web fetch tool factory for AI assistant
* Creates a tool that fetches web pages and extracts readable content as markdown
Expand Down Expand Up @@ -62,12 +120,41 @@ export const createWebFetchTool: ToolFactory = (config: ToolConfiguration) => {
const exitCodeMessages: Record<number, string> = {
6: "Could not resolve host",
7: "Failed to connect",
22: "HTTP error (4xx/5xx)",
28: "Operation timed out",
35: "SSL/TLS handshake failed",
56: "Network data receive error",
63: "Maximum file size exceeded",
};

// For HTTP errors (exit 22), try to parse and include the error body
if (result.exitCode === 22 && result.stdout) {
const { headers, body, statusCode } = parseResponse(result.stdout);
const statusText = statusCode ? `HTTP ${statusCode}` : "HTTP error";

// Detect Cloudflare challenge pages
if (isCloudflareChallenge(headers, body)) {
return {
success: false,
error: `${statusText}: Cloudflare security challenge (page requires JavaScript)`,
};
}

// Try to extract readable content from error page
const extracted = tryExtractContent(body, url, WEB_FETCH_MAX_OUTPUT_BYTES);
if (extracted) {
return {
success: false,
error: statusText,
content: extracted.content,
};
}

return {
success: false,
error: statusText,
};
}

const reason = exitCodeMessages[result.exitCode] || result.stderr || "Unknown error";
return {
success: false,
Expand All @@ -76,19 +163,7 @@ export const createWebFetchTool: ToolFactory = (config: ToolConfiguration) => {
}

// Parse headers and body from curl -i output
// Headers end with \r\n\r\n (or \n\n for some servers)
const output = result.stdout;
const headerEndIndex = output.indexOf("\r\n\r\n");
const altHeaderEndIndex = output.indexOf("\n\n");
const splitIndex =
headerEndIndex !== -1
? headerEndIndex + 4
: altHeaderEndIndex !== -1
? altHeaderEndIndex + 2
: 0;

const headers = splitIndex > 0 ? output.slice(0, splitIndex).toLowerCase() : "";
const body = splitIndex > 0 ? output.slice(splitIndex) : output;
const { headers, body } = parseResponse(result.stdout);

if (!body || body.trim().length === 0) {
return {
Expand Down