Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 114 additions & 7 deletions claude_code_log/factories/tool_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
ToolResultMessage,
ToolUseContent,
ToolUseMessage,
ToolUseResult,
WebSearchInput,
WriteInput,
# Tool output models
AskUserQuestionAnswer,
Expand All @@ -46,6 +48,8 @@
ReadOutput,
TaskOutput,
ToolOutput,
WebSearchLink,
WebSearchOutput,
WriteOutput,
)

Expand All @@ -67,6 +71,7 @@
"AskUserQuestion": AskUserQuestionInput,
"ask_user_question": AskUserQuestionInput, # Legacy tool name
"ExitPlanMode": ExitPlanModeInput,
"WebSearch": WebSearchInput,
}


Expand Down Expand Up @@ -466,11 +471,99 @@ def parse_exitplanmode_output(
return ExitPlanModeOutput(message=message, approved=approved)


def _parse_websearch_from_structured(
tool_use_result: ToolUseResult,
) -> Optional[WebSearchOutput]:
"""Parse WebSearch from structured toolUseResult data.

The toolUseResult for WebSearch has the format:
{
"query": "search query",
"results": [
{"tool_use_id": "...", "content": [{"title": "...", "url": "..."}]},
"Analysis text..."
],
"durationSeconds": 15.7
}

Args:
tool_use_result: The structured toolUseResult from the entry

Returns:
WebSearchOutput if parsing succeeds, None otherwise
"""
if not isinstance(tool_use_result, dict):
return None

query = tool_use_result.get("query")
if not isinstance(query, str):
return None

results_raw = tool_use_result.get("results")
if not isinstance(results_raw, list):
return None
results = cast(list[Any], results_raw)
if len(results) < 1:
return None

# Extract links from the first result element
links: list[WebSearchLink] = []
first_result: Any = results[0]
if isinstance(first_result, dict):
first_result_dict = cast(dict[str, Any], first_result)
content_raw = first_result_dict.get("content", [])
if isinstance(content_raw, list):
content = cast(list[Any], content_raw)
for item in content:
if isinstance(item, dict):
link = cast(dict[str, Any], item)
title = link.get("title")
url = link.get("url")
if isinstance(title, str) and isinstance(url, str):
links.append(WebSearchLink(title=title, url=url))

# Extract summary from the second result element (if present)
summary: Optional[str] = None
if len(results) > 1 and isinstance(results[1], str):
summary = results[1].strip() or None

return WebSearchOutput(query=query, links=links, preamble=None, summary=summary)


def parse_websearch_output(
tool_result: ToolResultContent,
file_path: Optional[str],
tool_use_result: Optional[ToolUseResult] = None,
) -> Optional[WebSearchOutput]:
"""Parse WebSearch tool result from structured toolUseResult data.

Note: A regex-based fallback parser for text content was removed.
See commit 0d1d2a9 if you need to restore it.

Args:
tool_result: The tool result content (unused, kept for signature compatibility)
file_path: Unused for WebSearch tool
tool_use_result: Structured toolUseResult from the entry

Returns:
WebSearchOutput with query, links, and summary, or None if not parseable
"""
del tool_result, file_path # Unused

if tool_use_result is None:
return None

return _parse_websearch_from_structured(tool_use_result)


# Type alias for tool output parsers
ToolOutputParser = Callable[[ToolResultContent, Optional[str]], Optional[ToolOutput]]
# Standard signature: (tool_result, file_path) -> Optional[ToolOutput]
# Extended signature: (tool_result, file_path, tool_use_result) -> Optional[ToolOutput]
ToolOutputParser = Callable[..., Optional[ToolOutput]]

# Registry of tool output parsers: tool_name -> parser(tool_result, file_path) -> Optional[ToolOutput]
# Registry of tool output parsers: tool_name -> parser function
# Parsers receive the full ToolResultContent and can use _extract_tool_result_text() for text.
# Some parsers (like WebSearch) also accept optional tool_use_result for structured data.
TOOL_OUTPUT_PARSERS: dict[str, ToolOutputParser] = {
"Read": parse_read_output,
"Edit": parse_edit_output,
Expand All @@ -479,13 +572,18 @@ def parse_exitplanmode_output(
"Task": parse_task_output,
"AskUserQuestion": parse_askuserquestion_output,
"ExitPlanMode": parse_exitplanmode_output,
"WebSearch": parse_websearch_output,
}

# Parsers that accept the extended signature with tool_use_result
PARSERS_WITH_TOOL_USE_RESULT: set[str] = {"WebSearch"}


def create_tool_output(
tool_name: str,
tool_result: ToolResultContent,
file_path: Optional[str] = None,
tool_use_result: Optional[ToolUseResult] = None,
) -> ToolOutput:
"""Create typed tool output from raw ToolResultContent.

Expand All @@ -497,15 +595,21 @@ def create_tool_output(
tool_name: The name of the tool (e.g., "Bash", "Read")
tool_result: The raw tool result content
file_path: Optional file path for file-based tools (Read, Edit, Write)
tool_use_result: Optional structured toolUseResult from entry (for WebSearch, etc.)

Returns:
A typed output model if parsing succeeds, ToolResultContent as fallback.
"""
# Look up parser in registry and parse if available
if (parser := TOOL_OUTPUT_PARSERS.get(tool_name)) and (
parsed := parser(tool_result, file_path)
):
return parsed
# Look up parser in registry
parser = TOOL_OUTPUT_PARSERS.get(tool_name)
if parser:
# Use extended signature for parsers that support tool_use_result
if tool_name in PARSERS_WITH_TOOL_USE_RESULT:
parsed = parser(tool_result, file_path, tool_use_result)
else:
parsed = parser(tool_result, file_path)
if parsed:
return parsed

# Fallback to raw ToolResultContent
return tool_result
Expand Down Expand Up @@ -570,13 +674,15 @@ def create_tool_result_message(
meta: MessageMeta,
tool_result: ToolResultContent,
tool_use_context: dict[str, ToolUseContent],
tool_use_result: Optional[ToolUseResult] = None,
) -> ToolItemResult:
"""Create ToolItemResult from a tool_result content item.

Args:
meta: Message metadata
tool_result: The tool result content item
tool_use_context: Dict with tool_use_id -> ToolUseContent mapping
tool_use_result: Optional structured toolUseResult from the entry

Returns:
ToolItemResult with tool_result content model
Expand All @@ -598,6 +704,7 @@ def create_tool_result_message(
result_tool_name or "",
tool_result,
result_file_path,
tool_use_result,
)

# Create content model with rendering context
Expand Down
20 changes: 20 additions & 0 deletions claude_code_log/html/renderer.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
TaskInput,
TodoWriteInput,
ToolUseContent,
WebSearchInput,
WriteInput,
# Tool output types
AskUserQuestionOutput,
Expand All @@ -44,6 +45,7 @@
ReadOutput,
TaskOutput,
ToolResultContent,
WebSearchOutput,
WriteOutput,
)
from ..renderer import (
Expand Down Expand Up @@ -96,6 +98,8 @@
format_task_output,
format_todowrite_input,
format_tool_result_content_raw,
format_websearch_input,
format_websearch_output,
format_write_input,
format_write_output,
render_params_table,
Expand Down Expand Up @@ -302,6 +306,10 @@ def format_ExitPlanModeInput(
"""Format → empty string (no content)."""
return format_exitplanmode_input(input)

def format_WebSearchInput(self, input: WebSearchInput, _: TemplateMessage) -> str:
"""Format → search query display."""
return format_websearch_input(input)

def format_ToolUseContent(self, content: ToolUseContent, _: TemplateMessage) -> str:
"""Format → <table class='params'>key | value rows</table>."""
return render_params_table(content.input)
Expand Down Expand Up @@ -342,6 +350,12 @@ def format_ExitPlanModeOutput(
"""Format → status message."""
return format_exitplanmode_output(output)

def format_WebSearchOutput(
self, output: WebSearchOutput, _: TemplateMessage
) -> str:
"""Format → list of clickable search result links."""
return format_websearch_output(output)

def format_ToolResultContent(
self, output: ToolResultContent, _: TemplateMessage
) -> str:
Expand Down Expand Up @@ -423,6 +437,12 @@ def title_BashInput(self, input: BashInput, message: TemplateMessage) -> str:
"""Title → '💻 Bash <description>'."""
return self._tool_title(message, "💻", input.description)

def title_WebSearchInput(
self, input: WebSearchInput, message: TemplateMessage
) -> str:
"""Title → '🔎 WebSearch <query>'."""
return self._tool_title(message, "🔎", input.query)

def _flatten_preorder(
self, roots: list[TemplateMessage]
) -> list[Tuple[TemplateMessage, str, str, str]]:
Expand Down
58 changes: 58 additions & 0 deletions claude_code_log/html/tool_formatters.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@
TaskOutput,
TodoWriteInput,
ToolResultContent,
WebSearchInput,
WebSearchOutput,
WriteInput,
Comment on lines +46 to 48
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🔴 Critical

🧩 Analysis chain

🏁 Script executed:

# First, find and examine the tool_formatters.py file
find . -name "tool_formatters.py" -type f

Repository: daaain/claude-code-log

Length of output: 107


🏁 Script executed:

# Get the file size to determine how to read it
wc -l $(find . -name "tool_formatters.py" -type f)

Repository: daaain/claude-code-log

Length of output: 111


🏁 Script executed:

# Check for render_markdown_collapsible function
rg -n "def render_markdown_collapsible" -S

Repository: daaain/claude-code-log

Length of output: 132


🏁 Script executed:

# Look for any sanitization/escaping mechanisms
rg -n "bleach|mistune|markdown|sanitize|escape|safe|html" -i -S

Repository: daaain/claude-code-log

Length of output: 50381


🏁 Script executed:

# Search for WebSearch formatter implementation
rg -n "WebSearch|class.*WebSearch" -S

Repository: daaain/claude-code-log

Length of output: 50381


🏁 Script executed:

# Read the WebSearch export lines and formatter implementation
head -n 270 ./claude_code_log/html/tool_formatters.py | tail -n +30

Repository: daaain/claude-code-log

Length of output: 8217


🏁 Script executed:

# Read the render_markdown_collapsible function from utils.py
sed -n '302,340p' ./claude_code_log/html/utils.py

Repository: daaain/claude-code-log

Length of output: 1648


🏁 Script executed:

# Check the _get_markdown_renderer function and escape settings
sed -n '244,265p' ./claude_code_log/html/utils.py

Repository: daaain/claude-code-log

Length of output: 886


🏁 Script executed:

# Look for any URL validation or sanitization in WebSearch formatting
rg -n "url|scheme|javascript|data:" ./claude_code_log/html/tool_formatters.py -i

Repository: daaain/claude-code-log

Length of output: 393


🏁 Script executed:

# Check for any bleach or sanitization after markdown rendering
rg -n "bleach|sanitize|clean|html\.escape" ./claude_code_log/html/ -i

Repository: daaain/claude-code-log

Length of output: 755


🏁 Script executed:

# Check if the rendered HTML from render_markdown goes through any sanitization
rg -n "render_markdown\(" ./claude_code_log/html/ -B2 -A2

Repository: daaain/claude-code-log

Length of output: 1174


🏁 Script executed:

# Look for any HTML sanitization in the broader codebase
rg -n "bleach|HtmlSanitizer|MarkupSafeHTML" .

Repository: daaain/claude-code-log

Length of output: 48


🏁 Script executed:

# Check mistune's handling - see if escape=False means HTML tags are preserved
rg -n "escape.*False" ./claude_code_log/

Repository: daaain/claude-code-log

Length of output: 809


Sanitize WebSearch external content before markdown rendering to prevent XSS injection.

WebSearch summaries, link titles, and URLs originate from external APIs and are currently embedded directly into markdown (in _websearch_as_markdown, line 233-256) without escaping. The markdown renderer uses mistune with escape=False (line 256 in utils.py), which means HTML tags in the markdown input will be rendered as HTML output. This creates an XSS vector: a malicious API response containing <script> or <img onerror=...> tags in a summary or title would execute in the browser.

Fix: Escape WebSearch external fields (output.summary, link.title) using escape_html() before building the markdown string in _websearch_as_markdown(). Alternatively, enable escape=True in mistune or sanitize the rendered HTML output with bleach after markdown rendering.

🤖 Prompt for AI Agents
In `@claude_code_log/html/tool_formatters.py` around lines 46 - 48,
_websearch_as_markdown currently embeds external WebSearch fields
(WebSearchOutput.output.summary and link.title) directly into markdown which is
later rendered with mistune escape=False; update _websearch_as_markdown to
sanitize/escape those external values before building the markdown (e.g., call
escape_html() on output.summary and link.title) so no raw HTML can pass through,
or alternatively enable escape=True in the mistune renderer or run the rendered
HTML through a sanitizer like bleach; make the change inside the
_websearch_as_markdown function and ensure all uses of output.summary and
link.title are replaced with the escaped/sanitized versions.

WriteOutput,
)
Expand Down Expand Up @@ -210,6 +212,60 @@ def format_exitplanmode_result(content: str) -> str:
return content


# -- WebSearch Tool -----------------------------------------------------------


def format_websearch_input(search_input: WebSearchInput) -> str:
"""Format WebSearch tool use content showing the search query.

Args:
search_input: Typed WebSearchInput with query parameter.

Only shows the query if it exceeds 100 chars (truncated in title).
Otherwise returns empty since the full query is already in the title.
"""
if len(search_input.query) <= 100:
return "" # Full query shown in title
escaped_query = escape_html(search_input.query)
return f'<div class="websearch-query">{escaped_query}</div>'


def _websearch_as_markdown(output: WebSearchOutput) -> str:
"""Convert WebSearch output to markdown: summary, then links at bottom."""
parts: list[str] = []

# Summary first (the analysis text)
if output.summary:
parts.append(output.summary)

# Links at the bottom after a separator
if output.links:
if parts:
parts.append("") # Blank line before separator
parts.append("---")
parts.append("") # Blank line after separator
for link in output.links:
parts.append(f"- [{link.title}]({link.url})")
elif not output.summary:
# Only show "no results" if there's also no summary
parts.append("*No results found*")

return "\n".join(parts)


def format_websearch_output(output: WebSearchOutput) -> str:
"""Format WebSearch tool result as collapsible markdown.

Args:
output: Parsed WebSearchOutput with preamble, links, and summary.

Combines preamble + links as markdown list + summary into a single
markdown block, rendered as collapsible content.
"""
markdown_content = _websearch_as_markdown(output)
return render_markdown_collapsible(markdown_content, "websearch-results")


# -- TodoWrite Tool -----------------------------------------------------------


Expand Down Expand Up @@ -711,6 +767,7 @@ def format_tool_result_content_raw(tool_result: ToolResultContent) -> str:
"format_multiedit_input",
"format_bash_input",
"format_task_input",
"format_websearch_input",
# Tool output formatters (called by HtmlRenderer.format_{OutputClass})
"format_read_output",
"format_write_output",
Expand All @@ -719,6 +776,7 @@ def format_tool_result_content_raw(tool_result: ToolResultContent) -> str:
"format_task_output",
"format_askuserquestion_output",
"format_exitplanmode_output",
"format_websearch_output",
# Fallback for ToolResultContent
"format_tool_result_content_raw",
# Legacy formatters (still used)
Expand Down
Loading
Loading