diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f86aed..2a6a965 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -160,7 +160,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Highlights -- Hardened redirect handling to revalidate every hop against FetchKit's SSRF policy +- Hardened redirect handling to revalidate every hop against Fetchkit's SSRF policy - Tightened allow/block prefix matching to use parsed URL components instead of raw string prefixes - Added FileSaver trait for saving fetched content to files - Mitigated 6 open threats from threat model diff --git a/README.md b/README.md index a4828c5..fa0e3ce 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ AI-friendly web content fetching tool designed for LLM consumption. Rust library ## Built-in Fetchers -FetchKit routes each request through an ordered fetcher registry. Specialized +Fetchkit routes each request through an ordered fetcher registry. Specialized fetchers match first; the default fetcher handles everything else. - `GitHubCodeFetcher` - GitHub source file URLs (`/blob/...`) @@ -211,14 +211,14 @@ pip install fetchkit ``` ```python -from fetchkit_py import fetch, FetchRequest, FetchKitTool +from fetchkit_py import fetch, FetchRequest, FetchkitTool # Simple fetch response = fetch("https://example.com", as_markdown=True) print(response.content) # With configuration -tool = FetchKitTool( +tool = FetchkitTool( enable_markdown=True, user_agent="MyBot/1.0", allow_prefixes=["https://docs.example.com"] @@ -282,7 +282,7 @@ Errors are returned in the `error` field: ## Security -FetchKit blocks connections to private/reserved IP ranges by default, preventing SSRF attacks when used in server-side or AI agent contexts. +Fetchkit blocks connections to private/reserved IP ranges by default, preventing SSRF attacks when used in server-side or AI agent contexts. **Blocked by default:** loopback, private networks (10.x, 172.16-31.x, 192.168.x), link-local (169.254.x including cloud metadata), IPv6 equivalents, multicast, and other reserved ranges. diff --git a/crates/fetchkit-cli/Cargo.toml b/crates/fetchkit-cli/Cargo.toml index b7c5c06..e9248f0 100644 --- a/crates/fetchkit-cli/Cargo.toml +++ b/crates/fetchkit-cli/Cargo.toml @@ -5,7 +5,7 @@ edition.workspace = true license.workspace = true authors.workspace = true repository.workspace = true -description = "Command line interface for FetchKit web content fetching tool" +description = "Command line interface for Fetchkit web content fetching tool" keywords.workspace = true categories.workspace = true readme = "../../README.md" diff --git a/crates/fetchkit-cli/src/main.rs b/crates/fetchkit-cli/src/main.rs index 0724ace..84d081c 100644 --- a/crates/fetchkit-cli/src/main.rs +++ b/crates/fetchkit-cli/src/main.rs @@ -1,4 +1,4 @@ -//! FetchKit CLI - Command-line interface for fetching web content +//! Fetchkit CLI - Command-line interface for fetching web content //! //! Provides the `fetchkit` binary with subcommands for fetching URLs //! and running an MCP server. @@ -27,7 +27,7 @@ enum OutputFormat { Json, } -/// FetchKit - AI-friendly web content fetching tool +/// Fetchkit - AI-friendly web content fetching tool #[derive(Parser, Debug)] #[command(name = "fetchkit")] #[command(author, version, about, long_about = None)] diff --git a/crates/fetchkit-cli/tests/cli_integration.rs b/crates/fetchkit-cli/tests/cli_integration.rs index e9c604c..9f82e5b 100644 --- a/crates/fetchkit-cli/tests/cli_integration.rs +++ b/crates/fetchkit-cli/tests/cli_integration.rs @@ -148,7 +148,7 @@ fn test_help_flag() { let stdout = String::from_utf8_lossy(&output.stdout); assert!(output.status.success()); - assert!(stdout.contains("fetchkit") || stdout.contains("FetchKit")); + assert!(stdout.contains("fetchkit") || stdout.contains("Fetchkit")); assert!(stdout.contains("fetch") || stdout.contains("mcp")); } diff --git a/crates/fetchkit-python/Cargo.toml b/crates/fetchkit-python/Cargo.toml index 6ca1734..b98b22a 100644 --- a/crates/fetchkit-python/Cargo.toml +++ b/crates/fetchkit-python/Cargo.toml @@ -5,7 +5,7 @@ edition.workspace = true license.workspace = true authors.workspace = true repository.workspace = true -description = "Python bindings for the FetchKit library" +description = "Python bindings for the Fetchkit library" publish = false [lib] diff --git a/crates/fetchkit-python/src/lib.rs b/crates/fetchkit-python/src/lib.rs index debca1e..52f9bcd 100644 --- a/crates/fetchkit-python/src/lib.rs +++ b/crates/fetchkit-python/src/lib.rs @@ -1,19 +1,19 @@ -//! Python bindings for FetchKit +//! Python bindings for Fetchkit //! -//! Exposes the FetchKit tool contract to Python via PyO3. +//! Exposes the Fetchkit tool contract to Python via PyO3. //! //! # Python Usage //! //! ```python -//! from fetchkit_py import FetchKitTool, FetchRequest +//! from fetchkit_py import FetchkitTool, FetchRequest //! -//! tool = FetchKitTool() +//! tool = FetchkitTool() //! response = tool.fetch("https://example.com", as_markdown=True) //! print(response.content) //! ``` use fetchkit::{FetchError, FetchRequest, FetchResponse, HttpMethod, Tool, ToolBuilder}; -use pyo3::exceptions::PyValueError; +use pyo3::exceptions::{PyDeprecationWarning, PyValueError}; use pyo3::prelude::*; /// Convert FetchError to PyErr @@ -190,15 +190,15 @@ impl PyFetchResponse { } } -/// Python wrapper for FetchKit Tool -#[pyclass(name = "FetchKitTool")] -pub struct PyFetchKitTool { +/// Python wrapper for Fetchkit Tool +#[pyclass(name = "FetchkitTool")] +pub struct PyFetchkitTool { inner: Tool, runtime: tokio::runtime::Runtime, } #[pymethods] -impl PyFetchKitTool { +impl PyFetchkitTool { /// Create a new tool with default options #[new] #[allow(clippy::too_many_arguments)] @@ -350,6 +350,65 @@ impl PyFetchKitTool { } } +#[deprecated(note = "Use PyFetchkitTool / Python FetchkitTool; FetchKitTool is deprecated.")] +pub type PyFetchKitTool = PyFetchkitTool; + +/// Deprecated constructor shim for the old Python class spelling. +#[pyfunction(name = "FetchKitTool")] +#[allow(clippy::too_many_arguments)] +#[pyo3(signature = ( + enable_markdown=true, + enable_text=true, + user_agent=None, + allow_prefixes=None, + block_prefixes=None, + max_body_size=None, + block_private_ips=true, + respect_proxy_env=false, + allowed_ports=None, + blocked_hosts=None, + same_host_redirects_only=None, + hardened=false +))] +fn deprecated_fetch_kit_tool( + py: Python<'_>, + enable_markdown: bool, + enable_text: bool, + user_agent: Option, + allow_prefixes: Option>, + block_prefixes: Option>, + max_body_size: Option, + block_private_ips: bool, + respect_proxy_env: bool, + allowed_ports: Option>, + blocked_hosts: Option>, + same_host_redirects_only: Option, + hardened: bool, +) -> PyResult { + let warning = py.get_type::(); + PyErr::warn( + py, + &warning, + c"FetchKitTool is deprecated; use FetchkitTool instead.", + 1, + )?; + + PyFetchkitTool::new( + enable_markdown, + enable_text, + user_agent, + allow_prefixes, + block_prefixes, + max_body_size, + block_private_ips, + respect_proxy_env, + allowed_ports, + blocked_hosts, + same_host_redirects_only, + hardened, + ) +} + /// Fetch a URL using default options (convenience function) #[pyfunction] #[pyo3(signature = (url, method=None, as_markdown=None, as_text=None, content_focus=None, crawl=None, max_pages=None))] @@ -363,7 +422,7 @@ fn fetch( crawl: Option, max_pages: Option, ) -> PyResult { - let tool = PyFetchKitTool::new( + let tool = PyFetchkitTool::new( true, true, None, None, None, None, true, false, None, None, None, false, )?; tool.fetch( @@ -382,7 +441,8 @@ fn fetch( fn fetchkit_py(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_class::()?; m.add_class::()?; - m.add_class::()?; + m.add_class::()?; + m.add_function(wrap_pyfunction!(deprecated_fetch_kit_tool, m)?)?; m.add_function(wrap_pyfunction!(fetch, m)?)?; Ok(()) } diff --git a/crates/fetchkit/examples/fetch_urls.rs b/crates/fetchkit/examples/fetch_urls.rs index 56bbc3a..0220df3 100644 --- a/crates/fetchkit/examples/fetch_urls.rs +++ b/crates/fetchkit/examples/fetch_urls.rs @@ -3,13 +3,13 @@ //! Run with: cargo run -p fetchkit --example fetch_urls //! //! Demonstrates the library API by fetching real URLs and showing -//! how FetchKit handles different content types (HTML, JSON, plain text). +//! how Fetchkit handles different content types (HTML, JSON, plain text). use fetchkit::{FetchRequest, Tool}; #[tokio::main] async fn main() { - println!("FetchKit URL Examples"); + println!("Fetchkit URL Examples"); println!("=====================\n"); let tool = Tool::builder().enable_markdown(true).build(); diff --git a/crates/fetchkit/examples/save_to_file.rs b/crates/fetchkit/examples/save_to_file.rs index 9b52dc8..ad46271 100644 --- a/crates/fetchkit/examples/save_to_file.rs +++ b/crates/fetchkit/examples/save_to_file.rs @@ -9,7 +9,7 @@ use fetchkit::{FetchRequest, LocalFileSaver, Tool}; #[tokio::main] async fn main() { - println!("FetchKit save_to_file Example"); + println!("Fetchkit save_to_file Example"); println!("==============================\n"); let dir = tempfile::tempdir().expect("Failed to create temp dir"); diff --git a/crates/fetchkit/src/client.rs b/crates/fetchkit/src/client.rs index c54cd4d..65b7bef 100644 --- a/crates/fetchkit/src/client.rs +++ b/crates/fetchkit/src/client.rs @@ -1,4 +1,4 @@ -//! HTTP client for FetchKit +//! HTTP client for Fetchkit //! //! This module provides the main entry points for fetching URLs. //! The actual fetch logic is implemented by fetchers in the [`fetchers`](crate::fetchers) module. diff --git a/crates/fetchkit/src/error.rs b/crates/fetchkit/src/error.rs index a209c97..2766e11 100644 --- a/crates/fetchkit/src/error.rs +++ b/crates/fetchkit/src/error.rs @@ -1,4 +1,4 @@ -//! Error types for FetchKit +//! Error types for Fetchkit use thiserror::Error; diff --git a/crates/fetchkit/src/file_saver.rs b/crates/fetchkit/src/file_saver.rs index c0c3c37..93e3e53 100644 --- a/crates/fetchkit/src/file_saver.rs +++ b/crates/fetchkit/src/file_saver.rs @@ -1,4 +1,4 @@ -//! File saving abstractions for FetchKit +//! File saving abstractions for Fetchkit //! //! Consumers implement [`FileSaver`] to control where fetched bytes land: //! - CLI: writes to real filesystem ([`LocalFileSaver`]) diff --git a/crates/fetchkit/src/lib.rs b/crates/fetchkit/src/lib.rs index 81c4325..f5e5789 100644 --- a/crates/fetchkit/src/lib.rs +++ b/crates/fetchkit/src/lib.rs @@ -1,4 +1,4 @@ -//! FetchKit - AI-friendly web content fetching library +//! Fetchkit - AI-friendly web content fetching library //! //! This crate provides a reusable library API for fetching web content, //! with optional HTML to markdown/text conversion optimized for LLM consumption. @@ -53,7 +53,7 @@ //! //! # Fetcher System //! -//! FetchKit uses a pluggable fetcher system where specialized fetchers +//! Fetchkit uses a pluggable fetcher system where specialized fetchers //! handle specific URL patterns. The [`FetcherRegistry`] dispatches //! requests to the appropriate fetcher based on URL matching. //! @@ -116,7 +116,7 @@ pub use types::{ pub use bot_auth::{BotAuthConfig, BotAuthError}; /// Default User-Agent string -pub const DEFAULT_USER_AGENT: &str = "Everruns FetchKit/1.0"; +pub const DEFAULT_USER_AGENT: &str = "Everruns Fetchkit/1.0"; /// Backward-compatible full description string with file-saving enabled. pub const TOOL_DESCRIPTION: &str = diff --git a/crates/fetchkit/src/tool.rs b/crates/fetchkit/src/tool.rs index c21de61..387aec3 100644 --- a/crates/fetchkit/src/tool.rs +++ b/crates/fetchkit/src/tool.rs @@ -1,4 +1,4 @@ -//! Tool builder and toolkit-library contract for FetchKit. +//! Tool builder and toolkit-library contract for Fetchkit. // // DECISION: keep the legacy typed `execute`/`llmtxt` surface as wrappers around the // toolkit-library contract so existing fetchkit callers can migrate incrementally. @@ -100,7 +100,7 @@ pub struct ToolOutput { pub metadata: ToolOutputMetadata, } -/// Builder for configuring the FetchKit tool +/// Builder for configuring the Fetchkit tool /// /// # Examples /// @@ -304,7 +304,7 @@ impl ToolBuilder { /// Control private/reserved IP range blocking (SSRF prevention) /// - /// Enabled by default. When enabled, FetchKit resolves hostnames to IP + /// Enabled by default. When enabled, Fetchkit resolves hostnames to IP /// addresses before connecting and validates that the resolved IP is not /// in a private or reserved range. DNS pinning prevents rebinding attacks. /// @@ -442,7 +442,7 @@ impl ToolBuilder { } } -/// Configured FetchKit tool +/// Configured Fetchkit tool /// /// Created via [`ToolBuilder`]. Provides methods for executing fetch requests, /// retrieving schemas, and accessing tool metadata. diff --git a/crates/fetchkit/src/types.rs b/crates/fetchkit/src/types.rs index 0ab7c10..b0783d2 100644 --- a/crates/fetchkit/src/types.rs +++ b/crates/fetchkit/src/types.rs @@ -1,4 +1,4 @@ -//! Core types for FetchKit +//! Core types for Fetchkit use schemars::JsonSchema; use serde::{Deserialize, Serialize}; diff --git a/crates/fetchkit/tests/integration.rs b/crates/fetchkit/tests/integration.rs index 950625f..7526e1e 100644 --- a/crates/fetchkit/tests/integration.rs +++ b/crates/fetchkit/tests/integration.rs @@ -1,4 +1,4 @@ -//! Integration tests for FetchKit using wiremock +//! Integration tests for Fetchkit using wiremock use fetchkit::{ fetch_with_options, DnsPolicy, FetchError, FetchOptions, FetchRequest, FetcherRegistry, diff --git a/crates/fetchkit/tests/ssrf_security.rs b/crates/fetchkit/tests/ssrf_security.rs index 5d16caa..e2788c9 100644 --- a/crates/fetchkit/tests/ssrf_security.rs +++ b/crates/fetchkit/tests/ssrf_security.rs @@ -1,4 +1,4 @@ -//! SSRF security tests for FetchKit +//! SSRF security tests for Fetchkit //! //! Tests that validate the resolve-then-check DNS policy prevents //! server-side request forgery attacks. These tests verify the threat diff --git a/docs/security.md b/docs/security.md index 8a28dca..6732f86 100644 --- a/docs/security.md +++ b/docs/security.md @@ -1,6 +1,6 @@ # Security Notes -FetchKit is intended to run in agent, server, and cluster environments where URL input may be +Fetchkit is intended to run in agent, server, and cluster environments where URL input may be user-controlled. ## Safe Defaults @@ -18,7 +18,7 @@ For shared VMs, containers, or clusters: - Keep private-IP blocking enabled. - Keep proxy inheritance disabled unless outbound traffic must traverse a trusted proxy. - Use allow-lists where possible instead of relying only on block-lists. -- Apply caller-side rate limits and concurrency limits around FetchKit. +- Apply caller-side rate limits and concurrency limits around Fetchkit. If you need different limits, configure them through `ToolBuilder`: @@ -35,7 +35,7 @@ See [`specs/threat-model.md`](../specs/threat-model.md) for the full threat inve ## Web Bot Authentication -FetchKit optionally supports the [Web Bot Authentication Architecture](https://datatracker.ietf.org/doc/html/draft-meunier-web-bot-auth-architecture), +Fetchkit optionally supports the [Web Bot Authentication Architecture](https://datatracker.ietf.org/doc/html/draft-meunier-web-bot-auth-architecture), which signs outgoing requests with Ed25519 signatures per RFC 9421. This lets origins verify bot identity cryptographically instead of relying on User-Agent strings. diff --git a/examples/langchain_summarize.py b/examples/langchain_summarize.py index 0a81c30..9f57259 100644 --- a/examples/langchain_summarize.py +++ b/examples/langchain_summarize.py @@ -8,14 +8,14 @@ # ] # /// """ -LangChain agent example using FetchKit MCP server for web fetching. +LangChain agent example using Fetchkit MCP server for web fetching. This example creates a LangChain agent that can fetch web content using the -FetchKit MCP tool and summarize it using an LLM. +Fetchkit MCP tool and summarize it using an LLM. Requirements: - OPENAI_API_KEY environment variable set - - FetchKit CLI built: cargo build -p fetchkit-cli --release + - Fetchkit CLI built: cargo build -p fetchkit-cli --release Usage: uv run examples/langchain_summarize.py @@ -40,11 +40,11 @@ async def main(): # URL to summarize url = "https://everruns.com/" - print("Creating LangChain agent with FetchKit MCP tool...") + print("Creating LangChain agent with Fetchkit MCP tool...") print(f"Target URL: {url}") print() - # Create MCP client connected to FetchKit server + # Create MCP client connected to Fetchkit server mcp_client = MultiServerMCPClient( { "fetchkit": { diff --git a/examples/python_fetchkit.py b/examples/python_fetchkit.py index fb3524b..16cc557 100644 --- a/examples/python_fetchkit.py +++ b/examples/python_fetchkit.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -FetchKit Python bindings example. +Fetchkit Python bindings example. Demonstrates the fetchkit_py module: fetching URLs, accessing metadata, and verifying tool contract (schemas, descriptions). @@ -16,13 +16,13 @@ def main(): try: - from fetchkit_py import FetchKitTool, FetchRequest, fetch + from fetchkit_py import FetchkitTool, FetchRequest, fetch except ImportError: print("Error: fetchkit_py module not found.") print("Build it with: maturin develop -m crates/fetchkit-python/Cargo.toml") sys.exit(1) - print("FetchKit Python Example") + print("Fetchkit Python Example") print("========================\n") passed = 0 @@ -30,7 +30,7 @@ def main(): # 1. Tool metadata print("1. Tool metadata") - tool = FetchKitTool() + tool = FetchkitTool() desc = tool.description() llmtxt = tool.llmtxt() schema = tool.input_schema() diff --git a/specs/bot-auth.md b/specs/bot-auth.md index e1fc0f8..afa8811 100644 --- a/specs/bot-auth.md +++ b/specs/bot-auth.md @@ -3,7 +3,7 @@ ## Abstract Optional support for the Web Bot Authentication Architecture -(draft-meunier-web-bot-auth-architecture). When enabled, FetchKit signs +(draft-meunier-web-bot-auth-architecture). When enabled, Fetchkit signs outgoing HTTP requests with Ed25519 signatures per RFC 9421, allowing origins to cryptographically verify bot identity. diff --git a/specs/initial.md b/specs/initial.md index 54bda3d..7a012c9 100644 --- a/specs/initial.md +++ b/specs/initial.md @@ -1,14 +1,14 @@ # Decisions: -# - Spec mirrors current FetchKit tool behavior (no new features) unless noted below. +# - Spec mirrors current Fetchkit tool behavior (no new features) unless noted below. # - Rust is the source of truth: library + CLI + MCP server + Python bindings. # - HTML conversion is built-in (no external HTML conversion deps). # - `FetchRequest` and `FetchResponse` are defined in this crate (no external dependency). -# FetchKit Specification +# Fetchkit Specification ## Abstract -Define a standalone Rust crate named `fetchkit` that implements the existing FetchKit tool +Define a standalone Rust crate named `fetchkit` that implements the existing Fetchkit tool behavior: fetch URL content, optional HTML conversion, strict timeouts, and metadata-only responses for binary content. The crate also ships a CLI, an MCP server, and Python bindings that expose the same tool contract. @@ -17,6 +17,9 @@ that expose the same tool contract. ### Scope +- Product name in prose, generated user-facing text, and new API identifiers is + `Fetchkit`, not `FetchKit`. Existing `FetchKit` identifiers may remain only as + deprecated compatibility shims with warnings. - Provide a reusable library API and a CLI wrapper. - Provide an MCP server exposing the tool. - Provide Python bindings that expose the same tool contract. @@ -228,7 +231,7 @@ Provide a builder to configure tool options, including: ### Crawl Discovery - Crawl discovery is opt-in via `crawl: true` or CLI `--crawl`. -- FetchKit first fetches the seed URL normally, then inspects extracted page links. +- Fetchkit first fetches the seed URL normally, then inspects extracted page links. - Only same-origin HTTP(S) links are eligible: same scheme, normalized host, and port. - Obvious static assets (`.js`, `.css`, images, PDFs, archives) are skipped. - `max_pages` includes the seed page, defaults to 5, and is clamped to 20. @@ -240,7 +243,7 @@ Provide a builder to configure tool options, including: ### SSRF Prevention (DNS Policy) -By default, FetchKit blocks connections to private/reserved IP ranges: +By default, Fetchkit blocks connections to private/reserved IP ranges: - Resolves hostnames to IP addresses before connecting (resolve-then-check). - Validates resolved IPs against blocked ranges (loopback, private, link-local, cloud metadata, carrier-grade NAT, documentation, benchmarking, multicast, broadcast). @@ -252,7 +255,7 @@ By default, FetchKit blocks connections to private/reserved IP ranges: ### HTTP Behavior - User-Agent: configurable via tool builder or CLI/MCP/Python options - (default `Everruns FetchKit/1.0`). + (default `Everruns Fetchkit/1.0`). - Ambient proxy environment variables are ignored by default. - Opt in via `ToolBuilder::respect_proxy_env(true)` or CLI `--allow-env-proxy`. - Accept header: diff --git a/specs/release-process.md b/specs/release-process.md index ba5ab73..e149d2c 100644 --- a/specs/release-process.md +++ b/specs/release-process.md @@ -1,5 +1,5 @@ # Decisions: -# - Spec mirrors Bashkit's agent-driven release flow, adapted to FetchKit's crates. +# - Spec mirrors Bashkit's agent-driven release flow, adapted to Fetchkit's crates. # - GitHub Release creation is the handoff point to publishing; publish retries use `workflow_dispatch`. # - `fetchkit-python` is explicitly out of the crates.io publish flow until PyPI packaging exists. @@ -7,7 +7,7 @@ ## Abstract -Define how FetchKit releases are prepared by a coding agent, reviewed by a human, +Define how Fetchkit releases are prepared by a coding agent, reviewed by a human, and published by GitHub Actions. The process must keep `CHANGELOG.md`, workspace versions, and release automation in sync. @@ -15,7 +15,7 @@ workspace versions, and release automation in sync. ### Versioning -- FetchKit follows Semantic Versioning. +- Fetchkit follows Semantic Versioning. - Version source of truth is `[workspace.package].version` in root `Cargo.toml`. - Internal crate dependency versions must match the workspace version when published crates depend on each other. diff --git a/specs/threat-model.md b/specs/threat-model.md index ce12cf2..92e4aa9 100644 --- a/specs/threat-model.md +++ b/specs/threat-model.md @@ -2,9 +2,9 @@ ## Abstract -Threat model for FetchKit, an AI-friendly web content fetching library. FetchKit is designed +Threat model for Fetchkit, an AI-friendly web content fetching library. Fetchkit is designed to be embedded in AI agent platforms (e.g., Everruns) where untrusted user prompts can -influence which URLs are fetched. This document identifies threats that arise when FetchKit +influence which URLs are fetched. This document identifies threats that arise when Fetchkit runs inside a container or cluster with access to internal network resources, and tracks mitigations implemented in the library. @@ -59,7 +59,7 @@ Verified in this review: │ │ Container / Sandbox │ │ │ │ │ │ │ │ ┌─────────────┐ ┌──────────────────┐ │ │ -│ │ │ AI Agent │────▶│ FetchKit │ │ │ +│ │ │ AI Agent │────▶│ Fetchkit │ │ │ │ │ │ (LLM loop) │ │ (library/CLI/ │ │ │ │ │ │ │ │ MCP server) │ │ │ │ │ └─────────────┘ └───────┬──────────┘ │ │ @@ -93,18 +93,18 @@ Verified in this review: └─────────────────────┘ ``` -**Trust Boundary 1 — Agent to FetchKit:** -The AI agent passes user-influenced URLs to FetchKit. FetchKit must treat all +**Trust Boundary 1 — Agent to Fetchkit:** +The AI agent passes user-influenced URLs to Fetchkit. Fetchkit must treat all URLs as untrusted input. The agent cannot be relied upon to validate URLs since adversarial prompts can manipulate it. **Trust Boundary 2 — Container to Internal Network:** The container typically has network access to internal services (metadata endpoints, -Kubernetes API, databases). FetchKit must prevent requests that cross this boundary +Kubernetes API, databases). Fetchkit must prevent requests that cross this boundary unless explicitly allowed. **Trust Boundary 3 — Cluster to Public Internet:** -Outbound requests to the public internet are the intended use case. FetchKit should +Outbound requests to the public internet are the intended use case. Fetchkit should only allow connections to publicly-routable IP addresses by default. ## 1. Server-Side Request Forgery (TM-SSRF) @@ -126,7 +126,7 @@ only allow connections to publicly-routable IP addresses by default. ### Mitigation Details **TM-SSRF-001 — Resolve-then-check (MITIGATED):** -FetchKit resolves the hostname to IP addresses using the system resolver, validates +Fetchkit resolves the hostname to IP addresses using the system resolver, validates each resolved IP against blocked ranges, and pins the validated IP via `reqwest::ClientBuilder::resolve()` to prevent re-resolution. @@ -143,12 +143,12 @@ Blocked ranges: - Broadcast: `255.255.255.255/32` **TM-SSRF-004 — Numeric IP variants (MITIGATED):** -The `url` crate normalizes IP representations during parsing. FetchKit validates +The `url` crate normalizes IP representations during parsing. Fetchkit validates the resolved `IpAddr` (not the string), so octal/hex/decimal-encoded IPs are caught after normalization. **TM-SSRF-005 — DNS rebinding (MITIGATED):** -After validating the resolved IP, FetchKit uses `reqwest::ClientBuilder::resolve(host, addr)` +After validating the resolved IP, Fetchkit uses `reqwest::ClientBuilder::resolve(host, addr)` to pin the connection to the validated IP. This prevents reqwest from re-resolving the hostname during connection establishment. @@ -159,15 +159,15 @@ does not extract. `is_blocked_ipv6()` now detects both formats, extracts the embedded IPv4, and validates it against the blocked ranges. **TM-SSRF-009 — URL credentials (ACCEPTED):** -FetchKit passes URLs to reqwest as-is. If credentials are embedded in the URL, +Fetchkit passes URLs to reqwest as-is. If credentials are embedded in the URL, they are sent with the request. This is acceptable because: -- FetchKit only supports GET/HEAD (read-only operations) +- Fetchkit only supports GET/HEAD (read-only operations) - The URL comes from the caller who controls what credentials to include - Stripping credentials would break legitimate use cases - **Risk:** Low. Mitigated at the caller level. **TM-SSRF-010 — Redirect to internal resource (MITIGATED):** -Automatic redirects are disabled via `reqwest::redirect::Policy::none()`. FetchKit +Automatic redirects are disabled via `reqwest::redirect::Policy::none()`. Fetchkit manually follows redirects (up to 10 hops) and performs full IP validation (resolve-then-check with DNS pinning) at each hop. Scheme validation is also enforced at each hop, preventing redirects to non-HTTP schemes (e.g., `file://`). @@ -188,10 +188,10 @@ redirect target, not the original host. ### Mitigation Details **TM-NET-001 — HTTP downgrade (ACCEPTED):** -FetchKit validates the scheme at each redirect hop — non-HTTP(S) schemes are +Fetchkit validates the scheme at each redirect hop — non-HTTP(S) schemes are rejected (see TM-INPUT-001). However, HTTPS→HTTP downgrade is still allowed. This is accepted because: -- FetchKit is designed for content fetching, not security-sensitive operations +- Fetchkit is designed for content fetching, not security-sensitive operations - The caller controls which URLs to fetch - Enforcing HTTPS-only would break many legitimate use cases @@ -201,7 +201,7 @@ connection pool state from leaking between requests. This is a defense-in-depth measure. **TM-NET-004 — Proxy environment variables (MITIGATED):** -FetchKit disables ambient `HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY` handling by +Fetchkit disables ambient `HTTP_PROXY`, `HTTPS_PROXY`, and `NO_PROXY` handling by default via `reqwest::ClientBuilder::no_proxy()`. Callers must opt in explicitly via `ToolBuilder::respect_proxy_env(true)` or the CLI `--allow-env-proxy` flag. This prevents inherited container proxy settings from silently bypassing the @@ -275,14 +275,14 @@ Accepted because: | TM-DOS-001 | Unbounded response body | Medium | Configurable `max_body_size` (default 10 MB); truncates with `truncated: true` | MITIGATED | | TM-DOS-002 | Slowloris / slow body | Low | 1-second first-byte timeout; 30-second body timeout | MITIGATED | | TM-DOS-003 | Compressed content bomb (gzip bomb) | Medium | `max_body_size` enforced on decompressed stream; truncates large payloads | MITIGATED | -| TM-DOS-004 | Rapid request flooding via tool | Low | No rate limiting in FetchKit; caller responsibility | **CALLER RISK** | +| TM-DOS-004 | Rapid request flooding via tool | Low | No rate limiting in Fetchkit; caller responsibility | **CALLER RISK** | | TM-DOS-005 | DNS resolution delay | Low | DNS resolution uses system resolver; no explicit timeout on DNS lookup | **ACCEPTED** | | TM-DOS-006 | Memory exhaustion from large HTML conversion | Medium | Conversion input bounded by `max_body_size` (10 MB default) | MITIGATED | ### Mitigation Details **TM-DOS-001 — Unbounded response body (MITIGATED):** -FetchKit enforces a configurable `max_body_size` (default 10 MB) during streaming +Fetchkit enforces a configurable `max_body_size` (default 10 MB) during streaming body reads. When the limit is reached, the response is truncated and `truncated: true` is set in the response. The 30-second body timeout provides additional protection. Configurable via `ToolBuilder::max_body_size()`. @@ -304,14 +304,14 @@ against unbounded responses (TM-DOS-001). |----|--------|----------|------------|--------| | TM-LEAK-001 | Error messages reveal internal network topology | Medium | Error messages include connect/timeout details but not resolved IPs | MITIGATED | | TM-LEAK-002 | DNS resolution errors reveal internal DNS | Low | DNS errors surfaced as connect errors; hostname visible in error | **ACCEPTED** | -| TM-LEAK-003 | Response content leaks internal data | Low | FetchKit returns content as-is; caller must filter sensitive data | **CALLER RISK** | -| TM-LEAK-004 | User-Agent reveals software version | Info | Default UA `Everruns FetchKit/1.0` reveals stack; configurable | **BY DESIGN** | +| TM-LEAK-003 | Response content leaks internal data | Low | Fetchkit returns content as-is; caller must filter sensitive data | **CALLER RISK** | +| TM-LEAK-004 | User-Agent reveals software version | Info | Default UA `Everruns Fetchkit/1.0` reveals stack; configurable | **BY DESIGN** | | TM-LEAK-005 | Timing side-channels (connect time reveals network proximity) | Low | 1-second timeout masks some timing; not fully mitigated | **ACCEPTED** | ### Mitigation Details **TM-LEAK-001 — Error message detail (MITIGATED):** -FetchKit's error types (`FetchError`) use generic messages that don't include +Fetchkit's error types (`FetchError`) use generic messages that don't include resolved IP addresses or internal hostnames. Connect errors say "Failed to connect to server" and the `from_reqwest()` fallback path classifies errors by type (redirect, body, decode) instead of passing through raw reqwest error strings @@ -323,7 +323,7 @@ which could contain hostnames or URL details. |----|--------|----------|------------|--------| | TM-CONV-001 | Script injection in converted markdown | Low | `