diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index aa877c75..1f1f7b60 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -23,27 +23,11 @@ jobs: - name: Checkout uses: actions/checkout@v4 - - name: Setup mdBook - uses: peaceiris/actions-mdbook@v2 - with: - mdbook-version: 'latest' - - - name: Install mdbook-mermaid - run: | - VERSION=$(curl -s https://api.github.com/repos/badboy/mdbook-mermaid/releases/latest | grep tag_name | cut -d '"' -f 4) - curl -sSL "https://github.com/badboy/mdbook-mermaid/releases/download/${VERSION}/mdbook-mermaid-${VERSION}-x86_64-unknown-linux-gnu.tar.gz" | tar -xz - sudo mv mdbook-mermaid /usr/local/bin/ - mdbook-mermaid install . - - - name: Install mdbook-linkcheck - run: | - curl -sSL https://github.com/Michael-F-Bryan/mdbook-linkcheck/releases/latest/download/mdbook-linkcheck.x86_64-unknown-linux-gnu.zip -o linkcheck.zip - unzip -q -o linkcheck.zip - chmod +x mdbook-linkcheck - sudo mv mdbook-linkcheck /usr/local/bin/ - + - name: Setup Rust toolchain + uses: dtolnay/rust-toolchain@stable + - name: Build documentation - run: mdbook build + run: ./scripts/mdbook.sh build - name: Upload artifact uses: actions/upload-pages-artifact@v3 diff --git a/book.toml b/book.toml index d8fff7d9..4ca99396 100644 --- a/book.toml +++ b/book.toml @@ -26,7 +26,7 @@ site-url = "https://coder.github.io/httpjail/" cname = "" mathjax-support = false copy-fonts = true -additional-css = [] +additional-css = ["docs/custom.css"] additional-js = ["mermaid.min.js", "mermaid-init.js"] no-section-label = false fold.enable = true diff --git a/docs/SUMMARY.md b/docs/SUMMARY.md index db3b647c..92a3d246 100644 --- a/docs/SUMMARY.md +++ b/docs/SUMMARY.md @@ -16,6 +16,7 @@ # Advanced +- [Request Body Limiting](./advanced/request-body-limiting.md) - [TLS Interception](./advanced/tls-interception.md) - [DNS Exfiltration](./advanced/dns-exfiltration.md) - [Server Mode](./advanced/server-mode.md) diff --git a/docs/advanced/request-body-limiting.md b/docs/advanced/request-body-limiting.md new file mode 100644 index 00000000..1232d0c6 --- /dev/null +++ b/docs/advanced/request-body-limiting.md @@ -0,0 +1,148 @@ +# Request Body Limiting + +The `max_tx_bytes` feature allows you to limit the total size of HTTP requests sent to upstream servers. + +This is primarily designed for mitigating code exfiltration attacks through covert channels. + +## Size Calculation + +The `max_tx_bytes` limit applies to **complete** HTTP requests, including: + +1. **Request line**: `METHOD /path HTTP/1.1\r\n` +2. **Headers**: Each header as `Name: Value\r\n` +3. **Header separator**: Final `\r\n` between headers and body +4. **Body**: Request body bytes + +## Response Format + +To enable request body limiting, return an object with `max_tx_bytes` in your rule response: + +```javascript +// JavaScript engine +{allow: {max_tx_bytes: 1024}} // Limit to 1KB total request size +``` + +```json +// Line processor engine +{"allow": {"max_tx_bytes": 1024}} +``` + +> **Note**: The `max_tx_bytes` feature is only available in the JavaScript (`--js`) and Line Processor (`--proc`) engines, not in Shell scripts. + +## Behavior + +The limiting behavior depends on whether the request includes a `Content-Length` header: + +### With Content-Length Header + +When the request includes a `Content-Length` header (most standard HTTP clients): + +1. **Early Detection**: httpjail calculates the total request size +2. **Immediate Rejection**: If it exceeds `max_tx_bytes`, the client receives a `413 Payload Too Large` error immediately +3. **No Upstream Contact**: The upstream server is never contacted, preventing unnecessary load +4. **Clear Feedback**: The error message indicates the actual size and limit + +**Example error response:** +``` +HTTP/1.1 413 Payload Too Large +Content-Type: text/plain + +Request body size (5000 bytes) exceeds maximum allowed (1024 bytes) +``` + +### Without Content-Length Header + +When the request uses chunked encoding or doesn't include `Content-Length`: + +1. **Stream Truncation**: The request body is truncated at the limit during streaming +2. **Upstream Receives Partial**: The upstream server receives exactly `max_tx_bytes` total bytes (url + headers + truncated body) +3. **Connection Closes**: The connection terminates after reaching the limit + +## Examples + +### JavaScript Engine - Upload Endpoint Limiting + +```javascript +// Limit upload endpoints to 1KB total request size +const uploadHosts = ['uploads.example.com', 'upload.github.com']; + +uploadHosts.includes(r.host) + ? {allow: {max_tx_bytes: 1024}} + : r.host.endsWith('.example.com') +``` + +### Line Processor Engine - Python Example + +```python +#!/usr/bin/env python3 +import sys, json + +upload_hosts = {'uploads.example.com', 'data.api.com'} + +for line in sys.stdin: + try: + req = json.loads(line) + if req['host'] in upload_hosts: + # Limit upload endpoints to 1KB requests + # Returns 413 error if Content-Length exceeds limit + # Truncates body if no Content-Length header + response = {"allow": {"max_tx_bytes": 1024}} + print(json.dumps(response)) + elif req['host'].endswith('.example.com'): + print("true") + else: + print("false") + except: + print("false") + sys.stdout.flush() +``` + + +## Use Cases + +### 1. Limiting File Uploads + +Prevent users from uploading large files to specific endpoints: + +```javascript +// JavaScript engine +const uploadPaths = ['/upload', '/api/files']; +uploadPaths.some(path => r.path.startsWith(path)) + ? {allow: {max_tx_bytes: 10485760}} // 10MB limit + : true +``` + +### 2. API Cost Control + +Limit request sizes to metered APIs to prevent unexpected costs: + +```javascript +// JavaScript engine +r.host === 'api.expensive-service.com' + ? {allow: {max_tx_bytes: 1024}} // 1KB limit for expensive API + : true +``` + +### 3. Data Exfiltration Prevention + +Prevent large data uploads that might indicate data exfiltration: + +```javascript +// JavaScript engine +const externalHosts = ['pastebin.com', 'transfer.sh', 'file.io']; +externalHosts.some(host => r.host.includes(host)) + ? {allow: {max_tx_bytes: 4096}} // 4KB limit for paste sites + : true +``` + +## Limitations + +- **Shell scripts**: The `max_tx_bytes` feature is not available when using shell script rules (`--shell`) +- **HTTP wire format**: The byte count is based on HTTP wire format, not just the body size +- **Partial uploads**: When truncating (no Content-Length), the upstream server receives incomplete data which may cause application errors + +## See Also + +- [JavaScript Engine](../guide/rule-engines/javascript.md) +- [Line Processor Engine](../guide/rule-engines/line-processor.md) +- [Configuration](../guide/configuration.md) diff --git a/docs/custom.css b/docs/custom.css new file mode 100644 index 00000000..8633b86f --- /dev/null +++ b/docs/custom.css @@ -0,0 +1,22 @@ +/* Minimal CSS to enable multi-line code in tables */ +/* Let mdbook theme handle all other styling */ + +table td pre { + margin: 0; +} + +table td pre code { + display: block; +} + +/* Make
tags work as line breaks in code */ +table td pre code br { + display: block; + content: ""; +} + +/* Keep tables left-aligned - override general.css margin: 0 auto */ +.table-wrapper table { + margin-left: 0; + margin-right: 0; +} diff --git a/docs/guide/rule-engines/javascript.md b/docs/guide/rule-engines/javascript.md index 31827167..1ec13b71 100644 --- a/docs/guide/rule-engines/javascript.md +++ b/docs/guide/rule-engines/javascript.md @@ -41,13 +41,11 @@ allowedHosts.includes(r.host); httpjail --js-file rules.js -- command ``` -## Response Types +## Response Format -Your JavaScript can return: +{{#include ../../includes/response-format-table.md}} -- **Boolean**: `true` to allow, `false` to deny -- **Object with message**: `{allow: false, deny_message: "Custom error"}` -- **Just a message**: `{deny_message: "Blocked"}` (implies deny) +**Examples:** ```javascript // Simple boolean @@ -59,6 +57,9 @@ false // Deny // Conditional with message r.host === 'facebook.com' ? {deny_message: 'Social media blocked'} : true + +// Limit request upload size to 1KB (headers + body) +({allow: {max_tx_bytes: 1024}}) ``` ## Common Patterns diff --git a/docs/guide/rule-engines/line-processor.md b/docs/guide/rule-engines/line-processor.md index fa5bbc14..976488af 100644 --- a/docs/guide/rule-engines/line-processor.md +++ b/docs/guide/rule-engines/line-processor.md @@ -27,11 +27,12 @@ Each request is sent as a single JSON line: ### Response Format -Your processor must respond with one line per request: +Your processor must respond with one line per request. -- **Boolean strings**: `"true"` (allow) or `"false"` (deny) -- **JSON object**: `{"allow": false, "deny_message": "Blocked by policy"}` -- **JSON with message only**: `{"deny_message": "Blocked"}` (implies deny) +{{#include ../../includes/response-format-table.md}} + +**Additional:** +- **Boolean strings**: `"true"` (allow) or `"false"` (deny) - same as boolean - **Any other text**: Treated as deny with that text as the message (e.g., `"Access denied"` becomes a deny with message "Access denied") ## Command Line Usage @@ -56,12 +57,17 @@ httpjail --proc "./filter.sh --strict" -- your-command import sys, json allowed_hosts = {'github.com', 'api.github.com'} +upload_hosts = {'uploads.example.com'} for line in sys.stdin: try: req = json.loads(line) if req['host'] in allowed_hosts: print("true") + elif req['host'] in upload_hosts: + # Limit upload endpoints to 1KB requests + response = {"allow": {"max_tx_bytes": 1024}} + print(json.dumps(response)) else: # Can return JSON for custom messages response = {"allow": False, "deny_message": f"{req['host']} not allowed"} diff --git a/docs/includes/response-format-table.md b/docs/includes/response-format-table.md new file mode 100644 index 00000000..2f0f2e40 --- /dev/null +++ b/docs/includes/response-format-table.md @@ -0,0 +1,9 @@ +| Response Format | Meaning | +|----------------|---------| +| `true` | Allow the request | +| `false` | Deny the request | +| `{allow: true}` | Allow (object form) | +| `{allow: false}` | Deny (object form) | +|
{
allow: false,
deny_message: "Access denied"
}
| Deny with custom message | +| `{deny_message: "Blocked"}` | Deny (message implies deny) | +|
{
allow: {
max_tx_bytes: 1024
}
}
| Allow with [request body limiting](../../advanced/request-body-limiting.md) | diff --git a/scripts/mdbook.sh b/scripts/mdbook.sh new file mode 100755 index 00000000..4edd2ca4 --- /dev/null +++ b/scripts/mdbook.sh @@ -0,0 +1,111 @@ +#!/usr/bin/env bash +set -euo pipefail + +# mdbook.sh - Wrapper script for mdbook that ensures preprocessors are installed +# Usage: ./scripts/mdbook.sh [mdbook-commands...] +# Example: ./scripts/mdbook.sh build +# Example: ./scripts/mdbook.sh serve + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +log_info() { + echo -e "${GREEN}[mdbook.sh]${NC} $*" +} + +log_warn() { + echo -e "${YELLOW}[mdbook.sh]${NC} $*" +} + +log_error() { + echo -e "${RED}[mdbook.sh]${NC} $*" >&2 +} + +# Check if a command exists +command_exists() { + command -v "$1" >/dev/null 2>&1 +} + +# Install mdbook if not present +ensure_mdbook() { + if command_exists mdbook; then + log_info "mdbook is already installed ($(mdbook --version))" + return 0 + fi + + log_warn "mdbook not found, installing..." + if command_exists cargo; then + cargo install mdbook --locked + else + log_error "cargo not found. Please install Rust toolchain first: https://rustup.rs/" + exit 1 + fi +} + +# Install mdbook-mermaid if not present +ensure_mdbook_mermaid() { + if command_exists mdbook-mermaid; then + log_info "mdbook-mermaid is already installed ($(mdbook-mermaid --version 2>&1 | head -1 || echo 'unknown version'))" + return 0 + fi + + log_warn "mdbook-mermaid not found, installing..." + + # Try cargo install first (works on all platforms) + if command_exists cargo; then + cargo install mdbook-mermaid --locked + else + log_error "cargo not found. Please install Rust toolchain first: https://rustup.rs/" + exit 1 + fi + + # Initialize mermaid support in the book + cd "${PROJECT_ROOT}" + mdbook-mermaid install . +} + +# Install mdbook-linkcheck if not present +ensure_mdbook_linkcheck() { + if command_exists mdbook-linkcheck; then + log_info "mdbook-linkcheck is already installed ($(mdbook-linkcheck --version 2>&1 | head -1 || echo 'unknown version'))" + return 0 + fi + + log_warn "mdbook-linkcheck not found, installing..." + + if command_exists cargo; then + cargo install mdbook-linkcheck --locked + else + log_error "cargo not found. Please install Rust toolchain first: https://rustup.rs/" + exit 1 + fi +} + +# Main installation check +main() { + log_info "Checking mdbook prerequisites..." + + ensure_mdbook + ensure_mdbook_mermaid + ensure_mdbook_linkcheck + + log_info "All prerequisites satisfied" + + # Pass through all arguments to mdbook + if [ $# -eq 0 ]; then + log_warn "No command specified, running 'mdbook --help'" + mdbook --help + else + log_info "Running: mdbook $*" + cd "${PROJECT_ROOT}" + mdbook "$@" + fi +} + +main "$@" diff --git a/src/lib.rs b/src/lib.rs index d340b25f..40c97467 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,6 +2,7 @@ pub mod dangerous_verifier; pub mod jail; +pub mod limited_body; pub mod macos_keychain; pub mod proxy; pub mod proxy_tls; diff --git a/src/limited_body.rs b/src/limited_body.rs new file mode 100644 index 00000000..250dd0ef --- /dev/null +++ b/src/limited_body.rs @@ -0,0 +1,290 @@ +//! Limited request body wrapper for enforcing byte transmission limits. +//! +//! This module provides `LimitedBody`, a wrapper around Hyper's body types that +//! enforces a maximum byte limit on data transmitted to upstream servers. This is +//! used to implement the `max_tx_bytes` feature in rule responses. +//! +//! # How It Works +//! +//! `LimitedBody` wraps any Hyper `Body` and tracks bytes as frames are polled: +//! +//! 1. **Initialization**: Created with a `max_bytes` limit representing the total +//! bytes allowed for the request body (headers are counted separately by the caller) +//! +//! 2. **Frame Polling**: As frames are polled from the inner body: +//! - Frames within the limit are passed through unchanged +//! - Frames that would exceed the limit are truncated to fit +//! - Once the limit is reached, the stream terminates (returns `None`) +//! +//! 3. **Partial Frames**: If a frame would partially exceed the limit, only the +//! bytes up to the limit are transmitted. For example, with 10 bytes remaining +//! and a 100-byte frame, only the first 10 bytes are sent. +//! +//! # Example Usage +//! +//! ```rust,ignore +//! use httpjail::limited_body::LimitedBody; +//! use http_body_util::BodyExt; +//! +//! // Create a limited body with 1KB limit +//! let limited = LimitedBody::new(original_body, 1024); +//! let boxed = BodyExt::boxed(limited); +//! ``` + +use bytes::Bytes; +use http_body_util::combinators::BoxBody; +use hyper::Error as HyperError; +use hyper::body::{Body, Frame, SizeHint}; +use std::pin::Pin; +use std::task::{Context, Poll}; +use tracing::debug; + +/// A body wrapper that enforces a maximum byte transmission limit. +/// +/// `LimitedBody` wraps another body and tracks the total bytes transmitted, +/// terminating the stream once the limit is reached. This ensures that +/// no more than `max_bytes` are sent to the upstream server. +/// +/// # Behavior +/// +/// - **Within Limit**: Frames are passed through unchanged +/// - **At Limit**: Stream terminates immediately (returns `None`) +/// - **Exceeding Limit**: Frame is truncated to fit remaining bytes +/// - **Non-Data Frames**: Trailers and other non-data frames pass through unchanged +/// +/// # Example +/// +/// ```rust,ignore +/// let body = LimitedBody::new(inner_body, 1024); // Limit to 1KB +/// ``` +pub struct LimitedBody { + /// The wrapped body being limited + inner: BoxBody, + /// Total bytes transmitted so far + bytes_transmitted: u64, + /// Maximum bytes allowed + max_bytes: u64, +} + +impl LimitedBody { + /// Creates a new `LimitedBody` that limits transmission to `max_bytes`. + /// + /// # Arguments + /// + /// * `inner` - The body to wrap + /// * `max_bytes` - Maximum number of bytes to transmit + /// + /// # Note + /// + /// The caller is responsible for accounting for HTTP header size separately. + /// This wrapper only limits the request body bytes. + pub fn new(inner: BoxBody, max_bytes: u64) -> Self { + Self { + inner, + bytes_transmitted: 0, + max_bytes, + } + } +} + +impl Body for LimitedBody { + type Data = Bytes; + type Error = HyperError; + + fn poll_frame( + mut self: Pin<&mut Self>, + cx: &mut Context<'_>, + ) -> Poll, Self::Error>>> { + // Check if we've already reached the limit + if self.bytes_transmitted >= self.max_bytes { + debug!( + bytes_transmitted = self.bytes_transmitted, + max_bytes = self.max_bytes, + "Byte limit reached, terminating stream" + ); + return Poll::Ready(None); + } + + // Poll the inner body for the next frame + match Pin::new(&mut self.inner).poll_frame(cx) { + Poll::Ready(Some(Ok(frame))) => { + // Check if this is a data frame (vs trailers, etc.) + if let Some(data) = frame.data_ref() { + let frame_size = data.len() as u64; + let new_total = self.bytes_transmitted + frame_size; + + if new_total > self.max_bytes { + // This frame would exceed the limit - truncate it + let bytes_remaining = self.max_bytes - self.bytes_transmitted; + debug!( + bytes_transmitted = self.bytes_transmitted, + frame_size = frame_size, + max_bytes = self.max_bytes, + bytes_remaining = bytes_remaining, + "Frame would exceed limit, truncating" + ); + + if bytes_remaining > 0 { + // Send the partial frame that fits + self.bytes_transmitted = self.max_bytes; + let truncated = data.slice(0..bytes_remaining as usize); + Poll::Ready(Some(Ok(Frame::data(truncated)))) + } else { + // No bytes remaining, terminate immediately + Poll::Ready(None) + } + } else { + // Frame fits entirely within the limit + self.bytes_transmitted = new_total; + debug!( + bytes_transmitted = self.bytes_transmitted, + frame_size = frame_size, + "Frame within limit, passing through" + ); + Poll::Ready(Some(Ok(frame))) + } + } else { + // Non-data frame (like trailers), pass through unchanged + Poll::Ready(Some(Ok(frame))) + } + } + Poll::Ready(Some(Err(e))) => Poll::Ready(Some(Err(e))), + Poll::Ready(None) => Poll::Ready(None), + Poll::Pending => Poll::Pending, + } + } + + fn is_end_stream(&self) -> bool { + // Stream ends if we've hit the limit OR the inner body is done + self.bytes_transmitted >= self.max_bytes || self.inner.is_end_stream() + } + + fn size_hint(&self) -> SizeHint { + let bytes_remaining = self.max_bytes.saturating_sub(self.bytes_transmitted); + let inner_hint = self.inner.size_hint(); + + // Our upper bound is the minimum of: + // 1. The inner body's upper bound + // 2. Our remaining byte allowance + let mut hint = SizeHint::new(); + if let Some(inner_upper) = inner_hint.upper() { + hint.set_upper(std::cmp::min(inner_upper, bytes_remaining)); + } else { + hint.set_upper(bytes_remaining); + } + hint + } +} + +#[cfg(test)] +mod tests { + use super::*; + use bytes::Bytes; + use http_body_util::{BodyExt, Full}; + + /// Helper to convert body to bytes vector + async fn body_to_bytes(body: impl Body) -> Vec { + let collected = body.collect().await.unwrap(); + collected.to_bytes().to_vec() + } + + /// Helper to convert Full (Infallible error) to BoxBody with HyperError + fn wrap_body(body: Full) -> BoxBody { + body.map_err(|_e: std::convert::Infallible| { + // This can never happen since Full never errors, but we need the type conversion + unreachable!("Full body never produces errors") + }) + .boxed() + } + + #[tokio::test] + async fn test_limited_body_within_limit() { + // Test: Body smaller than limit passes through unchanged + let data = Bytes::from("Hello, World!"); + let body = Full::new(data.clone()); + let limited = LimitedBody::new(wrap_body(body), 100); + + let result = body_to_bytes(limited).await; + assert_eq!(result, data.to_vec()); + } + + #[tokio::test] + async fn test_limited_body_exact_limit() { + // Test: Body exactly at limit passes through completely + let data = Bytes::from("1234567890"); + let body = Full::new(data.clone()); + let limited = LimitedBody::new(wrap_body(body), 10); + + let result = body_to_bytes(limited).await; + assert_eq!(result, data.to_vec()); + } + + #[tokio::test] + async fn test_limited_body_exceeds_limit() { + // Test: Body larger than limit is truncated + let data = Bytes::from("Hello, World! This is a long message."); + let body = Full::new(data.clone()); + let limited = LimitedBody::new(wrap_body(body), 13); + + let result = body_to_bytes(limited).await; + assert_eq!(result, b"Hello, World!".to_vec()); + assert_eq!(result.len(), 13); + } + + #[tokio::test] + async fn test_limited_body_zero_limit() { + // Test: Zero limit produces empty body + let data = Bytes::from("Hello, World!"); + let body = Full::new(data); + let limited = LimitedBody::new(wrap_body(body), 0); + + let result = body_to_bytes(limited).await; + assert_eq!(result, b"".to_vec()); + } + + #[tokio::test] + async fn test_limited_body_empty_body() { + // Test: Empty body remains empty + let body = Full::new(Bytes::new()); + let limited = LimitedBody::new(wrap_body(body), 100); + + let result = body_to_bytes(limited).await; + assert_eq!(result, b"".to_vec()); + } + + #[tokio::test] + async fn test_limited_body_size_hint() { + // Test: size_hint reflects the limit + let data = Bytes::from("Hello, World! This is a very long message."); + let body = Full::new(data); + let limited = LimitedBody::new(wrap_body(body), 10); + + let hint = limited.size_hint(); + // Upper bound should be 10 (our limit) + assert_eq!(hint.upper(), Some(10)); + } + + #[tokio::test] + async fn test_limited_body_large_to_small() { + // Test: Large body is properly truncated to small limit + let data = Bytes::from("A".repeat(10000)); + let body = Full::new(data); + let limited = LimitedBody::new(wrap_body(body), 50); + + let result = body_to_bytes(limited).await; + assert_eq!(result.len(), 50); + assert_eq!(result, "A".repeat(50).as_bytes()); + } + + #[tokio::test] + async fn test_limited_body_one_byte_limit() { + // Test: One byte limit + let data = Bytes::from("Hello"); + let body = Full::new(data); + let limited = LimitedBody::new(wrap_body(body), 1); + + let result = body_to_bytes(limited).await; + assert_eq!(result, b"H".to_vec()); + assert_eq!(result.len(), 1); + } +} diff --git a/src/proxy.rs b/src/proxy.rs index 966b52d7..e15f31a4 100644 --- a/src/proxy.rs +++ b/src/proxy.rs @@ -55,6 +55,103 @@ pub fn create_connect_403_response_with_context(context: Option) -> Vec< response.into_bytes() } +// Use the limited body module for request size limiting +use crate::limited_body::LimitedBody; + +/// Result of applying byte limit check to a request +pub enum ByteLimitResult { + /// Request is within limit (or no Content-Length), proceed with wrapped body + WithinLimit(Box>>), + /// Request exceeds limit based on Content-Length header + ExceedsLimit { content_length: u64, max_bytes: u64 }, +} + +/// Applies a byte limit to an outgoing request by wrapping its body. +/// +/// This function first checks the Content-Length header as a heuristic to detect +/// requests that would exceed the limit. If Content-Length indicates the request +/// is oversized, it returns `ByteLimitResult::ExceedsLimit` so the caller can +/// reject the request with a 413 error. This prevents the request from hanging +/// and provides immediate feedback to the client. +/// +/// If no Content-Length is present or the request is within limits, the body is +/// wrapped in a `LimitedBody` that enforces truncation as a fallback. +/// +/// # Arguments +/// +/// * `req` - The request to limit (already boxed) +/// * `max_bytes` - Maximum total bytes for the request (headers + body) +/// +/// # Returns +/// +/// `ByteLimitResult` indicating whether to proceed or reject the request +pub fn apply_request_byte_limit( + req: Request>, + max_bytes: u64, +) -> ByteLimitResult { + let (parts, body) = req.into_parts(); + + // Calculate request header size to subtract from max_tx_bytes + // Request line: "GET /path HTTP/1.1\r\n" + let method_str = parts.method.as_str(); + let path_str = parts + .uri + .path_and_query() + .map(|p| p.as_str()) + .unwrap_or("/"); + let request_line_size = format!("{} {} HTTP/1.1\r\n", method_str, path_str).len() as u64; + + // Headers: each header is "name: value\r\n" + let headers_size: u64 = parts + .headers + .iter() + .map(|(name, value)| name.as_str().len() as u64 + 2 + value.len() as u64 + 2) + .sum(); + + // Final "\r\n" separator between headers and body + let total_header_size = request_line_size + headers_size + 2; + + // Check Content-Length as a heuristic to reject oversized requests early + // This both provides convenience (immediate error) and prevents hangs + if let Some(content_length) = parts + .headers + .get(hyper::header::CONTENT_LENGTH) + .and_then(|v| v.to_str().ok()) + .and_then(|s| s.parse::().ok()) + { + let total_size = total_header_size + content_length; + if total_size > max_bytes { + debug!( + content_length = content_length, + header_size = total_header_size, + total_size = total_size, + max_bytes = max_bytes, + "Request exceeds byte limit based on Content-Length" + ); + return ByteLimitResult::ExceedsLimit { + content_length, + max_bytes, + }; + } + } + + // Subtract header size from total limit to get body limit + let body_limit = max_bytes.saturating_sub(total_header_size); + + debug!( + max_tx_bytes = max_bytes, + header_size = total_header_size, + body_limit = body_limit, + "Applying request byte limit" + ); + + let limited_body = LimitedBody::new(body, body_limit); + ByteLimitResult::WithinLimit(Box::new(Request::from_parts( + parts, + BodyExt::boxed(limited_body), + ))) +} + // Shared HTTP/HTTPS client for upstream requests static HTTPS_CLIENT: OnceLock< Client< @@ -457,8 +554,11 @@ pub async fn handle_http_request( .await; match evaluation.action { Action::Allow => { - debug!("Request allowed: {}", full_url); - match proxy_request(req, &full_url).await { + debug!( + "Request allowed: {} (max_tx_bytes: {:?})", + full_url, evaluation.max_tx_bytes + ); + match proxy_request(req, &full_url, evaluation.max_tx_bytes).await { Ok(resp) => Ok(resp), Err(e) => { error!("Proxy error: {}", e); @@ -476,12 +576,38 @@ pub async fn handle_http_request( async fn proxy_request( req: Request, full_url: &str, + max_tx_bytes: Option, ) -> Result>> { // Parse the target URL let target_uri = full_url.parse::()?; // Prepare request for upstream - let new_req = prepare_upstream_request(req, target_uri); + let prepared_req = prepare_upstream_request(req, target_uri.clone()); + + // Apply byte limit to outgoing request if specified, converting to BoxBody + let new_req = if let Some(max_bytes) = max_tx_bytes { + match apply_request_byte_limit(prepared_req, max_bytes) { + ByteLimitResult::WithinLimit(req) => *req, + ByteLimitResult::ExceedsLimit { + content_length, + max_bytes, + } => { + // Request exceeds limit based on Content-Length - reject immediately + let message = format!( + "Request body size ({} bytes) exceeds maximum allowed ({} bytes)", + content_length, max_bytes + ); + return Ok(create_error_response( + StatusCode::PAYLOAD_TOO_LARGE, + &message, + )?); + } + } + } else { + // Convert to BoxBody for consistent types + let (parts, body) = prepared_req.into_parts(); + Request::from_parts(parts, body.boxed()) + }; // Use the shared HTTP/HTTPS client let client = get_client(); @@ -524,7 +650,6 @@ async fn proxy_request( .insert(HTTPJAIL_HEADER, HTTPJAIL_HEADER_VALUE.parse().unwrap()); let boxed_body = body.boxed(); - Ok(Response::from_parts(parts, boxed_body)) } diff --git a/src/proxy_tls.rs b/src/proxy_tls.rs index 918bb455..7043a47c 100644 --- a/src/proxy_tls.rs +++ b/src/proxy_tls.rs @@ -1,6 +1,6 @@ use crate::proxy::{ - HTTPJAIL_HEADER, HTTPJAIL_HEADER_VALUE, create_connect_403_response_with_context, - create_forbidden_response, + HTTPJAIL_HEADER, HTTPJAIL_HEADER_VALUE, apply_request_byte_limit, + create_connect_403_response_with_context, create_forbidden_response, }; use crate::rules::{Action, RuleEngine}; use crate::tls::CertificateManager; @@ -493,7 +493,7 @@ async fn handle_decrypted_https_request( match evaluation.action { Action::Allow => { debug!("Request allowed: {}", full_url); - match proxy_https_request(req, &host).await { + match proxy_https_request(req, &host, evaluation.max_tx_bytes).await { Ok(resp) => Ok(resp), Err(e) => { error!("Proxy error: {}", e); @@ -512,6 +512,7 @@ async fn handle_decrypted_https_request( async fn proxy_https_request( req: Request, host: &str, + max_tx_bytes: Option, ) -> Result>> { // Build the target URL let path = req @@ -525,7 +526,32 @@ async fn proxy_https_request( debug!("Forwarding request to: {}", target_url); // Prepare request for upstream using common function - let new_req = crate::proxy::prepare_upstream_request(req, target_uri); + let prepared_req = crate::proxy::prepare_upstream_request(req, target_uri); + + // Apply byte limit to outgoing request if specified, converting to BoxBody + let new_req = if let Some(max_bytes) = max_tx_bytes { + match apply_request_byte_limit(prepared_req, max_bytes) { + crate::proxy::ByteLimitResult::WithinLimit(req) => *req, + crate::proxy::ByteLimitResult::ExceedsLimit { + content_length, + max_bytes, + } => { + // Request exceeds limit based on Content-Length - reject immediately + let message = format!( + "Request body size ({} bytes) exceeds maximum allowed ({} bytes)", + content_length, max_bytes + ); + return Ok(crate::proxy::create_error_response( + StatusCode::PAYLOAD_TOO_LARGE, + &message, + )?); + } + } + } else { + // Convert to BoxBody for consistent types + let (parts, body) = prepared_req.into_parts(); + Request::from_parts(parts, body.boxed()) + }; // Use the shared HTTP/HTTPS client from proxy module let client = crate::proxy::get_client(); @@ -592,7 +618,6 @@ async fn proxy_https_request( .insert(HTTPJAIL_HEADER, HTTPJAIL_HEADER_VALUE.parse().unwrap()); let boxed_body = body.boxed(); - Ok(Response::from_parts(parts, boxed_body)) } diff --git a/src/rules.rs b/src/rules.rs index 8696956f..5a209e67 100644 --- a/src/rules.rs +++ b/src/rules.rs @@ -21,6 +21,7 @@ pub enum Action { pub struct EvaluationResult { pub action: Action, pub context: Option, + pub max_tx_bytes: Option, } impl EvaluationResult { @@ -28,6 +29,7 @@ impl EvaluationResult { Self { action: Action::Allow, context: None, + max_tx_bytes: None, } } @@ -35,6 +37,7 @@ impl EvaluationResult { Self { action: Action::Deny, context: None, + max_tx_bytes: None, } } @@ -42,6 +45,11 @@ impl EvaluationResult { self.context = Some(context); self } + + pub fn with_max_tx_bytes(mut self, max_tx_bytes: u64) -> Self { + self.max_tx_bytes = Some(max_tx_bytes); + self + } } /// Trait for rule engines that evaluate HTTP requests. diff --git a/src/rules/common.rs b/src/rules/common.rs index bcd2ae22..0bcffe49 100644 --- a/src/rules/common.rs +++ b/src/rules/common.rs @@ -27,12 +27,52 @@ impl RequestInfo { } } +/// Policy for allowing requests +#[derive(Debug, Clone, Serialize, PartialEq)] +#[serde(untagged)] +pub enum AllowPolicy { + /// Simple boolean allow/deny + Bool(bool), + /// Allow with byte transmission limit + Limited { max_tx_bytes: u64 }, +} + +impl<'de> Deserialize<'de> for AllowPolicy { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + use serde::de::Error; + use serde_json::Value; + + let value = Value::deserialize(deserializer)?; + match value { + Value::Bool(b) => Ok(AllowPolicy::Bool(b)), + Value::Object(mut obj) => { + if let Some(max_tx_bytes) = obj.remove("max_tx_bytes") { + let bytes = max_tx_bytes + .as_u64() + .ok_or_else(|| Error::custom("max_tx_bytes must be a number"))?; + Ok(AllowPolicy::Limited { + max_tx_bytes: bytes, + }) + } else { + Err(Error::custom( + "allow object must contain max_tx_bytes field", + )) + } + } + _ => Err(Error::custom("allow must be a boolean or object")), + } + } +} + /// Common response structure for rule engines (proc and v8_js) /// This ensures perfect parity between different evaluation modes #[derive(Debug, Clone, Deserialize, Serialize)] pub struct RuleResponse { #[serde(skip_serializing_if = "Option::is_none")] - pub allow: Option, + pub allow: Option, #[serde(skip_serializing_if = "Option::is_none")] pub deny_message: Option, } @@ -50,13 +90,13 @@ impl RuleResponse { match trimmed { "true" => { return RuleResponse { - allow: Some(true), + allow: Some(AllowPolicy::Bool(true)), deny_message: None, }; } "false" => { return RuleResponse { - allow: Some(false), + allow: Some(AllowPolicy::Bool(false)), deny_message: None, }; } @@ -70,25 +110,30 @@ impl RuleResponse { // Any other output is treated as deny with the output as the message RuleResponse { - allow: Some(false), + allow: Some(AllowPolicy::Bool(false)), deny_message: Some(trimmed.to_string()), } } - /// Convert to evaluation result tuple (allowed, context) + /// Convert to evaluation result tuple (allowed, context, max_tx_bytes) /// Following the rules: /// - If deny_message exists but allow is not set, default to deny /// - Only include context message when denying - pub fn to_evaluation_result(&self) -> (bool, Option) { - let allowed = self.allow.unwrap_or_else(|| { - // If allow is not specified but deny_message exists, default to false - self.deny_message.is_none() - }); - - if allowed { - (true, None) // Never include message when allowing - } else { - (false, self.deny_message.clone()) + /// - max_tx_bytes is returned when allow policy has a byte limit + pub fn to_evaluation_result(&self) -> (bool, Option, Option) { + match &self.allow { + Some(AllowPolicy::Bool(true)) => (true, None, None), + Some(AllowPolicy::Bool(false)) => (false, self.deny_message.clone(), None), + Some(AllowPolicy::Limited { max_tx_bytes }) => (true, None, Some(*max_tx_bytes)), + None => { + // If allow is not specified but deny_message exists, default to deny + let allowed = self.deny_message.is_none(); + if allowed { + (true, None, None) + } else { + (false, self.deny_message.clone(), None) + } + } } } } @@ -101,20 +146,20 @@ mod tests { fn test_rule_response_from_string() { // Test simple boolean strings let resp = RuleResponse::from_string("true"); - assert_eq!(resp.allow, Some(true)); + assert!(matches!(resp.allow, Some(AllowPolicy::Bool(true)))); assert_eq!(resp.deny_message, None); let resp = RuleResponse::from_string("false"); - assert_eq!(resp.allow, Some(false)); + assert!(matches!(resp.allow, Some(AllowPolicy::Bool(false)))); assert_eq!(resp.deny_message, None); // Test JSON with both fields let resp = RuleResponse::from_string(r#"{"allow": true}"#); - assert_eq!(resp.allow, Some(true)); + assert!(matches!(resp.allow, Some(AllowPolicy::Bool(true)))); assert_eq!(resp.deny_message, None); let resp = RuleResponse::from_string(r#"{"allow": false, "deny_message": "blocked"}"#); - assert_eq!(resp.allow, Some(false)); + assert!(matches!(resp.allow, Some(AllowPolicy::Bool(false)))); assert_eq!(resp.deny_message, Some("blocked".to_string())); // Test shorthand (deny_message only implies allow: false) @@ -124,7 +169,7 @@ mod tests { // Test arbitrary string treated as deny message let resp = RuleResponse::from_string("Access denied for security reasons"); - assert_eq!(resp.allow, Some(false)); + assert!(matches!(resp.allow, Some(AllowPolicy::Bool(false)))); assert_eq!( resp.deny_message, Some("Access denied for security reasons".to_string()) @@ -132,7 +177,7 @@ mod tests { // Test whitespace handling let resp = RuleResponse::from_string(" true \n"); - assert_eq!(resp.allow, Some(true)); + assert!(matches!(resp.allow, Some(AllowPolicy::Bool(true)))); assert_eq!(resp.deny_message, None); } @@ -140,34 +185,34 @@ mod tests { fn test_rule_response_to_evaluation_result() { // Allow with no message let resp = RuleResponse { - allow: Some(true), + allow: Some(AllowPolicy::Bool(true)), deny_message: None, }; - assert_eq!(resp.to_evaluation_result(), (true, None)); + assert_eq!(resp.to_evaluation_result(), (true, None, None)); // Allow with message (message should be ignored) let resp = RuleResponse { - allow: Some(true), + allow: Some(AllowPolicy::Bool(true)), deny_message: Some("ignored".to_string()), }; - assert_eq!(resp.to_evaluation_result(), (true, None)); + assert_eq!(resp.to_evaluation_result(), (true, None, None)); // Deny with message let resp = RuleResponse { - allow: Some(false), + allow: Some(AllowPolicy::Bool(false)), deny_message: Some("denied".to_string()), }; assert_eq!( resp.to_evaluation_result(), - (false, Some("denied".to_string())) + (false, Some("denied".to_string()), None) ); // Deny without message let resp = RuleResponse { - allow: Some(false), + allow: Some(AllowPolicy::Bool(false)), deny_message: None, }; - assert_eq!(resp.to_evaluation_result(), (false, None)); + assert_eq!(resp.to_evaluation_result(), (false, None, None)); // Shorthand: deny_message only (implies deny) let resp = RuleResponse { @@ -176,7 +221,7 @@ mod tests { }; assert_eq!( resp.to_evaluation_result(), - (false, Some("blocked".to_string())) + (false, Some("blocked".to_string()), None) ); // Neither field set (defaults to allow) @@ -184,7 +229,7 @@ mod tests { allow: None, deny_message: None, }; - assert_eq!(resp.to_evaluation_result(), (true, None)); + assert_eq!(resp.to_evaluation_result(), (true, None, None)); } #[test] @@ -193,36 +238,68 @@ mod tests { // Case 1: Simple true let resp = RuleResponse::from_string("true"); - assert_eq!(resp.to_evaluation_result(), (true, None)); + assert_eq!(resp.to_evaluation_result(), (true, None, None)); // Case 2: Simple false let resp = RuleResponse::from_string("false"); - assert_eq!(resp.to_evaluation_result(), (false, None)); + assert_eq!(resp.to_evaluation_result(), (false, None, None)); // Case 3: JSON allow let resp = RuleResponse::from_string(r#"{"allow": true}"#); - assert_eq!(resp.to_evaluation_result(), (true, None)); + assert_eq!(resp.to_evaluation_result(), (true, None, None)); // Case 4: JSON deny with message let resp = RuleResponse::from_string(r#"{"allow": false, "deny_message": "Not authorized"}"#); assert_eq!( resp.to_evaluation_result(), - (false, Some("Not authorized".to_string())) + (false, Some("Not authorized".to_string()), None) ); // Case 5: Shorthand deny let resp = RuleResponse::from_string(r#"{"deny_message": "Access restricted"}"#); assert_eq!( resp.to_evaluation_result(), - (false, Some("Access restricted".to_string())) + (false, Some("Access restricted".to_string()), None) ); // Case 6: Plain text message let resp = RuleResponse::from_string("Invalid request"); assert_eq!( resp.to_evaluation_result(), - (false, Some("Invalid request".to_string())) + (false, Some("Invalid request".to_string()), None) + ); + } + + #[test] + fn test_allow_with_max_tx_bytes() { + // Test parsing allow with max_tx_bytes + let resp = RuleResponse::from_string(r#"{"allow": {"max_tx_bytes": 1024}}"#); + assert!(matches!( + resp.allow, + Some(AllowPolicy::Limited { max_tx_bytes: 1024 }) + )); + assert_eq!(resp.deny_message, None); + assert_eq!(resp.to_evaluation_result(), (true, None, Some(1024))); + + // Test parsing allow with large max_tx_bytes + let resp = RuleResponse::from_string(r#"{"allow": {"max_tx_bytes": 10485760}}"#); + assert!(matches!( + resp.allow, + Some(AllowPolicy::Limited { + max_tx_bytes: 10485760 + }) + )); + assert_eq!(resp.to_evaluation_result(), (true, None, Some(10485760))); + + // Test that deny_message is ignored when max_tx_bytes is set + let resp = RuleResponse::from_string( + r#"{"allow": {"max_tx_bytes": 512}, "deny_message": "ignored"}"#, ); + assert!(matches!( + resp.allow, + Some(AllowPolicy::Limited { max_tx_bytes: 512 }) + )); + assert_eq!(resp.to_evaluation_result(), (true, None, Some(512))); } } diff --git a/src/rules/proc.rs b/src/rules/proc.rs index 6ca8cd83..cb3b5ea0 100644 --- a/src/rules/proc.rs +++ b/src/rules/proc.rs @@ -107,7 +107,7 @@ impl ProcRuleEngine { &self, process_guard: &mut Option, json_request: &str, - ) -> Result<(bool, Option), String> { + ) -> Result<(bool, Option, Option), String> { // Ensure we have a running process self.ensure_process_running(process_guard).await?; @@ -165,9 +165,9 @@ impl ProcRuleEngine { // Parse response let rule_response = RuleResponse::from_string(response); - let (allowed, message) = rule_response.to_evaluation_result(); + let (allowed, message, max_tx_bytes) = rule_response.to_evaluation_result(); - Ok((allowed, message)) + Ok((allowed, message, max_tx_bytes)) } Ok(Err(e)) => { error!("Error reading from program: {}", e); @@ -226,13 +226,17 @@ impl ProcRuleEngine { .send_request_to_process(&mut process_guard, &json_request) .await { - Ok((allowed, message)) => { + Ok((allowed, message, max_tx_bytes)) => { if allowed { debug!("ALLOW: {} {} (program allowed)", method, url); - return match message { - Some(msg) => EvaluationResult::allow().with_context(msg), - None => EvaluationResult::allow(), - }; + let mut result = EvaluationResult::allow(); + if let Some(msg) = message { + result = result.with_context(msg); + } + if let Some(bytes) = max_tx_bytes { + result = result.with_max_tx_bytes(bytes); + } + return result; } else { debug!("DENY: {} {} (program denied)", method, url); return match message { diff --git a/src/rules/v8_js.rs b/src/rules/v8_js.rs index 30f1d237..f8ce51b0 100644 --- a/src/rules/v8_js.rs +++ b/src/rules/v8_js.rs @@ -51,12 +51,12 @@ impl V8JsRuleEngine { method: &Method, url: &str, requester_ip: &str, - ) -> (bool, Option) { + ) -> (bool, Option, Option) { let request_info = match RequestInfo::from_request(method, url, requester_ip) { Ok(info) => info, Err(e) => { warn!("Failed to parse request info: {}", e); - return (false, Some("Invalid request format".to_string())); + return (false, Some("Invalid request format".to_string()), None); } }; @@ -64,7 +64,7 @@ impl V8JsRuleEngine { Ok(result) => result, Err(e) => { warn!("JavaScript execution failed: {}", e); - (false, Some("JavaScript execution failed".to_string())) + (false, Some("JavaScript execution failed".to_string()), None) } } } @@ -115,11 +115,12 @@ impl V8JsRuleEngine { } } + #[allow(clippy::type_complexity)] fn execute_with_isolate( isolate: &mut v8::OwnedIsolate, js_code: &str, request_info: &RequestInfo, - ) -> Result<(bool, Option), Box> { + ) -> Result<(bool, Option, Option), Box> { let handle_scope = &mut v8::HandleScope::new(isolate); let context = v8::Context::new(handle_scope, Default::default()); let context_scope = &mut v8::ContextScope::new(handle_scope, context); @@ -175,7 +176,7 @@ impl V8JsRuleEngine { // Use the common RuleResponse parser - exact same logic as proc engine let rule_response = RuleResponse::from_string(&response_str); - let (allowed, message) = rule_response.to_evaluation_result(); + let (allowed, message, max_tx_bytes) = rule_response.to_evaluation_result(); debug!( "JS rule returned {} for {} {}", @@ -188,13 +189,14 @@ impl V8JsRuleEngine { debug!("Deny message: {}", msg); } - Ok((allowed, message)) + Ok((allowed, message, max_tx_bytes)) } + #[allow(clippy::type_complexity)] fn create_and_execute( &self, request_info: &RequestInfo, - ) -> Result<(bool, Option), Box> { + ) -> Result<(bool, Option, Option), Box> { // Create a new isolate for each execution (simpler approach) let mut isolate = v8::Isolate::new(v8::CreateParams::default()); Self::execute_with_isolate(&mut isolate, &self.js_code, request_info) @@ -216,13 +218,13 @@ impl RuleEngineTrait for V8JsRuleEngine { runtime: self.runtime.clone(), }; - let (allowed, context) = tokio::task::spawn_blocking(move || { + let (allowed, context, max_tx_bytes) = tokio::task::spawn_blocking(move || { self_clone.execute(&method_clone, &url_clone, &ip_clone) }) .await .unwrap_or_else(|e| { warn!("Failed to spawn V8 evaluation task: {}", e); - (false, Some("Evaluation failed".to_string())) + (false, Some("Evaluation failed".to_string()), None) }); if allowed { @@ -230,6 +232,9 @@ impl RuleEngineTrait for V8JsRuleEngine { if let Some(ctx) = context { result = result.with_context(ctx); } + if let Some(bytes) = max_tx_bytes { + result = result.with_max_tx_bytes(bytes); + } result } else { let mut result = EvaluationResult::deny(); diff --git a/tests/weak_integration.rs b/tests/weak_integration.rs index 1aa818c9..a726cba4 100644 --- a/tests/weak_integration.rs +++ b/tests/weak_integration.rs @@ -349,3 +349,5 @@ fn test_host_header_security() { // The proc/JS parity tests have been moved to tests/json_parity.rs // which directly tests the rule engines without the fragility of // going through the full httpjail binary and external processes + +// The max_tx_bytes test has been moved to tests/weak_integration_max_tx_bytes.rs diff --git a/tests/weak_integration_max_tx_bytes.rs b/tests/weak_integration_max_tx_bytes.rs new file mode 100644 index 00000000..724a274c --- /dev/null +++ b/tests/weak_integration_max_tx_bytes.rs @@ -0,0 +1,294 @@ +// Test for max_tx_bytes feature using Hyper HTTP server +// Separated into its own file for clarity + +mod common; + +use bytes::Bytes; +use http_body_util::{BodyExt, Full}; +use hyper::body::Incoming; +use hyper::server::conn::http1; +use hyper::service::service_fn; +use hyper::{Request, Response}; +use hyper_util::rt::TokioIo; +use std::process::{Command, Stdio}; +use std::sync::{Arc, Mutex}; +use std::time::Duration; +use tokio::net::TcpListener; + +/// Backend server that counts all incoming request bytes (headers + body) +async fn handle_request( + req: Request, + bytes_counter: Arc>, +) -> Result>, hyper::Error> { + // Calculate request line size: "POST /upload HTTP/1.1\r\n" + let method = req.method().as_str(); + let path = req.uri().path(); + let request_line_size = format!("{} {} HTTP/1.1\r\n", method, path).len(); + + // Calculate headers size: each header is "name: value\r\n" + let headers_size: usize = req + .headers() + .iter() + .map(|(name, value)| name.as_str().len() + 2 + value.len() + 2) + .sum(); + + // Final "\r\n" separator between headers and body + let header_total = request_line_size + headers_size + 2; + + // Read body bytes frame by frame to handle truncated requests + let body = req.into_body(); + let mut body_size = 0usize; + + // Try to read all body frames with timeout on each frame + + let mut body_pin = std::pin::pin!(body); + loop { + match tokio::time::timeout(Duration::from_millis(100), body_pin.as_mut().frame()).await { + Ok(Some(Ok(frame))) => { + if let Some(data) = frame.data_ref() { + body_size += data.len(); + } + } + Ok(Some(Err(_))) => break, // Error reading frame + Ok(None) => break, // End of stream + Err(_) => break, // Timeout - connection likely closed + } + } + + let total_bytes = header_total + body_size; + + // Store the total received bytes + *bytes_counter.lock().unwrap() = total_bytes; + + // Send response + Ok(Response::new(Full::new(Bytes::from("OK")))) +} + +/// Test helper: common backend server setup +async fn setup_backend() -> (TcpListener, u16, Arc>) { + let bytes_counter = Arc::new(Mutex::new(0usize)); + let backend_listener = TcpListener::bind("127.0.0.1:0") + .await + .expect("Failed to bind backend server"); + let backend_port = backend_listener.local_addr().unwrap().port(); + (backend_listener, backend_port, bytes_counter) +} + +/// Test helper: spawn backend server +fn spawn_backend( + backend_listener: TcpListener, + bytes_counter: Arc>, +) -> tokio::task::JoinHandle<()> { + tokio::spawn(async move { + if let Ok((stream, _)) = backend_listener.accept().await { + let io = TokioIo::new(stream); + let service = service_fn(move |req| { + let counter = bytes_counter.clone(); + handle_request(req, counter) + }); + let _ = http1::Builder::new().serve_connection(io, service).await; + } + }) +} + +/// Test helper: start httpjail server +async fn start_httpjail(js_config: &str, proxy_port: u16) -> std::process::Child { + let httpjail_path: &str = env!("CARGO_BIN_EXE_httpjail"); + + let mut httpjail = Command::new(httpjail_path) + .arg("--server") + .arg("--js") + .arg(js_config) + .env("HTTPJAIL_HTTP_BIND", proxy_port.to_string()) + .env("HTTPJAIL_SKIP_KEYCHAIN_INSTALL", "1") + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .expect("Failed to start httpjail server"); + + // Wait for httpjail to start listening + let mut connected = false; + for _ in 0..10 { + tokio::time::sleep(Duration::from_millis(100)).await; + + if let Ok(Some(status)) = httpjail.try_wait() { + let mut stderr = httpjail.stderr.take().unwrap(); + let mut stderr_content = String::new(); + let _ = std::io::Read::read_to_string(&mut stderr, &mut stderr_content); + panic!( + "httpjail server exited early with status: {}\nStderr: {}", + status, stderr_content + ); + } + + if tokio::net::TcpStream::connect(format!("127.0.0.1:{}", proxy_port)) + .await + .is_ok() + { + connected = true; + break; + } + } + + if !connected { + let _ = httpjail.kill(); + panic!( + "httpjail server did not start listening on port {} within 1 second", + proxy_port + ); + } + + httpjail +} + +#[tokio::test] +async fn test_max_tx_bytes_truncates_without_content_length() { + let (backend_listener, backend_port, bytes_counter) = setup_backend().await; + let backend_handle = spawn_backend(backend_listener, bytes_counter.clone()); + + tokio::time::sleep(Duration::from_millis(100)).await; + + let proxy_port = 18080u16; + let mut httpjail = start_httpjail("({allow: {max_tx_bytes: 1024}})", proxy_port).await; + + // Send POST request with large body (5KB) through proxy WITHOUT Content-Length + // This tests the LimitedBody truncation path + let large_body = "X".repeat(5000); + let target_url = format!("http://127.0.0.1:{}/upload", backend_port); + + // Build request without Content-Length header (chunked encoding) + let req = Request::builder() + .method("POST") + .uri(&target_url) + .header("Host", format!("127.0.0.1:{}", backend_port)) + .header("Transfer-Encoding", "chunked") + .body(Full::new(Bytes::from(large_body))) + .unwrap(); + + // Send request through proxy by connecting to proxy address + // Note: For this test we need to manually connect to the proxy + let proxy_stream = tokio::net::TcpStream::connect(format!("127.0.0.1:{}", proxy_port)) + .await + .expect("Failed to connect to proxy"); + + let io = TokioIo::new(proxy_stream); + let (mut sender, conn) = hyper::client::conn::http1::handshake(io).await.unwrap(); + + // Spawn connection handler + tokio::spawn(async move { + let _ = conn.await; + }); + + // Send the request with timeout + let response = tokio::time::timeout(Duration::from_secs(1), sender.send_request(req)).await; + + // Kill httpjail server + let _ = httpjail.kill(); + let _ = httpjail.wait(); + + // Wait for backend to finish (with tight timeout) + let _ = tokio::time::timeout(Duration::from_millis(500), backend_handle).await; + + // Verify the response succeeded (proxy allowed it) + assert!( + response.is_ok(), + "Request should not timeout (got: {:?})", + response + ); + let inner_response = response.unwrap(); + assert!( + inner_response.is_ok(), + "Request should succeed through proxy (got: {:?})", + inner_response + ); + + let bytes_received = *bytes_counter.lock().unwrap(); + println!("Backend server received: {} bytes", bytes_received); + + // Backend server should have received <= 1024 bytes (including headers + body) + assert!( + bytes_received <= 1024, + "Backend should receive at most 1024 bytes, but received {} bytes", + bytes_received + ); + + // Backend server should have received some data (not zero) + assert!(bytes_received > 0, "Backend should have received some data"); +} + +#[tokio::test] +async fn test_max_tx_bytes_rejects_with_content_length() { + let (backend_listener, backend_port, bytes_counter) = setup_backend().await; + let backend_handle = spawn_backend(backend_listener, bytes_counter.clone()); + + tokio::time::sleep(Duration::from_millis(100)).await; + + let proxy_port = 18081u16; + let mut httpjail = start_httpjail("({allow: {max_tx_bytes: 1024}})", proxy_port).await; + + // Send POST request with large body (5KB) WITH Content-Length header + // This tests the early rejection path based on Content-Length + let large_body = "X".repeat(5000); + let target_url = format!("http://127.0.0.1:{}/upload", backend_port); + + let req = Request::builder() + .method("POST") + .uri(&target_url) + .header("Host", format!("127.0.0.1:{}", backend_port)) + .header("Content-Length", large_body.len()) + .body(Full::new(Bytes::from(large_body))) + .unwrap(); + + // Send request through proxy + let proxy_stream = tokio::net::TcpStream::connect(format!("127.0.0.1:{}", proxy_port)) + .await + .expect("Failed to connect to proxy"); + + let io = TokioIo::new(proxy_stream); + let (mut sender, conn) = hyper::client::conn::http1::handshake(io).await.unwrap(); + + tokio::spawn(async move { + let _ = conn.await; + }); + + // Send the request with timeout + let response = tokio::time::timeout(Duration::from_secs(1), sender.send_request(req)) + .await + .expect("Request should not timeout") + .expect("Request should complete"); + + // Kill httpjail server + let _ = httpjail.kill(); + let _ = httpjail.wait(); + + // Backend should not have been contacted + let bytes_received = *bytes_counter.lock().unwrap(); + assert_eq!( + bytes_received, 0, + "Backend should not receive any data when Content-Length exceeds limit" + ); + + // Response should be 413 Payload Too Large + assert_eq!( + response.status(), + hyper::StatusCode::PAYLOAD_TOO_LARGE, + "Should receive 413 Payload Too Large status" + ); + + // Read response body to verify error message + let body_bytes = response + .into_body() + .collect() + .await + .expect("Failed to read response body") + .to_bytes(); + let body_str = String::from_utf8_lossy(&body_bytes); + assert!( + body_str.contains("exceeds maximum allowed"), + "Error message should indicate size limit exceeded, got: {}", + body_str + ); + + // Clean up backend + let _ = tokio::time::timeout(Duration::from_millis(100), backend_handle).await; +}