From 486295ab5a467d8dbc19251fdb1178e9ca736154 Mon Sep 17 00:00:00 2001 From: itzlambda Date: Sun, 5 Oct 2025 18:04:49 +0530 Subject: [PATCH] refactor(middleware): remove retry and circuit breaker logic - Remove RetryPolicy configuration and related retry logic - Remove circuit breaker functionality from middleware stack - Simplify middleware to focus on core features: rate limiting, timeouts, connection pooling, and metrics - Update Cargo.toml to remove retry Tower feature - Update CLAUDE.md to reflect simplified middleware architecture - Remove retry_policy_example.rs and simplify middleware_usage.rs example - Update all example documentation to remove references to retry/circuit breaker features BREAKING CHANGE: Retry and circuit breaker middleware has been removed. Applications relying on these features will need to implement them at the application level or use external libraries. --- CLAUDE.md | 5 +- Cargo.toml | 2 +- crates/rullm-core/examples/README.md | 10 +- .../rullm-core/examples/anthropic_stream.rs | 4 +- crates/rullm-core/examples/basic_usage.rs | 12 +- crates/rullm-core/examples/gemini_stream.rs | 2 +- .../rullm-core/examples/middleware_usage.rs | 89 +---- .../examples/retry_policy_example.rs | 50 --- crates/rullm-core/src/config.rs | 236 +---------- crates/rullm-core/src/error.rs | 28 -- crates/rullm-core/src/lib.rs | 52 +-- crates/rullm-core/src/middleware.rs | 133 +------ crates/rullm-core/src/providers/google.rs | 2 +- crates/rullm-core/src/simple.rs | 12 - crates/rullm-core/src/tests.rs | 372 +----------------- 15 files changed, 70 insertions(+), 939 deletions(-) delete mode 100644 crates/rullm-core/examples/retry_policy_example.rs diff --git a/CLAUDE.md b/CLAUDE.md index 32cd4aa9..763940cd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -29,12 +29,11 @@ The `openai_compatible` provider is a generic implementation that other provider ### Middleware Stack -The library uses Tower middleware for enterprise features (see `crates/rullm-core/src/middleware.rs`): -- Retry logic with exponential backoff +The library uses Tower middleware (see `crates/rullm-core/src/middleware.rs`): - Rate limiting -- Circuit breakers - Timeouts - Connection pooling +- Logging and metrics Configuration is done via `MiddlewareConfig` and `LlmServiceBuilder`. diff --git a/Cargo.toml b/Cargo.toml index b0336069..26ba3f6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -10,7 +10,7 @@ rust-version = "1.85" [workspace.dependencies] # Core library dependencies tokio = { version = "1", features = ["full"] } -tower = { version = "0.4", features = ["timeout", "retry", "limit", "util"] } +tower = { version = "0.4", features = ["timeout", "limit", "util"] } rand = "0.8" reqwest = { version = "0.11", features = ["json", "stream"] } bytes = "1.0" diff --git a/crates/rullm-core/examples/README.md b/crates/rullm-core/examples/README.md index 56967238..3028f325 100644 --- a/crates/rullm-core/examples/README.md +++ b/crates/rullm-core/examples/README.md @@ -40,7 +40,7 @@ All streaming examples use the `chat_completion_stream` method which returns a ` **Environment:** Requires `OPENAI_API_KEY` -Demonstrates comprehensive OpenAI streaming with: +Demonstrates OpenAI streaming with: - **Simple streaming chat** with real-time token display - **Multi-turn conversations** with context preservation - **Creative writing** with high temperature settings @@ -87,7 +87,7 @@ while let Some(event) = stream.next().await { **Environment:** Requires `ANTHROPIC_API_KEY` -Showcases Claude's capabilities with: +Shows Claude streaming with: - **Philosophical conversations** demonstrating reasoning abilities - **Creative storytelling** with vivid imagery - **Code explanation** with technical accuracy @@ -124,7 +124,7 @@ let mut stream = provider **Environment:** Requires `GOOGLE_API_KEY` -Highlights Gemini's versatility: +Shows Gemini streaming with: - **Technical explanations** with precision - **Creative writing** using experimental models - **Code analysis** and review capabilities @@ -265,7 +265,7 @@ Demonstrates: Key features: - **Environment-based configuration** -- **Custom endpoints** for enterprise setups +- **Custom endpoints** for custom API URLs - **Validation and error handling** - **Health checks** and model availability - **Request builder patterns** from minimal to full-featured @@ -434,7 +434,7 @@ cargo run --example test_all_providers šŸŽ‰ All providers are working correctly! ``` -This example is perfect for: +Use this example for: - Verifying your API keys work - Testing network connectivity - Validating provider implementations diff --git a/crates/rullm-core/examples/anthropic_stream.rs b/crates/rullm-core/examples/anthropic_stream.rs index 40cb5003..08d427af 100644 --- a/crates/rullm-core/examples/anthropic_stream.rs +++ b/crates/rullm-core/examples/anthropic_stream.rs @@ -183,8 +183,8 @@ async fn main() -> Result<(), Box> { println!("\n\nšŸŽÆ Tips for using Anthropic Claude streaming:"); println!("• Set ANTHROPIC_API_KEY environment variable"); println!("• Use .stream(true) in ChatRequestBuilder"); - println!("• Claude models: haiku (fast), sonnet (balanced), opus (powerful)"); - println!("• Claude excels at reasoning, analysis, and creative writing"); + println!("• Claude models: haiku (fast), sonnet (balanced), opus (largest)"); + println!("• Claude supports reasoning, analysis, and creative writing"); println!("• Lower temperature (0.1-0.4) for factual content"); println!("• Higher temperature (0.7-1.0) for creative content"); diff --git a/crates/rullm-core/examples/basic_usage.rs b/crates/rullm-core/examples/basic_usage.rs index deacdad0..4aab6d33 100644 --- a/crates/rullm-core/examples/basic_usage.rs +++ b/crates/rullm-core/examples/basic_usage.rs @@ -1,4 +1,4 @@ -use rullm_core::{ChatRequestBuilder, LlmError}; +use rullm_core::ChatRequestBuilder; // This example demonstrates the unified interface without actual provider implementations // It shows how the library would be used once provider modules are implemented @@ -33,15 +33,5 @@ async fn main() -> Result<(), Box> { println!("\nThis example shows the unified interface design."); println!("Actual provider implementations will be added in subsequent tasks."); - // Example of error handling - let error_example = LlmError::rate_limit( - "Too many requests", - Some(std::time::Duration::from_secs(60)), - ); - println!("\nError handling example:"); - println!(" Error: {error_example}"); - println!(" Is retryable: {}", error_example.is_retryable()); - println!(" Retry delay: {:?}", error_example.retry_delay()); - Ok(()) } diff --git a/crates/rullm-core/examples/gemini_stream.rs b/crates/rullm-core/examples/gemini_stream.rs index f465e8c4..b46a18d1 100644 --- a/crates/rullm-core/examples/gemini_stream.rs +++ b/crates/rullm-core/examples/gemini_stream.rs @@ -219,7 +219,7 @@ async fn main() -> Result<(), Box> { println!( "• Models: gemini-1.5-flash (fast), gemini-1.5-pro (balanced), gemini-2.0-flash-exp (experimental)" ); - println!("• Gemini excels at reasoning, code analysis, and creative tasks"); + println!("• Gemini supports reasoning, code analysis, and creative tasks"); println!("• Lower temperature (0.1-0.4) for factual/technical content"); println!("• Higher temperature (0.7-1.0) for creative content"); println!("• Use top_p for more controlled randomness"); diff --git a/crates/rullm-core/examples/middleware_usage.rs b/crates/rullm-core/examples/middleware_usage.rs index 2474434c..017d6c0f 100644 --- a/crates/rullm-core/examples/middleware_usage.rs +++ b/crates/rullm-core/examples/middleware_usage.rs @@ -1,6 +1,6 @@ use rullm_core::{ ChatRequestBuilder, ConfigBuilder, LlmServiceBuilder, MiddlewareConfig, OpenAIProvider, - RateLimit, config::RetryPolicy, + RateLimit, }; use std::time::Duration; @@ -14,16 +14,13 @@ async fn main() -> Result<(), Box> { // Example 1: Basic middleware stack with defaults basic_middleware_example().await?; - // Example 2: Custom retry policy with exponential backoff - custom_retry_example().await?; - - // Example 3: Production-ready configuration + // Example 2: Configuration with timeouts and rate limiting production_config_example().await?; - // Example 4: Rate-limited and monitored configuration + // Example 3: Rate-limited and monitored configuration rate_limited_example().await?; - // Example 5: Custom middleware configuration + // Example 4: Custom middleware configuration custom_middleware_config_example().await?; Ok(()) @@ -57,83 +54,40 @@ async fn basic_middleware_example() -> Result<(), Box> { Ok(()) } -/// Example 2: Custom retry policy with exponential backoff -async fn custom_retry_example() -> Result<(), Box> { - println!("šŸ”„ Example 2: Custom Retry Policy"); - - let config = ConfigBuilder::openai_from_env()?; - let provider = OpenAIProvider::new(config)?; - - // Create middleware with custom exponential backoff retry policy - let mut middleware_stack = LlmServiceBuilder::new() - .timeout(Duration::from_secs(60)) // 60 second timeout - .retry(RetryPolicy::ExponentialBackoff { - initial_delay_ms: 200, // Start with 200ms - max_delay_ms: 10000, // Cap at 10 seconds - multiplier: 2.5, // Aggressive backoff - jitter: true, // Add randomness - }) - .logging() - .build(provider, "gpt-3.5-turbo".to_string()); - - let request = ChatRequestBuilder::new() - .user("Explain quantum computing in simple terms") - .temperature(0.7) - .max_tokens(150) - .build(); - - let response = middleware_stack.call(request).await?; - - println!("āœ… Response: {}", response.message.content); - println!("šŸ”„ Retry policy: Exponential backoff with jitter\n"); - - Ok(()) -} - -/// Example 3: Production-ready configuration +/// Example 2: Configuration with timeouts and rate limiting async fn production_config_example() -> Result<(), Box> { - println!("šŸ­ Example 3: Production Configuration"); + println!("šŸ­ Example 2: Configuration with Timeouts and Rate Limiting"); let config = ConfigBuilder::openai_from_env()?; let provider = OpenAIProvider::new(config)?; - // Production-ready middleware configuration + // Middleware configuration with timeouts and rate limiting let mut middleware_stack = LlmServiceBuilder::new() .timeout(Duration::from_secs(30)) // Conservative timeout - .retry(RetryPolicy::ApiGuided { - fallback: Box::new(RetryPolicy::ExponentialBackoff { - initial_delay_ms: 100, - max_delay_ms: 5000, - multiplier: 2.0, - jitter: true, - }), - max_api_delay_ms: 30000, // Don't wait more than 30 seconds - retry_headers: vec!["retry-after".to_string(), "x-ratelimit-reset".to_string()], - }) .rate_limit(100, Duration::from_secs(60)) // 100 requests per minute - .logging() // Always log in production - .metrics() // Always collect metrics + .logging() + .metrics() .build(provider, "gpt-4".to_string()); let request = ChatRequestBuilder::new() .system("You are a helpful assistant for a production application.") .user("How can I optimize my database queries?") - .temperature(0.3) // More deterministic for production + .temperature(0.3) // Lower temperature for more deterministic output .max_tokens(300) .build(); let response = middleware_stack.call(request).await?; - println!("āœ… Production response received"); + println!("āœ… Response received"); println!("šŸ“Š Token usage: {}", response.usage.total_tokens); - println!("šŸ›”ļø Configuration: API-guided retry, rate limited, fully monitored\n"); + println!("šŸ›”ļø Configuration: Rate limited, logged and monitored\n"); Ok(()) } -/// Example 4: Rate-limited and monitored configuration +/// Example 3: Rate-limited and monitored configuration async fn rate_limited_example() -> Result<(), Box> { - println!("ā±ļø Example 4: Rate Limited Configuration"); + println!("ā±ļø Example 3: Rate Limited Configuration"); let config = ConfigBuilder::openai_from_env()?; let provider = OpenAIProvider::new(config)?; @@ -141,7 +95,6 @@ async fn rate_limited_example() -> Result<(), Box> { // Configuration optimized for rate limiting and monitoring let mut middleware_stack = LlmServiceBuilder::new() .timeout(Duration::from_secs(45)) - .retry(RetryPolicy::Fixed { delay_ms: 1000 }) // Simple fixed delay .rate_limit(50, Duration::from_secs(60)) // Conservative rate limit .logging() .metrics() @@ -184,9 +137,9 @@ async fn rate_limited_example() -> Result<(), Box> { Ok(()) } -/// Example 5: Custom middleware configuration from struct +/// Example 4: Custom middleware configuration from struct async fn custom_middleware_config_example() -> Result<(), Box> { - println!("āš™ļø Example 5: Custom Middleware Configuration"); + println!("āš™ļø Example 4: Custom Middleware Configuration"); let config = ConfigBuilder::openai_from_env()?; let provider = OpenAIProvider::new(config)?; @@ -194,12 +147,6 @@ async fn custom_middleware_config_example() -> Result<(), Box Result<(), Box Result<(), Box> { - println!("=== LLM Retry Policy Examples ===\n"); - - // Example 1: Simple fixed retry - let config = ConfigBuilder::openai_from_env()?.with_fixed_retry(3, 2000); // 3 retries, 2s delay - - println!("1. Fixed Retry Policy:"); - println!(" Max retries: {}", config.max_retries()); - println!(" Policy: {:?}\n", config.retry_policy()); - - // Example 2: Exponential backoff - let config = - ConfigBuilder::openai_from_env()?.with_exponential_backoff(5, 1000, 30000, 2.0, true); - - println!("2. Exponential Backoff:"); - println!(" Max retries: {}", config.max_retries()); - println!(" Policy: {:?}\n", config.retry_policy()); - - // Example 3: API-guided retry (respects rate limit headers) - let fallback = RetryPolicy::ExponentialBackoff { - initial_delay_ms: 1000, - max_delay_ms: 15000, - multiplier: 2.0, - jitter: true, - }; - - let config = ConfigBuilder::openai_from_env()?.with_api_guided_retry(3, fallback, 60000); // Max 60s from API - - println!("3. API-Guided Retry (Smart):"); - println!(" Max retries: {}", config.max_retries()); - println!(" Policy: {:?}\n", config.retry_policy()); - - // Example 4: Smart retry (default - API-guided with good fallback) - let config = ConfigBuilder::openai_from_env()?.with_smart_retry(5); - - println!("4. Smart Retry (Default):"); - println!(" Max retries: {}", config.max_retries()); - println!(" Policy: {:?}\n", config.retry_policy()); - - println!("=== How API-Guided Retry Works ==="); - println!("When a 429 (rate limit) response is received:"); - println!("1. Check for 'Retry-After' header -> use that delay"); - println!("2. Check for 'X-RateLimit-Reset' header -> calculate delay"); - println!("3. If no headers found -> use fallback policy"); - println!("4. Respect max_api_delay_ms limit (prevents infinite waits)"); - - Ok(()) -} diff --git a/crates/rullm-core/src/config.rs b/crates/rullm-core/src/config.rs index f2dcd6cd..be8a3488 100644 --- a/crates/rullm-core/src/config.rs +++ b/crates/rullm-core/src/config.rs @@ -1,49 +1,6 @@ use serde::{Deserialize, Serialize}; use std::collections::HashMap; -use std::time::{Duration, SystemTime, UNIX_EPOCH}; - -/// Retry policy configuration -#[derive(Debug, Clone, Serialize, Deserialize)] -pub enum RetryPolicy { - /// Fixed delay between retries - Fixed { delay_ms: u64 }, - /// Exponential backoff with optional jitter - ExponentialBackoff { - initial_delay_ms: u64, - max_delay_ms: u64, - multiplier: f64, - jitter: bool, - }, - /// Respect API-provided retry timing from response headers, with fallback policy - ApiGuided { - /// Fallback policy when no API guidance is available - fallback: Box, - /// Maximum time to wait based on API guidance (prevents indefinite waits) - max_api_delay_ms: u64, - /// Headers to check for retry timing (in order of preference) - retry_headers: Vec, - }, -} - -impl Default for RetryPolicy { - fn default() -> Self { - RetryPolicy::ApiGuided { - fallback: Box::new(RetryPolicy::ExponentialBackoff { - initial_delay_ms: 1000, - max_delay_ms: 30000, - multiplier: 2.0, - jitter: true, - }), - max_api_delay_ms: 60000, // Max 1 minute wait from API guidance - retry_headers: vec![ - "retry-after".to_string(), - "x-ratelimit-reset".to_string(), - "x-ratelimit-reset-after".to_string(), - "reset-time".to_string(), - ], - } - } -} +use std::time::Duration; /// Configuration trait for LLM providers pub trait ProviderConfig: Send + Sync { @@ -59,12 +16,6 @@ pub trait ProviderConfig: Send + Sync { /// Get any additional headers required by the provider fn headers(&self) -> HashMap; - /// Get maximum number of retry attempts - fn max_retries(&self) -> u32; - - /// Get retry policy configuration - fn retry_policy(&self) -> RetryPolicy; - /// Validate the configuration fn validate(&self) -> Result<(), crate::error::LlmError>; } @@ -76,8 +27,6 @@ pub struct HttpProviderConfig { pub base_url: String, pub timeout_seconds: u64, pub headers: HashMap, - pub max_retries: u32, - pub retry_delay_ms: u64, } impl HttpProviderConfig { @@ -87,8 +36,6 @@ impl HttpProviderConfig { base_url: base_url.into(), timeout_seconds: 30, headers: HashMap::new(), - max_retries: 3, - retry_delay_ms: 1000, } } @@ -101,12 +48,6 @@ impl HttpProviderConfig { self.headers.insert(key.into(), value.into()); self } - - pub fn with_retries(mut self, max_retries: u32, delay_ms: u64) -> Self { - self.max_retries = max_retries; - self.retry_delay_ms = delay_ms; - self - } } impl ProviderConfig for HttpProviderConfig { @@ -126,16 +67,6 @@ impl ProviderConfig for HttpProviderConfig { self.headers.clone() } - fn max_retries(&self) -> u32 { - self.max_retries - } - - fn retry_policy(&self) -> RetryPolicy { - RetryPolicy::Fixed { - delay_ms: self.retry_delay_ms, - } - } - fn validate(&self) -> Result<(), crate::error::LlmError> { if self.api_key.is_empty() { return Err(crate::error::LlmError::configuration("API key is required")); @@ -165,8 +96,6 @@ pub struct OpenAICompatibleConfig { pub project: Option, pub base_url: Option, pub timeout_seconds: u64, - pub max_retries: u32, - pub retry_policy: RetryPolicy, } /// Type alias for backwards compatibility @@ -180,8 +109,6 @@ impl OpenAICompatibleConfig { project: None, base_url: None, timeout_seconds: 30, - max_retries: 3, - retry_policy: RetryPolicy::default(), } } @@ -192,8 +119,6 @@ impl OpenAICompatibleConfig { project: None, base_url: Some("https://api.groq.com/openai/v1".to_string()), timeout_seconds: 30, - max_retries: 3, - retry_policy: RetryPolicy::default(), } } @@ -204,8 +129,6 @@ impl OpenAICompatibleConfig { project: None, base_url: Some("https://openrouter.ai/api/v1".to_string()), timeout_seconds: 30, - max_retries: 3, - retry_policy: RetryPolicy::default(), } } @@ -223,62 +146,6 @@ impl OpenAICompatibleConfig { self.base_url = Some(base_url.into()); self } - - pub fn with_retry_policy(mut self, retry_policy: RetryPolicy) -> Self { - self.retry_policy = retry_policy; - self - } - - pub fn with_fixed_retry(mut self, max_retries: u32, delay_ms: u64) -> Self { - self.max_retries = max_retries; - self.retry_policy = RetryPolicy::Fixed { delay_ms }; - self - } - - pub fn with_exponential_backoff( - mut self, - max_retries: u32, - initial_delay_ms: u64, - max_delay_ms: u64, - multiplier: f64, - jitter: bool, - ) -> Self { - self.max_retries = max_retries; - self.retry_policy = RetryPolicy::ExponentialBackoff { - initial_delay_ms, - max_delay_ms, - multiplier, - jitter, - }; - self - } - - pub fn with_api_guided_retry( - mut self, - max_retries: u32, - fallback_policy: RetryPolicy, - max_api_delay_ms: u64, - ) -> Self { - self.max_retries = max_retries; - self.retry_policy = RetryPolicy::ApiGuided { - fallback: Box::new(fallback_policy), - max_api_delay_ms, - retry_headers: vec![ - "retry-after".to_string(), - "x-ratelimit-reset".to_string(), - "x-ratelimit-reset-after".to_string(), - "reset-time".to_string(), - ], - }; - self - } - - pub fn with_smart_retry(mut self, max_retries: u32) -> Self { - // Smart retry: API-guided with exponential backoff fallback - self.max_retries = max_retries; - self.retry_policy = RetryPolicy::default(); - self - } } impl ProviderConfig for OpenAICompatibleConfig { @@ -315,14 +182,6 @@ impl ProviderConfig for OpenAICompatibleConfig { headers } - fn max_retries(&self) -> u32 { - self.max_retries - } - - fn retry_policy(&self) -> RetryPolicy { - self.retry_policy.clone() - } - fn validate(&self) -> Result<(), crate::error::LlmError> { if self.api_key.is_empty() { return Err(crate::error::LlmError::configuration("API key is required")); @@ -341,8 +200,6 @@ pub struct AnthropicConfig { pub api_key: String, pub base_url: Option, pub timeout_seconds: u64, - pub max_retries: u32, - pub retry_policy: RetryPolicy, } impl AnthropicConfig { @@ -351,8 +208,6 @@ impl AnthropicConfig { api_key: api_key.into(), base_url: None, timeout_seconds: 30, - max_retries: 3, - retry_policy: RetryPolicy::default(), } } @@ -385,14 +240,6 @@ impl ProviderConfig for AnthropicConfig { headers } - fn max_retries(&self) -> u32 { - self.max_retries - } - - fn retry_policy(&self) -> RetryPolicy { - self.retry_policy.clone() - } - fn validate(&self) -> Result<(), crate::error::LlmError> { if self.api_key.is_empty() { return Err(crate::error::LlmError::configuration( @@ -410,8 +257,6 @@ pub struct GoogleAiConfig { pub api_key: String, pub base_url: Option, pub timeout_seconds: u64, - pub max_retries: u32, - pub retry_policy: RetryPolicy, } impl GoogleAiConfig { @@ -420,8 +265,6 @@ impl GoogleAiConfig { api_key: api_key.into(), base_url: None, timeout_seconds: 30, - max_retries: 3, - retry_policy: RetryPolicy::default(), } } @@ -452,14 +295,6 @@ impl ProviderConfig for GoogleAiConfig { headers } - fn max_retries(&self) -> u32 { - self.max_retries - } - - fn retry_policy(&self) -> RetryPolicy { - self.retry_policy.clone() - } - fn validate(&self) -> Result<(), crate::error::LlmError> { if self.api_key.is_empty() { return Err(crate::error::LlmError::configuration( @@ -563,72 +398,3 @@ impl ConfigBuilder { Ok(config) } } - -/// Utility functions for parsing retry timing from HTTP headers -pub mod retry_parsing { - use super::*; - use std::str::FromStr; - - /// Parse retry delay from HTTP response headers - pub fn parse_retry_delay( - headers: &HashMap, - retry_headers: &[String], - max_delay_ms: u64, - ) -> Option { - for header_name in retry_headers { - if let Some(value) = headers.get(header_name) { - if let Some(delay) = parse_single_header(header_name, value, max_delay_ms) { - return Some(delay); - } - } - } - None - } - - fn parse_single_header(header_name: &str, value: &str, max_delay_ms: u64) -> Option { - match header_name.to_lowercase().as_str() { - "retry-after" => parse_retry_after(value, max_delay_ms), - "x-ratelimit-reset" => parse_reset_timestamp(value, max_delay_ms), - "x-ratelimit-reset-after" => parse_seconds(value, max_delay_ms), - "reset-time" => parse_reset_timestamp(value, max_delay_ms), - _ => None, - } - } - - fn parse_retry_after(value: &str, max_delay_ms: u64) -> Option { - // Retry-After can be either seconds or HTTP date - if let Ok(seconds) = u64::from_str(value.trim()) { - let delay_ms = seconds * 1000; - if delay_ms <= max_delay_ms { - return Some(Duration::from_millis(delay_ms)); - } - } - // TODO: Add HTTP date parsing if needed - None - } - - fn parse_reset_timestamp(value: &str, max_delay_ms: u64) -> Option { - if let Ok(timestamp) = u64::from_str(value.trim()) { - let now = SystemTime::now().duration_since(UNIX_EPOCH).ok()?.as_secs(); - - if timestamp > now { - let delay_seconds = timestamp - now; - let delay_ms = delay_seconds * 1000; - if delay_ms <= max_delay_ms { - return Some(Duration::from_millis(delay_ms)); - } - } - } - None - } - - fn parse_seconds(value: &str, max_delay_ms: u64) -> Option { - if let Ok(seconds) = u64::from_str(value.trim()) { - let delay_ms = seconds * 1000; - if delay_ms <= max_delay_ms { - return Some(Duration::from_millis(delay_ms)); - } - } - None - } -} diff --git a/crates/rullm-core/src/error.rs b/crates/rullm-core/src/error.rs index e58af07a..702299cd 100644 --- a/crates/rullm-core/src/error.rs +++ b/crates/rullm-core/src/error.rs @@ -195,34 +195,6 @@ impl LlmError { source: Some(source.into()), } } - - /// Check if the error is retryable - pub fn is_retryable(&self) -> bool { - match self { - LlmError::Network { .. } => true, - LlmError::RateLimit { .. } => true, - LlmError::Timeout { .. } => true, - LlmError::ServiceUnavailable { .. } => true, - LlmError::Api { code, .. } => { - // Some API errors are retryable (e.g., 500, 502, 503, 504) - code.as_ref() - .map(|c| matches!(c.as_str(), "500" | "502" | "503" | "504")) - .unwrap_or(false) - } - _ => false, - } - } - - /// Get retry delay for retryable errors - pub fn retry_delay(&self) -> Option { - match self { - LlmError::RateLimit { retry_after, .. } => *retry_after, - LlmError::Network { .. } => Some(std::time::Duration::from_secs(1)), - LlmError::Timeout { .. } => Some(std::time::Duration::from_millis(500)), - LlmError::ServiceUnavailable { .. } => Some(std::time::Duration::from_secs(5)), - _ => None, - } - } } /// Convert from reqwest errors diff --git a/crates/rullm-core/src/lib.rs b/crates/rullm-core/src/lib.rs index 4775f34a..fca569cf 100644 --- a/crates/rullm-core/src/lib.rs +++ b/crates/rullm-core/src/lib.rs @@ -1,18 +1,17 @@ //! # rullm-core - Rust LLM Library //! -//! A high-performance Rust library for interacting with Large Language Models (LLMs). -//! Built with Tower middleware for enterprise-grade reliability, featuring retry logic, -//! rate limiting, circuit breakers, and comprehensive error handling. +//! A Rust library for interacting with Large Language Models (LLMs). +//! Built with Tower middleware, featuring rate limiting and error handling. //! //! ## Features //! -//! - **Multiple LLM Providers** - OpenAI, Anthropic, Google AI -//! - **High Performance** - Built on Tower with connection pooling and async/await -//! - **Enterprise Ready** - Retry logic, rate limiting, circuit breakers, timeouts -//! - **Dual APIs** - Simple string-based API + advanced API with full control -//! - **Real-time Streaming** - Stream responses token by token as they're generated -//! - **Well Tested** - Comprehensive test suite with examples -//! - **Observability** - Built-in metrics, logging, and error handling +//! - Multiple LLM Providers (OpenAI, Anthropic, Google AI) +//! - Tower middleware with connection pooling and async/await +//! - Rate limiting, timeouts, and error handling +//! - Dual APIs: Simple string-based API and advanced API with full control +//! - Streaming support for token-by-token responses +//! - Test suite with examples +//! - Metrics, logging, and error handling //! //! ## Quick Start //! @@ -33,19 +32,19 @@ //! ### Advanced API (Full Control) //! //! ```rust,no_run -//! use rullm_core::{OpenAIConfig, OpenAIProvider, ChatProvider, ChatRequestBuilder, ChatRole}; +//! use rullm_core::{OpenAIConfig, OpenAIProvider, ChatCompletion, ChatRequestBuilder, ChatRole}; //! //! #[tokio::main] //! async fn main() -> Result<(), Box> { //! let config = OpenAIConfig::new("your-api-key"); //! let provider = OpenAIProvider::new(config)?; -//! +//! //! let request = ChatRequestBuilder::new() //! .user("Hello, world!") //! .temperature(0.7) //! .max_tokens(100) //! .build(); -//! +//! //! let response = provider.chat_completion(request, "gpt-3.5-turbo").await?; //! println!("Response: {}", response.message.content); //! Ok(()) @@ -54,7 +53,7 @@ //! //! ## Streaming API Overview //! -//! The streaming API enables real-time token-by-token responses, perfect for interactive +//! The streaming API enables real-time token-by-token responses for interactive //! chat applications and live user experiences. //! //! ### Core Streaming Types @@ -66,13 +65,14 @@ //! ### Basic Streaming Usage //! //! ```rust,no_run -//! use rullm_core::{OpenAIProvider, ChatProvider, ChatRequestBuilder, ChatStreamEvent}; +//! use rullm_core::{OpenAIProvider, OpenAIConfig, ChatCompletion, ChatRequestBuilder, ChatStreamEvent}; //! use futures::StreamExt; //! //! #[tokio::main] //! async fn main() -> Result<(), Box> { -//! let provider = OpenAIProvider::new(/* config */)?; -//! +//! let config = OpenAIConfig::new("your-api-key"); +//! let provider = OpenAIProvider::new(config)?; +//! //! let request = ChatRequestBuilder::new() //! .user("Tell me a story") //! .stream(true) // Enable streaming @@ -104,7 +104,7 @@ //! //! ### Streaming Examples //! -//! The library includes comprehensive streaming examples for each provider: +//! The library includes streaming examples for each provider: //! //! - `openai_stream.rs` - OpenAI GPT models streaming //! - `anthropic_stream.rs` - Anthropic Claude models streaming @@ -127,19 +127,25 @@ //! //! ## Error Handling //! -//! All operations return [`Result`](LlmError) for comprehensive error handling: +//! All operations return [`Result`](LlmError) for error handling: //! -//! ```rust,no_run -//! use rullm_core::{LlmError, OpenAIProvider, ChatProvider}; +//! ```rust,ignore +//! use rullm_core::{LlmError, OpenAIProvider, OpenAIConfig, ChatCompletion, ChatRequestBuilder}; //! +//! # async fn example() -> Result<(), Box> { +//! # let config = OpenAIConfig::new("your-api-key"); +//! # let provider = OpenAIProvider::new(config)?; +//! # let request = ChatRequestBuilder::new().user("test").build(); //! match provider.chat_completion(request, "gpt-4").await { //! Ok(response) => println!("Success: {}", response.message.content), -//! Err(LlmError::Authentication(_)) => println!("Invalid API key"), -//! Err(LlmError::RateLimit { retry_after }) => { +//! Err(LlmError::Authentication { .. }) => println!("Invalid API key"), +//! Err(LlmError::RateLimit { retry_after, .. }) => { //! println!("Rate limited, retry after: {:?}", retry_after); //! } //! Err(e) => println!("Other error: {}", e), //! } +//! # Ok(()) +//! # } //! ``` pub mod config; diff --git a/crates/rullm-core/src/middleware.rs b/crates/rullm-core/src/middleware.rs index 6e2dc58f..941174c8 100644 --- a/crates/rullm-core/src/middleware.rs +++ b/crates/rullm-core/src/middleware.rs @@ -1,4 +1,3 @@ -use crate::config::RetryPolicy; use crate::error::LlmError; use crate::types::{ ChatCompletion, ChatRequest, ChatResponse, ChatStreamEvent, StreamConfig, StreamResult, @@ -9,16 +8,6 @@ use metrics::{counter, histogram}; use std::pin::Pin; use std::time::{Duration, Instant}; -/// Retry attempt information for logging and diagnostics -#[derive(Debug, Clone)] -pub struct RetryInfo { - pub attempt: u32, - pub max_retries: u32, - pub delay: Duration, - pub reason: String, - pub response_status: Option, -} - /// Streaming metrics collector #[derive(Debug, Clone)] pub struct StreamingMetrics { @@ -169,33 +158,10 @@ where } } -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_retry_info_creation() { - let retry_info = RetryInfo { - attempt: 2, - max_retries: 3, - delay: Duration::from_millis(200), - reason: "HTTP 500".to_string(), - response_status: Some(500), - }; - - assert_eq!(retry_info.attempt, 2); - assert_eq!(retry_info.max_retries, 3); - assert_eq!(retry_info.delay, Duration::from_millis(200)); - assert_eq!(retry_info.reason, "HTTP 500"); - assert_eq!(retry_info.response_status, Some(500)); - } -} - /// Middleware configuration for LLM providers #[derive(Debug, Clone)] pub struct MiddlewareConfig { pub timeout: Option, - pub retry_policy: Option, pub rate_limit: Option, pub enable_logging: bool, pub enable_metrics: bool, @@ -205,12 +171,6 @@ impl Default for MiddlewareConfig { fn default() -> Self { Self { timeout: Some(Duration::from_secs(30)), - retry_policy: Some(RetryPolicy::ExponentialBackoff { - initial_delay_ms: 100, - max_delay_ms: 5000, - multiplier: 2.0, - jitter: false, - }), rate_limit: None, enable_logging: true, enable_metrics: false, @@ -251,11 +211,6 @@ impl LlmServiceBuilder { self } - pub fn retry(mut self, policy: RetryPolicy) -> Self { - self.middleware_config.retry_policy = Some(policy); - self - } - pub fn rate_limit(mut self, requests_per_period: u64, period: Duration) -> Self { let rate_limit = RateLimit { requests_per_period, @@ -309,12 +264,8 @@ where ); } - // Apply retry logic - let result = if let Some(ref policy) = self.config.retry_policy { - self.call_with_retry(request, policy).await - } else { - self.provider.chat_completion(request, &self.model).await - }; + // Make the request + let result = self.provider.chat_completion(request, &self.model).await; // Apply metrics and logging match &result { @@ -351,68 +302,7 @@ where result } - async fn call_with_retry( - &self, - request: ChatRequest, - policy: &RetryPolicy, - ) -> Result { - let max_retries = 3; // Could be configurable - let mut attempt = 0; - - loop { - match self - .provider - .chat_completion(request.clone(), &self.model) - .await - { - Ok(response) => return Ok(response), - Err(error) => { - if attempt >= max_retries || !error.is_retryable() { - return Err(error); - } - - let delay = match policy { - RetryPolicy::Fixed { delay_ms } => Duration::from_millis(*delay_ms), - RetryPolicy::ExponentialBackoff { - initial_delay_ms, - max_delay_ms, - multiplier, - jitter: _, - } => { - let delay_ms = - (*initial_delay_ms as f64 * multiplier.powi(attempt)) as u64; - let delay_ms = std::cmp::min(delay_ms, *max_delay_ms); - Duration::from_millis(delay_ms) - } - RetryPolicy::ApiGuided { fallback, .. } => { - // For simplicity, use fallback policy - match fallback.as_ref() { - RetryPolicy::Fixed { delay_ms } => Duration::from_millis(*delay_ms), - RetryPolicy::ExponentialBackoff { - initial_delay_ms, - max_delay_ms, - multiplier, - jitter: _, - } => { - let delay_ms = (*initial_delay_ms as f64 - * multiplier.powi(attempt)) - as u64; - let delay_ms = std::cmp::min(delay_ms, *max_delay_ms); - Duration::from_millis(delay_ms) - } - RetryPolicy::ApiGuided { .. } => Duration::from_millis(1000), // Fallback - } - } - }; - - tokio::time::sleep(delay).await; - attempt += 1; - } - } - } - } - - /// Streaming version with retry logic only for initial call and metrics tracking + /// Streaming version with metrics tracking pub async fn call_stream( &self, request: ChatRequest, @@ -427,18 +317,11 @@ where ); } - // Apply retry logic only for the initial call - let stream = if let Some(ref _policy) = self.config.retry_policy { - // For streaming, we can't really retry on a Stream, so just call normally - // Retry logic would be complex for streams, so just use direct call for now - self.provider - .chat_completion_stream(request, &self.model, config) - .await - } else { - self.provider - .chat_completion_stream(request, &self.model, config) - .await - }; + // Make the streaming request + let stream = self + .provider + .chat_completion_stream(request, &self.model, config) + .await; let metrics_stream = MetricsStream::new(stream, provider_name); diff --git a/crates/rullm-core/src/providers/google.rs b/crates/rullm-core/src/providers/google.rs index 9c05db82..76c0e598 100644 --- a/crates/rullm-core/src/providers/google.rs +++ b/crates/rullm-core/src/providers/google.rs @@ -479,7 +479,7 @@ impl ChatCompletion for GoogleProvider { }; // Google now returns Server-Sent Events (SSE) for streaming responses. - // Leverage the shared `sse_lines` util to properly parse the stream. + // Use the shared `sse_lines` util to parse the stream. let byte_stream = response.bytes_stream(); let mut sse_stream = sse_lines(byte_stream); diff --git a/crates/rullm-core/src/simple.rs b/crates/rullm-core/src/simple.rs index 6eb84007..3f3d667d 100644 --- a/crates/rullm-core/src/simple.rs +++ b/crates/rullm-core/src/simple.rs @@ -29,8 +29,6 @@ pub struct SimpleLlmConfig { pub default_top_p: Option, /// Request timeout pub timeout: Duration, - /// Maximum number of retry attempts - pub max_retries: u32, /// Whether to validate inputs before sending requests pub validate_inputs: bool, } @@ -70,7 +68,6 @@ impl Default for SimpleLlmConfig { default_max_tokens: None, default_top_p: None, timeout: Duration::from_secs(30), - max_retries: 3, validate_inputs: true, } } @@ -136,12 +133,6 @@ impl SimpleLlmConfig { self } - /// Set the maximum retry attempts - pub fn with_max_retries(mut self, max_retries: u32) -> Self { - self.max_retries = max_retries; - self - } - /// Enable or disable input validation pub fn with_validation(mut self, validate: bool) -> Self { self.validate_inputs = validate; @@ -995,7 +986,6 @@ mod tests { fn test_simple_llm_config_default() { let config = SimpleLlmConfig::default(); assert_eq!(config.timeout, Duration::from_secs(30)); - assert_eq!(config.max_retries, 3); assert!(config.validate_inputs); assert_eq!(config.default_models.openai, "gpt-3.5-turbo"); } @@ -1007,7 +997,6 @@ mod tests { .with_max_tokens(2000) .with_top_p(0.9) .with_timeout(Duration::from_secs(60)) - .with_max_retries(5) .with_validation(false) .with_openai_model("gpt-4") .with_anthropic_model("claude-3-sonnet-20240229") @@ -1017,7 +1006,6 @@ mod tests { assert_eq!(config.default_max_tokens, Some(2000)); assert_eq!(config.default_top_p, Some(0.9)); assert_eq!(config.timeout, Duration::from_secs(60)); - assert_eq!(config.max_retries, 5); assert!(!config.validate_inputs); assert_eq!(config.default_models.openai, "gpt-4"); assert_eq!(config.default_models.anthropic, "claude-3-sonnet-20240229"); diff --git a/crates/rullm-core/src/tests.rs b/crates/rullm-core/src/tests.rs index 882b39a6..7bebc085 100644 --- a/crates/rullm-core/src/tests.rs +++ b/crates/rullm-core/src/tests.rs @@ -226,8 +226,8 @@ fn test_openai_config() { config.validate().unwrap(); - // Test invalid config - let invalid_config = OpenAIConfig::new("invalid-key"); + // Test invalid config (empty API key) + let invalid_config = OpenAIConfig::new(""); assert!(invalid_config.validate().is_err()); } @@ -264,37 +264,6 @@ fn test_google_ai_config() { config.validate().unwrap(); } -#[test] -fn test_error_retry_logic() { - let network_error = LlmError::network("Connection failed"); - assert!(network_error.is_retryable()); - assert_eq!( - network_error.retry_delay(), - Some(std::time::Duration::from_secs(1)) - ); - - let rate_limit_error = LlmError::rate_limit( - "Too many requests", - Some(std::time::Duration::from_secs(60)), - ); - assert!(rate_limit_error.is_retryable()); - assert_eq!( - rate_limit_error.retry_delay(), - Some(std::time::Duration::from_secs(60)) - ); - - let auth_error = LlmError::authentication("Invalid API key"); - assert!(!auth_error.is_retryable()); - assert_eq!(auth_error.retry_delay(), None); - - let api_error_retryable = LlmError::api("test", "Server error", Some("503".to_string()), None); - assert!(api_error_retryable.is_retryable()); - - let api_error_not_retryable = - LlmError::api("test", "Bad request", Some("400".to_string()), None); - assert!(!api_error_not_retryable.is_retryable()); -} - #[test] fn test_all_llm_error_variants() { use std::collections::HashMap; @@ -425,66 +394,6 @@ fn test_error_conversions() { // so we test the error conversion logic indirectly through the provider implementations } -#[test] -fn test_retry_logic_comprehensive() { - // Test all retryable error types - let network_error = LlmError::network("Connection failed"); - assert!(network_error.is_retryable()); - assert_eq!( - network_error.retry_delay(), - Some(std::time::Duration::from_secs(1)) - ); - - let rate_limit_error = - LlmError::rate_limit("Rate limited", Some(std::time::Duration::from_secs(30))); - assert!(rate_limit_error.is_retryable()); - assert_eq!( - rate_limit_error.retry_delay(), - Some(std::time::Duration::from_secs(30)) - ); - - let timeout_error = LlmError::timeout(std::time::Duration::from_secs(10)); - assert!(timeout_error.is_retryable()); - assert_eq!( - timeout_error.retry_delay(), - Some(std::time::Duration::from_millis(500)) - ); - - let service_error = LlmError::service_unavailable("openai"); - assert!(service_error.is_retryable()); - assert_eq!( - service_error.retry_delay(), - Some(std::time::Duration::from_secs(5)) - ); - - // Test retryable API errors (5xx) - let server_error = LlmError::api("test", "Server error", Some("502".to_string()), None); - assert!(server_error.is_retryable()); - - let gateway_timeout = LlmError::api("test", "Gateway timeout", Some("504".to_string()), None); - assert!(gateway_timeout.is_retryable()); - - // Test non-retryable error types - let auth_error = LlmError::authentication("Invalid key"); - assert!(!auth_error.is_retryable()); - assert_eq!(auth_error.retry_delay(), None); - - let config_error = LlmError::configuration("Bad config"); - assert!(!config_error.is_retryable()); - assert_eq!(config_error.retry_delay(), None); - - let validation_error = LlmError::validation("Invalid input"); - assert!(!validation_error.is_retryable()); - assert_eq!(validation_error.retry_delay(), None); - - // Test non-retryable API errors (4xx) - let bad_request = LlmError::api("test", "Bad request", Some("400".to_string()), None); - assert!(!bad_request.is_retryable()); - - let unauthorized = LlmError::api("test", "Unauthorized", Some("401".to_string()), None); - assert!(!unauthorized.is_retryable()); -} - #[test] fn test_provider_specific_error_mapping() { use std::collections::HashMap; @@ -592,34 +501,6 @@ fn test_provider_specific_error_mapping() { &serde_json::Value::String("RATE_LIMIT_EXCEEDED".to_string()) ); } - - // Test retryable server errors (500, 502, 503, 504) - let internal_error = LlmError::api( - "any_provider", - "Internal server error", - Some("500".to_string()), - None, - ); - assert!(internal_error.is_retryable()); - - let bad_gateway = LlmError::api("any_provider", "Bad gateway", Some("502".to_string()), None); - assert!(bad_gateway.is_retryable()); - - let service_unavailable = LlmError::api( - "any_provider", - "Service unavailable", - Some("503".to_string()), - None, - ); - assert!(service_unavailable.is_retryable()); - - // Test non-retryable client errors (400, 401, etc.) - let bad_request = LlmError::api("any_provider", "Bad request", Some("400".to_string()), None); - assert!(!bad_request.is_retryable()); - - // Test client error (should not be retryable) - let client_error = LlmError::api("any_provider", "Bad request", Some("400".to_string()), None); - assert!(!client_error.is_retryable()); } #[test] @@ -953,110 +834,11 @@ fn test_anthropic_response_parsing_errors() { // Middleware Tests // ============================================================================= -#[derive(Clone)] -struct MockFailingProvider { - name: &'static str, - fail_count: std::sync::Arc>, - max_failures: u32, -} - -impl MockFailingProvider { - fn new(name: &'static str, max_failures: u32) -> Self { - Self { - name, - fail_count: std::sync::Arc::new(std::sync::Mutex::new(0)), - max_failures, - } - } - - fn failure_count(&self) -> u32 { - *self.fail_count.lock().unwrap() - } -} - -#[async_trait::async_trait] -impl LlmProvider for MockFailingProvider { - fn name(&self) -> &'static str { - self.name - } - - fn aliases(&self) -> &'static [&'static str] { - &[] - } - - fn default_base_url(&self) -> Option<&'static str> { - Some("") - } - - fn env_key(&self) -> &'static str { - "" - } - - async fn available_models(&self) -> Result, LlmError> { - Ok(vec!["test-model".to_string()]) - } - - async fn health_check(&self) -> Result<(), LlmError> { - Ok(()) - } -} - -#[async_trait::async_trait] -impl ChatCompletion for MockFailingProvider { - async fn chat_completion( - &self, - _request: ChatRequest, - model: &str, - ) -> Result { - let mut count = self.fail_count.lock().unwrap(); - if *count < self.max_failures { - *count += 1; - return Err(LlmError::api( - self.name, - "Simulated failure", - Some("500".to_string()), - None, - )); - } - - Ok(ChatResponse { - message: ChatMessage { - role: ChatRole::Assistant, - content: "Success after retries".to_string(), - }, - model: model.to_string(), - usage: TokenUsage { - prompt_tokens: 10, - completion_tokens: 5, - total_tokens: 15, - }, - finish_reason: Some("stop".to_string()), - provider_metadata: None, - }) - } - - async fn chat_completion_stream( - &self, - _request: ChatRequest, - _model: &str, - _config: Option, - ) -> crate::types::StreamResult { - Box::pin(futures::stream::once(async { - Err(LlmError::model("Streaming not implemented")) - })) - } - - async fn estimate_tokens(&self, text: &str, _model: &str) -> Result { - Ok(text.len() as u32 / 4) - } -} - #[test] fn test_middleware_config_default() { let config = MiddlewareConfig::default(); assert_eq!(config.timeout, Some(Duration::from_secs(30))); - assert!(config.retry_policy.is_some()); assert!(config.rate_limit.is_none()); assert!(config.enable_logging); assert!(!config.enable_metrics); @@ -1071,17 +853,12 @@ fn test_middleware_config_custom() { let config = MiddlewareConfig { timeout: Some(Duration::from_secs(45)), - retry_policy: Some(RetryPolicy::Fixed { delay_ms: 1000 }), rate_limit: Some(rate_limit.clone()), enable_logging: false, enable_metrics: true, }; assert_eq!(config.timeout, Some(Duration::from_secs(45))); - assert!(matches!( - config.retry_policy, - Some(RetryPolicy::Fixed { delay_ms: 1000 }) - )); assert!(config.rate_limit.is_some()); assert!(!config.enable_logging); assert!(config.enable_metrics); @@ -1108,7 +885,6 @@ fn test_llm_service_builder_fluent_api() { let middleware_stack = LlmServiceBuilder::new() .timeout(Duration::from_secs(60)) - .retry(RetryPolicy::Fixed { delay_ms: 500 }) .rate_limit(50, Duration::from_secs(30)) .logging() .metrics() @@ -1116,10 +892,6 @@ fn test_llm_service_builder_fluent_api() { let config = middleware_stack.config(); assert_eq!(config.timeout, Some(Duration::from_secs(60))); - assert!(matches!( - config.retry_policy, - Some(RetryPolicy::Fixed { delay_ms: 500 }) - )); assert!(config.rate_limit.is_some()); assert!(config.enable_logging); assert!(config.enable_metrics); @@ -1133,12 +905,6 @@ fn test_llm_service_builder_fluent_api() { fn test_llm_service_builder_with_config() { let custom_config = MiddlewareConfig { timeout: Some(Duration::from_secs(20)), - retry_policy: Some(RetryPolicy::ExponentialBackoff { - initial_delay_ms: 200, - max_delay_ms: 4000, - multiplier: 1.5, - jitter: true, - }), rate_limit: None, enable_logging: false, enable_metrics: true, @@ -1170,118 +936,6 @@ async fn test_middleware_stack_basic_call() { assert_eq!(response.usage.total_tokens, 15); } -#[tokio::test] -async fn test_middleware_retry_logic_success() { - // Provider that fails 2 times then succeeds - let provider = MockFailingProvider::new("test", 2); - let mut middleware_stack = LlmServiceBuilder::new() - .retry(RetryPolicy::Fixed { delay_ms: 10 }) // Fast retry for testing - .build(provider.clone(), "test-model".to_string()); - - let request = ChatRequestBuilder::new().user("Test retry logic").build(); - - let start = std::time::Instant::now(); - let response = middleware_stack.call(request).await.unwrap(); - let duration = start.elapsed(); - - assert_eq!(response.message.content, "Success after retries"); - assert_eq!(provider.failure_count(), 2); - // Should have taken at least 20ms (2 retries * 10ms delay) - assert!(duration >= Duration::from_millis(20)); -} - -#[tokio::test] -async fn test_middleware_retry_logic_failure() { - // Provider that always fails - let provider = MockFailingProvider::new("test", 10); // More failures than max retries - let mut middleware_stack = LlmServiceBuilder::new() - .retry(RetryPolicy::Fixed { delay_ms: 10 }) - .build(provider.clone(), "test-model".to_string()); - - let request = ChatRequestBuilder::new() - .user("Test retry exhaustion") - .build(); - - let result = middleware_stack.call(request).await; - - assert!(result.is_err()); - // Should have made initial attempt + 3 retries = 4 total attempts - assert_eq!(provider.failure_count(), 4); - - let error = result.unwrap_err(); - assert!(matches!(error, LlmError::Api { .. })); -} - -#[tokio::test] -async fn test_middleware_exponential_backoff() { - let provider = MockFailingProvider::new("test", 3); - let mut middleware_stack = LlmServiceBuilder::new() - .retry(RetryPolicy::ExponentialBackoff { - initial_delay_ms: 10, - max_delay_ms: 100, - multiplier: 2.0, - jitter: false, - }) - .build(provider.clone(), "test-model".to_string()); - - let request = ChatRequestBuilder::new() - .user("Test exponential backoff") - .build(); - - let start = std::time::Instant::now(); - let response = middleware_stack.call(request).await.unwrap(); - let duration = start.elapsed(); - - assert_eq!(response.message.content, "Success after retries"); - assert_eq!(provider.failure_count(), 3); - - // Should have delays of 10ms, 20ms, 40ms = 70ms minimum - assert!(duration >= Duration::from_millis(70)); -} - -#[tokio::test] -async fn test_middleware_api_guided_retry() { - let provider = MockFailingProvider::new("test", 2); - let mut middleware_stack = LlmServiceBuilder::new() - .retry(RetryPolicy::ApiGuided { - fallback: Box::new(RetryPolicy::Fixed { delay_ms: 20 }), - max_api_delay_ms: 1000, - retry_headers: vec!["retry-after".to_string()], - }) - .build(provider.clone(), "test-model".to_string()); - - let request = ChatRequestBuilder::new() - .user("Test API guided retry") - .build(); - - let response = middleware_stack.call(request).await.unwrap(); - - assert_eq!(response.message.content, "Success after retries"); - assert_eq!(provider.failure_count(), 2); -} - -#[tokio::test] -async fn test_middleware_no_retry_policy() { - let provider = MockFailingProvider::new("test", 1); - let middleware_stack = LlmServiceBuilder::new() - // No retry policy - .build(provider.clone(), "test-model".to_string()); - - // Remove retry policy - let mut config = middleware_stack.config().clone(); - config.retry_policy = None; - let mut middleware_stack = - LlmServiceBuilder::with_config(config).build(provider.clone(), "test-model".to_string()); - - let request = ChatRequestBuilder::new().user("Test no retry").build(); - - let result = middleware_stack.call(request).await; - - assert!(result.is_err()); - // Should fail immediately, no retries - assert_eq!(provider.failure_count(), 1); -} - #[tokio::test] async fn test_middleware_logging_and_metrics() { let provider = MockProvider::new("test"); @@ -1329,7 +983,6 @@ fn test_rate_limit_configuration() { async fn test_middleware_error_propagation() { let provider = MockProvider::new("test").with_failure(); let mut middleware_stack = LlmServiceBuilder::new() - .retry(RetryPolicy::Fixed { delay_ms: 1 }) // Fast retry .logging() .build(provider, "test-model".to_string()); @@ -1346,12 +999,6 @@ async fn test_middleware_error_propagation() { fn test_middleware_config_inspection() { let custom_config = MiddlewareConfig { timeout: Some(Duration::from_secs(25)), - retry_policy: Some(RetryPolicy::ExponentialBackoff { - initial_delay_ms: 150, - max_delay_ms: 6000, - multiplier: 2.5, - jitter: true, - }), rate_limit: Some(RateLimit { requests_per_period: 75, period: Duration::from_secs(45), @@ -1371,21 +1018,6 @@ fn test_middleware_config_inspection() { assert!(config.enable_logging); assert!(!config.enable_metrics); - if let Some(RetryPolicy::ExponentialBackoff { - initial_delay_ms, - max_delay_ms, - multiplier, - jitter, - }) = &config.retry_policy - { - assert_eq!(*initial_delay_ms, 150); - assert_eq!(*max_delay_ms, 6000); - assert_eq!(*multiplier, 2.5); - assert!(*jitter); - } else { - panic!("Expected ExponentialBackoff retry policy"); - } - if let Some(rate_limit) = &config.rate_limit { assert_eq!(rate_limit.requests_per_period, 75); assert_eq!(rate_limit.period, Duration::from_secs(45));