itzlambda · itzlambda · Oct 5, 2025 · Oct 5, 2025
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -29,12 +29,11 @@ The `openai_compatible` provider is a generic implementation that other provider
 
 ### Middleware Stack
 
-The library uses Tower middleware for enterprise features (see `crates/rullm-core/src/middleware.rs`):
-- Retry logic with exponential backoff
+The library uses Tower middleware (see `crates/rullm-core/src/middleware.rs`):
 - Rate limiting
-- Circuit breakers
 - Timeouts
 - Connection pooling
+- Logging and metrics
 
 Configuration is done via `MiddlewareConfig` and `LlmServiceBuilder`.
 

diff --git a/Cargo.toml b/Cargo.toml
@@ -10,7 +10,7 @@ rust-version = "1.85"
 [workspace.dependencies]
 # Core library dependencies
 tokio = { version = "1", features = ["full"] }
-tower = { version = "0.4", features = ["timeout", "retry", "limit", "util"] }
+tower = { version = "0.4", features = ["timeout", "limit", "util"] }
 rand = "0.8"
 reqwest = { version = "0.11", features = ["json", "stream"] }
 bytes = "1.0"

diff --git a/crates/rullm-core/examples/README.md b/crates/rullm-core/examples/README.md
@@ -40,7 +40,7 @@ All streaming examples use the `chat_completion_stream` method which returns a `
 
 **Environment:** Requires `OPENAI_API_KEY`
 
-Demonstrates comprehensive OpenAI streaming with:
+Demonstrates OpenAI streaming with:
 - **Simple streaming chat** with real-time token display
 - **Multi-turn conversations** with context preservation  
 - **Creative writing** with high temperature settings
@@ -87,7 +87,7 @@ while let Some(event) = stream.next().await {
 
 **Environment:** Requires `ANTHROPIC_API_KEY`
 
-Showcases Claude's capabilities with:
+Shows Claude streaming with:
 - **Philosophical conversations** demonstrating reasoning abilities
 - **Creative storytelling** with vivid imagery
 - **Code explanation** with technical accuracy
@@ -124,7 +124,7 @@ let mut stream = provider
 
 **Environment:** Requires `GOOGLE_API_KEY`
 
-Highlights Gemini's versatility:
+Shows Gemini streaming with:
 - **Technical explanations** with precision
 - **Creative writing** using experimental models
 - **Code analysis** and review capabilities
@@ -265,7 +265,7 @@ Demonstrates:
 
 Key features:
 - **Environment-based configuration**
-- **Custom endpoints** for enterprise setups
+- **Custom endpoints** for custom API URLs
 - **Validation and error handling**
 - **Health checks** and model availability
 - **Request builder patterns** from minimal to full-featured
@@ -434,7 +434,7 @@ cargo run --example test_all_providers
 🎉 All providers are working correctly!
 ```
 
-This example is perfect for:
+Use this example for:
 - Verifying your API keys work
 - Testing network connectivity
 - Validating provider implementations

diff --git a/crates/rullm-core/examples/anthropic_stream.rs b/crates/rullm-core/examples/anthropic_stream.rs
@@ -183,8 +183,8 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     println!("\n\n🎯 Tips for using Anthropic Claude streaming:");
     println!("• Set ANTHROPIC_API_KEY environment variable");
     println!("• Use .stream(true) in ChatRequestBuilder");
-    println!("• Claude models: haiku (fast), sonnet (balanced), opus (powerful)");
-    println!("• Claude excels at reasoning, analysis, and creative writing");
+    println!("• Claude models: haiku (fast), sonnet (balanced), opus (largest)");
+    println!("• Claude supports reasoning, analysis, and creative writing");
     println!("• Lower temperature (0.1-0.4) for factual content");
     println!("• Higher temperature (0.7-1.0) for creative content");
 

diff --git a/crates/rullm-core/examples/basic_usage.rs b/crates/rullm-core/examples/basic_usage.rs
@@ -1,4 +1,4 @@
-use rullm_core::{ChatRequestBuilder, LlmError};
+use rullm_core::ChatRequestBuilder;
 
 // This example demonstrates the unified interface without actual provider implementations
 // It shows how the library would be used once provider modules are implemented
@@ -33,15 +33,5 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     println!("\nThis example shows the unified interface design.");
     println!("Actual provider implementations will be added in subsequent tasks.");
 
-    // Example of error handling
-    let error_example = LlmError::rate_limit(
-        "Too many requests",
-        Some(std::time::Duration::from_secs(60)),
-    );
-    println!("\nError handling example:");
-    println!("  Error: {error_example}");
-    println!("  Is retryable: {}", error_example.is_retryable());
-    println!("  Retry delay: {:?}", error_example.retry_delay());
-
     Ok(())
 }
diff --git a/crates/rullm-core/examples/gemini_stream.rs b/crates/rullm-core/examples/gemini_stream.rs
@@ -219,7 +219,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     println!(
         "• Models: gemini-1.5-flash (fast), gemini-1.5-pro (balanced), gemini-2.0-flash-exp (experimental)"
     );
-    println!("• Gemini excels at reasoning, code analysis, and creative tasks");
+    println!("• Gemini supports reasoning, code analysis, and creative tasks");
     println!("• Lower temperature (0.1-0.4) for factual/technical content");
     println!("• Higher temperature (0.7-1.0) for creative content");
     println!("• Use top_p for more controlled randomness");

diff --git a/crates/rullm-core/examples/middleware_usage.rs b/crates/rullm-core/examples/middleware_usage.rs
@@ -1,6 +1,6 @@
 use rullm_core::{
     ChatRequestBuilder, ConfigBuilder, LlmServiceBuilder, MiddlewareConfig, OpenAIProvider,
-    RateLimit, config::RetryPolicy,
+    RateLimit,
 };
 use std::time::Duration;
 
@@ -14,16 +14,13 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
     // Example 1: Basic middleware stack with defaults
     basic_middleware_example().await?;
 
-    // Example 2: Custom retry policy with exponential backoff
-    custom_retry_example().await?;
-
-    // Example 3: Production-ready configuration
+    // Example 2: Configuration with timeouts and rate limiting
     production_config_example().await?;
 
-    // Example 4: Rate-limited and monitored configuration
+    // Example 3: Rate-limited and monitored configuration
     rate_limited_example().await?;
 
-    // Example 5: Custom middleware configuration
+    // Example 4: Custom middleware configuration
     custom_middleware_config_example().await?;
 
     Ok(())
@@ -57,91 +54,47 @@ async fn basic_middleware_example() -> Result<(), Box<dyn std::error::Error>> {
     Ok(())
 }
 
-/// Example 2: Custom retry policy with exponential backoff
-async fn custom_retry_example() -> Result<(), Box<dyn std::error::Error>> {
-    println!("🔄 Example 2: Custom Retry Policy");
-
-    let config = ConfigBuilder::openai_from_env()?;
-    let provider = OpenAIProvider::new(config)?;
-
-    // Create middleware with custom exponential backoff retry policy
-    let mut middleware_stack = LlmServiceBuilder::new()
-        .timeout(Duration::from_secs(60)) // 60 second timeout
-        .retry(RetryPolicy::ExponentialBackoff {
-            initial_delay_ms: 200, // Start with 200ms
-            max_delay_ms: 10000,   // Cap at 10 seconds
-            multiplier: 2.5,       // Aggressive backoff
-            jitter: true,          // Add randomness
-        })
-        .logging()
-        .build(provider, "gpt-3.5-turbo".to_string());
-
-    let request = ChatRequestBuilder::new()
-        .user("Explain quantum computing in simple terms")
-        .temperature(0.7)
-        .max_tokens(150)
-        .build();
-
-    let response = middleware_stack.call(request).await?;
-
-    println!("✅ Response: {}", response.message.content);
-    println!("🔄 Retry policy: Exponential backoff with jitter\n");
-
-    Ok(())
-}
-
-/// Example 3: Production-ready configuration
+/// Example 2: Configuration with timeouts and rate limiting
 async fn production_config_example() -> Result<(), Box<dyn std::error::Error>> {
-    println!("🏭 Example 3: Production Configuration");
+    println!("🏭 Example 2: Configuration with Timeouts and Rate Limiting");
 
     let config = ConfigBuilder::openai_from_env()?;
     let provider = OpenAIProvider::new(config)?;
 
-    // Production-ready middleware configuration
+    // Middleware configuration with timeouts and rate limiting
     let mut middleware_stack = LlmServiceBuilder::new()
         .timeout(Duration::from_secs(30)) // Conservative timeout
-        .retry(RetryPolicy::ApiGuided {
-            fallback: Box::new(RetryPolicy::ExponentialBackoff {
-                initial_delay_ms: 100,
-                max_delay_ms: 5000,
-                multiplier: 2.0,
-                jitter: true,
-            }),
-            max_api_delay_ms: 30000, // Don't wait more than 30 seconds
-            retry_headers: vec!["retry-after".to_string(), "x-ratelimit-reset".to_string()],
-        })
         .rate_limit(100, Duration::from_secs(60)) // 100 requests per minute
-        .logging() // Always log in production
-        .metrics() // Always collect metrics
+        .logging()
+        .metrics()
         .build(provider, "gpt-4".to_string());
 
     let request = ChatRequestBuilder::new()
         .system("You are a helpful assistant for a production application.")
         .user("How can I optimize my database queries?")
-        .temperature(0.3) // More deterministic for production
+        .temperature(0.3) // Lower temperature for more deterministic output
         .max_tokens(300)
         .build();
 
     let response = middleware_stack.call(request).await?;
 
-    println!("✅ Production response received");
+    println!("✅ Response received");
     println!("📊 Token usage: {}", response.usage.total_tokens);
-    println!("🛡️ Configuration: API-guided retry, rate limited, fully monitored\n");
+    println!("🛡️ Configuration: Rate limited, logged and monitored\n");
 
     Ok(())
 }
 
-/// Example 4: Rate-limited and monitored configuration
+/// Example 3: Rate-limited and monitored configuration
 async fn rate_limited_example() -> Result<(), Box<dyn std::error::Error>> {
-    println!("⏱️ Example 4: Rate Limited Configuration");
+    println!("⏱️ Example 3: Rate Limited Configuration");
 
     let config = ConfigBuilder::openai_from_env()?;
     let provider = OpenAIProvider::new(config)?;
 
     // Configuration optimized for rate limiting and monitoring
     let mut middleware_stack = LlmServiceBuilder::new()
         .timeout(Duration::from_secs(45))
-        .retry(RetryPolicy::Fixed { delay_ms: 1000 }) // Simple fixed delay
         .rate_limit(50, Duration::from_secs(60)) // Conservative rate limit
         .logging()
         .metrics()
@@ -184,22 +137,16 @@ async fn rate_limited_example() -> Result<(), Box<dyn std::error::Error>> {
     Ok(())
 }
 
-/// Example 5: Custom middleware configuration from struct
+/// Example 4: Custom middleware configuration from struct
 async fn custom_middleware_config_example() -> Result<(), Box<dyn std::error::Error>> {
-    println!("⚙️ Example 5: Custom Middleware Configuration");
+    println!("⚙️ Example 4: Custom Middleware Configuration");
 
     let config = ConfigBuilder::openai_from_env()?;
     let provider = OpenAIProvider::new(config)?;
 
     // Define custom middleware configuration
     let middleware_config = MiddlewareConfig {
         timeout: Some(Duration::from_secs(20)),
-        retry_policy: Some(RetryPolicy::ExponentialBackoff {
-            initial_delay_ms: 500,
-            max_delay_ms: 8000,
-            multiplier: 1.8,
-            jitter: false,
-        }),
         rate_limit: Some(RateLimit {
             requests_per_period: 25,
             period: Duration::from_secs(60),
@@ -225,9 +172,7 @@ async fn custom_middleware_config_example() -> Result<(), Box<dyn std::error::Er
         "📊 Response length: {} characters",
         response.message.content.len()
     );
-    println!(
-        "⚙️ Configuration: Custom timeouts, exponential backoff (no jitter), 25 req/min limit\n"
-    );
+    println!("⚙️ Configuration: Custom timeouts, 25 req/min limit\n");
 
     // Display the configuration details
     let config = middleware_stack.config();

diff --git a/crates/rullm-core/examples/retry_policy_example.rs b/crates/rullm-core/examples/retry_policy_example.rs