# RefLex LLM - OpenAI Integration Example

This notebook demonstrates how to use RefLex LLM specifically with OpenAI endpoints, including configuration, fallback capabilities, and best practices.

## Installation

## Best Practices for OpenAI Integration

In [None]:
import reflex_llms

class OpenAIBestPractices:
    """Demonstrates best practices for OpenAI integration with RefLex."""
    
    def __init__(self):
        self.client = reflex_llms.get_openai_client(from_file="reflex_openai.json")
        self.request_count = 0
        self.total_tokens = 0
    
    def rate_limited_request(self, messages, model="gpt-3.5-turbo", **kwargs):
        """Make requests with basic rate limiting awareness."""
        import time
        
        # Simple rate limiting (adjust based on your OpenAI tier)
        if self.request_count > 0 and self.request_count % 10 == 0:
            print("Rate limiting: brief pause...")
            time.sleep(1)
        
        try:
            response = self.client.chat.completions.create(
                model=model,
                messages=messages,
                **kwargs
            )
            
            self.request_count += 1
            if hasattr(response, 'usage'):
                self.total_tokens += response.usage.total_tokens
            
            return response
            
        except Exception as e:
            if "rate_limit" in str(e).lower():
                print("Rate limit hit, switching to fallback...")
                reflex_llms.clear_cache()
                self.client = reflex_llms.get_openai_client(
                    preference_order=["reflex", "openai"]
                )
                return self.client.chat.completions.create(
                    model=model,
                    messages=messages,
                    **kwargs
                )
            else:
                raise e
    
    def optimized_prompt(self, user_query):
        """Example of prompt optimization for better results."""
        system_prompt = """
You are a helpful AI assistant. Be concise but comprehensive.
If you're unsure about something, say so clearly.
Structure your responses with clear sections when appropriate.
"""
        
        messages = [
            {"role": "system", "content": system_prompt.strip()},
            {"role": "user", "content": user_query}
        ]
        
        return self.rate_limited_request(
            messages=messages,
            model="gpt-3.5-turbo",
            max_tokens=300,
            temperature=0.7
        )
    
    def batch_process(self, queries, batch_size=5):
        """Process multiple queries efficiently."""
        results = []
        
        for i in range(0, len(queries), batch_size):
            batch = queries[i:i + batch_size]
            batch_results = []
            
            for query in batch:
                try:
                    response = self.optimized_prompt(query)
                    batch_results.append({
                        "query": query,
                        "response": response.choices[0].message.content,
                        "success": True
                    })
                except Exception as e:
                    batch_results.append({
                        "query": query,
                        "error": str(e),
                        "success": False
                    })
            
            results.extend(batch_results)
            
            # Brief pause between batches
            if i + batch_size < len(queries):
                import time
                time.sleep(0.5)
        
        return results
    
    def get_stats(self):
        """Get usage statistics."""
        return {
            "requests_made": self.request_count,
            "total_tokens": self.total_tokens,
            "current_provider": reflex_llms.get_selected_provider()
        }

# Example usage of best practices
bp = OpenAIBestPractices()

# Test optimized prompting
response = bp.optimized_prompt("What are the key advantages of using OpenAI models?")
print(f"Optimized response: {response.choices[0].message.content[:100]}...")

# Test batch processing
test_queries = [
    "What is machine learning?",
    "Explain neural networks briefly",
    "What is the difference between AI and ML?"
]

batch_results = bp.batch_process(test_queries)
print(f"\nBatch processing results: {len(batch_results)} queries processed")
for result in batch_results:
    if result["success"]:
        print(f"✅ {result['query'][:30]}... -> {result['response'][:50]}...")
    else:
        print(f"❌ {result['query'][:30]}... -> Error: {result['error'][:30]}...")

# Show statistics
stats = bp.get_stats()
print(f"\nUsage Statistics: {stats}")

## Summary

This notebook demonstrated:

1. **OpenAI Integration** - Primary OpenAI configuration with API keys
2. **Intelligent Fallback** - Automatic switching to local AI when OpenAI is unavailable
3. **Model Flexibility** - Testing multiple OpenAI models (GPT-3.5, GPT-4, GPT-4o)
4. **Production Setup** - Enterprise-ready OpenAI deployment with monitoring
5. **Best Practices** - Rate limiting, prompt optimization, and batch processing
6. **Streaming Support** - Real-time response streaming capabilities
7. **Cost Management** - Usage tracking and cost estimation features

Key benefits for OpenAI users:
- **Reliability** - Never lose AI capability even during OpenAI outages
- **Cost Control** - Fallback to local AI during rate limits or budget constraints
- **Performance** - Optimized request handling and batch processing
- **Flexibility** - Easy switching between different OpenAI models and configurations
- **Monitoring** - Comprehensive health checks and usage tracking


In [None]:
!pip install reflex-llms

## OpenAI Setup

### Environment Variables for OpenAI

In [None]:
import os

# OpenAI API credentials
os.environ['OPENAI_API_KEY'] = 'your-openai-api-key'
# Optional: Custom OpenAI base URL for enterprise
# os.environ['OPENAI_BASE_URL'] = 'https://enterprise-api.openai.com/v1'

print("OpenAI environment setup complete")

## Basic OpenAI Usage

In [None]:
import reflex_llms

# Get client with OpenAI preference
client = reflex_llms.get_openai_client(
    preference_order=["openai", "reflex"]  # Try OpenAI first, fallback to local
)

# Use exactly like the OpenAI client
response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "user", "content": "Hello! I'm using OpenAI through RefLex."}
    ],
    max_tokens=100
)

print(f"Response: {response.choices[0].message.content}")
print(f"Using provider: {reflex_llms.get_selected_provider()}")

## OpenAI-Specific Configuration

In [None]:
import reflex_llms

# Custom OpenAI configuration
client = reflex_llms.get_openai_client(
    preference_order=["openai"],
    openai_base_url="https://api.openai.com/v1",  # Standard or custom endpoint
    timeout=10.0
)

print(f"OpenAI client configured with provider: {reflex_llms.get_selected_provider()}")

## OpenAI Configuration File Example

Create a `reflex.json` file optimized for OpenAI:

In [None]:
import json

# OpenAI-focused configuration
openai_config = {
    "preference_order": ["openai", "reflex"],
    "timeout": 15.0,
    "openai": {
        "base_url": "https://api.openai.com/v1"
    },
    "reflex_server_config": {
        "port": 8080,
        "container_name": "openai-fallback-server",
        "model_mappings": {
            "minimal_setup": True,
            "minimal_model_mapping": {
                "gpt-3.5-turbo": "llama3.2:3b",
                "gpt-4": "llama3.1:8b",
                "gpt-4o": "llama3.1:70b",
                "gpt-4o-mini": "gemma3:2b",
                "text-embedding-ada-002": "nomic-embed-text"
            }
        }
    }
}

# Save configuration
with open('reflex_openai.json', 'w') as f:
    json.dump(openai_config, f, indent=2)

print("OpenAI configuration saved to reflex_openai.json")
print(json.dumps(openai_config, indent=2))

## Chat Application with OpenAI Fallback

In [None]:
import reflex_llms

def openai_chat_with_fallback(user_input="Hello from OpenAI!"):
    """Chat function that prefers OpenAI but falls back to local AI."""
    
    # Load from OpenAI config file
    client = reflex_llms.get_openai_client(from_file="reflex_openai.json")
    
    provider = reflex_llms.get_selected_provider()
    print(f"Chat ready! Using {provider}")
    
    conversation = [{"role": "user", "content": user_input}]
    
    try:
        # OpenAI uses standard model names
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=conversation,
            max_tokens=150
        )
        
        ai_response = response.choices[0].message.content
        print(f"You: {user_input}")
        print(f"AI ({provider}): {ai_response}")
        
        return ai_response
        
    except Exception as e:
        print(f"Error with {provider}: {e}")
        print("Attempting fallback...")
        
        # Clear cache and try fallback
        reflex_llms.clear_cache()
        client = reflex_llms.get_openai_client(from_file="reflex_openai.json")
        
        new_provider = reflex_llms.get_selected_provider()
        print(f"Switched to: {new_provider}")
        
        return "Successfully switched to fallback provider!"

# Test the chat with fallback
response = openai_chat_with_fallback("Explain the benefits of using OpenAI models")
print(f"\nResponse length: {len(response)} characters")

## Advanced OpenAI Model Usage

In [None]:
import reflex_llms

def test_multiple_openai_models():
    """Test different OpenAI models with fallback."""
    client = reflex_llms.get_openai_client(from_file="reflex_openai.json")
    
    models_to_test = [
        "gpt-3.5-turbo",
        "gpt-4o-mini",
        "gpt-4",  # Will fallback if not available
        "gpt-4o"   # Will fallback if not available
    ]
    
    results = {}
    
    for model in models_to_test:
        try:
            response = client.chat.completions.create(
                model=model,
                messages=[
                    {"role": "user", "content": f"Say hello using {model}"}
                ],
                max_tokens=30
            )
            
            results[model] = {
                "success": True,
                "response": response.choices[0].message.content,
                "provider": reflex_llms.get_selected_provider()
            }
            
        except Exception as e:
            results[model] = {
                "success": False,
                "error": str(e),
                "provider": reflex_llms.get_selected_provider()
            }
    
    return results

# Test multiple models
model_results = test_multiple_openai_models()

print("OpenAI Model Test Results:")
print("===========================")
for model, result in model_results.items():
    if result["success"]:
        print(f"✅ {model} ({result['provider']}): {result['response'][:50]}...")
    else:
        print(f"❌ {model}: {result['error'][:50]}...")

## OpenAI Embeddings with Fallback

In [None]:
import reflex_llms

def get_openai_embeddings(texts):
    """Generate embeddings using OpenAI with local fallback."""
    client = reflex_llms.get_openai_client(from_file="reflex_openai.json")
    
    if isinstance(texts, str):
        texts = [texts]
    
    provider = reflex_llms.get_selected_provider()
    embedding_model = "text-embedding-ada-002"
    
    embeddings = []
    
    for text in texts:
        try:
            response = client.embeddings.create(
                model=embedding_model,
                input=text
            )
            embeddings.append(response.data[0].embedding)
            
        except Exception as e:
            print(f"Embedding failed with {provider}: {e}")
            # Try fallback
            reflex_llms.clear_cache()
            client = reflex_llms.get_openai_client(from_file="reflex_openai.json")
            new_provider = reflex_llms.get_selected_provider()
            print(f"Retrying with {new_provider}...")
            
            response = client.embeddings.create(
                model=embedding_model,
                input=text
            )
            embeddings.append(response.data[0].embedding)
    
    return {
        "embeddings": embeddings,
        "provider": reflex_llms.get_selected_provider(),
        "count": len(embeddings),
        "dimension": len(embeddings[0]) if embeddings else 0
    }

# Example usage
sample_texts = [
    "OpenAI provides state-of-the-art language models",
    "RefLex enables seamless fallback to local AI servers",
    "GPT models excel at natural language understanding",
    "Local AI deployment reduces dependency on cloud services"
]

try:
    result = get_openai_embeddings(sample_texts)
    print(f"Generated {result['count']} embeddings using {result['provider']}")
    print(f"Embedding dimension: {result['dimension']}")
    
    # Calculate similarity between first two texts
    if len(result['embeddings']) >= 2:
        import math
        
        def cosine_similarity(a, b):
            dot_product = sum(x * y for x, y in zip(a, b))
            magnitude_a = math.sqrt(sum(x * x for x in a))
            magnitude_b = math.sqrt(sum(x * x for x in b))
            return dot_product / (magnitude_a * magnitude_b)
        
        similarity = cosine_similarity(result['embeddings'][0], result['embeddings'][1])
        print(f"Similarity between first two texts: {similarity:.3f}")
    
except Exception as e:
    print(f"Embedding generation failed: {e}")

## Streaming Responses Example

In [None]:
import reflex_llms

def stream_openai_response(prompt="Tell me about artificial intelligence"):
    """Demonstrate streaming responses from OpenAI with fallback."""
    client = reflex_llms.get_openai_client(from_file="reflex_openai.json")
    
    print(f"Streaming response from {reflex_llms.get_selected_provider()}...")
    print("=" * 50)
    
    try:
        stream = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=200,
            stream=True
        )
        
        full_response = ""
        for chunk in stream:
            if chunk.choices[0].delta.content is not None:
                content = chunk.choices[0].delta.content
                print(content, end="", flush=True)
                full_response += content
        
        print("\n" + "=" * 50)
        print(f"Stream complete. Total characters: {len(full_response)}")
        
        return full_response
        
    except Exception as e:
        print(f"Streaming failed: {e}")
        print("Falling back to regular completion...")
        
        # Fallback to non-streaming
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            max_tokens=200
        )
        
        fallback_response = response.choices[0].message.content
        print(fallback_response)
        return fallback_response

# Test streaming
streaming_result = stream_openai_response(
    "Explain the advantages of using OpenAI models in production applications"
)

## OpenAI Production Setup

In [None]:
import reflex_llms
from reflex_llms.server import ReflexServerConfig, ModelMapping

class OpenAIManager:
    """Production-ready OpenAI manager with fallback."""
    
    def __init__(self, environment="production"):
        self.environment = environment
        self.client = None
        self._setup_openai_client()
    
    def _setup_openai_client(self):
        """Setup OpenAI client with production configuration."""
        
        if self.environment == "development":
            # Development: prefer local for cost savings
            fallback_config = ReflexServerConfig(
                port=11434,
                container_name="dev-openai-fallback",
                model_mappings=ModelMapping(
                    minimal_setup=True,
                    minimal_model_mapping={
                        "gpt-3.5-turbo": "llama3.2:3b",
                        "gpt-4o-mini": "gemma3:2b"
                    }
                )
            )
            preference = ["reflex", "openai"]
            
        else:
            # Production: prefer OpenAI for quality
            fallback_config = ReflexServerConfig(
                host="0.0.0.0",
                port=8080,
                container_name="prod-openai-fallback",
                model_mappings=ModelMapping(
                    minimal_setup=False,
                    model_mapping={
                        "gpt-3.5-turbo": "llama3.2:7b",
                        "gpt-4": "llama3.1:70b",
                        "gpt-4o": "llama3.1:405b",
                        "gpt-4o-mini": "gemma3:27b",
                        "text-embedding-ada-002": "nomic-embed-text"
                    }
                )
            )
            preference = ["openai", "reflex"]
        
        self.client = reflex_llms.get_openai_client(
            preference_order=preference,
            timeout=30.0 if self.environment == "production" else 10.0,
            reflex_server_config=fallback_config
        )
        
        provider = reflex_llms.get_selected_provider()
        print(f"OpenAI Manager ({self.environment}) ready using: {provider}")
        
        if provider == "reflex" and self.environment == "production":
            print("⚠️  Production using local fallback - check OpenAI connectivity")
    
    def chat_completion(self, messages, model="gpt-3.5-turbo", **kwargs):
        """OpenAI chat completion with automatic fallback."""
        try:
            return self.client.chat.completions.create(
                model=model,
                messages=messages,
                **kwargs
            )
        except Exception as e:
            print(f"OpenAI request failed: {e}")
            # Attempt fallback
            reflex_llms.clear_cache()
            self._setup_openai_client()
            
            return self.client.chat.completions.create(
                model=model,
                messages=messages,
                **kwargs
            )
    
    def get_embeddings(self, text, model="text-embedding-ada-002"):
        """OpenAI embeddings with fallback."""
        return self.client.embeddings.create(
            model=model,
            input=text
        )
    
    def stream_completion(self, messages, model="gpt-3.5-turbo", **kwargs):
        """Streaming completion for real-time responses."""
        return self.client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
            **kwargs
        )
    
    def health_check(self):
        """Check OpenAI system health."""
        status = reflex_llms.get_module_status()
        
        health = {
            "provider": status["selected_provider"],
            "openai_primary": status["selected_provider"] == "openai",
            "fallback_available": status.get("reflex_server_running", False),
            "config_cached": status["has_cached_config"],
            "environment": self.environment
        }
        
        return health
    
    def cost_estimate(self, tokens_input, tokens_output, model="gpt-3.5-turbo"):
        """Estimate OpenAI API costs (rough estimates)."""
        # Rough pricing as of 2024 (check OpenAI pricing for current rates)
        pricing = {
            "gpt-3.5-turbo": {"input": 0.0015 / 1000, "output": 0.002 / 1000},
            "gpt-4": {"input": 0.03 / 1000, "output": 0.06 / 1000},
            "gpt-4o": {"input": 0.005 / 1000, "output": 0.015 / 1000},
            "gpt-4o-mini": {"input": 0.00015 / 1000, "output": 0.0006 / 1000}
        }
        
        if model in pricing:
            cost = (tokens_input * pricing[model]["input"] + 
                   tokens_output * pricing[model]["output"])
            return {"estimated_cost": cost, "model": model, "note": "Rough estimate"}
        else:
            return {"error": "Model pricing not available"}
    
    def cleanup(self):
        """Clean shutdown."""
        if reflex_llms.is_using_reflex():
            reflex_llms.stop_reflex_server()
            print("Fallback server stopped")

# Usage examples
print("Setting up Development Environment:")
dev_ai = OpenAIManager("development")

print("\nSetting up Production Environment:")
prod_ai = OpenAIManager("production")

# Test different models
try:
    # Development test
    dev_response = dev_ai.chat_completion(
        messages=[{"role": "user", "content": "Hello from development!"}],
        model="gpt-3.5-turbo",
        max_tokens=30
    )
    print(f"\nDev response: {dev_response.choices[0].message.content}")
    
    # Production test
    prod_response = prod_ai.chat_completion(
        messages=[{"role": "user", "content": "Hello from production!"}],
        model="gpt-4o-mini",
        max_tokens=30
    )
    print(f"Prod response: {prod_response.choices[0].message.content}")
    
except Exception as e:
    print(f"Test failed: {e}")

# Health checks
dev_health = dev_ai.health_check()
prod_health = prod_ai.health_check()

print(f"\nDevelopment Health: {dev_health}")
print(f"Production Health: {prod_health}")

# Cost estimation example
cost_estimate = prod_ai.cost_estimate(100, 50, "gpt-4o-mini")
print(f"\nCost estimate: ${cost_estimate.get('estimated_cost', 0):.6f}")

# Cleanup
dev_ai.cleanup()
prod_ai.cleanup()

## OpenAI Monitoring and Troubleshooting