# OpenAI Compatible API Lab

**Estimated time:** 90â€“120 minutes

---

## Learning Objectives

- Understand the OpenAI API compatibility standard
- Configure clients for multiple providers
- Implement streaming responses
- Handle errors gracefully
- Use structured outputs and tools

## Setup

In [None]:
# Install dependencies if needed
# !pip install openai

import os
import json
import time
from openai import OpenAI, RateLimitError, APIError, APIConnectionError, AuthenticationError, BadRequestError

print("Imports successful")

## Part 1: Provider Configuration

Configure multiple providers for testing.

In [None]:
# Provider configurations
# FREE providers (no credit card required) - recommended for learning
# Get your API keys:
# - Groq: https://console.groq.com/keys
# - OpenRouter: https://openrouter.ai/keys

PROVIDERS = {
    # Groq - Ultra-fast inference, generous free tier
    # Docs: https://console.groq.com/docs/openai
    "groq": {
        "base_url": "https://api.groq.com/openai/v1",
        "api_key": os.environ.get("GROQ_API_KEY", "your-groq-api-key"),
        "default_model": "llama-3.3-70b-versatile"
    },
    
    # OpenRouter - 100+ models, free tier available
    # Docs: https://openrouter.ai/docs/quickstart
    # Free models have :free suffix
    "openrouter": {
        "base_url": "https://openrouter.ai/api/v1",
        "api_key": os.environ.get("OPENROUTER_API_KEY", "your-openrouter-api-key"),
        "default_model": "meta-llama/llama-3.3-70b-instruct:free"
    },
    
    # Ollama (local) - requires Ollama running on localhost
    # See documentation for setup instructions
    # "ollama": {
    #     "base_url": "http://localhost:11434/v1",
    #     "api_key": "ollama",
    #     "default_model": "llama3.2"
    # },
    
    # LiteLLM proxy - requires running LiteLLM proxy server
    # "litellm": {
    #     "base_url": "http://localhost:4000/v1",
    #     "api_key": os.environ.get("LITELLM_KEY", "sk-xxxx"),
    #     "default_model": "gpt-3.5-turbo"
    # },
    
    # Vercel AI Gateway
    # "vercel": {
    #     "base_url": "https://ai-gateway.vercel.sh/v1",
    #     "api_key": os.environ.get("AI_GATEWAY_API_KEY"),
    #     "default_model": "openai/gpt-4o-mini"
    # },
}

def get_client(provider_name: str) -> OpenAI:
    """Create an OpenAI client for a specific provider."""
    config = PROVIDERS.get(provider_name)
    if not config:
        raise ValueError(f"Unknown provider: {provider_name}")
    return OpenAI(base_url=config["base_url"], api_key=config["api_key"])

print(f"Configured providers: {list(PROVIDERS.keys())}")
print("\nTo get API keys:")
print("  Groq: https://console.groq.com/keys")
print("  OpenRouter: https://openrouter.ai/keys")

## Part 2: List Models

Explore available models from each provider.

In [None]:
def list_models(provider_name: str) -> list:
    """List available models from a provider."""
    client = get_client(provider_name)
    try:
        models = client.models.list()
        return [m.id for m in models.data]
    except Exception as e:
        print(f"Error: {e}")
        return []

for provider in PROVIDERS:
    print(f"\n=== {provider.upper()} ===")
    models = list_models(provider)
    if models:
        print(f"Found {len(models)} models:")
        for m in models[:10]:
            print(f"  - {m}")
        if len(models) > 10:
            print(f"  ... and {len(models) - 10} more")

## Part 3: Basic Chat Completions

Make simple requests to different providers.

In [None]:
def chat(provider_name: str, prompt: str, **kwargs) -> dict:
    """Make a chat completion request."""
    config = PROVIDERS[provider_name]
    client = get_client(provider_name)
    
    response = client.chat.completions.create(
        model=kwargs.pop("model", config["default_model"]),
        messages=[{"role": "user", "content": prompt}],
        **kwargs
    )
    
    return {
        "content": response.choices[0].message.content,
        "finish_reason": response.choices[0].finish_reason,
        "usage": response.usage.model_dump()
    }

# Test basic chat
for provider in PROVIDERS:
    print(f"\n=== {provider.upper()} ===")
    result = chat(provider, "What is 2 + 2?", max_tokens=50)
    print(f"Response: {result['content']}")
    print(f"Tokens: {result['usage']}")

## Part 4: Temperature and Parameters

Experiment with temperature and other parameters.

In [None]:
def test_temperature(provider_name: str, prompt: str, temperatures: list) -> dict:
    """Test same prompt at different temperatures."""
    results = {}
    config = PROVIDERS[provider_name]
    client = get_client(provider_name)
    
    for temp in temperatures:
        response = client.chat.completions.create(
            model=config["default_model"],
            messages=[{"role": "user", "content": prompt}],
            temperature=temp,
            max_tokens=50
        )
        results[temp] = response.choices[0].message.content
    
    return results

# Test temperature variation
provider = list(PROVIDERS.keys())[0]
prompt = "Write a one-sentence story about a robot."
temperatures = [0.0, 0.5, 1.0, 1.5]

print(f"Prompt: {prompt}\n")
results = test_temperature(provider, prompt, temperatures)

for temp, output in results.items():
    print(f"Temperature {temp}:\n  {output}\n")

## Part 5: Streaming Responses

Process responses in real-time.

In [None]:
def stream_chat(provider_name: str, prompt: str, **kwargs):
    """Stream a chat completion response."""
    config = PROVIDERS[provider_name]
    client = get_client(provider_name)
    
    response = client.chat.completions.create(
        model=kwargs.pop("model", config["default_model"]),
        messages=[{"role": "user", "content": prompt}],
        stream=True,
        **kwargs
    )
    
    full_content = ""
    for chunk in response:
        if chunk.choices[0].delta.content:
            content = chunk.choices[0].delta.content
            full_content += content
            print(content, end="", flush=True)
    
    print()  # Newline at end
    return full_content

# Test streaming
provider = list(PROVIDERS.keys())[0]
print(f"Streaming from {provider}:\n")
content = stream_chat(provider, "Tell me a short joke about programming.", max_tokens=100)

## Part 6: Error Handling

Implement robust error handling with retries.

In [None]:
def safe_chat(provider_name: str, messages: list, max_retries: int = 3, **kwargs) -> dict:
    """Make a chat request with error handling and retries."""
    config = PROVIDERS[provider_name]
    client = get_client(provider_name)
    
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model=kwargs.pop("model", config["default_model"]),
                messages=messages,
                **kwargs
            )
            return {
                "success": True,
                "content": response.choices[0].message.content,
                "usage": response.usage.model_dump(),
                "attempts": attempt + 1
            }
            
        except RateLimitError as e:
            wait = 2 ** attempt
            print(f"Rate limited, waiting {wait}s (attempt {attempt + 1})")
            time.sleep(wait)
            
        except APIConnectionError as e:
            print(f"Connection error: {e}")
            time.sleep(1)
            
        except AuthenticationError as e:
            return {"success": False, "error": f"Auth error: {e}", "attempts": attempt + 1}
            
        except BadRequestError as e:
            return {"success": False, "error": f"Bad request: {e}", "attempts": attempt + 1}
            
        except APIError as e:
            print(f"API error: {e}")
            time.sleep(1)
    
    return {"success": False, "error": "Max retries exceeded", "attempts": max_retries}

# Test safe chat
provider = list(PROVIDERS.keys())[0]
result = safe_chat(provider, [{"role": "user", "content": "Hello!"}], max_tokens=20)
print(f"Success: {result['success']}")
print(f"Attempts: {result['attempts']}")
if result['success']:
    print(f"Response: {result['content']}")
else:
    print(f"Error: {result['error']}")

## Part 7: Structured Outputs

Request JSON-formatted responses.

In [None]:
def get_json_response(provider_name: str, prompt: str, **kwargs) -> dict:
    """Request a JSON response."""
    config = PROVIDERS[provider_name]
    client = get_client(provider_name)
    
    response = client.chat.completions.create(
        model=kwargs.pop("model", config["default_model"]),
        messages=[
            {"role": "system", "content": "Respond with valid JSON only. No markdown, no explanation."},
            {"role": "user", "content": prompt}
        ],
        **kwargs
    )
    
    content = response.choices[0].message.content
    
    # Try to parse as JSON
    try:
        return {"success": True, "data": json.loads(content), "raw": content}
    except json.JSONDecodeError as e:
        return {"success": False, "error": str(e), "raw": content}

# Test JSON output
provider = list(PROVIDERS.keys())[0]
result = get_json_response(
    provider,
    "List 3 programming languages as JSON: {\"languages\": [{\"name\": string, \"year\": int}]}"
)

if result['success']:
    print("Parsed JSON:")
    print(json.dumps(result['data'], indent=2))
else:
    print(f"Failed to parse: {result['error']}")
    print(f"Raw: {result['raw']}")

## Part 8: Multi-Turn Conversation

Maintain context across multiple messages.

In [None]:
class Conversation:
    """Manage a multi-turn conversation."""
    
    def __init__(self, provider_name: str, system: str = "You are a helpful assistant."):
        self.provider_name = provider_name
        self.config = PROVIDERS[provider_name]
        self.client = get_client(provider_name)
        self.messages = [{"role": "system", "content": system}]
    
    def send(self, user_input: str, **kwargs) -> str:
        """Send a message and get a response."""
        self.messages.append({"role": "user", "content": user_input})
        
        response = self.client.chat.completions.create(
            model=kwargs.pop("model", self.config["default_model"]),
            messages=self.messages,
            **kwargs
        )
        
        assistant_msg = response.choices[0].message.content
        self.messages.append({"role": "assistant", "content": assistant_msg})
        
        return assistant_msg
    
    def history(self) -> list:
        return self.messages.copy()
    
    def clear(self, system: str = None):
        system = system or self.messages[0]["content"]
        self.messages = [{"role": "system", "content": system}]

# Test conversation
provider = list(PROVIDERS.keys())[0]
conv = Conversation(provider, "You are a helpful math tutor. Be concise.")

print("User: What is 5 * 7?")
print(f"Assistant: {conv.send('What is 5 * 7?')}\n")

print("User: What about 5 * 8?")
print(f"Assistant: {conv.send('What about 5 * 8?')}\n")

print("User: What was my first question?")
print(f"Assistant: {conv.send('What was my first question?')}\n")

print("=== Full History ===")
for msg in conv.history():
    print(f"{msg['role']}: {msg['content']}")

## Part 9: Multi-Provider Client

Build a client that can switch between providers.

In [None]:
class MultiProviderClient:
    """A client that can switch between multiple providers."""
    
    def __init__(self, providers: dict):
        self.providers = providers
        self._clients = {}
        
        for name, config in providers.items():
            self._clients[name] = OpenAI(
                base_url=config["base_url"],
                api_key=config["api_key"]
            )
    
    def chat(self, provider_name: str, messages: list, **kwargs) -> str:
        """Send a chat request to a specific provider."""
        if provider_name not in self._clients:
            raise ValueError(f"Unknown provider: {provider_name}")
        
        client = self._clients[provider_name]
        config = self.providers[provider_name]
        model = kwargs.pop("model", config["default_model"])
        
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            **kwargs
        )
        
        return response.choices[0].message.content
    
    def list_providers(self) -> list:
        """Return available provider names."""
        return list(self.providers.keys())

# Test multi-provider client
multi_client = MultiProviderClient(PROVIDERS)
print("Available providers:", multi_client.list_providers())

for provider in multi_client.list_providers():
    try:
        response = multi_client.chat(
            provider,
            [{"role": "user", "content": "Say 'hello' in one word."}],
            max_tokens=10
        )
        print(f"{provider}: {response}")
    except Exception as e:
        print(f"{provider}: Error - {e}")

## Exercises

1. **Provider Comparison**: Make identical requests to multiple providers and compare responses.
2. **Streaming with Error Handling**: Combine streaming with retry logic.
3. **Cost Tracker**: Build a wrapper that tracks token usage and estimates costs.
4. **Fallback Chain**: Implement automatic fallback from one provider to another on failure.

In [None]:
# Exercise: Cost Tracker

class CostTracker:
    """Track API usage and estimate costs."""
    
    def __init__(self, input_cost_per_1k: float = 0.0005, output_cost_per_1k: float = 0.0015):
        self.input_cost_per_1k = input_cost_per_1k
        self.output_cost_per_1k = output_cost_per_1k
        self.total_input_tokens = 0
        self.total_output_tokens = 0
    
    def track(self, response):
        """Track usage from a response."""
        self.total_input_tokens += response.usage.prompt_tokens
        self.total_output_tokens += response.usage.completion_tokens
    
    def get_cost(self) -> float:
        """Calculate total cost."""
        input_cost = (self.total_input_tokens * self.input_cost_per_1k) / 1000
        output_cost = (self.total_output_tokens * self.output_cost_per_1k) / 1000
        return input_cost + output_cost
    
    def report(self) -> dict:
        """Get usage report."""
        return {
            "total_input_tokens": self.total_input_tokens,
            "total_output_tokens": self.total_output_tokens,
            "total_tokens": self.total_input_tokens + self.total_output_tokens,
            "estimated_cost": self.get_cost()
        }

# Test cost tracker
tracker = CostTracker()

provider = list(PROVIDERS.keys())[0]
client = get_client(provider)
config = PROVIDERS[provider]

for _ in range(3):
    response = client.chat.completions.create(
        model=config["default_model"],
        messages=[{"role": "user", "content": "Say hello"}],
        max_tokens=20
    )
    tracker.track(response)

print("Usage Report:")
print(json.dumps(tracker.report(), indent=2))