# My LLM Client Implementation


On my journey of developing smart chatbots, i ran into rate limits while experimenting with open ai. This was after I had spent hours setting up my system. I knew that was not the end of the world. I then decided to implement a generalized way to program LLMs. That way I could retain cloud llms with different price points while incorporating local models like Ollama. I named my chatbot TUTI which translates to parrot. So much fun! So, here we go

## Overview
In this notebook, I implement a multi-provider LLM client with error handling and retry logic.

## Setup
Firstly, I install dependencies and set up API keys for the different LLM providers.



### install dependencies 

In [16]:
# Install required packages (run this cell once, then comment it out)
# %pip install openai

# %pip install openai # installs the OpenAI API client.
# %pip install python-dotenv # installs the dotenv package for loading environment variables.

# %pip install openai google-generativeai anthropic tenacity python-dotenv

# %pip install ollama  # installs the Ollama API client.

# Download an Ollama model (run this cell once, then comment it out)
# !ollama pull llama3:8b  # Good balance of size and quality

# %pip install panel # for GUI
# %pip install jupyter_bokeh






### import libraries and set up api keys

In [17]:
# Import everything needed
import os
from enum import Enum
from typing import List, Dict, Optional
from dataclasses import dataclass

import openai
from openai import OpenAI as OpenAIClient
import google.generativeai as genai
import anthropic
from tenacity import retry, wait_exponential, stop_after_attempt, retry_if_exception_type

import time

import panel as pn  # GUI

import ollama
# import json


# Set API keys (i load from .env file)
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
os.environ['DEEPSEEK_API_KEY'] = os.getenv('DEEPSEEK_API_KEY')  
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')

# ... etc

### data classes and enums

In [18]:
# Defining data structures i will use
class LLMProvider(Enum):
    OPENAI = "openai"
    DEEPSEEK = "deepseek"
    GOOGLE = "google"
    ANTHROPIC = "anthropic"
    OLLAMA = "ollama" #Added Ollama as a provider for local models

@dataclass
class LLMResponse:
    content: str
    provider: LLMProvider
    model: str
    usage: Optional[Dict] = None
    latency: Optional[float] = None
    local: bool = False  #Flag for local vs cloud because now i have ollama


### main LLM client class

In [19]:
class ProfessionalLLMClient:
    """Main client class to interact with various LLM providers."""
    
    def __init__(self):
        self.clients = {}
        self.available_providers = []
        self._initialize_all_clients()

    
    def _initialize_all_clients(self):
        """Initialize all available API clients."""
        provider_configs = {
            # LLMProvider.OPENAI: {
            #     'env_var': 'OPENAI_API_KEY',
            #     'init_func': self._init_openai,
            #     'requires_key': True  #Flag for key requirement
            # },
            # LLMProvider.DEEPSEEK: {
            #     'env_var': 'DEEPSEEK_API_KEY', 
            #     'init_func': self._init_deepseek,
            #     'requires_key': True
            # },
            # LLMProvider.GOOGLE: {
            #     'env_var': 'GOOGLE_API_KEY',
            #     'init_func': self._init_google,
            #     'requires_key': True
            # },
            # LLMProvider.ANTHROPIC: {
            #     'env_var': 'ANTHROPIC_API_KEY',
            #     'init_func': self._init_anthropic,
            #     'requires_key': True
            # },
            LLMProvider.OLLAMA: {
                'env_var': None,  # No environment variable needed
                'init_func': self._init_ollama,
                'requires_key': False  #No API key required!
            }
        }
        
        for provider, config in provider_configs.items():
            try:
                if config['requires_key']:
                    # For providers that need API keys
                    api_key = os.getenv(config['env_var'])
                    if api_key and not api_key.startswith('your-'):
                        config['init_func'](api_key)
                        self.available_providers.append(provider)
                        print(f"‚úÖ {provider.value.upper()} client initialized successfully")
                    else:
                        print(f"‚ö†Ô∏è  Skipping {provider.value}: API key not found")
                else:
                    # For providers that don't need API keys (like Ollama)
                    config['init_func'](None)  # Pass None or whatever the init expects
                    self.available_providers.append(provider)
                    print(f"‚úÖ {provider.value.upper()} client initialized successfully")
                    
            except Exception as e:
                print(f"‚ùå Failed to initialize {provider.value}: {e}")
    

    def _init_ollama(self, api_key=None):
        """Initialize Ollama client - no API key needed!"""
        try:
            # Test if Ollama is running
            ollama.list()
            self.clients[LLMProvider.OLLAMA] = ollama
            print("Ollama is running and ready!")
        except Exception as e:
            print(f"Ollama not available: {e}")
            raise
    
    def _init_openai(self, api_key: str):
        """Initialize OpenAI client."""
        self.clients[LLMProvider.OPENAI] = OpenAIClient(api_key=api_key)
    
    def _init_deepseek(self, api_key: str):
        """Initialize DeepSeek client."""
        self.clients[LLMProvider.DEEPSEEK] = OpenAIClient(
            api_key=api_key,
            base_url="https://api.deepseek.com/v1"
        )
    
    def _init_google(self, api_key: str):
        """Initialize Google client."""
        genai.configure(api_key=api_key)
        self.clients[LLMProvider.GOOGLE] = genai
    
    def _init_anthropic(self, api_key: str):
        """Initialize Anthropic client."""
        self.clients[LLMProvider.ANTHROPIC] = anthropic.Anthropic(api_key=api_key)
    
    @retry(
        retry=retry_if_exception_type((
            openai.RateLimitError,
            anthropic.RateLimitError,
            openai.APIConnectionError,
            openai.APIError
        )),
        wait=wait_exponential(multiplier=1, min=2, max=30),
        stop=stop_after_attempt(3),
        reraise=True
    )
    def chat_completion(
        self,
        provider: LLMProvider,
        messages: List[Dict[str, str]],
        model: Optional[str] = None,
        temperature: float = 0.7,
        max_tokens: Optional[int] = None,
        **kwargs
    ) -> LLMResponse:
        
        """
        Chat completion with comprehensive error handling.
        
        Args:
            provider: The LLM provider to use
            messages: List of message dictionaries
            model: Specific model name (uses default if None)
            temperature: Creativity control (0.0 to 1.0)
            max_tokens: Maximum tokens to generate
            
        Returns:
            LLMResponse object with structured data
        """
        
        if provider not in self.clients:
            raise ValueError(f"Provider {provider.value} not available")
        
        # Set default models
        default_models = {
            LLMProvider.OPENAI: "gpt-3.5-turbo",
            LLMProvider.DEEPSEEK: "deepseek-chat",
            LLMProvider.GOOGLE: "gemini-1.0-pro",  # Updated from gemini-pro
            LLMProvider.ANTHROPIC: "claude-3-haiku-20240307",
            LLMProvider.OLLAMA: "llama3:8b"  #Added default model for Ollama
        }
        model = model or default_models[provider]
        
        start_time = time.time()
        
        try:
            if provider == LLMProvider.OLLAMA:
                # Convert messages to Ollama format
                ollama_messages = []
                for msg in messages:
                    ollama_messages.append({
                        'role': msg['role'],
                        'content': msg['content']
                    })
                
                # Make Ollama API call
                response = self.clients[provider].chat(
                    model=model,
                    messages=ollama_messages,
                    options={
                        'temperature': temperature,
                        'num_predict': max_tokens or 512,  # Ollama uses num_predict
                    }
                )
                
                content = response['message']['content']
                usage = {
                    'prompt_tokens': response.get('prompt_eval_count', 0),
                    'completion_tokens': response.get('eval_count', 0),
                    'total_tokens': response.get('prompt_eval_count', 0) + response.get('eval_count', 0)
                }
            elif provider in [LLMProvider.OPENAI, LLMProvider.DEEPSEEK]:
                response = self.clients[provider].chat.completions.create(
                    model=model,
                    messages=messages,
                    temperature=temperature,
                    max_tokens=max_tokens,
                    **kwargs
                )
                content = response.choices[0].message.content
                usage = {
                    'prompt_tokens': response.usage.prompt_tokens,
                    'completion_tokens': response.usage.completion_tokens,
                    'total_tokens': response.usage.total_tokens
                } if hasattr(response, 'usage') and response.usage else None
                
            elif provider == LLMProvider.GOOGLE:
                # UPDATED: Google Gemini API format
                conversation = []
                for msg in messages:
                    if msg["role"] == "system":
                        # For system messages, prepend to first user message
                        if conversation and conversation[-1]["role"] == "user":
                            conversation[-1]["parts"][0] = f"{msg['content']}\n\n{conversation[-1]['parts'][0]}"
                        else:
                            # If no user message yet, store system for later
                            system_content = msg["content"]
                    else:
                        role = "user" if msg["role"] == "user" else "model"
                        conversation.append({"role": role, "parts": [msg["content"]]})
                
                # Create the model
                model_obj = self.clients[provider].GenerativeModel(
                    model_name=model,
                    generation_config={
                        "temperature": temperature,
                        "max_output_tokens": max_tokens or 2048,
                        "top_p": kwargs.get("top_p", 0.95),
                        "top_k": kwargs.get("top_k", 40),
                    }
                )
                
                # Start chat and send message
                chat = model_obj.start_chat(history=conversation[:-1] if len(conversation) > 1 else [])
                response = chat.send_message(conversation[-1]["parts"][0] if conversation else "")
                
                content = response.text
                usage = {
                    'prompt_tokens': response.usage_metadata.prompt_token_count if hasattr(response, 'usage_metadata') else None,
                    'completion_tokens': response.usage_metadata.candidates_token_count if hasattr(response, 'usage_metadata') else None,
                    'total_tokens': response.usage_metadata.total_token_count if hasattr(response, 'usage_metadata') else None
                }

                
            elif provider == LLMProvider.ANTHROPIC:
                # Convert to Anthropic's format
                system_content = None
                anthropic_messages = []
                
                for msg in messages:
                    if msg["role"] == "system":
                        system_content = msg["content"]
                    else:
                        role = "user" if msg["role"] == "user" else "assistant"
                        anthropic_messages.append({"role": role, "content": msg["content"]})
                
                response = self.clients[provider].messages.create(
                    model=model,
                    messages=anthropic_messages,
                    system=system_content,
                    temperature=temperature,
                    max_tokens=max_tokens or 1024,
                    **kwargs
                )
                content = response.content[0].text
                usage = {
                    'input_tokens': response.usage.input_tokens,
                    'output_tokens': response.usage.output_tokens
                }

            
                
                
            latency = time.time() - start_time
            
            return LLMResponse(
                content=content,
                provider=provider,
                model=model,
                usage=usage,
                latency=latency
            )
            
        except Exception as e:
            print(f"Error with {provider.value} API: {str(e)}")
            raise
    
    def get_available_providers(self) -> List[LLMProvider]:
        """Get list of successfully initialized providers."""
        return self.available_providers
    
    def get_provider_models(self, provider: LLMProvider) -> List[str]:
        """Get available models for a provider."""
        model_lists = {
            LLMProvider.OPENAI: ["gpt-4-turbo", "gpt-4", "gpt-3.5-turbo", "gpt-3.5-turbo-16k"],
            LLMProvider.DEEPSEEK: ["deepseek-chat", "deepseek-coder"],
            LLMProvider.GOOGLE: [
                "gemini-1.0-pro",      # General purpose
                "gemini-1.0-pro-001",  # Specific version
                "gemini-pro",           # Some accounts might still have this
                "gemini-1.5-pro-latest" # Latest 1.5 pro (if you have access)
            ],
            LLMProvider.ANTHROPIC: [
                "claude-3-opus-20240229", 
                "claude-3-sonnet-20240229", 
                "claude-3-haiku-20240307"
            ],
            LLMProvider.OLLAMA: self._get_ollama_models()  # Dynamic list!
            
        }
        return model_lists.get(provider, [])
    
    def _get_ollama_models(self) -> List[str]:
        """Get list of available Ollama models."""
        try:
            result = self.clients[LLMProvider.OLLAMA].list()
            return [model['name'] for model in result['models']]
        except:
            return ["llama3:8b", "llama3:70b", "mixtral:8x7b", "gemma:7b", "phi3:mini"]

    def batch_process(
        self,
        prompts: List[str],
        provider: LLMProvider,
        **kwargs
    ) -> List[LLMResponse]:
        """Process multiple prompts with the same settings."""
        results = []
        for prompt in prompts:
            messages = [{"role": "user", "content": prompt}]
            results.append(self.chat_completion(provider, messages, **kwargs))
        return results

### initializing my LLM client

In [20]:
# Create an instance of my llm client
llm_client = ProfessionalLLMClient()

# Check what providers are available
print("Available providers:", [p.value for p in llm_client.get_available_providers()])

Ollama is running and ready!
‚úÖ OLLAMA client initialized successfully
Available providers: ['ollama']


## Usage Examples
Here's how to use the client for different providers

### testing my individual providers

#### deepseek / openai

In [7]:
# Testing my LLM providers, one at a time
test_messages = [
    {"role": "user", "content": "Explain quantum computing to me like i am a farmer"}
]

try:
    response = llm_client.chat_completion(LLMProvider.DEEPSEEK, test_messages)
    print("DeepSeek Response:", response.content)
    print(f"Latency: {response.latency:.2f}s")
except Exception as e:
    print(f"Error: {e}")

Error: Provider deepseek not available


#### google gemini

In [8]:
# Test Google Gemini specifically with different models
test_messages = [{"role": "user", "content": "Hello! What's the weather like today?"}]

# Try available Google models
google_models = llm_client.get_provider_models(LLMProvider.GOOGLE)
print("Available Google models:", google_models)

for model in google_models:
    try:
        print(f"\nüß™ Testing Google model: {model}")
        response = llm_client.chat_completion(
            LLMProvider.GOOGLE, 
            test_messages, 
            model=model
        )
        print(f"‚úÖ Success with {model}: {response.content[:100]}...")
        break  # Stop after first successful model
    except Exception as e:
        print(f"‚ùå {model} failed: {e}")

Available Google models: ['gemini-1.0-pro', 'gemini-1.0-pro-001', 'gemini-pro', 'gemini-1.5-pro-latest']

üß™ Testing Google model: gemini-1.0-pro
‚ùå gemini-1.0-pro failed: Provider google not available

üß™ Testing Google model: gemini-1.0-pro-001
‚ùå gemini-1.0-pro-001 failed: Provider google not available

üß™ Testing Google model: gemini-pro
‚ùå gemini-pro failed: Provider google not available

üß™ Testing Google model: gemini-1.5-pro-latest
‚ùå gemini-1.5-pro-latest failed: Provider google not available


#### ollama

In [None]:
print("\nüß™ Testing Ollama...")
try:
    test_messages = [{"role": "user", "content": "Explain quantum computing simply"}]
    response = llm_client.chat_completion(LLMProvider.OLLAMA, test_messages)
    print(f"‚úÖ Ollama response: {response.content[:50]}...")
except Exception as e:
    print(f"‚ùå Ollama test failed: {e}")


üß™ Testing Ollama...
‚úÖ Ollama response: The fascinating world of quantum computing!

**Cla...


## Comparison Function
This function compares the performance of different providers, comparing parameters like latency, token usage, and the quality of response content. Please try it for yourself :) In an effort not to bias your own analyses, I will reserve my findings to myself :)

In [10]:
# Compare multiple providers
def compare_providers_professional(prompt: str):
    """Professional comparison of all available providers."""
    messages = [{"role": "user", "content": prompt}]
    results = {}
    
    for provider in llm_client.get_available_providers():
        try:
            print(f"üß™ Testing {provider.value}...")
            response = llm_client.chat_completion(provider, messages, temperature=0.7)
            results[provider.value] = {
                'content': response.content,
                'latency': response.latency,
                'tokens': response.usage
            }
            print(f"‚úÖ {provider.value} completed in {response.latency:.2f}s")
            
        except Exception as e:
            results[provider.value] = f"Error: {str(e)}"
            print(f"‚ùå {provider.value} failed: {e}")
        
        print("-" * 50)
    
    return results

# Run comparison
comparison_results = compare_providers_professional(
    "What are the main benefits of machine learning in healthcare?"
)

üß™ Testing ollama...
‚úÖ ollama completed in 162.89s
--------------------------------------------------


### Analyze Results

In [11]:
# Display results in a nice format
for provider, data in comparison_results.items():
    if isinstance(data, dict):
        print(f"\nüéØ {provider.upper()}:")
        print(f"‚è±Ô∏è Latency: {data['latency']:.2f}s")
        print(f"üìä Tokens: {data['tokens']}")
        print(f"üí¨ Response: {data['content'][:200]}...")
    else:
        print(f"\n‚ùå {provider.upper()}: {data}")
    print("‚éØ" * 60)


üéØ OLLAMA:
‚è±Ô∏è Latency: 162.89s
üìä Tokens: {'prompt_tokens': 21, 'completion_tokens': 512, 'total_tokens': 533}
üí¨ Response: Machine learning (ML) has numerous benefits in healthcare, including:

1. **Improved disease diagnosis**: ML algorithms can analyze medical images, laboratory results, and electronic health records to...
‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ‚éØ


## Experimenting with Batch Processing
This enables us to ask the LLM provider multiple prompts and print out the model's performance results

In [130]:
# Process multiple prompts at once
prompts = [
    "Explain blockchain technology like I'm seven",
    "What is the capital of Uganda?",
    "How does pollination work?"
]

print("Processing batch of prompts with Ollama...")
batch_results = llm_client.batch_process(
    prompts=prompts,
    provider=LLMProvider.OLLAMA,
    temperature=0.3
)

for i, result in enumerate(batch_results):
    print(f"\nüìù Prompt {i+1}: {prompts[i][:30]}...")
    print(f"üí° Response: {result.content[:100]}...")
    print(f"‚è±Ô∏è  Latency: {result.latency:.2f}s")

Processing batch of prompts with DeepSeek...

üìù Prompt 1: Explain blockchain technology...
üí° Response: The fascinating world of blockchain technology!

Blockchain is a decentralized, digital ledger techn...
‚è±Ô∏è  Latency: 145.99s

üìù Prompt 2: What is the capital of France?...
üí° Response: The capital of France is Paris....
‚è±Ô∏è  Latency: 3.56s

üìù Prompt 3: How does photosynthesis work?...
üí° Response: Photosynthesis is the process by which plants, algae, and some bacteria convert light energy from th...
‚è±Ô∏è  Latency: 157.48s


# OrderBot

In this OrderBot, we automate the collection of user prompts and assistant responses. This OrderBot will take orders at a pizza restaurant

In [35]:
def collect_messages(_):
    prompt = inp.value_input
    inp.value = ''
    context.append({'role':'user', 'content':f"{prompt}"})
    response = llm_client.chat_completion(provider=LLMProvider.OLLAMA, messages=context) 
    context.append({'role':'assistant', 'content':f"{response.content}"})
    panels.append(
        pn.Row('User:', pn.pane.Markdown(prompt, width=600)))
    panels.append(
        pn.Row('Assistant:', pn.pane.Markdown(response.content, width=600)))
 
    return pn.Column(*panels)



We go ahead and set up the user interface to collect user prompts and assistant responses

In [36]:
import panel as pn  # GUI
pn.extension()

panels = [] # collect display 

context = [ {'role':'system', 'content':"""
You are OrderBot, an automated service to collect orders for a pizza restaurant. \
You first greet the customer, then collects the order, \
and then asks if it's a pickup or delivery. \
You wait to collect the entire order, then summarize it and check for a final \
time if the customer wants to add anything else. \
If it's a delivery, you ask for an address. \
Finally you collect the payment.\
Make sure to clarify all options, extras and sizes to uniquely \
identify the item from the menu.\
You respond in a short, very conversational friendly style. \
The menu includes \
pepperoni pizza  12.95, 10.00, 7.00 \
cheese pizza   10.95, 9.25, 6.50 \
eggplant pizza   11.95, 9.75, 6.75 \
fries 4.50, 3.50 \
greek salad 7.25 \
Toppings: \
extra cheese 2.00, \
mushrooms 1.50 \
sausage 3.00 \
canadian bacon 3.50 \
AI sauce 1.50 \
peppers 1.00 \
Drinks: \
coke 3.00, 2.00, 1.00 \
sprite 3.00, 2.00, 1.00 \
bottled water 5.00 \
"""} ]  # accumulate messages


inp = pn.widgets.TextInput(value="Hi", placeholder='Enter text here‚Ä¶')
button_conversation = pn.widgets.Button(name="Chat!")

interactive_conversation = pn.bind(collect_messages, button_conversation)

dashboard = pn.Column(
    inp,
    pn.Row(button_conversation),
    pn.panel(interactive_conversation, loading_indicator=True, height=300),
)

dashboard

BokehModel(combine_events=True, render_bundle={'docs_json': {'3ca312c0-00ed-419e-b6cc-a4a9fd7bc630': {'version‚Ä¶

Now we go ahead and collect the user's order and process it 

In [37]:
messages =  context.copy()
messages.append(
{'role':'system', 'content':'create a json summary of the previous food order. Itemize the price for each item\
 The fields should be 1) pizza, include size 2) list of toppings 3) list of drinks, include size   4) list of sides include size  5)total price '},    
)
 #The fields should be 1) pizza, price 2) list of toppings 3) list of drinks, include size include price  4) list of sides include size include price, 5)total price '},    

response = llm_client.chat_completion(LLMProvider.OLLAMA, messages=messages, temperature=0)
print(response)

LLMResponse(content="You've got a few options for where to get your Medium Cheese Pizza:\n\nWould you like to pick it up from our store, or have it delivered to your doorstep?\n\nIf you choose delivery, I'll need an address to make sure we bring it right to you. Just let me know!", provider=<LLMProvider.OLLAMA: 'ollama'>, model='llama3:8b', usage={'prompt_tokens': 675, 'completion_tokens': 60, 'total_tokens': 735}, latency=26.801153898239136, local=False)
