In [10]:
import asyncio
import json
from openai import AsyncOpenAI
from ollama import AsyncClient
from pydantic import BaseModel
from typing import Optional
import time

import os
from dotenv import load_dotenv
load_dotenv()


# Define the structured output model
class ResponseModel(BaseModel):
    answer: str
    confidence: float
    source: str

# Configuration
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")  # Replace with your OpenAI API key
OLLAMA_HOST = "https://zk8ufz7qvz0kxi-11434.proxy.runpod.net/"  # Default Ollama host
OLLAMA_MODEL = "llama3.1:8b"
OPENAI_MODEL = "gpt-4o-mini"  # Example OpenAI model

# Initialize clients
openai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)
ollama_client = AsyncClient(host=OLLAMA_HOST)

# Prompt for both APIs
PROMPT = """
Given the query: "What is the capital of France?", provide a structured response with the following fields:
- answer: The capital city.
- confidence: A float between 0 and 1 indicating confidence in the answer.
- source: The model name or source of the response.

Return the response in JSON format.
"""

async def call_openai() -> Optional[ResponseModel]:
    try:
        response = await openai_client.chat.completions.create(
            model=OPENAI_MODEL,
            messages=[
                {"role": "system", "content": "You are a helpful assistant that returns structured JSON responses."},
                {"role": "user", "content": PROMPT}
            ],
            response_format={"type": "json_object"}
        )
        content = response.choices[0].message.content
        return ResponseModel(**json.loads(content))
    except Exception as e:
        print(f"OpenAI error: {e}")
        return None

async def call_ollama() -> Optional[ResponseModel]:
    try:
        response = await ollama_client.chat(
            model=OLLAMA_MODEL,
            messages=[
                {"role": "system", "content": "Return responses in structured JSON format."},
                {"role": "user", "content": PROMPT}
            ],
            options={"format": "json"}
        )
        content = response["message"]["content"]
        return ResponseModel(**json.loads(content))
    except Exception as e:
        print(f"Ollama error: {e}")
        return None



In [15]:
async def main():
    start_time = time.time()
    
    # Run API calls concurrently
    openai_task = call_openai()
    ollama_task = call_ollama()
    responses = await asyncio.gather(openai_task, ollama_task, return_exceptions=True)
    
    # Process results
    for i, response in enumerate(responses):
        if isinstance(response, ResponseModel):
            print(f"Response from {response.source}:")
            print(f"Answer: {response.answer}, Confidence: {response.confidence}")
        else:
            print(f"Failed to get response from {'OpenAI' if i == 0 else 'Ollama'}")
    
    elapsed_time = time.time() - start_time
    print(f"Total execution time: {elapsed_time:.2f} seconds")





In [18]:
await main()

Response from GPT-3.5:
Answer: Paris, Confidence: 1.0
Response from Knowledge Graph:
Answer: Paris, Confidence: 1.0
Total execution time: 1.91 seconds
