In [None]:
# imports
import os
from dotenv import load_dotenv
from openai import OpenAI


In [None]:
# constants

MODEL_GPT = 'gpt-4o-mini'
MODEL_LLAMA = "llama3.2:1b"

In [None]:
# set up environment

load_dotenv()

# OpenRouter client for GPT-4o-mini
OPENAI_API_KEY = os.getenv("OPENROUTER_API_KEY")
openai = OpenAI(
    api_key=OPENAI_API_KEY,
    base_url="https://openrouter.ai/api/v1",
    default_headers={
        "HTTP-Referer": "http://localhost:8888",
        "X-Title": "OpenAI Playground",
    },
)

# Local Ollama client for Llama (run: ollama run llama3.2:1b)
ollama = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama")

In [None]:
# here is the question; type over this to ask something new

system_prompt = """
You are a helpful assist that answers technical questions. You take the questions from the user and answer them in a way that is easy to understand.
"""

question = "What is the difference between OSI and TCP/IP models?"   


In [None]:
# Get gpt-4o-mini to answer, with streaming
print("\nGPT-4o-Mini Response:\n")
stream = openai.chat.completions.create(
    model=MODEL_GPT, 
    messages=[{
        "role": "system", 
        "content": system_prompt
        }, 
        {"role": "user", 
        "content": question
        }], stream=True)
for chunk in stream:
    if chunk.choices[0].delta.content:
        print(chunk.choices[0].delta.content, end="", flush=True)

In [None]:
# Get Llama 3.2 to answer (local Ollama instance)
print("\nLlama 3.2 Response:\n")

response = ollama.chat.completions.create(
    model=MODEL_LLAMA,
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": question},
    ],
)

print(response.choices[0].message.content)