Ollama via OpenAI API

In [1]:
from openai import OpenAI
from pprint import pprint

In [2]:
def _msg(role, content):
    # simple helper function to create a message object
    return {"role": role, "content": content}

def system(content):
    return _msg("system", content)

def user(content):
    return _msg("user", content)

def assistant(content):
    return _msg("assistant", content)

In [3]:
chat_history = [
    system("You are a helpful assistant that can answer questions and help with tasks."),
    user("What is the capital of France?"),
    assistant("The capital of France is Paris."),
    user("What is the capital of Germany?"),
]

In [4]:
# Initialize the OpenAI client to point to Ollama's API
client = OpenAI(
    base_url="http://localhost:11434/v1",  # Ollama's default local endpoint
    api_key="ollama"  # Dummy key, required but not used by Ollama
)

completion = client.chat.completions.create(
    messages=chat_history,
    model="llama3.2:3b",
)

In [5]:
completion

ChatCompletion(id='chatcmpl-73', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of Germany is Berlin.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None))], created=1754722119, model='llama3.2:3b', object='chat.completion', service_tier=None, system_fingerprint='fp_ollama', usage=CompletionUsage(completion_tokens=8, prompt_tokens=70, total_tokens=78, completion_tokens_details=None, prompt_tokens_details=None))

In [6]:
chat_history.append(assistant(completion.choices[0].message.content))


In [7]:
pprint(chat_history)

[{'content': 'You are a helpful assistant that can answer questions and help '
             'with tasks.',
  'role': 'system'},
 {'content': 'What is the capital of France?', 'role': 'user'},
 {'content': 'The capital of France is Paris.', 'role': 'assistant'},
 {'content': 'What is the capital of Germany?', 'role': 'user'},
 {'content': 'The capital of Germany is Berlin.', 'role': 'assistant'}]


# Asynchronous Calls

In [8]:
from openai import AsyncOpenAI
import asyncio

In [9]:
client = AsyncOpenAI(
    base_url="http://localhost:11434/v1",  # Ollama's default local endpoint
    api_key="ollama" 
)

chat_history = [
    system("You are a helpful assistant that can answer questions and help with tasks."),
    user("What is the capital of France?"),
    assistant("The capital of France is Paris."),
    user("What is the capital of Germany?"),
]


In [10]:
completion_1 = client.chat.completions.create(
    messages=chat_history,
    model="llama3.2:3b",
    max_tokens=100,
)

completion_2 = client.chat.completions.create(
    messages=chat_history,
    model="llama3.2:3b",
    max_tokens=100,
)

In [11]:
completions = await asyncio.gather(completion_1, completion_2)

In [12]:
completions

[ChatCompletion(id='chatcmpl-176', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of Germany is Berlin.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None))], created=1754722120, model='llama3.2:3b', object='chat.completion', service_tier=None, system_fingerprint='fp_ollama', usage=CompletionUsage(completion_tokens=8, prompt_tokens=70, total_tokens=78, completion_tokens_details=None, prompt_tokens_details=None)),
 ChatCompletion(id='chatcmpl-284', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The capital of Germany is Berlin.', refusal=None, role='assistant', annotations=None, audio=None, function_call=None, tool_calls=None))], created=1754722120, model='llama3.2:3b', object='chat.completion', service_tier=None, system_fingerprint='fp_ollama', usage=CompletionUsage(completion_tokens=8, prompt_tokens=70, total_tokens=78