# Basic Completion and Embedding Examples

## Completion


In [6]:
# Copyright (c) 2024 Microsoft Corporation.
# Licensed under the MIT License

import os
from collections.abc import AsyncIterator, Iterator

from dotenv import load_dotenv
from graphrag_llm.completion import LLMCompletion, create_completion
from graphrag_llm.config import AuthMethod, ModelConfig
from graphrag_llm.types import LLMCompletionChunk, LLMCompletionResponse

load_dotenv()

api_key = os.getenv("GRAPHRAG_API_KEY")
model_config = ModelConfig(
    model_provider="azure",
    model=os.getenv("GRAPHRAG_MODEL", "gpt-4o"),
    azure_deployment_name=os.getenv("GRAPHRAG_MODEL", "gpt-4o"),
    api_base=os.getenv("GRAPHRAG_API_BASE"),
    api_version=os.getenv("GRAPHRAG_API_VERSION", "2025-04-01-preview"),
    api_key=api_key,
    auth_method=AuthMethod.AzureManagedIdentity if not api_key else AuthMethod.ApiKey,
)
llm_completion: LLMCompletion = create_completion(model_config)

response: LLMCompletionResponse | Iterator[LLMCompletionChunk] = (
    llm_completion.completion(
        messages="What is the capital of France?",
    )
)

if isinstance(response, Iterator):
    # Streaming response
    for chunk in response:
        print(chunk.choices[0].delta.content or "", end="", flush=True)
else:
    # Non-streaming response
    print(response.choices[0].message.content)
    # Or alternatively, access via the content property
    # This is equivalent to the above line, getting the content of the first choice
    print(response.content)

print("Full Response:")
print(response.model_dump_json(indent=2))  # type: ignore

ModuleNotFoundError: No module named 'graphrag_llm'

## Async Completion


In [3]:
response: LLMCompletionResponse = await llm_completion.completion_async(
    messages="What is the capital of France?",
)  # type: ignore
print(response.content)

The capital of France is Paris.


## Streaming Completion


In [4]:
response = llm_completion.completion(
    messages="What is the capital of France?",
    stream=True,
)

if isinstance(response, Iterator):
    # Streaming response
    for chunk in response:
        print(chunk.choices[0].delta.content or "", end="", flush=True)

The capital of France is Paris.

## Async Streaming Completion


In [5]:
response = await llm_completion.completion_async(
    messages="What is the capital of France?",
    stream=True,
)

if isinstance(response, AsyncIterator):
    # Streaming response
    async for chunk in response:
        print(chunk.choices[0].delta.content or "", end="", flush=True)

The capital of France is Paris.

## Completion Arguments

The completion API adheres to litellm completion API and thus the OpanAI SDK API. The `messages` parameter can be one of the following:

- `str`: Raw string for the prompt.
- `list[dict[str, Any]]`: A list of dicts in the form `{"role": "user|system|...", "content": "..."}`
- `list[ChatCompletionMessageParam]`: A list of OpenAI `ChatCompletionMessageParam`. `graphrag_llm.utils` provides a `ChatCompletionMessageParamBuilder` to help construct these objects. See the message builder notebook for more details on using `ChatCompletionMessageParamBuilder`.


In [6]:
from graphrag_llm.utils import (
    CompletionMessagesBuilder,
)

# raw string input
response1: LLMCompletionResponse = llm_completion.completion(
    messages="What is the capital of France?"
)  # type: ignore
print(response1.content)

# list of message dicts input
response2: LLMCompletionResponse = llm_completion.completion(
    messages=[{"role": "user", "content": "What is the capital of France?"}]
)  # type: ignore
print(response2.content)

# using the builder to create complex message
messages = (
    CompletionMessagesBuilder()
    .add_system_message(
        "You are a helpful assistant that likes to talk like a pirate. Respond as if you are a pirate using pirate speak."
    )
    .add_user_message("Is pluto a planet? Respond with a yes or no.")
    .add_assistant_message("Aye, matey! Pluto be a planet in me book.")
    .add_user_message("Are you sure? I want the truth. Can you elaborate?")
    .build()
)

response3: LLMCompletionResponse = llm_completion.completion(messages=messages)  # type: ignore
print(response3.content)

The capital of France is Paris.
The capital of France is Paris.
Arrr, ye got me there, matey! Truth be, back in 2006, them fancy scallywags at the International Astronomical Union be sayin' Pluto ain't a full-fledged planet no more. They be callin' it a "dwarf planet" now. So, officially, she be a dwarf planet, savvy?


## Embedding


In [7]:
from graphrag_llm.embedding import LLMEmbedding, create_embedding
from graphrag_llm.types import LLMEmbeddingResponse

embedding_config = ModelConfig(
    model_provider="azure",
    model=os.getenv("GRAPHRAG_EMBEDDING_MODEL", "text-embedding-3-small"),
    azure_deployment_name=os.getenv(
        "GRAPHRAG_LLM_EMBEDDING_MODEL", "text-embedding-3-small"
    ),
    api_base=os.getenv("GRAPHRAG_API_BASE"),
    api_version=os.getenv("GRAPHRAG_API_VERSION", "2025-04-01-preview"),
    api_key=api_key,
    auth_method=AuthMethod.AzureManagedIdentity if not api_key else AuthMethod.ApiKey,
)

llm_embedding: LLMEmbedding = create_embedding(embedding_config)

embeddings_batch: LLMEmbeddingResponse = llm_embedding.embedding(
    input=["Hello world", "How are you?"]
)
for embedding in embeddings_batch.embeddings:
    print(embedding[0:3])

[-0.002078542485833168, -0.04908587411046028, 0.020946789532899857]
[0.027567066252231598, -0.026544300839304924, -0.027091361582279205]


### First Embedding

`.embedding` batches by default, it takes a list of strings to embed. If embedding a single string then you can use `.first_embedding` on the response to obtain the first embedding.


In [9]:
embedding_response = llm_embedding.embedding(
    input=["This is a single input string for embedding."]
)

print(embedding_response.first_embedding[0:3])

[0.05073608458042145, 0.003799507161602378, 0.019212841987609863]


## Async Embedding


In [8]:
embeddings_batch = await llm_embedding.embedding_async(
    input=["Hello world", "How are you?"]
)

for embedding in embeddings_batch.embeddings:
    print(embedding[0:3])

[-0.002078542485833168, -0.04908587411046028, 0.020946789532899857]
[0.027567066252231598, -0.026544300839304924, -0.027091361582279205]
