Refer to this [document](https://python.useinstructor.com/examples/ollama/), we can use ollama and instructor to constrain ollama's response into pydantic response

In [3]:
from openai import OpenAI
from pydantic import BaseModel, Field
from typing import List
import instructor

In [10]:
class Character(BaseModel):
    name: str
    age: int
    fact: List[str] = Field(..., description="A list of facts about the character")


In [11]:
client = instructor.from_openai(
    OpenAI(
        base_url='http://localhost:11434/v1',
        api_key='null', 
    ),
    mode=instructor.Mode.JSON
)

In [13]:
resp = client.chat.completions.create(
    model='gemma2:9b',
    messages=[
        {
            "role": "user",
            "content": "Tell me about the Harry Potter",
        }
    ],
    response_model=Character,
)

In [15]:
print(resp.model_dump_json(indent=2))

{
  "name": "Harry Potter",
  "age": 11,
  "fact": [
    "He is an orphan raised by his aunt, uncle, and cousin.",
    "He learns on his eleventh birthday that he is a wizard and is destined to attend Hogwarts School of Witchcraft and Wizardry.",
    "He is famous in the wizarding world for surviving Lord Voldemort's killing curse as a baby."
  ]
}


In [16]:
resp = client.chat.completions.create(
    model='llama3:instruct',
    messages=[
        {
            "role": "user",
            "content": "Tell me about the Harry Potter",
        }
    ],
    response_model=Character,
)

In [18]:
print(resp.model_dump_json(indent=2))

{
  "name": "Harry James Potter",
  "age": 37,
  "fact": [
    "Some important facts about Harry Potter"
  ]
}
