In [1]:
# Built-in library
import asyncio
import json
import logging
import re
import warnings
from pathlib import Path
from pprint import pprint
from typing import Annotated, Any, Iterable, Literal, Optional, Type, Union

# Standard imports
import nest_asyncio
import numpy as np
import numpy.typing as npt
import pandas as pd
import polars as pl
from rich.console import Console
from rich.theme import Theme

custom_theme = Theme(
    {
        "white": "#FFFFFF",  # Bright white
        "info": "#00FF00",  # Bright green
        "warning": "#FFD700",  # Bright gold
        "error": "#FF1493",  # Deep pink
        "success": "#00FFFF",  # Cyan
        "highlight": "#FF4500",  # Orange-red
    }
)
console = Console(theme=custom_theme)

# Visualization
# import matplotlib.pyplot as pltife

# NumPy settings
np.set_printoptions(precision=4)

# Pandas settings
pd.options.display.max_rows = 1_000
pd.options.display.max_columns = 1_000
pd.options.display.max_colwidth = 600

# Polars settings
pl.Config.set_fmt_str_lengths(1_000)
pl.Config.set_tbl_cols(n=1_000)

warnings.filterwarnings("ignore", category=UserWarning, module="numpy")

# Black code formatter (Optional)
%load_ext lab_black

# auto reload imports
%load_ext autoreload
%autoreload 2

In [2]:
def go_up_from_current_directory(*, go_up: int = 1) -> None:
    """This is used to up a number of directories.

    Params:
    -------
    go_up: int, default=1
        This indicates the number of times to go back up from the current directory.

    Returns:
    --------
    None
    """
    import os
    import sys

    CONST: str = "../"
    NUM: str = CONST * go_up

    # Goto the previous directory
    prev_directory = os.path.join(os.path.dirname(__name__), NUM)
    # Get the 'absolute path' of the previous directory
    abs_path_prev_directory = os.path.abspath(prev_directory)

    # Add the path to the System paths
    sys.path.insert(0, abs_path_prev_directory)
    print(abs_path_prev_directory)

In [3]:
go_up_from_current_directory(go_up=1)


from schemas import ModelEnum  # noqa: E402
from settings import refresh_settings  # noqa: E402

settings = refresh_settings()

/Users/neidu/Desktop/Projects/Personal/My_Projects/AI-Tutorials


### Define Schemas (Pydantic Models)

In [None]:
from openai import AsyncOpenAI
from pydantic import BaseModel, BeforeValidator, Field


def string_constraints(value: str) -> str:
    """Apply custom string formatting."""
    if isinstance(value, str):
        value = value.strip().title()
    return value


ConstrainedStr = Annotated[str, BeforeValidator(string_constraints)]


class Person(BaseModel):
    name: ConstrainedStr = Field(description="The name of the person")
    role: ConstrainedStr = Field(description="The role of the person")
    salary: float = Field(description="The yearly salary of the person")
    organisation: ConstrainedStr = Field(description="The organisation the person works for")


class Persons(BaseModel):
    persons: list[Person]

### Prompts

In [5]:
system_msg: str = """
<system>
/no_think
<role>
You are a data extraction assistant. Extract information from the provided text and return ONLY a 
valid JSON object that matches this exact schema:

<schema>
{json_schema}
</schema>
</role>

<guidelines>
- Return only valid JSON - no explanations, markdown, or additional text
- Extract data precisely as it appears in the source text
- Do not include fields not present in the schema
- For missing required fields, use these defaults:
  * Numbers: 0
  * Strings: null
  * Booleans: false
  * Arrays: []
  * Objects: {{}}
- Preserve original data types and formatting where possible
- If text contains ambiguous information, choose the most likely interpretation
</guidelines>

<output_format>
Valid JSON object only
</output_format>

</system>
"""

In [None]:
async def get_structured_response(
    client: Optional[AsyncOpenAI],
    message: str,
    model: str,
    response_model: Type[BaseModel],
) -> tuple[Type[BaseModel], Type[BaseModel]] | tuple[dict[str, str], dict[str, str]]:
    """Get structured response from OpenAI API.

    Parameters
    ----------
    client : Optional[AsyncOpenAI]
        The OpenAI client instance for making API calls
    message : str
        The user message to process
    model : str
        The name of the OpenAI model to use
    response_model : Type[BaseModel]
        The Pydantic model class for response validation

    Returns
    -------
    tuple[Type[BaseModel] | Type[BaseModel]] | tuple[dict[str, str], dict[str, str]]
        A tuple containing either:
        - (structured_output, raw_response) where structured_output is the validated response
          and raw_response is the complete API response
        - (error_dict, error_info) where error_dict contains status and raw_response
          and error_info contains status and error message
    """
    try:
        json_schema: dict[str, str] = response_model.model_json_schema()
        raw_response = await client.chat.completions.create(  # type: ignore
            model=model,
            messages=[
                {
                    "role": "system",
                    "content": system_msg.format(json_schema=json_schema),
                },
                {"role": "user", "content": message},
            ],
            response_format={
                "type": "json_schema",
                "schema": json_schema,
                "strict": True,
            },
            temperature=0,
            seed=42,
            extra_body={"enable_thinking": False},  # Disable thinking
        )
        _value: str = (
            raw_response.choices[0].message.content.replace("<think>", "").replace("</think>", "")
        )
        structured_output = response_model.model_validate(json.loads(_value))
        return (structured_output, raw_response)

    except Exception as e:
        return {"status": "error", "raw_response": None}, {
            "status": "error",
            "error": str(e),
        }

In [8]:
model = ModelEnum.LLAMA_3p2_3B_INSTRUCT_REMOTE.value
message: str = (
    "Michael Scott is a software engineer at Anery Limited. He has 4 years "
    "of experience. He currently earns $100,000 a year. James Kayode has been "
    "an instructor at the Centre For Artificial Intelligence for the last 5 years."
    "He earns an estimated $50,000 a year although he has not disclosed his salary. "
    "At 27 years old, Adaugo is a student at the University of Lagos who believes in "
    "fighting for justice and equality. "
)
aclient = AsyncOpenAI(
    api_key=settings.OPENROUTER_API_KEY.get_secret_value(),
    base_url=settings.OPENROUTER_URL,
    timeout=60,
)
structured_output, raw_response = await get_structured_response(
    message=message, model=model, response_model=Persons, client=aclient
)
console.print(raw_response)
print("----" * 50)
console.print(structured_output)

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


### Using Ollama

In [9]:
class MySchema(BaseModel):
    name: str = Field(description="The name of the person")
    age: int = Field(description="The age of the person")
    occupation: str = Field(description="The occupation of the person")


message: str = "John is a 30-year-old software engineer"
response = await get_structured_response(
    message=message,
    model="qwen2.5:3b",
    response_model=MySchema,
    client=AsyncOpenAI(
        api_key=settings.OLLAMA_API_KEY.get_secret_value(),
        base_url=settings.OLLAMA_URL,
    ),
)
console.print(response)

# Putting It Together

In [None]:
from dataclasses import dataclass


@dataclass
class StructuredLLMResponse:
    """Structured response model for LLM outputs.

    Parameters
    ----------
    api_key : str
        The API key for authentication
    base_url : str
        The base URL for the API endpoint
    model : str
        The name of the model to use
    """

    api_key: str
    base_url: str
    model: str

    async def get_structured_response(
        self, message: str, response_model: Type[BaseModel]
    ) -> tuple[Type[BaseModel] | Type[BaseModel]] | tuple[dict[str, str], dict[str, str]]:
        """Get structured response from OpenAI API.
        Parameters
        ----------
        message : str
            The user message to send to the API.
        response_model : Type[BaseModel]
            The Pydantic model class to validate the response.

        Returns
        -------
        tuple[Type[BaseModel] | Type[BaseModel]] | tuple[dict[str, str], dict[str, str]]
        A tuple containing either:
        - (structured_output, raw_response)
        - (error_dict, error_info)
        """
        try:
            aclient: AsyncOpenAI = AsyncOpenAI(
                api_key=self.api_key,
                base_url=self.base_url,
            )

            json_schema: dict = response_model.model_json_schema()
            raw_response = await aclient.chat.completions.create(  # type: ignore
                model=self.model,
                messages=[
                    {
                        "role": "system",
                        "content": system_msg.format(json_schema=json_schema),
                    },
                    {"role": "user", "content": message},
                ],
                response_format={
                    "type": "json_schema",
                    "schema": json_schema,
                    "strict": True,
                },
                temperature=0,
                seed=42,
            )

            _value = (
                raw_response.choices[0]
                .message.content.replace("<think>", "")
                .replace("</think>", "")
            )
            structured_output = response_model.model_validate(json.loads(_value))
            return (structured_output, raw_response)

        except Exception as e:
            return (
                {"status": "error", "error": str(e)},
                {"status": "error", "raw_response": None},
            )

In [11]:
model = ModelEnum.LLAMA_3p2_3B_INSTRUCT_REMOTE.value
message: str = (
    "Michael Scott is a software engineer at Anery Limited. He has 4 years "
    "of experience. He currently earns $100,000 a year. James Kayode has been "
    "an instructor at the Centre For Artificial Intelligence for the last 5 years."
    "He earns an estimated $50,000 a year although he has not disclosed his salary. "
    "At 27 years old, Adaugo is a student at the University of Lagos who believes in "
    "fighting for justice and equality. "
)

llm = StructuredLLMResponse(
    api_key=settings.OPENROUTER_API_KEY.get_secret_value(),
    base_url=settings.OPENROUTER_URL,
    model=model,
)
structured_output, raw_response = await llm.get_structured_response(
    message=message,
    response_model=Persons,
)
console.print(raw_response)
print("----" * 50)
console.print(structured_output)

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


In [12]:
class Transaction(BaseModel):
    transaction_id: str | None = Field(
        default=None, description="The unique identifier for the transaction"
    )
    date: str = Field(description="The date of the transaction in ISO format")
    amount: float = Field(description="The amount of the transaction")
    description: str = Field(description="The description of the transaction")


class Transactions(BaseModel):
    transactions: list[Transaction]


message: str = """
The following are transactions from a bank account: 
    1: On May 20, 2024, at 10:00 AM UTC, an amount of €55.75 was spent on groceries at SuperMart.
    2: On May 21, 2024, at 2:30 PM UTC, €12.50 was spent on coffee at Daily Grind.
    3 (ID: T003): On May 22, 2024, at 9:15 AM UTC, €120.00 was spent on dinner at The Bistro.
    4 (ID: T004): Also on May 22, 2024, at 6:00 PM UTC, €35.00 was spent on movie tickets.
    5: On May 23, 2024, at 11:45 AM UTC, spent €8.99 on a book from an online store.
    6 (ID: T006): On May 24, 2024, at 8:00 AM UTC, a rent payment of €450.00 was made.
    7: Also on May 24, 2024, at 4:20 PM UTC, €25.50 was spent on a gas station refill.
    8 (ID: T008): On May 25, 2024, at 1:10 PM UTC, €60.00 was spent on a haircut.
    9 (ID: T009): On May 26, 2024, at 7:00 AM UTC, €15.00 was spent on bus fare.
    10: On May 26, 2024, at 8:00 PM UTC, €75.20 was spent on an online subscription renewal."
"""


structured_output, raw_response = await llm.get_structured_response(
    message=message, response_model=Transactions
)
# console.print(raw_response)
print("----" * 50)
console.print(structured_output)

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------


In [15]:
ollama_llm = StructuredLLMResponse(
    api_key=settings.OLLAMA_API_KEY.get_secret_value(),
    base_url=settings.OLLAMA_URL,
    model="Qwen3-finNER-4B-reasoning:v1",
)
structured_output, raw_response = await ollama_llm.get_structured_response(
    message=message,
    response_model=Transactions,
)
console.print(raw_response)
print("----" * 50)
console.print(structured_output)

--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
