In [None]:
from typing import get_type_hints, get_args, get_origin, Annotated, Literal, List
import msgspec

def struct_to_llm_schema(cls) -> dict:
    type_hints = get_type_hints(cls, include_extras=True)
    schema = {}

    for field_name, annotated_type in type_hints.items():
        if get_origin(annotated_type) is Annotated:
            base_type, *annotations = get_args(annotated_type)
            description = annotations[0] if annotations else ""
        else:
            base_type = annotated_type
            description = ""

        origin = get_origin(base_type)
        args = get_args(base_type)

        if origin is Literal:
            field_type = list(args)
        elif origin in (list, List) and get_origin(args[0]) is Literal:
            field_type = list(get_args(args[0]))
        elif base_type is str:
            field_type = "string"
        elif base_type is int:
            field_type = "integer"
        elif base_type is float:
            field_type = "float"
        elif base_type is bool:
            field_type = "boolean"
        else:
            field_type = str(base_type)

        if description:
            schema[field_name] = {
                "type": field_type,
                "description": description
            }
        else:
            schema[field_name] = field_type

    return schema


class DocumentExtraction(msgspec.Struct):
    introduction: Annotated[str, "An introduction extracted from the document"]
    architecture_overview: Annotated[str, "The architecture overview"]
    communication_protocols: List[Literal["gRPC", "https", "REST/JSON"]]


print(struct_to_llm_schema(DocumentExtraction))


{'introduction': {'type': 'string', 'description': 'An introduction extracted from the document'}, 'architecture_overview': {'type': 'string', 'description': 'The architecture overview'}, 'communication_protocols': ['gRPC', 'https', 'REST/JSON']}


In [5]:
result = msgspec.json.decode("{\"introduction\": \"This document outlines the design and implementation details of the Acme Microservice Framework.\", \"architecture_overview\": \"The system follows a modular microservices architecture, comprising of independent services that communicate via defined APIs. Each service is containerized using Docker and deployed using Kubernetes.\", \"communication_protocols\": [\"gRPC\", \"REST/JSON\"]}", type=DocumentExtraction)

In [6]:
print(result)

DocumentExtraction(introduction='This document outlines the design and implementation details of the Acme Microservice Framework.', architecture_overview='The system follows a modular microservices architecture, comprising of independent services that communicate via defined APIs. Each service is containerized using Docker and deployed using Kubernetes.', communication_protocols=['gRPC', 'REST/JSON'])
