In [1]:
import textwrap
from typing import cast

import langextract as lx
from IPython.display import HTML
from langextract.data import AnnotatedDocument

In [2]:
# 1. Define the prompt and extraction rules
prompt = textwrap.dedent("""\
    Extract characters, emotions, and relationships in order of appearance.
    Use exact text for extractions. Do not paraphrase or overlap entities.
    Provide meaningful attributes for each entity to add context.""")

# 2. Provide a high-quality example to guide the model
examples = [
    lx.data.ExampleData(
        text="ROMEO. But soft! What light through yonder window breaks? It is the east, and Juliet is the sun.",
        extractions=[
            lx.data.Extraction(
                extraction_class="character",
                extraction_text="ROMEO",
                attributes={"emotional_state": "wonder"},
            ),
            lx.data.Extraction(
                extraction_class="emotion",
                extraction_text="But soft!",
                attributes={"feeling": "gentle awe"},
            ),
            lx.data.Extraction(
                extraction_class="relationship",
                extraction_text="Juliet is the sun",
                attributes={"type": "metaphor"},
            ),
        ],
    )
]

In [3]:
# The input text to be processed
input_text = "Lady Juliet gazed longingly at the stars, her heart aching for Romeo"

# To use the server, explicitly set the provider to `OpenAILanguageModel`
config = lx.factory.ModelConfig(
    provider="OpenAILanguageModel",
    provider_kwargs=dict(
        base_url="http://localhost:8000/v1/",
        api_key="llama-cpp",
    ),
)


result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    config=config,
    fence_output=False,
    use_schema_constraints=False,
    debug=False,
)

In [4]:
result = cast(AnnotatedDocument, result)

html_content = lx.visualize(result)
html_content = cast(HTML, html_content)

display(html_content)