In [1]:
import langextract as lx
import textwrap

from IPython.display import HTML
from langextract.data import AnnotatedDocument
from typing import cast

In [2]:
# 1. Define the prompt and extraction rules
prompt = textwrap.dedent("""\
    Extract characters, emotions, and relationships in order of appearance.
    Use exact text for extractions. Do not paraphrase or overlap entities.
    Provide meaningful attributes for each entity to add context.""")

# 2. Provide a high-quality example to guide the model
examples = [
    lx.data.ExampleData(
        text="ROMEO. But soft! What light through yonder window breaks? It is the east, and Juliet is the sun.",
        extractions=[
            lx.data.Extraction(
                extraction_class="character",
                extraction_text="ROMEO",
                attributes={"emotional_state": "wonder"},
            ),
            lx.data.Extraction(
                extraction_class="emotion",
                extraction_text="But soft!",
                attributes={"feeling": "gentle awe"},
            ),
            lx.data.Extraction(
                extraction_class="relationship",
                extraction_text="Juliet is the sun",
                attributes={"type": "metaphor"},
            ),
        ],
    )
]

In [3]:
# The input text to be processed
input_text = "Lady Juliet gazed longingly at the stars, her heart aching for Romeo"

# Run the extraction
# result = lx.extract(
#     text_or_documents=input_text,
#     prompt_description=prompt,
#     examples=examples,
#     model_id="gemini-2.5-flash",
# )

result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    model_id="gemma3:1b",
    model_url="http://localhost:11434",
    fence_output=False,
    use_schema_constraints=False,
)

DEBUG:absl:Registered GeminiLanguageModel with patterns ['^gemini'] at priority 10
DEBUG:absl:Registered OllamaLanguageModel with patterns ['^gemma', '^llama', '^mistral', '^mixtral', '^phi', '^qwen', '^deepseek', '^command-r', '^starcoder', '^codellama', '^codegemma', '^tinyllama', '^wizardcoder', '^gpt-oss', '^meta-llama/[Ll]lama', '^google/gemma', '^mistralai/[Mm]istral', '^mistralai/[Mm]ixtral', '^microsoft/phi', '^Qwen/', '^deepseek-ai/', '^bigcode/starcoder', '^codellama/', '^TinyLlama/', '^WizardLM/'] at priority 10
DEBUG:absl:Registered OpenAILanguageModel with patterns ['^gpt-4', '^gpt4\\.', '^gpt-5', '^gpt5\\.'] at priority 10
2025-08-19 08:33:05,362 - langextract.debug - DEBUG - [langextract.inference] CALL: BaseLanguageModel.__init__(self=<OllamaLanguageModel>, constraint=Constraint(co...NONE: 'none'>), kwargs={})
2025-08-19 08:33:05,363 - langextract.debug - DEBUG - [langextract.inference] RETURN: BaseLanguageModel.__init__ -> None (0.0 ms)
2025-08-19 08:33:05,363 - langex

[92m✓[0m Extraction processing complete



INFO:absl:Finalizing annotation for document ID doc_3c5fdb17.
INFO:absl:Document annotation completed.


[92m✓[0m Extracted [1m3[0m entities ([1m3[0m unique types)
  [96m•[0m Time: [1m3.59s[0m
  [96m•[0m Speed: [1m19[0m chars/sec
  [96m•[0m Chunks: [1m1[0m


In [4]:
# Save the results to a JSONL file
# lx.io.save_annotated_documents([result], output_name="extraction_results.jsonl", output_dir=".")

# Generate the visualization from the file
# html_content: HTML = lx.visualize("extraction_results.jsonl")
# with open("visualization.html", "w") as f:
#     if hasattr(html_content, "data"):
#         f.write(html_content.data)  # For Jupyter/Colab
#     else:
#         f.write(html_content)

In [5]:
result = cast(AnnotatedDocument, result)

html_content = lx.visualize(result)
html_content = cast(HTML, html_content)

display(html_content)