In [3]:
import langextract as lx
import textwrap
# Properties, Relations, and Data properties
# 1. Define the prompt and extraction rules
prompt = textwrap.dedent("""\
    Extract Classes in order of appearance.
    Use exact text for extractions. Do not paraphrase or overlap entities.
    Provide meaningful attributes for each entity to add context.""")

# 2. Provide a high-quality example to guide the model
examples = [
    lx.data.ExampleData(
        text="A Nd3 +-doped transparent oxyfluoride glass ceramic containing Sr5(PO4)3F nanocrystals was prepared by melt quenching technique and subsequent thermal treatment. " \
        "The phase and morphology of Sr5(PO4)3F nanocrystals were investigated by X-ray diffraction and transmission electron microscopy, respectively. " \
        "The volume fraction of Sr5(PO4)3F nanocrystals in the glass ceramic is about 12% and the fraction of Nd3 + ions incorporated in the Sr5(PO4)3F nanocrystals is about 15%. " \
        "The peak absorption cross-section increases to 145% at 806 nm and the full width at half maximum for the band around 806 nm decreases from 12.5 to 11 nm after the crystallization process. " \
        "The 1059 nm peak stimulated emission cross-section increases from 2.27 × 10- 20 to 3.07 × 10- 20 cm2 and the effective width for this band decreases from 35 to 30 nm after the crystallization process. " \
        "The improvement of spectroscopic properties indicates that the glass ceramic is potentially applicable as a 1.06 μm laser material. © 2013 Elsevier B.V.",
        extractions=[
            lx.data.Extraction(
                extraction_class="Classes",
                extraction_text="A Nd3+-doped transparent oxyfluoride glass ceramic containing Sr5(PO4)3F nanocrystals was prepared by melt quenching technique and subsequent thermal treatment. The phase and morphology of Sr5(PO4)3F nanocrystals were investigated by X-ray diffraction and transmission electron microscopy, respectively. The volume fraction of Sr5(PO4)3F nanocrystals in the glass ceramic is about 12% and the fraction of Nd3+ ions incorporated in the Sr5(PO4)3F nanocrystals is about 15%. The peak absorption cross-section increases to 145% at 806 nm and the full width at half maximum for the band around 806 nm decreases from 12.5 to 11 nm after the crystallization process. The 1059 nm peak stimulated emission cross-section increases from 2.27 × 10-20 to 3.07 × 10-20 cm2 and the effective width for this band decreases from 35 to 30 nm after the crystallization process. The improvement of spectroscopic properties indicates that the glass ceramic is potentially applicable as a 1.06 μm laser material.",
                attributes={
                    "GlassCeramic": "transparent oxyfluoride glass ceramic",
                    "OxyfluorideGlassCeramic": "transparent oxyfluoride glass ceramic",
                    "DopedMaterial": "Nd3+-doped transparent oxyfluoride glass ceramic",
                    "DopantIon": "Nd3+",
                    "Nanocrystal": "Sr5(PO4)3F nanocrystals",
                    "CrystallinePhase": "Sr5(PO4)3F",
                    "ChemicalCompound": "Sr5(PO4)3F",
                    "ProcessingMethod": "prepared by melt quenching technique and subsequent thermal treatment",
                    "MeltQuenching": "melt quenching technique",
                    "ThermalTreatment": "subsequent thermal treatment",
                    "CrystallizationProcess": "after the crystallization process",
                    "CharacterizationTechnique": "investigated by X-ray diffraction and transmission electron microscopy",
                    "XRD": "X-ray diffraction",
                    "TEM": "transmission electron microscopy",
                    "SpectroscopicProperty": "peak absorption cross-section ... full width at half maximum ... peak stimulated emission cross-section ... effective width",
                    "AbsorptionBand": "band around 806 nm",
                    "EmissionBand": "The 1059 nm peak stimulated emission cross-section",
                    "LaserMaterial": "potentially applicable as a 1.06 μm laser material",
                    "Application": "potentially applicable as a 1.06 μm laser material"
}
            ),
        ]
    )
]

# The input text to be processed
input_text = "A pulsed laser deposited SiO2/Ag/ZnO/Ag/TiO2 multilayer structure is studied to enhance the light trapping capability of thin-film solar cell. Structural and optical properties of structure are studied with scanning electron microscopy, x-ray diffraction, photoluminescence and UV–visible spectroscopy. Proposed geometry improves the extinction spectra and quenches photoluminescence in comparison to TiO2/Ag and SiO2/Ag/ZnO geometry. Finite-difference time-domain (FDTD) simulations indicate a promising effect of the proposed geometries on thin-film solar cells. Twofold enhancement in total quantum efficiency of an optimized multilayer plasmonic graded-index thin-film solar cell is observed in comparison to the pristine solar cell. Results suggest a more concerted study of multilayer plasmonic nanostructures with graded-index anti-reflection coatings to improve the performance of thin-film photovoltaic devices."

# 3. Perform the extraction using a local Ollama model

result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    model_id="gemma3:4b",  # Automatically selects Ollama provider
    model_url="http://localhost:11434",
    fence_output=False,
    use_schema_constraints=False
)


[94m[1mLangExtract[0m: Processing [00:00]

[94m[1mLangExtract[0m: Processing, current=[92m920[0m chars, processed=[92m0[0m chars:  [00:12]


In [4]:
print(result)

AnnotatedDocument(extractions=[Extraction(extraction_class='Classes', extraction_text='A pulsed laser deposited SiO2/Ag/ZnO/Ag/TiO2 multilayer structure is studied to enhance the light trapping capability of thin-film solar cell. Structural and optical properties of structure are studied with scanning electron microscopy, x-ray diffraction, photoluminescence and UV–visible spectroscopy.', char_interval=CharInterval(start_pos=0, end_pos=302), alignment_status=<AlignmentStatus.MATCH_EXACT: 'match_exact'>, extraction_index=1, group_index=0, description=None, attributes={'MaterialStructure': 'SiO2/Ag/ZnO/Ag/TiO2 multilayer structure', 'DepositionMethod': 'pulsed laser deposited', 'SolarCellApplication': 'to enhance the light trapping capability of thin-film solar cell', 'CharacterizationTechnique': 'studied with scanning electron microscopy, x-ray diffraction, photoluminescence and UV–visible spectroscopy'}), Extraction(extraction_class='Classes', extraction_text='Proposed geometry improve

In [7]:
lx.io.save_annotated_documents([result], output_name="extraction_results.jsonl")

html_content = lx.visualize("test_output/extraction_results.jsonl")

with open("visualization.html", "w", encoding="utf-8") as f:
    # In notebooks, html_content may be an object with .data
    f.write(html_content.data if hasattr(html_content, "data") else html_content)


[94m[1mLangExtract[0m: Saving to [92mtest_output\extraction_results.jsonl[0m: 1 docs [00:00, 945.73 docs/s]

[92m✓[0m Saved [1m1[0m documents to [92mtest_output\extraction_results.jsonl[0m



[94m[1mLangExtract[0m: Loading [92mtest_output\extraction_results.jsonl[0m: 100%|█████████▉| 3.38k/3.38k [00:00<00:00, 1.83MB/s]

[92m✓[0m Loaded [1m1[0m documents from [92mtest_output\extraction_results.jsonl[0m



