#### Install the required libraries

In [None]:
%pip install langextract

#### Import the libraries and set up the dotenv file with LANGEXTRACT_API_KEY, which uses the GEMINI API

In [1]:
import textwrap
import langextract as lx
from dotenv import load_dotenv
import os

load_dotenv()

True

#### Define the prompt with the extraction requirements

In [None]:
prompt = textwrap.dedent("""\
Extract the company name, specific financial metrics, and market sentiment from the text.
    Use exact text for extractions. Do not paraphrase or overlap entities.
    Provide meaningful attributes for each entity to add context.
    - For companies, include the stock ticker.
    - For financial metrics, specify the type and value.
    - For market sentiment, classify it as 'bullish', 'bearish', or 'neutral'.""")

#### Provide a high-quality example to guide the model

In [None]:
examples = [
    lx.data.ExampleData(
        text=(
            "AlphaTech (AT) announced a quarterly profit of $2.5 billion, exceeding analyst expectations"
            " and signaling a strongly bullish trend for the sector."
        ),
        extractions=[
            lx.data.Extraction(
                extraction_class="company",
                extraction_text="AlphaTech",
                attributes={"stock_ticker": "AT"},
            ),
            lx.data.Extraction(
                extraction_class="financial_metric",
                extraction_text="quarterly profit of $2.5 billion",
                attributes={"metric_type": "profit", "value": "$2.5 billion"}
            ),
            lx.data.Extraction(
                extraction_class="market_sentiment",
                extraction_text="strongly bullish trend",
                attributes={"sentiment": "bullish"}
            ),
        ],
    )
]

#### Provide an input for processing and execute the extraction process 

In [5]:
input_text = (
    "Global Dynamics Inc. (GDI) reported a staggering quarterly revenue of $15 billion, \
    but its stock dipped 2%, leading to a neutral but cautious market outlook."
)
result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    model_id="gemini-2.5-pro",
)

[94m[1mLangExtract[0m: model=[92mgemini-2.5-pro[0m, current=[92m161[0m chars, processed=[92m161[0m chars:  [00:07]

[92m✓[0m Extraction processing complete
[92m✓[0m Extracted [1m4[0m entities ([1m3[0m unique types)
  [96m•[0m Time: [1m7.62s[0m
  [96m•[0m Speed: [1m21[0m chars/sec
  [96m•[0m Chunks: [1m1[0m





#### Save the results in a JSONL file

In [6]:
lx.io.save_annotated_documents([result], output_name="/Users/sachintripathi/Documents/Py_files/LangExtract/extraction_results.jsonl")

[94m[1mLangExtract[0m: Saving to [92mextraction_results.jsonl[0m: 1 docs [00:00, 517.30 docs/s]

[92m✓[0m Saved [1m1[0m documents to [92mextraction_results.jsonl[0m





#### Visualise the results 

In [7]:
html_content = lx.visualize("extraction_results.jsonl")
html_content

[94m[1mLangExtract[0m: Loading [92mextraction_results.jsonl[0m: 100%|██████████| 1.39k/1.39k [00:00<00:00, 2.01MB/s]

[92m✓[0m Loaded [1m1[0m documents from [92mextraction_results.jsonl[0m



