<a href="https://colab.research.google.com/github/amalsalilan/B3-Developing-Named-Entity-Recognition-NER-Models-for-Financial-Data-Extraction-/blob/Isha/financial_clause.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install langextract -q

In [None]:
import langextract as lx
import textwrap

prompt = textwrap.dedent("""\
    Extract the financial data clause from the document using the exact text.
""")

# 2. Provide a few high-quality examples to guide the model
examples = [
    lx.data.ExampleData(
        text="All invoices shall be paid within thirty (30) days of receipt.",
        extractions=[
            lx.data.Extraction(
                extraction_class="financial_clause",
                extraction_text="All invoices shall be paid within thirty (30) days of receipt.",
                attributes={
                    "payment_terms": "30 days",
                    "late_fee": "None specified",
                    "currency": "Not specified",
                    "responsible_party": "Client",
                    "condition": "invoice receipt"
                }
            )
        ]
    ),    lx.data.ExampleData(
        text="Late payments shall accrue interest at 1.5% per month until fully paid.",
        extractions=[
            lx.data.Extraction(
                extraction_class="financial_clause",
                extraction_text="Late payments shall accrue interest at 1.5% per month until fully paid.",
                attributes={
                    "payment_terms": "Late fee applies",
                    "late_fee": "1.5% per month",
                    "currency": "Not specified",
                    "responsible_party": "Client",
                    "condition": "late payment"
                }
            )
        ]
    ),
    lx.data.ExampleData(
        text="The Service Provider shall receive an advance payment of INR 50,000 prior to commencement of services.",
        extractions=[
            lx.data.Extraction(
                extraction_class="financial_clause",
                extraction_text="The Service Provider shall receive an advance payment of INR 50,000 prior to commencement of services.",
                attributes={"payment_terms": "Advance",
                    "amount": "INR 50,000",
                    "currency": "INR",
                    "responsible_party": "Client",
                    "condition": "before service start"
                }
            )
        ]
    )
]

In [None]:
# 3. Input document text
input_text = """
SERVICE AGREEMENT

This Service Agreement (“Agreement”) is made between FinEdge Analytics Pvt. Ltd. (“Service Provider”)
and BluePeak Capital Advisors (“Client”) as of March 10, 2025.

1. Scope of Services
The Service Provider agrees to deliver quarterly financial risk reports, compliance reviews,
and portfolio performance analytics as requested by the Client.

2. Payment Terms
All invoices shall be paid within thirty (30) days of receipt.
Late payments shall accrue interest at 1.5% per month until fully paid.
The Service Provider shall receive an advance payment of INR 50,000 prior to commencement of services.

3. Confidentiality
Both parties agree to maintain confidentiality of proprietary information.

4. Termination
Either party may terminate this Agreement by providing sixty (60) days’ written notice.

5. Governing Law
This Agreement shall be governed by and construed in accordance with the laws of India.
"""

In [None]:
# Run the extraction
result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    api_key="AIzaSyDkd5IlQDtrihvgqcSt07rWTe3cfXetXyc",
    model_id="gemini-2.5-flash",
)

In [None]:
# Save the results to a JSONL file
lx.io.save_annotated_documents([result], output_name="extraction_results.jsonl", output_dir=".")

# Generate the visualization from the file
html_content = lx.visualize("extraction_results.jsonl")
with open("visualization.html", "w") as f:
    if hasattr(html_content, 'data'):
        f.write(html_content.data)  # For Jupyter/Colab
    else:
        f.write(html_content)

[94m[1mLangExtract[0m: Saving to [92mextraction_results.jsonl[0m: 1 docs [00:00, 556.94 docs/s]

[92m✓[0m Saved [1m1[0m documents to [92mextraction_results.jsonl[0m



[94m[1mLangExtract[0m: Loading [92mextraction_results.jsonl[0m: 100%|██████████| 2.48k/2.48k [00:00<00:00, 3.66MB/s]

[92m✓[0m Loaded [1m1[0m documents from [92mextraction_results.jsonl[0m





In [None]:
html_content