In [45]:
!pip install langextract



[94m[1mLangExtract[0m: Saving to [92mextraction_results.json[0m: 0 docs [08:12, ? docs/s]
[94m[1mLangExtract[0m: Saving to [92mextraction_results.json[0m: 0 docs [02:12, ? docs/s]
[94m[1mLangExtract[0m: Saving to [92mextraction_results.json[0m: 0 docs [09:30, ? docs/s]


In [9]:
import os
os.environ["LANGEXTRACT_API_KEY"] = "AIzaSyCTRrMUxRJRot5tX-eCSjbo433M2dayPjs"

In [10]:
import langextract as lx
import textwrap

In [66]:
prompt = textwrap.dedent("""\
Extract entities related to Party & Identification Information, Monetary Values, and Dates & Time Periods.
Follow these rules:
1. Use exact text spans from the input, no paraphrasing.
2. Do not overlap entities.
3. Provide meaningful attributes to add context.
4. Categories:
   - Party & Identification Information:
     • Entity Names: Company, institution, counterparty, regulator
     • Identifiers: Tax ID, GST/VAT, PAN, CIN, registration numbers
     • Account Numbers: Bank account, loan account, investment account IDs
   - Monetary Values:
     • Principal Amounts: Loan value, invoice total, transaction amount
     • Fees & Charges: Service fees, late fees, management charges
     • Interest Rates: Fixed/floating, annual %, APR, benchmark reference (LIBOR, SOFR)
     • Taxes: VAT, GST, withholding tax
     • Penalties/Fines: Early termination fee, default charges
   - Dates & Time Periods:
     • Effective Date, Maturity Date, Due Dates, Tenure/Duration
     • Historical Dates: Transaction date, invoice date, settlement date
""")



examples = [
    lx.data.ExampleData(
        text=
        'This Agreement is entered into on the 5th day of July, 2024, between Alpha Technologies Pvt. Ltd., a company incorporated under the Companies Act, 2013 and having its registered office at #210, MG Road, Mumbai, India, (hereinafter referred to as the "Service Provider"), and Delta Innovations Inc., located at 455 Market Street, San Francisco, CA 94105,\n\n(hereinafter referred to as the "Client"). The Agreement becomes effective on August 1, 2024.\n\nThe total contract value is USD 125,000.00, inclusive of applicable taxes and fees. The contract shall remain in effect for a period of 12 months unless terminated earlier in accordance with the provisions herein. The termination clause states that either party may terminate the agreement with 30 days\' written notice under justifiable circumstances.\n\nThis Agreement shall be governed by and construed in accordance with the laws of the State of\n\nCalifornia, United States. The governing law clause ensures that any dispute arising out of or in connection with this Agreement shall be subject to the exclusive jurisdiction of the courts located in San Francisco County, California.\n\nContract Reference No: CON-ALD-20240705',

        extractions=[
            lx.data.Extraction(
                extraction_class="DATE",
                extraction_text="March 15, 2023",
                attributes={"type": "approval_date"}
            ),
            lx.data.Extraction(
                extraction_class="ORG",
                extraction_text="ICICI Bank",
                attributes={"industry": "banking"}
            ),
            lx.data.Extraction(
                extraction_class="MONEY",
                extraction_text="₹5,00,000",
                attributes={"currency": "INR", "purpose": "business loan"}
            ),
            lx.data.Extraction(
                extraction_class="PERSON",
                extraction_text="Rohit Kumar",
                attributes={"role": "borrower"}
            ),
            lx.data.Extraction(
                extraction_class="ROLE",
                extraction_text="Borrower",
                attributes={"relation": "loan recipient"}
            ),
            lx.data.Extraction(
                extraction_class="LOCATION",
                extraction_text="Mumbai",
                attributes={"type": "branch location"}
            ),
            lx.data.Extraction(
                extraction_class="CONTRACT_REFERENCE",
                extraction_text="Agreement No. AG-2023/45",
                attributes={"type": "loan agreement"}
            ),
            lx.data.Extraction(
                extraction_class="PERCENT",
                extraction_text="4.5%",
                attributes={"type": "interest_rate", "timeframe": "per annum"}
            ),
            lx.data.Extraction(
                extraction_class="ACCOUNT_NUMBER",
                extraction_text="987654321",
                attributes={"type": "loan_account"}
            ),
        ]
    )
]



In [67]:
input_text =  """This Agreement is entered into on the 5th day of July, 2024,
between Alpha Technologies Pvt. Ltd., a company incorporated under the Companies Act, 2013
and having its registered office at #210, MG Road, Mumbai, India,
(hereinafter referred to as the "Service Provider"), and Delta Innovations Inc.,
located at 455 Market Street, San Francisco, CA 94105,
(hereinafter referred to as the "Client").
The Agreement becomes effective on August 1, 2024.
The total contract value is USD 125,000.00, inclusive of applicable taxes and fees.
Contract Reference No: CON-ALD-20240705"""

In [68]:
result = lx.extract(
    text_or_documents=input_text,
    prompt_description=prompt,
    examples=examples,
    model_id="gemini-2.5-flash"
)

In [69]:
lx.io.save_annotated_documents([result], output_name="extraction_results.json", output_dir=".")

[94m[1mLangExtract[0m: Saving to [92mextraction_results.json[0m: 1 docs [00:00, 895.26 docs/s]

[92m✓[0m Saved [1m1[0m documents to [92mextraction_results.json[0m





In [70]:
html_content = lx.visualize("extraction_results.json")
with open("visualization.html", "w") as f:
    if hasattr(html_content, 'data'):
        f.write(html_content.data)  # For Colab/Jupyter
    else:
        f.write(html_content)

[94m[1mLangExtract[0m: Loading [92mextraction_results.json[0m: 100%|██████████| 2.79k/2.79k [00:00<00:00, 3.78MB/s]

[92m✓[0m Loaded [1m1[0m documents from [92mextraction_results.json[0m





In [71]:
from IPython.display import display, HTML
if hasattr(html_content, 'data'):
    display(HTML(html_content.data))
else:
    display(HTML(html_content))
