In [None]:
import spacy
import pandas as pd
import matplotlib.pyplot as plt
from spacy.matcher import Matcher

# Load SpaCy's pre-trained NLP model
nlp = spacy.load("en_core_web_sm")

# Sample contract text
document_text = """
This agreement is made between ABC Corp and XYZ Ltd on January 1, 2025.
The contract is valid for a period of two years, ending on December 31, 2026.
ABC Corp shall provide software development services to XYZ Ltd.
Payment terms are net 30 days from invoice date.
"""

# Process the text
doc = nlp(document_text)

# Extract named entities
entities = [(ent.text, ent.label_) for ent in doc.ents]
df_entities = pd.DataFrame(entities, columns=["Entity", "Label"])

# Display named entities
print("\nNamed Entities:")
print(df_entities)

# Extract key contract clauses
important_phrases = [sent.text for sent in doc.sents if "agreement" in sent.text.lower() or "contract" in sent.text.lower()]

print("\nKey Contract Clauses:")
for clause in important_phrases:
    print(clause)

# Rule-based matching for payment terms
matcher = Matcher(nlp.vocab)
pattern = [{"LOWER": "payment"}, {"LOWER": "terms"}, {"IS_PUNCT": True, "OP": "?"}, {"LOWER": "net"}, {"IS_DIGIT": True}]
matcher.add("PAYMENT_TERMS", [pattern])

matches = matcher(doc)
payment_terms = [doc[start:end].text for match_id, start, end in matches]

print("\nPayment Terms:")
for term in payment_terms:
    print(term)

# Visualization of named entities
plt.figure(figsize=(8, 4))
df_entities["Label"].value_counts().plot(kind="bar", color="skyblue")
plt.title("Named Entity Distribution")
plt.xlabel("Entity Type")
plt.ylabel("Count")
plt.show()