# Financial Document Summarization with SmolAgent & LLM Guardrails

**This notebook extracts text from a financial PDF, summarizes it using SmolAgent's CodeAgent, performs self-evaluation, and validates accuracy using a locally hosted OpenAI-compatible LLM.**

## 📌 Step 1: Install Required Libraries

In [None]:
!pip install smolagent requests openai pypdf

## 📌 Step 2: Import Dependencies & Set Up Local LLM

In [None]:
import openai
import requests
import json
import pypdf  # For PDF text extraction
from smolagent import CodeAgent

# Configure OpenAI-compatible local LLM
openai.api_base = "http://127.0.0.1:1234/v1"  # Local model
openai.api_key = "sk-local"  # Dummy key since local models don't need authentication

## 📌 Step 3: Extract Text from a PDF Document

In [None]:
def extract_text_from_pdf(pdf_path):
    """Extract text from a financial PDF document."""
    with open(pdf_path, "rb") as file:
        pdf_reader = pypdf.PdfReader(file)
        text = "\n".join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
    return text

# Load financial document (update the file path)
pdf_path = "financial_report.pdf"  # Change to your file path
financial_document = extract_text_from_pdf(pdf_path)
print("Extracted Document Snippet:\n", financial_document[:500])

## 📌 Step 4: Create SmolAgent for Summarization

In [None]:
summarization_agent = CodeAgent(name="FinancialSummarizer", llm="gpt-4")

## 📌 Step 5: Generate a Summary Using SmolAgent

In [None]:
prompt = f"""
Write a Python script that extracts key financial figures (e.g., revenue, net income, EBITDA) and risk disclosures
from the following financial report and summarizes them concisely:

{financial_document[:2000]}
"""

summary_code = summarization_agent.run(prompt)
print("Generated Code:\n", summary_code)

## 📌 Step 6: Self-Evaluate the Generated Summary

In [None]:
def self_evaluate_summary(summary, source_text):
    """Check completeness and consistency."""
    checks = {
        "contains_financials": any(keyword in summary.lower() for keyword in ["revenue", "net income", "ebitda", "profit"]),
        "contains_risk_factors": "risk" in summary.lower(),
        "word_count": len(summary.split()) < 250,
        "matches_source": any(phrase in source_text for phrase in summary.split()[:10])
    }
    return all(checks.values()), checks

self_eval_passed, self_eval_checks = self_evaluate_summary(summary_code, financial_document)
print("\nSelf-Evaluation Passed:", self_eval_passed)
print("Self-Evaluation Checks:", self_eval_checks)

## 📌 Step 7: Use an LLM Guardrail to Verify Accuracy

In [None]:
def guardrail_evaluate(summary, source_text):
    """Evaluate summary accuracy."""
    prompt = f"""
    Evaluate this financial summary against the source document.
    Summary:
    {summary}
    
    Source:
    {source_text[:3000]}
    
    Return a confidence score (0-100) and an explanation.
    """
    response = openai.ChatCompletion.create(model="gpt-4", messages=[{"role": "user", "content": prompt}])
    return response["choices"][0]["message"]["content"]

guardrail_score = guardrail_evaluate(summary_code, financial_document)
print("\n🔍 Guardrail Score:", guardrail_score)