<a href="https://colab.research.google.com/github/amalks02/finwise-genai-capstone/blob/task-06-summarization/summarizer_checkpoint.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# =================== Task 6: Summarization Engine (Gemini) ===================

# Install dependencies (run once)
!pip install langchain langchain-google-genai PyPDF2

# Imports
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.document_loaders import PyPDFLoader, TextLoader
from langchain.chains.summarize import load_summarize_chain
import os

# Import config
from config import SAMPLE_DOCS_PATH, GEMINI_API_KEY

# =================== Set Gemini API Key in Environment ===================
os.environ["GOOGLE_API_KEY"] = GEMINI_API_KEY  # ✅ Correct variable

# =================== Load Documents ===================
# PDF Example
pdf_loader = PyPDFLoader(f"{SAMPLE_DOCS_PATH}kyc_report.pdf")
documents = pdf_loader.load()

# Text Example (uncomment if needed)
# text_loader = TextLoader(f"{SAMPLE_DOCS_PATH}credit_risk.txt")
# documents = text_loader.load()

# =================== Initialize Gemini LLM ===================
llm = ChatGoogleGenerativeAI(
    model="gemini-pro",  # ✅ Correct Gemini model
    temperature=0
)

# =================== Setup Summarization Chain ===================
summarize_chain = load_summarize_chain(llm=llm, chain_type="map_reduce")

# =================== Generate Summary ===================
summary = summarize_chain.run(documents)

# =================== Output ===================
print("===== Document Summary =====")
print(summary)

# =================== Save Summary ===================
with open(f"{SAMPLE_DOCS_PATH}summary_output.txt", "w", encoding="utf-8") as f:
    f.write(summary)