## PDF Summarization using LangGraph & Google Generative AI

### Load Libraries and Environment Variables

In [None]:
import google.generativeai as genai
from PyPDF2 import PdfReader
from langchain.text_splitter import RecursiveCharacterTextSplitter


import os
from dotenv import load_dotenv
load_dotenv('.config')
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
genai.configure(api_key=GOOGLE_API_KEY)

### Define Nodes

In [2]:
def pdf_preprocessing(uploaded_pdf):
    pdf_content = ""
    reader = PdfReader(uploaded_pdf)
    for page in reader.pages:
        pdf_content += page.extract_text() + "\n"
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
    texts = text_splitter.split_text(pdf_content)
    return texts

In [3]:
def summarize_text(text):
    model = genai.GenerativeModel("gemini-2.0-flash-exp")
    prompt = f"Extract Key Features and summarize the headingwise and sub-headingwise main content of the PDF:\n{text}"
    response = model.generate_content(prompt)
    return response.candidates[0].content.parts[0].text

### Graph

In [None]:
from langgraph.graph import Graph

graph=Graph()

graph.add_node("PDF PREPROCESSING",pdf_preprocessing)
graph.add_node("TEXT SUMMARIZATION",summarize_text)

graph.add_edge("PDF PREPROCESSING", "TEXT SUMMARIZATION")

graph.set_entry_point("PDF PREPROCESSING")
graph.set_finish_point("TEXT SUMMARIZATION")

app=graph.compile()

from IPython.display import Image,display
display(Image(app.get_graph().draw_mermaid_png()))

### Evaluation

In [None]:
# Give PDF FILE to Summarize

print(app.invoke("***.pdf"))