In [1]:
!pip install langchain-community pypdf



In [2]:
import getpass
import os
import ast
import json
import traceback
from typing import TypedDict, List, Dict

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass.getpass("Enter your Google AI Studio API Key: ")

from langchain_google_genai import ChatGoogleGenerativeAI
from langgraph.graph import START, StateGraph, END
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.messages import SystemMessage, HumanMessage


# -------------------------------------------------
#                1. STATE FORMAT
# -------------------------------------------------
class ResearchState(TypedDict):
    file_path: str
    user_prompt: str
    pdf_pages: list
    split_sections: dict
    plan: str
    final_answer: str


# -------------------------------------------------
#                     LLM
# -------------------------------------------------
llm = ChatGoogleGenerativeAI(
    model="gemini-2.5-flash",
    temperature=0.7
)


# -------------------------------------------------
#                Utility
# -------------------------------------------------
def safe_join_pages(pages, max_chars=12000):
    if not pages:
        return ""
    try:
        joined = "\n\n".join([getattr(p, "page_content", str(p)) for p in pages])
        return joined[:max_chars]
    except Exception:
        return str(pages)[:max_chars]


# -------------------------------------------------
#                2.   NODES
# -------------------------------------------------

# ------------ PDF LOADER ------------
def pdf_loader_node(state: ResearchState):
    print("\n[AGENT] pdf_loader running...")
    path = state.get("file_path", "")
    try:
        loader = PyPDFLoader(path)
        pages = loader.load()
        print("[pdf_loader] Loaded PDF successfully.")
        return {"pdf_pages": pages}
    except Exception as e:
        print("[pdf_loader] ERROR loading PDF.")
        return {"pdf_pages": [f"Error loading PDF: {str(e)}"]}



import re

def clean_string(s: str) -> str:
    """
    Keep only safe characters:
    A–Z a–z 0–9 space comma period dash
    Remove anything else.
    Collapse multiple spaces.
    """
    allowed = re.sub(r"[^A-Za-z0-9 ,.\-]", " ", s)
    allowed = re.sub(r"\s+", " ", allowed)
    return allowed.strip()


def splitter_agent_node(state: ResearchState):
    print("\n[AGENT] splitter running...")
    print("[LLM] Splitter agent calling LLM...")

    pages = state.get("pdf_pages", [])
    joined = safe_join_pages(pages, max_chars=15000)

    system = """
You are a PDF Subsection Splitter Agent.

Rules:
- Split the paper into **10 to 15 subsections**.
- Output MUST be a **valid Python dictionary literal**.
- You are not allowed to miss any content from the paper.
- Keys must be **simple strings**: letters, numbers, spaces only.
- Values must be **simple text strings** with no special characters.
- No markdown or bullet points.
- No commentary.
- Return ONLY the dictionary literal.
"""

    try:
        msg = [
            SystemMessage(content=system),
            HumanMessage(content=f"PDF CONTENT:\n{joined}")
        ]
        response = llm.invoke(msg)
        raw = response.content.strip()

        # -------- TRY PYTHON DICT PARSE ----------
        parsed = None
        try:
            parsed = ast.literal_eval(raw)
        except Exception:
            try:
                parsed = json.loads(raw)
            except Exception:
                print("[splitter] FAILED to parse output.")
                return {
                    "split_sections": {
                        "FullText": joined,
                        "parse_error": "Could not parse splitter output",
                        "raw_llm": raw[:2000]
                    }
                }

        if not isinstance(parsed, dict):
            print("[splitter] Parsed output not a dict.")
            return {
                "split_sections": {
                    "FullText": joined,
                    "parse_error": "Output was not a dict",
                    "raw_llm": raw[:2000]
                }
            }

        # -------- SANITIZE KEYS + VALUES ----------
        cleaned = {}

        for k, v in parsed.items():
            k_clean = clean_string(str(k))
            v_clean = clean_string(str(v))

            if not k_clean:
                k_clean = "Section"

            while k_clean in cleaned:
                k_clean += "_2"

            cleaned[k_clean] = v_clean

        print("[splitter] CLEANED keys:", list(cleaned.keys()))
        return {"split_sections": cleaned}

    except Exception as e:
        print("[splitter] ERROR:", str(e))
        return {"split_sections": {"error": f"Splitter agent error: {str(e)}"}}



# ------------ PLANNER ------------
def planner_node(state: ResearchState):
    print("\n[AGENT] planner running...")
    print("[LLM] Planner agent calling LLM...")

    sections = state.get("split_sections", {})
    user_prompt = state.get("user_prompt", "")

    system = (
        "You are a Planning Agent.\n"
        "Use the provided section dictionary to make a step-by-step plan to answer the user query.\n"
        "Write the plan in clear numbered steps."
    )

    try:
        preview = {k: (v[:800] if isinstance(v, str) else str(v))
                   for k, v in list(sections.items())[:10]}

        msg = [
            SystemMessage(content=system),
            HumanMessage(content=f"SECTIONS:\n{preview}\n\nUSER PROMPT:\n{user_prompt}")
        ]

        response = llm.invoke(msg)
        plan = response.content.strip()
        print("[planner] LLM call completed.")
        return {"plan": plan or "Planner returned empty plan."}

    except Exception as e:
        print("[planner] ERROR:", str(e))
        return {"plan": f"Planner error: {str(e)}"}



# ------------ FINAL ANSWER AGENT ------------
def final_answer_node(state: ResearchState):
    print("\n[AGENT] final_answer running...")
    print("[LLM] Final answer agent calling LLM...")

    user_prompt = state.get("user_prompt", "")
    sections = state.get("split_sections", {})
    plan = state.get("plan", "")

    system = (
        "You are the Final Solution Agent.\n"
        "Your job is to produce the final answer STRICTLY by following the step-by-step plan.\n"
        "Mandatory rules:\n"
        "- Use ONLY the subsection dictionary from the Splitter Agent.\n"
        "- For every step, explicitly cite which subsections the content was extracted from.\n"
        "- If a step cannot be fulfilled from provided sections, state that the paper does not provide enough information.\n"
        "- Provide clear, concise, academically precise explanations.\n"
        "- The final answer MUST be written in numbered steps, one per step of the plan."
    )

    try:
        preview = {
            k: (v[:1500] if isinstance(v, str) else str(v))
            for k, v in list(sections.items())[:20]
        }

        msg = [
            SystemMessage(content=system),
            HumanMessage(
                content=
                f"PLAN:\n{plan}\n\n"
                f"MAIN CONTENT (Subsections from Splitter Agent):\n{preview}\n\n"
                f"USER QUERY:\n{user_prompt}"
            )
        ]

        response = llm.invoke(msg)
        out = response.content.strip()
        print("[final_answer] LLM call completed.")
        return {"final_answer": out or "Final agent returned empty result."}

    except Exception as e:
        print("[final_answer] ERROR:", str(e))
        return {"final_answer": f"Final agent error: {str(e)}"}



# ------------ SCHEDULER ------------
def scheduler_node(state: ResearchState):
    print("\n[SCHEDULER] Deciding next agent...")
    return {}



# -------------------------------------------------
#               4. BUILD GRAPH
# -------------------------------------------------
builder = StateGraph(ResearchState)

builder.add_node("pdf_loader", pdf_loader_node)
builder.add_node("scheduler", scheduler_node)
builder.add_node("splitter", splitter_agent_node)
builder.add_node("planner", planner_node)
builder.add_node("final_answer", final_answer_node)

builder.add_edge(START, "pdf_loader")
builder.add_edge("pdf_loader", "scheduler")
builder.add_edge("scheduler", "splitter")
builder.add_edge("splitter", "planner")
builder.add_edge("planner", "final_answer")
builder.add_edge("final_answer", END)

graph = builder.compile()

Enter your Google AI Studio API Key: ··········


In [3]:
pdf_path = input("Enter PDF path: ").strip()
user_cmd = input("Enter your prompt/instruction: ").strip()

state: ResearchState = {
    "file_path": pdf_path,
    "user_prompt": user_cmd,
    "pdf_pages": [],
    "split_sections": {},
    "figures": "",
    "plan": "",
    "final_answer": ""
}

try:
    print("\n\n===== PIPELINE START =====")
    result = graph.invoke(state)
    print("===== PIPELINE END =====\n")

    print("\n" + "="*50)
    print("SECTION SPLITTER OUTPUT")
    print("="*50)
    print(result.get("split_sections", {}))

    print("\n" + "="*50)
    print("FINAL ANSWER")
    print("="*50)
    print(result.get("final_answer", ""))

except Exception:
    traceback.print_exc()

Enter PDF path: /content/Video Depth Anything.pdf
Enter your prompt/instruction: What is this paper doing? Tell in 50 words


===== PIPELINE START =====

[AGENT] pdf_loader running...
[pdf_loader] Loaded PDF successfully.

[SCHEDULER] Deciding next agent...

[AGENT] splitter running...
[LLM] Splitter agent calling LLM...
[splitter] CLEANED keys: ['Header and Visual Overview', 'Abstract', 'Introduction Monocular Depth Estimation Progress', 'Introduction Limitations of Existing Models', 'Introduction Video Depth Anything Proposed Solution', 'Introduction Experimental Results and Contributions', 'Related Work Monocular Depth Estimation', 'Related Work Consistent Video Depth Estimation', 'Video Depth Anything Overall Method', 'Architecture Overview and Joint Training', 'Architecture Depth Anything V2 Encoder', 'Architecture Spatiotemporal Head', 'Temporal Gradient Matching Loss Introduction']

[AGENT] planner running...
[LLM] Planner agent calling LLM...
[planner] LLM call completed.

[AGE

In [4]:
pdf_path = "/content/Video Depth Anything.pdf"
user_cmd = "Tell me the overall summary of the paper"

state: ResearchState = {
    "file_path": pdf_path,
    "user_prompt": user_cmd,
    "pdf_pages": [],
    "split_sections": {},
    "figures": "",
    "plan": "",
    "final_answer": ""
}

try:
    print("\n\n===== PIPELINE START =====")
    result = graph.invoke(state)
    print("===== PIPELINE END =====\n")

    print("\n" + "="*50)
    print("SECTION SPLITTER OUTPUT")
    print("="*50)
    print(result.get("split_sections", {}))

    print("\n" + "="*50)
    print("FINAL ANSWER")
    print("="*50)
    print(result.get("final_answer", ""))

except Exception:
    traceback.print_exc()



===== PIPELINE START =====

[AGENT] pdf_loader running...
[pdf_loader] Loaded PDF successfully.

[SCHEDULER] Deciding next agent...

[AGENT] splitter running...
[LLM] Splitter agent calling LLM...
[splitter] CLEANED keys: ['ArXiv Info and Figure 1', 'Abstract', 'Introduction Monocular Depth Estimation Progress', 'Introduction Existing Approaches and Limitations', 'Introduction Video Depth Anything Core Idea', 'Introduction Technical Design of Video Depth Anything', 'Introduction Experimental Results and Contributions Summary', 'Related Work Monocular Depth Estimation Overview', 'Related Work Consistent Video Depth Estimation Overview', 'Video Depth Anything Model Introduction', 'Architecture Overview and Training Strategy', 'Architecture Depth Anything V2 Encoder Details', 'Architecture Spatiotemporal Head STH Details', 'Temporal Gradient Matching Loss Introduction', 'Temporal Gradient Matching Loss OPW Description']

[AGENT] planner running...
[LLM] Planner agent calling LLM...
[pla

In [5]:
pdf_path = "/content/Video Depth Anything.pdf"
user_cmd = "What is the objective of this paper?"

state: ResearchState = {
    "file_path": pdf_path,
    "user_prompt": user_cmd,
    "pdf_pages": [],
    "split_sections": {},
    "figures": "",
    "plan": "",
    "final_answer": ""
}

try:
    print("\n\n===== PIPELINE START =====")
    result = graph.invoke(state)
    print("===== PIPELINE END =====\n")

    print("\n" + "="*50)
    print("SECTION SPLITTER OUTPUT")
    print("="*50)
    print(result.get("split_sections", {}))

    print("\n" + "="*50)
    print("FINAL ANSWER")
    print("="*50)
    print(result.get("final_answer", ""))

except Exception:
    traceback.print_exc()



===== PIPELINE START =====

[AGENT] pdf_loader running...
[pdf_loader] Loaded PDF successfully.

[SCHEDULER] Deciding next agent...

[AGENT] splitter running...
[LLM] Splitter agent calling LLM...
[splitter] CLEANED keys: ['Title and Authors', 'Figure 1 Description', 'Abstract', 'Introduction Background and Problem', 'Introduction Existing Approaches', 'Introduction Our Solution', 'Introduction Methodology Highlights', 'Introduction Experimental Summary', 'Contributions', 'Related Work Monocular Depth Estimation', 'Related Work Consistent Video Depth Estimation', 'Video Depth Anything Overview', 'Video Depth Anything Architecture Encoder', 'Video Depth Anything Architecture Spatiotemporal Head', 'Video Depth Anything Temporal Gradient Matching Loss']

[AGENT] planner running...
[LLM] Planner agent calling LLM...
[planner] LLM call completed.

[AGENT] final_answer running...
[LLM] Final answer agent calling LLM...
[final_answer] LLM call completed.
===== PIPELINE END =====


SECTION S

In [6]:
pdf_path = "/content/Video Depth Anything.pdf"
user_cmd = "Tell me the section-wise summary of the paper"

state: ResearchState = {
    "file_path": pdf_path,
    "user_prompt": user_cmd,
    "pdf_pages": [],
    "split_sections": {},
    "figures": "",
    "plan": "",
    "final_answer": ""
}

try:
    print("\n\n===== PIPELINE START =====")
    result = graph.invoke(state)
    print("===== PIPELINE END =====\n")

    print("\n" + "="*50)
    print("SECTION SPLITTER OUTPUT")
    print("="*50)
    print(result.get("split_sections", {}))

    print("\n" + "="*50)
    print("FINAL ANSWER")
    print("="*50)
    print(result.get("final_answer", ""))

except Exception:
    traceback.print_exc()



===== PIPELINE START =====

[AGENT] pdf_loader running...
[pdf_loader] Loaded PDF successfully.

[SCHEDULER] Deciding next agent...

[AGENT] splitter running...
[LLM] Splitter agent calling LLM...
[splitter] CLEANED keys: ['Paper Metadata', 'Visual Overview Figure 1', 'Abstract Summary', 'Introduction to Monocular Depth Estimation Challenges', 'Existing Approaches and Limitations', 'Video Depth Anything Proposed Solution', 'Experimental Results and Performance Summary', 'Our Contributions', 'Related Work Monocular Depth Estimation', 'Related Work Consistent Video Depth Estimation', 'Video Depth Anything Overview', 'Architecture Depth Anything V2 Encoder', 'Architecture Spatiotemporal Head STH', 'Temporal Gradient Matching Loss Introduction']

[AGENT] planner running...
[LLM] Planner agent calling LLM...
[planner] LLM call completed.

[AGENT] final_answer running...
[LLM] Final answer agent calling LLM...
[final_answer] LLM call completed.
===== PIPELINE END =====


SECTION SPLITTER O

In [7]:
pdf_path = "/content/Video Depth Anything.pdf"
user_cmd = "Summarize the architecture in 2 lines"

state: ResearchState = {
    "file_path": pdf_path,
    "user_prompt": user_cmd,
    "pdf_pages": [],
    "split_sections": {},
    "figures": "",
    "plan": "",
    "final_answer": ""
}

try:
    print("\n\n===== PIPELINE START =====")
    result = graph.invoke(state)
    print("===== PIPELINE END =====\n")

    print("\n" + "="*50)
    print("SECTION SPLITTER OUTPUT")
    print("="*50)
    print(result.get("split_sections", {}))

    print("\n" + "="*50)
    print("FINAL ANSWER")
    print("="*50)
    print(result.get("final_answer", ""))

except Exception:
    traceback.print_exc()



===== PIPELINE START =====

[AGENT] pdf_loader running...
[pdf_loader] Loaded PDF successfully.

[SCHEDULER] Deciding next agent...

[AGENT] splitter running...
[LLM] Splitter agent calling LLM...
[splitter] CLEANED keys: ['Paper Identification', 'Figure 1 Visual Summary', 'Abstract', 'Introduction Problem Statement', 'Introduction Existing Solutions', 'Our Approach Overview', 'Key Technical Components of Our Approach', 'Introduction Experimental Results and Contributions', 'Related Work Monocular Depth Estimation', 'Related Work Consistent Video Depth Estimation', 'Video Depth Anything Overview', 'Architecture Depth Anything V2 Encoder', 'Architecture Spatiotemporal Head', 'Temporal Gradient Matching Loss Introduction']

[AGENT] planner running...
[LLM] Planner agent calling LLM...
[planner] LLM call completed.

[AGENT] final_answer running...
[LLM] Final answer agent calling LLM...
[final_answer] LLM call completed.
===== PIPELINE END =====


SECTION SPLITTER OUTPUT
{'Paper Identif

In [8]:
pdf_path = "/content/Video Depth Anything.pdf"
user_cmd = "What figures and tables are present in the paper"

state: ResearchState = {
    "file_path": pdf_path,
    "user_prompt": user_cmd,
    "pdf_pages": [],
    "split_sections": {},
    "figures": "",
    "plan": "",
    "final_answer": ""
}

try:
    print("\n\n===== PIPELINE START =====")
    result = graph.invoke(state)
    print("===== PIPELINE END =====\n")

    print("\n" + "="*50)
    print("SECTION SPLITTER OUTPUT")
    print("="*50)
    print(result.get("split_sections", {}))

    print("\n" + "="*50)
    print("FINAL ANSWER")
    print("="*50)
    print(result.get("final_answer", ""))

except Exception:
    traceback.print_exc()



===== PIPELINE START =====

[AGENT] pdf_loader running...
[pdf_loader] Loaded PDF successfully.

[SCHEDULER] Deciding next agent...

[AGENT] splitter running...
[LLM] Splitter agent calling LLM...
[splitter] CLEANED keys: ['arXiv Info and Figure 1', 'Abstract', 'Introduction Monocular Depth Challenges', 'Introduction Related Work and Problem', 'Introduction Video Depth Anything Solution', 'Introduction Experimental Results and Contributions', 'Related Work Monocular Depth Estimation', 'Related Work Consistent Video Depth Estimation', 'Video Depth Anything Overview', 'Architecture Joint Training and Encoder', 'Architecture Spatiotemporal Head', 'Temporal Gradient Matching Loss']

[AGENT] planner running...
[LLM] Planner agent calling LLM...
[planner] LLM call completed.

[AGENT] final_answer running...
[LLM] Final answer agent calling LLM...
[final_answer] LLM call completed.
===== PIPELINE END =====


SECTION SPLITTER OUTPUT
{'arXiv Info and Figure 1': 'arXiv 2501.12375v3 cs.CV 15 Ju

In [9]:
pdf_path = "/content/Video Depth Anything.pdf"
user_cmd = "Tell a random bedtime story in short"

state: ResearchState = {
    "file_path": pdf_path,
    "user_prompt": user_cmd,
    "pdf_pages": [],
    "split_sections": {},
    "figures": "",
    "plan": "",
    "final_answer": ""
}

try:
    print("\n\n===== PIPELINE START =====")
    result = graph.invoke(state)
    print("===== PIPELINE END =====\n")

    print("\n" + "="*50)
    print("SECTION SPLITTER OUTPUT")
    print("="*50)
    print(result.get("split_sections", {}))

    print("\n" + "="*50)
    print("FINAL ANSWER")
    print("="*50)
    print(result.get("final_answer", ""))

except Exception:
    traceback.print_exc()



===== PIPELINE START =====

[AGENT] pdf_loader running...
[pdf_loader] Loaded PDF successfully.

[SCHEDULER] Deciding next agent...

[AGENT] splitter running...
[LLM] Splitter agent calling LLM...
[splitter] CLEANED keys: ['arXiv Info', 'Figure 1 Description', 'Abstract', 'Introduction Problem and Motivation', 'Introduction Our Approach Details', 'Introduction Results and Contributions', 'Related Work Monocular Depth Estimation', 'Related Work Consistent Video Depth Estimation', 'Video Depth Anything Overview', 'Architecture Depth Anything V2 Encoder', 'Architecture Spatiotemporal Head', 'Figure 2 Overall Pipeline', 'Temporal Gradient Matching Loss Introduction', 'Temporal Gradient Matching Loss OPW Loss']

[AGENT] planner running...
[LLM] Planner agent calling LLM...
[planner] LLM call completed.

[AGENT] final_answer running...
[LLM] Final answer agent calling LLM...
[final_answer] LLM call completed.
===== PIPELINE END =====


SECTION SPLITTER OUTPUT
{'arXiv Info': 'arXiv 2501.123