In [None]:
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI

load_dotenv()

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.7)


In [None]:
llm.invoke("hello")

In [None]:
from typing import TypedDict,List,Dict,Optional,Literal

In [None]:
class RetryState(TypedDict):
    rag:int
    web_search:int
    synthesis:int

In [None]:
class RetrievalResult(TypedDict):
    source: Literal["rag", "web"]
    title: str
    content: str
    url: Optional[str]
    score: float

In [None]:
from typing import TypedDict,List,Dict,Optional,Literal

class AgentState(TypedDict):
    user_query:str

    confidence_score:float
    retrieval_mode:Literal['rag','web','both','none']
    research_relevant:bool
    answer_mode:Literal['grounded','direct','refuse']

    rag_results:Optional[List[RetrievalResult]]
    web_search_results:Optional[List[RetrievalResult]]
    merged_results:Optional[List[RetrievalResult]]

    retries:RetryState
    max_retries:RetryState

    failure_response:Optional[str]
    response:Optional[str]





In [None]:
from langgraph.graph import StateGraph, END

graph = StateGraph(AgentState)

graph.add_node("planner", planner_node)
graph.add_node("rag", rag_node)
graph.add_node("web_search", websearch_node)
graph.add_node("confidence_checker", confidence_checker)
graph.add_node("evaluator", evaluator_node)
graph.add_node("summarizer", summarizer_node)
graph.add_node("failure_node", failure_node)

graph.set_entry_point("planner")

# Planner decides retrieval
graph.add_conditional_edges(
    "planner",
    planner_router,
    {
        "rag": "rag",
        "web": "web_search",
        "both": ["rag", "web_search"],
        "none": "failure_node"
    }
)

# Retrieval → confidence
graph.add_edge("rag", "confidence_checker")
graph.add_edge("web_search", "confidence_checker")

# Confidence → evaluation
graph.add_edge("confidence_checker", "evaluator")

# Evaluation controls loop
graph.add_conditional_edges(
    "evaluator",
    evaluation_router,
    {
        "planner": "planner",
        "failed": "failure_node",
        "enough": "summarizer"
    }
)

graph.add_edge("summarizer", END)
graph.add_edge("failure_node", END)


<h1>Planner Node</h1>

In [None]:
from typing import Literal
from pydantic import BaseModel, Field
from langchain_core.output_parsers import PydanticOutputParser
from langchain_classic.prompts import ChatPromptTemplate

class PlannerOutput(BaseModel):
    research_relevant: bool
    retrieval_mode: Literal["rag", "web", "both", "none"]
    answer_mode: Literal["grounded","direct","refuse"]
    confidence: float = Field(ge=0.0, le=1.0)


parser = PydanticOutputParser(pydantic_object=PlannerOutput)


PLANNER_PROMPT = """You are a planning module inside an AI research assistant.

You DO NOT answer the user.
You DO NOT explain your reasoning.
You ONLY decide how the system should respond.

You have access to:
- A large internal vector store containing AI / ML / NLP / LLM / Systems research papers, surveys, benchmarks, and technical articles.
- A web search tool for retrieving recent, external, or missing research information.

Your job is to analyze the user query and output a structured decision describing:
1. Whether the query is research-related
2. How information should be retrieved (if at all)
3. How the answer should be produced
4. How confident you are in these decisions

--------------------------------
1. RESEARCH RELEVANCE
--------------------------------
A query IS research-related if it involves:
- Research papers, surveys, or academic work
- Models, algorithms, architectures, or methods
- Benchmarks, evaluations, comparisons, or ablations
- Technical analysis of AI/ML/NLP/LLMs/Systems

A query is NOT research-related if it involves:
- Casual conversation or chit-chat
- Opinions, jokes, or storytelling
- Personal advice or life questions
- Shopping, travel, or entertainment
- General trivia without technical depth

Set:
- research_relevant = true or false

--------------------------------
2. RETRIEVAL MODE
--------------------------------
If research_relevant = false:
- retrieval_mode MUST be "none"

If research_relevant = true, choose exactly ONE:

Use "rag" if:
- The topic is established or well-documented
- Surveys, classic papers, or known methods are sufficient
- Internal vector store is likely enough

Use "web" if:
- The query explicitly asks for recent, latest, or current work
- The topic involves fast-moving developments
- External or up-to-date sources are required

Use "both" if:
- Foundational research exists internally
- AND recent updates, comparisons, or new papers may be required

Rules:
- Do NOT assume freshness unless explicitly requested
- Prefer internal knowledge when possible
- Be conservative: choose "both" only when clearly necessary
- Never hallucinate missing information

--------------------------------
3. ANSWER MODE
--------------------------------
Decide how the answer should be produced:

Use "direct" ONLY if:
- The question is simple, factual, and well-established
- The answer is unlikely to change over time
- No citations, verification, or freshness are required

Use "grounded" if:
- The query is research-related
- Evidence, papers, or verification are expected
- Even if you know the answer, it should be supported by sources

Use "refuse" if:
- The query is not research-related
- Or answering would require speculation or unsupported claims

Rules:
- For research questions, default to "grounded" unless clearly trivial
- Do NOT choose "direct" just because you know the answer
- Research assistants prefer evidence over memory

--------------------------------
4. CONSISTENCY RULES (STRICT)
--------------------------------
These rules MUST ALWAYS hold:

- If research_relevant = false:
  - retrieval_mode MUST be "none"
  - answer_mode MUST be "refuse"

- If answer_mode = "direct":
  - retrieval_mode MUST be "none"

- If answer_mode = "grounded":
  - retrieval_mode MUST NOT be "none"

--------------------------------
5. CONFIDENCE
--------------------------------
Provide a confidence score between 0.0 and 1.0 indicating how certain you are that:
- research_relevant
- retrieval_mode
- answer_mode

are all correct.

Confidence interpretation:
- ≥ 0.8 → clear and unambiguous decision
- 0.5–0.8 → some uncertainty, retries may be useful
- < 0.5 → high uncertainty

--------------------------------
OUTPUT FORMAT (STRICT)
--------------------------------
Output ONLY valid JSON that conforms exactly to the provided schema.
Do NOT include explanations, comments, or extra text.

"""


prompt=ChatPromptTemplate([
    ("system",PLANNER_PROMPT+"\n\n{format_instructions}"),
    ("user",'{user_query}')
]).partial(format_instructions=parser.get_format_instructions())

chain = prompt | llm |parser

response=chain.invoke({"user_query":"who are ypou"})

response

<h1>RAG INGESTION

In [None]:
TOPICS = {
    "computer_vision": "computer vision",
    "nlp": "natural language processing",
    "deep_learning": "deep learning",
    "transformers": "transformer models",
    "diffusion": "diffusion models",
    "reinforcement_learning": "reinforcement learning"
}


In [None]:
# ingest.py
import os
import json
import arxiv
import fitz
import time
import uuid
import urllib.error
from tqdm import tqdm
from sentence_transformers import SentenceTransformer
from rank_bm25 import BM25Okapi
from qdrant_client import QdrantClient
from qdrant_client.models import PointStruct, VectorParams, Distance

# ---------------- CONFIG ----------------
TARGET_PAPER_COUNT = 100
RESULTS_PER_TOPIC = 40   # oversample because some will fail

DATA_DIR = "data"
PDF_DIR = os.path.join(DATA_DIR, "pdfs")
META_DIR = os.path.join(DATA_DIR, "metadata")

COLLECTION_NAME = "arxiv_ai_research_papers"
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"

TOPICS = {
    "computer_vision": "computer vision",
    "nlp": "natural language processing",
    "deep_learning": "deep learning",
    "transformers": "transformer models",
    "diffusion": "diffusion models",
    "reinforcement_learning": "reinforcement learning"
}

# ---------------- SETUP ----------------
os.makedirs(PDF_DIR, exist_ok=True)
os.makedirs(META_DIR, exist_ok=True)

embedder = SentenceTransformer(EMBED_MODEL)

qdrant = QdrantClient(path='./qdrant_data')

if COLLECTION_NAME not in [c.name for c in qdrant.get_collections().collections]:
    qdrant.create_collection(
        collection_name=COLLECTION_NAME,
        vectors_config=VectorParams(
            size=embedder.get_sentence_embedding_dimension(),
            distance=Distance.COSINE
        )
    )

# ---------------- HELPERS ----------------
def extract_text(pdf_path: str) -> str:
    try:
        doc = fitz.open(pdf_path)
        return "\n".join(page.get_text() for page in doc)
    except Exception:
        return ""

def safe_download(paper, pdf_path, retries=3):
    for attempt in range(retries):
        try:
            paper.download_pdf(filename=pdf_path)
            return True
        except (urllib.error.HTTPError, urllib.error.URLError) as e:
            time.sleep(2)
        except Exception:
            return False
    return False

# ---------------- INGEST ----------------
points = []
bm25_corpus = []
bm25_ids = []
seen_ids = set()

successful_papers = 0

for topic, query in TOPICS.items():
    if successful_papers >= TARGET_PAPER_COUNT:
        break

    search = arxiv.Search(
        query=query,
        max_results=RESULTS_PER_TOPIC,
        sort_by=arxiv.SortCriterion.Relevance
    )

    for paper in tqdm(search.results(), desc=f"Ingesting {topic}"):
        if successful_papers >= TARGET_PAPER_COUNT:
            break

        arxiv_id = paper.get_short_id()
        if arxiv_id in seen_ids:
            continue

        seen_ids.add(arxiv_id)

        pdf_path = os.path.join(PDF_DIR, f"{arxiv_id}.pdf")
        meta_path = os.path.join(META_DIR, f"{arxiv_id}.json")

        # ---- SAFE PDF DOWNLOAD
        downloaded = safe_download(paper, pdf_path)
        if not downloaded:
            continue

        # ---- METADATA
        metadata = {
            "arxiv_id": arxiv_id,
            "title": paper.title,
            "authors": [a.name for a in paper.authors],
            "summary": paper.summary,
            "published": paper.published.isoformat(),
            "updated": paper.updated.isoformat(),
            "categories": paper.categories,
            "topic": topic,
            "pdf_path": pdf_path,
            "arxiv_url": paper.entry_id
        }

        with open(meta_path, "w", encoding="utf-8") as f:
            json.dump(metadata, f, indent=2)

        # ---- TEXT EXTRACTION
        text = extract_text(pdf_path)
        if len(text.strip()) < 500:
            continue  # trash PDFs

        # ---- EMBEDDING
        embedding = embedder.encode(text, normalize_embeddings=True)

        tokens = text.lower().split()
        bm25_corpus.append(tokens)
        bm25_ids.append(arxiv_id)

        point_id = uuid.uuid5(
            uuid.NAMESPACE_URL,
            f"https://arxiv.org/abs/{arxiv_id}"
        )

        points.append(
            PointStruct(
                id=str(point_id),  # ✅ valid UUID
                vector=embedding.tolist(),
                payload=metadata  # contains arxiv_id safely
            )
        )


        successful_papers += 1

# ---------------- STORE ----------------
qdrant.upsert(
    collection_name=COLLECTION_NAME,
    points=points
)

with open(os.path.join(DATA_DIR, "bm25.json"), "w") as f:
    json.dump({"ids": bm25_ids, "corpus": bm25_corpus}, f)

print(f"SUCCESS: Downloaded and indexed {successful_papers} papers.")


In [None]:
import json
from rank_bm25 import BM25Okapi

# ---- Load BM25 data
with open("data/bm25.json", "r") as f:
    data = json.load(f)

corpus = data["corpus"]
ids = data["ids"]

bm25 = BM25Okapi(corpus)

# ---- Query
query = "diffusion model image generation"
query_tokens = query.lower().split()

scores = bm25.get_scores(query_tokens)
top_k = sorted(
    zip(ids, scores),
    key=lambda x: x[1],
    reverse=True
)[:5]

print("BM25 RESULTS:")
for arxiv_id, score in top_k:
    print(arxiv_id, score)


In [None]:
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient

COLLECTION = "arxiv_ai_research_papers"

client = QdrantClient(path='./qdrant_data')
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

query = "transformer architectures for large language models"
query_vector = embedder.encode(query, normalize_embeddings=True).tolist()

response = client.query_points(
    collection_name=COLLECTION,
    query=query_vector,
    limit=5,
    with_payload=True
)

print("QDRANT RESULTS:")
for p in response.points:
    print(p.payload["arxiv_id"], p.payload["title"])


<h1> RAG RETRIEVAL

In [None]:
import json
from rank_bm25 import BM25Okapi
from sentence_transformers import SentenceTransformer
from qdrant_client import QdrantClient

COLLECTION = "arxiv_ai_research_papers"

# ---- Load BM25
with open("data/bm25.json") as f:
    data = json.load(f)

bm25 = BM25Okapi(data["corpus"])
ids = data["ids"]

# ---- Init
client = QdrantClient(path='./qdrant_data')
embedder = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

query = "diffusion models for computer vision"
tokens = query.lower().split()

# ---- BM25 stage
bm25_scores = bm25.get_scores(tokens)
bm25_top = sorted(
    zip(ids, bm25_scores),
    key=lambda x: x[1],
    reverse=True
)[:30]

bm25_ids = set(i for i, _ in bm25_top)

# ---- Dense stage
query_vector = embedder.encode(query, normalize_embeddings=True).tolist()

dense = client.query_points(
    collection_name=COLLECTION,
    query=query_vector,
    limit=20,
    with_payload=True
)

print("HYBRID RESULTS:")
for p in dense.points:
    if p.payload["arxiv_id"] in bm25_ids:
        print(p.payload["arxiv_id"], p.payload["title"])


<h1> Websearch API

In [None]:
from dotenv import load_dotenv
load_dotenv()
import os

In [None]:
from tavily import TavilyClient
client = TavilyClient(os.environ.get("TAVILY_API_KEY"))
response = client.search(
    query="What are the latest developments in quantum computing?"
)
print(response)

In [None]:
from typing import Literal, Optional
from pydantic import BaseModel

class RagRetrievalResult(BaseModel):
    source: Literal["rag"] = "rag"

    arxiv_id: str
    title: str
    abstract: Optional[str]

    bm25_score: float
    dense_score: float
    relevance_score: float

    url: str


In [None]:
from typing import List

def rag_retrieve(query: str) -> List[RagRetrievalResult]:
    tokens = query.lower().split()

    # ---- BM25 stage
    bm25_scores = bm25.get_scores(tokens)
    bm25_top = sorted(
        zip(ids, bm25_scores),
        key=lambda x: x[1],
        reverse=True
    )[:30]

    bm25_map = {i: score for i, score in bm25_top}

    # ---- Dense stage
    query_vector = embedder.encode(
        query,
        normalize_embeddings=True
    ).tolist()

    dense = client.query_points(
        collection_name=COLLECTION,
        query=query_vector,
        limit=20,
        with_payload=True,
        with_vectors=False
    )

    results: List[RagRetrievalResult] = []

    for p in dense.points:
        arxiv_id = p.payload.get("arxiv_id")

        # enforce hybrid intersection
        if arxiv_id not in bm25_map:
            continue

        bm25_score = bm25_map[arxiv_id]
        dense_score = p.score

        # simple but effective fusion
        relevance_score = 0.6 * dense_score + 0.4 * min(bm25_score / 10.0, 1.0)

        results.append(
            RagRetrievalResult(
                arxiv_id=arxiv_id,
                title=p.payload.get("title", ""),
                abstract=p.payload.get("abstract"),
                bm25_score=bm25_score,
                dense_score=dense_score,
                relevance_score=relevance_score,
                url=f"https://arxiv.org/abs/{arxiv_id}"
            )
        )

    # sort by final relevance
    results.sort(key=lambda r: r.relevance_score, reverse=True)

    return results


In [None]:
from typing import Literal, Optional
from pydantic import BaseModel

class WebRetrievalResult(BaseModel):
    source: Literal["web"] = "web"

    title: str
    content: str
    url: str

    relevance_score: float


In [None]:
from typing import List
from tavily import TavilyClient

tavily = TavilyClient(os.environ.get("TAVILY_API_KEY"))

def web_search(query: str, k: int = 8) -> List[WebRetrievalResult]:
    response = tavily.search(
        query=query,
        max_results=k
    )

    results: List[WebRetrievalResult] = []

    for r in response.get("results", []):
        results.append(
            WebRetrievalResult(
                title=r.get("title", ""),
                content=r.get("content", ""),
                url=r.get("url"),
                relevance_score=float(r.get("score", 0.5))
            )
        )

    return results


In [None]:
from app.graph.graph import graph

app = graph.compile()

initial_state = {
    "user_query": "What is gradient descent?",
    "rag_results": [],
    "web_search_results": [],
    "evidence_score": None,
    "retries": {
        "rag": 0,
        "web": 0,
        "synthesis": 0
    },
}

result = app.invoke(initial_state)

print(result["final_answer"])


  from .autonotebook import tqdm as notebook_tqdm


In [None]:

import atexit
from app.llm.qdrant import close_qdrant_client
app = graph.compile()

initial_state = {
    "user_query": "Tell me about the recent updates in the architercture of transformers",
    "rag_results": [],
    "web_search_results": [],
    "evidence_score": None,
    "retries": {
        "rag": 0,
        "web_search": 0,
        "synthesis": 0
    },
    "max_retries": {
        "rag": 2,
        "web_search": 2,
        "synthesis": 2
    },
    
}


for event in app.stream(
    initial_state,
    stream_mode="updates"
):
    print(event)

atexit.register(close_qdrant_client)


NameError: name 'graph' is not defined

In [1]:
from app.graph.graph import graph
app = graph.compile()

print(app.get_graph().draw_mermaid())


  from .autonotebook import tqdm as notebook_tqdm


---
config:
  flowchart:
    curve: linear
---
graph TD;
	__start__([<p>__start__</p>]):::first
	planner(planner)
	rag(rag)
	web_search(web_search)
	both_retrieval(both_retrieval)
	merge(merge)
	evidence_checker(evidence_checker)
	evaluator(evaluator)
	reducer(reducer)
	summarizer(summarizer)
	degraded_summarizer(degraded_summarizer)
	failure_node(failure_node)
	direct_answer(direct_answer)
	__end__([<p>__end__</p>]):::last
	__start__ --> planner;
	both_retrieval --> rag;
	both_retrieval --> web_search;
	evaluator --> reducer;
	evidence_checker --> evaluator;
	merge --> evidence_checker;
	planner -. &nbsp;both&nbsp; .-> both_retrieval;
	planner -. &nbsp;direct&nbsp; .-> direct_answer;
	planner -. &nbsp;fail&nbsp; .-> failure_node;
	planner -.-> rag;
	planner -. &nbsp;web&nbsp; .-> web_search;
	rag --> merge;
	reducer -. &nbsp;retry_both&nbsp; .-> both_retrieval;
	reducer -. &nbsp;degrade_answer&nbsp; .-> degraded_summarizer;
	reducer -. &nbsp;fail&nbsp; .-> failure_node;
	reducer -. &n

In [2]:
import atexit
from app.llm.qdrant import close_qdrant_client

initial_state = {
    "user_query": "Tell me about the recent updates in the architercture of transformers",
    "rag_results": [],
    "web_search_results": [],
    "evidence_score": None,
    "retries": {
        "rag": 0,
        "web_search": 0,
        "synthesis": 0
    },
    "max_retries": {
        "rag": 2,
        "web_search": 2,
        "synthesis": 2
    },
    
}


for event in app.stream(
    initial_state,
    stream_mode="updates"
):
    print(event)

atexit.register(close_qdrant_client)

{'planner': {'research_relevant': True, 'retrieval_mode': 'both', 'answer_mode': 'grounded'}}
{'both_retrieval': {'user_query': 'Tell me about the recent updates in the architercture of transformers', 'retrieval_mode': 'both', 'answer_mode': 'grounded', 'research_relevant': True, 'rag_results': [], 'web_search_results': [], 'evidence_score': None, 'retries': {'rag': 0, 'web_search': 0, 'synthesis': 0}, 'max_retries': {'rag': 2, 'web_search': 2, 'synthesis': 2}}}
{'rag': {'rag_results': [RagRetrievalResult(source_type='rag', source_id='arxiv', arxiv_id='2212.14538v2', title='Transformer in Transformer as Backbone for Deep Reinforcement Learning', url='https://arxiv.org/abs/2212.14538v2', abstract=None, text=None, year=2022, version='v2', bm25_score=14.880436083286108, dense_score=0.4103936795247529, relevance_score=0.6462362077148518), RagRetrievalResult(source_type='rag', source_id='arxiv', arxiv_id='2110.08975v2', title='Deep Transfer Learning & Beyond: Transformer Language Models in 

<function app.llm.qdrant.close_qdrant_client()>

In [None]:
print(result.keys())

In [None]:
print("ANSWER MODE:", result["answer_mode"])
print("RETRIEVAL MODE:", result["retrieval_mode"])


In [None]:
result['web_search_results']

In [None]:
result['rag_results']

In [None]:
from app.tools.web_search import web_search
web_search("recent upgrades in transformer architecture")