# Imports

In [1]:
import os
import re
import shutil
import threading
from pathlib import Path
from typing import Any, List, Optional, Sequence, Type, Union, Annotated
from langchain.chains import ConversationalRetrievalChain
from langchain.chains.conversational_retrieval.base import BaseConversationalRetrievalChain
from langchain.chains.query_constructor.base import AttributeInfo
from pydantic import BaseModel, Field 
from langchain_community.document_loaders import DirectoryLoader, PyMuPDFLoader
from langchain_community.vectorstores import Chroma
from langchain_core.language_models import BaseLanguageModel
from langchain_core.vectorstores import VectorStore
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.tools import BaseTool

# Building the Retriever

In [2]:
def educated_retriever(
    llm: BaseLanguageModel,
    metadata_field_info: Sequence[Union[AttributeInfo, dict]],
    document_content: str,
    vectordb: VectorStore,
    chain_type: str = "stuff",
) -> BaseConversationalRetrievalChain:
    """
    Builds a conversational retrieval QA pipeline by combining a SelfQueryRetriever
    with a ConversationalRetrievalChain.
    """
    retriever = SelfQueryRetriever.from_llm(
        llm,
        vectordb,
        document_contents=document_content,
        metadata_field_info=metadata_field_info,
        verbose=True,
    )
    qa = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=retriever,
        chain_type=chain_type,
        return_source_documents=True,
    )
    return qa

In [3]:
class RAGSearchInput(BaseModel):
    """Input schema for the RAG search tool"""
    query: str = Field(..., description="The query to search for in the research papers")
    papers_path: Optional[str] = Field(default=None, description="Path to the research papers directory")
    subject: Optional[str] = Field(default=None, description="Subject area (e.g., 'Artificial Intelligence')")
    topic: Optional[str] = Field(default=None, description="Topic within the subject (e.g., 'Agentic AI', 'Finetuning')")
    year: Optional[int] = Field(default=None, description="Publication year of the paper")
    chain_type: Optional[str] = Field(default="stuff", description="Chain type for the QA system")

In [4]:
class RAGSearchTool(BaseTool):
    name: Annotated[str, Field(description="Name of the tool")] = "research_paper_search"
    description: Annotated[str, Field(description="Description of the tool")] = """
    Search through research papers using an educated RAG approach.
    Uses metadata filtering (subject, topic, year, paper_title) and conversational capabilities 
    to provide relevant information from AI research papers.
    """
    args_schema: Type[BaseModel] = RAGSearchInput
    default_papers_path: Path
    persist_directory: Path
    collection_name: str = "research_papers"

    _qa_chain: Any = None
    _vectordb: Any = None
    _embeddings: Any = None
    _init_lock: Any = None
    _llm: Any = None

    def __init__(
        self,
        default_papers_path: Optional[Path] = None,
        persist_directory: Optional[Path] = None,
        collection_name: str = "research_papers",
    ):
        # First, determine the final paths for the required fields.
        base_dir = Path(os.getcwd()).resolve().parent.parent.parent.parent
        final_papers_path = default_papers_path or base_dir / "Build Up Phase" / "Rag" / "Papers"
        final_persist_dir = Path(persist_directory or base_dir / "Build Up Phase" / "Rag" / "VectorDB").resolve()
        super().__init__(
            default_papers_path=final_papers_path,
            persist_directory=final_persist_dir,
            collection_name=collection_name
        )

        self._embeddings = OpenAIEmbeddings()
        self._init_lock = threading.Lock()

    def _extract_paper_metadata(self, file_path: Path, base_path: Path) -> dict:
        """
        Extract metadata from research paper filename and path.
        Expected filename format: {paper_title} - {year} - {description}.pdf
        Expected path structure: Papers/Subject/Topic/filename.pdf
        """
        metadata = {
            "subject": "Artificial Intelligence",  # Default for now
            "topic": None,
            "paper_title": None,
            "year": None,
            "file_name": file_path.name,
            "file_path": str(file_path),
        }
        
        # Extract subject and topic from directory structure
        try:
            rel_path = file_path.relative_to(base_path)
            parts = rel_path.parts
            # Expected: Subject/Topic/filename.pdf
            if len(parts) >= 2:
                metadata["subject"] = parts[0]  # e.g., "Artificial Intelligence"
            if len(parts) >= 3:
                metadata["topic"] = parts[1]  # e.g., "Agentic AI", "Finetuning"
        except ValueError:
            pass
        
        # Parse filename: {title} - {year} - {description}.pdf
        filename_stem = file_path.stem  # Remove .pdf extension
        
        # Split by " - " to get parts
        parts = [p.strip() for p in filename_stem.split(" - ")]
        
        if len(parts) >= 1:
            metadata["paper_title"] = parts[0]
        
        if len(parts) >= 2:
            # Try to extract year from second part
            year_match = re.search(r"(\d{4})", parts[1])
            if year_match:
                metadata["year"] = int(year_match.group(1))
            else:
                # If no year in second part, it might be part of title or description
                metadata["paper_title"] = f"{parts[0]} - {parts[1]}"
        
        return metadata

    def _load_or_build_vectordb(self):
        db_file = self.persist_directory / "chroma.sqlite3"
        if db_file.exists() and self._vectordb is None:
            self._vectordb = Chroma(
                persist_directory=str(self.persist_directory),
                collection_name=self.collection_name,
                embedding_function=self._embeddings,
            )
            return

        if self._vectordb is not None:
            return

        loader = DirectoryLoader(
            str(self.default_papers_path),
            glob="**/*.pdf",
            loader_cls=PyMuPDFLoader,
        )
        raw_docs = loader.load()

        for d in raw_docs:
            p = Path(d.metadata["source"])
            
            # Extract research paper metadata
            paper_metadata = self._extract_paper_metadata(p, self.default_papers_path)
            d.metadata.update(paper_metadata)
            
            # Add additional useful metadata
            d.metadata.update({
                "doc_id": p.stem,
                "relpath": str(p.relative_to(self.default_papers_path)) if self.default_papers_path in p.parents or p.parent == self.default_papers_path else p.name
            })

        splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=250)
        split_docs = splitter.split_documents(raw_docs)

        self.persist_directory.mkdir(parents=True, exist_ok=True)
        self._vectordb = Chroma.from_documents(
            documents=split_docs,
            embedding=self._embeddings,
            collection_name=self.collection_name,
            persist_directory=str(self.persist_directory),
        )

    def _initialize_components(self):
        """Initialize the RAG components if not already initialized."""
        if self._qa_chain is not None:
            return
        with self._init_lock:
            if self._qa_chain is not None:
                return
            self._load_or_build_vectordb()
            metadata_field_info = [
                AttributeInfo(
                    name="subject", 
                    type="string", 
                    description="Subject area of the research paper (e.g., 'Artificial Intelligence', 'Communication Systems', 'Security')"
                ),
                AttributeInfo(
                    name="topic", 
                    type="string", 
                    description="Specific topic within the subject (e.g., 'Agentic AI', 'Finetuning', 'Hierarchical Reasoning Models')"
                ),
                AttributeInfo(
                    name="paper_title", 
                    type="string", 
                    description="Title of the research paper"
                ),
                AttributeInfo(
                    name="year", 
                    type="integer", 
                    description="Publication year of the research paper"
                ),
                AttributeInfo(
                    name="file_name", 
                    type="string", 
                    description="The filename of the PDF document"
                ),
            ]
            self._llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
            self._qa_chain = educated_retriever(
                llm=self._llm,
                metadata_field_info=metadata_field_info,
                document_content="Research papers on Artificial Intelligence topics including Agentic AI, Finetuning, and Hierarchical Reasoning Models",
                vectordb=self._vectordb,
                chain_type="stuff",
            )

    def _build_metadata_filter(self, subject: Optional[str], topic: Optional[str], year: Optional[int]) -> Optional[dict]:
        """
        Build a Chroma-compatible metadata filter from the provided parameters.
        Uses $and to combine multiple conditions.
        """
        conditions = []
        
        if subject:
            conditions.append({"subject": {"$eq": subject}})
        if topic:
            conditions.append({"topic": {"$eq": topic}})
        if year:
            conditions.append({"year": {"$eq": year}})
        
        if not conditions:
            return None
        elif len(conditions) == 1:
            return conditions[0]
        else:
            return {"$and": conditions}

    def _generate_answer_from_docs(self, query: str, docs: List) -> str:
        """Generate an answer from retrieved documents using the LLM."""
        if not docs:
            return "No documents found matching the specified filters."
        
        context = "\n\n---\n\n".join([doc.page_content for doc in docs])
        
        from langchain_core.prompts import ChatPromptTemplate
        
        prompt = ChatPromptTemplate.from_template(
            """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context:
{context}

Question: {question}

Answer:"""
        )
        
        chain = prompt | self._llm
        response = chain.invoke({"context": context, "question": query})
        return response.content.strip()

    def _format_sources(self, docs: List) -> List[str]:
        """Format source documents into a list of source strings."""
        sources = []
        for i, doc in enumerate(docs, 1):
            md = doc.metadata or {}
            parts = [f"[{i}]"]
            
            if md.get("paper_title"):
                parts.append(f"'{md.get('paper_title')}'")
            if md.get("year"):
                parts.append(f"({md.get('year')})")
            if md.get("topic"):
                parts.append(f"[{md.get('topic')}]")
            if md.get("subject"):
                parts.append(f"- {md.get('subject')}")
            
            page = md.get("page")
            if page is not None:
                parts.append(f"p.{page + 1}")
            
            sources.append(" ".join(parts))
        return sources

    def _run(
        self,
        query: str,
        papers_path: Optional[str] = None,
        subject: Optional[str] = None,
        topic: Optional[str] = None,
        year: Optional[int] = None,
        k: int = 10,
    ) -> str:
        """
        Executes the RAG search by applying explicit metadata filters
        to ensure accurate filtering by subject, topic, and year.
        When explicit filters are provided, uses direct vector store retrieval
        instead of SelfQueryRetriever to guarantee filter accuracy.
        """
        if papers_path:
            new_path = Path(papers_path)
            if self.default_papers_path != new_path:
                self.default_papers_path = new_path
                self._qa_chain = None
                self._vectordb = None
                if self.persist_directory.exists():
                    shutil.rmtree(self.persist_directory)

        self._initialize_components()

        # Build explicit metadata filter for Chroma
        metadata_filter = self._build_metadata_filter(subject, topic, year)

        try:
            # If explicit filters are provided, bypass SelfQueryRetriever
            # and use direct vector store retrieval with guaranteed filters
            if metadata_filter:
                # Use direct similarity search with explicit filter
                docs = self._vectordb.similarity_search(
                    query,
                    k=k,
                    filter=metadata_filter
                )
                
                answer = self._generate_answer_from_docs(query, docs)
                sources = self._format_sources(docs)
                
                return f"Answer: {answer}\n\nSources:\n" + ("\n".join(sources) if sources else "(none)")
            
            else:
                # No explicit filters - use the SelfQueryRetriever for intelligent querying
                retr = self._qa_chain.retriever
                orig_kwargs = retr.search_kwargs.copy()
                try:
                    retr.search_kwargs["k"] = k
                    response = self._qa_chain({"question": query, "chat_history": []})

                    answer = (response.get("answer") or "").strip()
                    sources = self._format_sources(response.get("source_documents", []) or [])

                    return f"Answer: {answer}\n\nSources:\n" + ("\n".join(sources) if sources else "(none)")
                finally:
                    if self._qa_chain:
                        self._qa_chain.retriever.search_kwargs = orig_kwargs
                        
        except Exception as e:
            return f"Error: {e!r}"

# Trying the Retriever

In [5]:
# Initialize the RAG tool with explicit paths for notebook usage
papers_path = Path(r"c:\Users\User\Desktop\llms\Project\Research Assistant Multi Agent System\Build Up Phase\Rag\Papers")
vectordb_path = Path(r"c:\Users\User\Desktop\llms\Project\Research Assistant Multi Agent System\Build Up Phase\Rag\VectorDB")

rag_tool = RAGSearchTool(
    default_papers_path=papers_path,
    persist_directory=vectordb_path,
    collection_name="research_papers"
)
print("RAG tool initialized")

RAG tool initialized


In [6]:
# Query 2: Filter by topic - Finetuning
result = rag_tool._run("What is QLoRA and how does it help with fine-tuning LLMs?", topic="Finetuning")
print(result)

Answer: QLoRA is an efficient fine-tuning approach designed to reduce memory usage, enabling the fine-tuning of large language models (LLMs) with billions of parameters on consumer-grade GPUs. Specifically, it allows for the fine-tuning of a 65 billion parameter model on a single 48GB GPU while maintaining performance comparable to full 16-bit fine-tuning. QLoRA achieves this by backpropagating gradients through a frozen, 4-bit quantized pretrained language model into Low Rank Adapters (LoRA).

The method introduces several innovations to save memory without sacrificing performance, including:

1. **4-bit NormalFloat (NF4)**: A new data type that is optimal for normally distributed weights.
2. **Double Quantization**: This technique reduces the average memory footprint by quantizing the quantization constants.
3. **Paged Optimizers**: These help manage memory spikes during the training process.

QLoRA has been shown to outperform previous models on benchmarks like Vicuna, achieving hig

In [7]:
# Query 3: Filter by year - 2025 papers only (retry with updated metadata extraction)
result = rag_tool._run("What are the latest advances in hierarchical reasoning and agentic AI?", year=2025)
print(result)

Answer: I don't know.

Sources:
[1] 'AI Agents vs Agentic AI, A Conceptual Taxonomy Applications and Challenges' (2025) [Agentic AI] - Artificial Intelligence p.27
[2] 'AI Agents vs Agentic AI, A Conceptual Taxonomy Applications and Challenges' (2025) [Agentic AI] - Artificial Intelligence p.1
[3] 'AI Agents vs Agentic AI, A Conceptual Taxonomy Applications and Challenges' (2025) [Agentic AI] - Artificial Intelligence p.3
[4] 'AI Agents vs Agentic AI, A Conceptual Taxonomy Applications and Challenges' (2025) [Agentic AI] - Artificial Intelligence p.24
[5] 'AI Agents vs Agentic AI, A Conceptual Taxonomy Applications and Challenges' (2025) [Agentic AI] - Artificial Intelligence p.3
[6] 'AI Agents vs Agentic AI, A Conceptual Taxonomy Applications and Challenges' (2025) [Agentic AI] - Artificial Intelligence p.3
[7] 'AI Agents vs Agentic AI, A Conceptual Taxonomy Applications and Challenges' (2025) [Agentic AI] - Artificial Intelligence p.27
[8] 'AI Agents vs Agentic AI, A Conceptual Taxon

In [8]:
# Query 4: Filter by topic - Hierarchical Reasoning Models
result = rag_tool._run("How do small neural networks solve complex reasoning tasks?", topic="Hierarchical Reasoning Models")
print(result)

Answer: Small neural networks solve complex reasoning tasks through approaches like recursive hierarchical reasoning and deep supervision. These methods allow the networks to process information at different frequencies and improve their predictions iteratively. For instance, the Hierarchical Reasoning Model (HRM) uses two small networks that operate at high and low frequencies to generate different latent features, which are then used to refine answers. Additionally, deep supervision enhances performance by providing multiple feedback steps during training, allowing the model to learn from its mistakes and improve accuracy. The Tiny Recursive Model (TRM) further simplifies this process by using a single small network with fewer layers, achieving better generalization while minimizing overfitting. This combination of recursion and efficient architecture enables small networks to tackle complex reasoning tasks effectively.

Sources:
[1] 'Less is More, Recursive Reasoning with Tiny Netwo

In [9]:
# Query 5: Question about RAG and sentiment analysis
result = rag_tool._run("How can RAG be combined with fine-tuning for sentiment analysis?")
print(result)

  response = self._qa_chain({"question": query, "chat_history": []})


  response = self._qa_chain({"question": query, "chat_history": []})


Answer: RAG (Retrieval-Augmented Generation) can be combined with fine-tuning for sentiment analysis by integrating the retrieval of relevant documents with the generation of sentiment classifications. The process typically involves the following steps:

1. **Document Retrieval**: RAG first identifies relevant documents from a vector database based on a given query. This is done using a retriever component that finds the most pertinent information to provide context for the sentiment analysis task.

2. **Combining Inputs**: The retrieved documents are then concatenated with the original query to form a combined input. This enriched input provides the language model with additional context that is relevant to the sentiment being analyzed.

3. **Fine-Tuning**: The language model (such as Llama-2, Llama-3, GPT-3.5 Turbo, or GPT-4o Mini) is fine-tuned on domain-specific data, such as financial reviews. Techniques like LoRA (Low-Rank Adaptation) and quantization can be used to efficiently a

In [10]:
# Query 6: Filter by year - 2024 papers only
result = rag_tool._run("What fine-tuning techniques are discussed?", year=2024)
print(result)

Answer: The fine-tuning techniques discussed include:

1. **Dynamic Modification of Hyper-parameters**: This involves adjusting parameters such as the number of epochs, batch size, and learning rate based on dataset properties.

2. **Chain-of-Thought Reasoning and ReAct**: These methods are used to align model outputs with task objectives, allowing the models to capture nuanced sentiment cues.

3. **Low-Rank Adaptation (LoRA)**: This technique allows for parameter-efficient fine-tuning by selectively updating a subset of model weights.

4. **Quantisation**: Specifically, four-bit quantisation is used to reduce resource needs while retaining model performance.

5. **Prompt Engineering**: Tailored prompts are designed to aid the models' interpretation processes, enhancing their ability to manage regional idioms and sentiment subtleties.

6. **Few-shot Learning**: This approach is employed to help the models adapt to specific tasks with minimal examples.

7. **Cross-Entropy Loss Function*

# Building the tool

In [21]:
from typing import Literal, Dict
from langchain.tools import StructuredTool


# ============================================================================
# Pydantic Models for Research RAG Tool (Agent Interface)
# ============================================================================

class SourceReference(BaseModel):
    """A reference to a source document."""
    paper_title: Optional[str] = Field(None, description="Title of the research paper")
    year: Optional[int] = Field(None, description="Publication year")
    topic: Optional[str] = Field(None, description="Topic area")
    subject: Optional[str] = Field(None, description="Subject area")
    page: Optional[int] = Field(None, description="Page number")


class ResearchProbeResponse(BaseModel):
    """Structured response from the research RAG tool."""
    topic: str = Field(..., description="The searched topic")
    category: str = Field(default="General", description="Category: 'Agentic AI', 'Finetuning', 'Hierarchical Reasoning Models', 'General', or 'Not Found'")
    response: str = Field(..., description="Answer in markdown format")
    sources: List[SourceReference] = Field(default_factory=list, description="Source references")
    confidence: float = Field(default=0.0, ge=0.0, le=1.0, description="Confidence score (0-1)")
    query: str = Field(..., description="Original query")
    filters_applied: Dict[str, Any] = Field(default_factory=dict, description="Applied filters")


class ResearchProbeArgs(BaseModel):
    """Input arguments for the research paper probe tool."""
    query: str = Field(..., description="The research question to answer")
    topic: Optional[str] = Field(default=None, description="Topic filter: 'Agentic AI', 'Finetuning', 'Hierarchical Reasoning Models'")
    subject: Optional[str] = Field(default=None, description="Subject filter (e.g., 'Artificial Intelligence')")
    year: Optional[int] = Field(default=None, description="Publication year filter", ge=1900, le=2100)
    k: int = Field(default=10, ge=1, le=50, description="Number of documents to retrieve")


# ============================================================================
# Core Tool Function (Direct metadata access - no parsing needed)
# ============================================================================

def _research_probe_fn(
    query: str,
    topic: Optional[str] = None,
    subject: Optional[str] = None,
    year: Optional[int] = None,
    k: int = 10
) -> Dict[str, Any]:
    """Search research papers and return structured response."""
    
    filters_applied = {key: val for key, val in [("topic", topic), ("subject", subject), ("year", year)] if val}
    category = topic if topic else "General"
    
    try:
        # Initialize and get docs directly from vector store
        rag_tool._initialize_components()
        metadata_filter = rag_tool._build_metadata_filter(subject, topic, year)
        
        if metadata_filter:
            docs = rag_tool._vectordb.similarity_search(query, k=k, filter=metadata_filter)
        else:
            docs = rag_tool._vectordb.similarity_search(query, k=k)
        
        # Generate answer
        answer = rag_tool._generate_answer_from_docs(query, docs)
        
        # Build sources directly from document metadata (no parsing!)
        sources = []
        for doc in docs:
            md = doc.metadata or {}
            sources.append(SourceReference(
                paper_title=md.get("paper_title"),
                year=md.get("year"),
                topic=md.get("topic"),
                subject=md.get("subject"),
                page=(md.get("page", 0) + 1) if md.get("page") is not None else None
            ))
            # Infer category from first source if not filtered
            if not topic and md.get("topic") and category == "General":
                category = md.get("topic")
        
        # Calculate confidence
        confidence = min(1.0, len(sources) * 0.1 + (0.3 if answer and "don't know" not in answer.lower() else 0.0))
        if not sources or "don't know" in answer.lower():
            category = "Not Found" if not topic else category
        
        # Format markdown response
        md_response = f"## Answer\n\n{answer or '*No answer available.*'}\n"
        if sources:
            md_response += "\n## Sources\n\n" + "\n".join(
                f"{i}. *{s.paper_title}* ({s.year}) p.{s.page} [{s.topic}]" 
                for i, s in enumerate(sources, 1) if s.paper_title
            )
        
        return ResearchProbeResponse(
            topic=query.split("?")[0][:50], category=category, response=md_response,
            sources=sources, confidence=confidence, query=query, filters_applied=filters_applied
        ).model_dump()
        
    except Exception as e:
        return ResearchProbeResponse(
            topic=query[:50], category="Not Found", response=f"## Error\n\n{str(e)}",
            sources=[], confidence=0.0, query=query, filters_applied=filters_applied
        ).model_dump()


# ============================================================================
# StructuredTool for Agent Registration
# ============================================================================

research_probe = StructuredTool.from_function(
    name="research_paper_probe",
    description="""Search AI research papers to answer questions.

Filters:
- topic: 'Agentic AI', 'Finetuning', 'Hierarchical Reasoning Models'
- year: Publication year (e.g., 2024, 2025)
- subject: Subject area (e.g., 'Artificial Intelligence')

Returns: topic, category, response (markdown), sources, confidence (0-1)""",
    func=_research_probe_fn,
    args_schema=ResearchProbeArgs,
)

print("Research RAG Tool defined successfully!")

Research RAG Tool defined successfully!


In [22]:
# Test the tool
result = research_probe.invoke({
    "query": "What are the main techniques for fine-tuning LLMs?",
    "topic": "Finetuning",
    "year": 2024,
    "k": 5
})

print(f"Category: {result['category']}")
print(f"Confidence: {result['confidence']:.2f}")
print(f"\n{result['response']}")

Category: Finetuning
Confidence: 0.80

## Answer

The main techniques for fine-tuning LLMs include:

1. **Dynamic Modification of Hyper-parameters**: This involves adjusting parameters such as the number of epochs, batch size, and learning rate based on dataset properties.

2. **Use of Domain-Specific Data**: Fine-tuning is performed using datasets that reflect the specific language and cultural peculiarities of the target domain, such as customer reviews in the financial industry.

3. **Targeted Prompts**: Prompts designed for Few-shot Learning, Chain-of-Thought reasoning, and ReAct are used to align model outputs with task objectives.

4. **Low-Rank Adaptation (LoRA)**: This technique is employed to improve model performance in sentiment analysis tasks.

5. **Quantisation**: This method is used to enhance the efficiency of the models during fine-tuning.

6. **Cross-Entropy Loss Function**: This function is utilized to reduce classification mistakes during the training process.

These