<a href="https://colab.research.google.com/github/goelnikhils-lgtm/languagemodels/blob/main/DynamicFunctionCalling_AGENT_MCP_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install pypdf
!pip install langchain-core
!pip install langchain-community
!pip install tiktoken
!pip install faiss-cpu
!pip install openai
!pip install pypdf2
!pip install langgraph

#https://abvijaykumar.medium.com/hands-on-agentic-rag-2-3-agentic-reranking-rag-773b04cf4cdd

In [None]:
#agentic RAG
from typing import List
import tiktoken
ENC = tiktoken.get_encoding("cl100k_base") # Changed encoding name to a known one
def num_tokens(text:str) ->int:
  return len(ENC.encode(text))

In [None]:
from pypdf import PdfReader

class PDFLoaderAgent():
  def __init__(self,chunk_size=500,chunk_overlap=50,verbose=False):
    self.chunk_size=chunk_size
    self.chunk_overlap=chunk_overlap
    self.verbose=verbose

  def load_and_split(self,pdf_path:str) -> List[str]:
    reader = PdfReader(pdf_path)
    full_text = "\n".join([page.extract_text() or "" for page in reader.pages])
    tokens = ENC.encod(full_text)
    chunks=[]
    start = 0
    while start < len(tokens):
      end = min(start + self.chunk_size,len(tokens))
      chunk = ENC.decode(tokens[start:end])
      chunks.append(chunk)
      start+= self.chunk_size - self.chunk_overlap
    return chunks

In [None]:
#Embedding Agent
from langchain.embeddings.openai import OpenAIEmbeddings
import faiss
import openai
import numpy as np

class EmbeddingAgent():
  def __init__(self,dim=1536):
    self.dim = dim
    self.index = faiss.IndexFlatL2(self.dim) #using inverted flat file based index ....
  def embed(self,texts:List[str]) ->List[List[float]]:
    response = openai.Embedding.create(input=texts,model=EMBED_MODEL)
    return [item.embedding for item in response.data]
  def add_to_index(self,texts:List[str]):
    embeddings = self.embed(texts)
    vecs = np.array(embeddings).astype("float32")
    self.index.add(vecs)

In [None]:
#Retrieval Agent
class RetrievalAgent():
  def __init__(self,index):
    self.index = index
  def retrieve_candidates(self,query,texts,n_candidates=3,k=5):
    base_emb = EmbeddingAgent.embed([query])[0]
    candidates=[]
    for i in range(n_candidates):
      perturbed = np.array(base_emb)+np.random.normal(0,0.01,len(base_emb))
      D,I = self.index.search(np.array([perturbed]).astype("float32"),k)
      retrieved = [texts[j] for j in I[0] if j < len(texts)]
      candidates.append(retrieved)
    return candidates

In [None]:
#QA Agent
class QAAgent():
  def __init__(self,model = "CHAT_MODEL"):
    self.model = model
  def answer(self,question,context):
    context_str = "---\n".join(context)
    prompt = ("You are an expert assistant. Use the following context to answer the question.\n\n"
              f"Context:\n{context_str}\n\n"
              f"Question: {question}\n"
              "Answer:")
    resp = openai.chat.completions.create(
      model=self.model,
      messages=[{"role": "system", "content":prompt}],
      temperature=0.2,
      max_tokens=500
    )
    return resp.choices[0].message.content.strip()

  def answer_parallel(self,question,candidate_contexts):
    from concurrent.futures import ThreadPoolExecutor
    with ThreadPoolExecutor() as executor:
      return list(executor.map(lambda ctx:self.answer(question,ctx),candidate_contexts))

In [None]:
#Ranking Agent
class RankingAgent():
  def __init__(self, model = "CHAT_MODEL"):
    self.model = model
  def rank(self,question,candidate_answers,candidate_contexts):
    print("[Ranking Agent] All candidates contexts and answers")
    for idx,(ctx,ans) in enumerate(zip(candidate_contexts,candidate_answers),1):
      print(f"\n Candidate #{idx} Context: \n{ctx}\n Answer: {ans}")
      for chunk in ctx:
        print(f"\n Chunk: {chunk}")
      print(f"Candidate #{idx} Answer: {ans}")
    ranking_prompt = f"...." #detailed LLM Prompt
    summary = "...."
    full_prompt = ranking_prompt + summary
    resp = openai.chat.completions.create(
      model=self.model,
      messages=[{"role": "system", "content":full_prompt}],
      temperature=0.2,
      max_tokens=500
    )
    response_text = resp.choices[0].message.content.strip()
    print(f"[Ranking Agent] Ranking Response" + response_text)
    import re
    m = re.search(r"Candidate #(\d+)\s*\n Reason:([^\n]*)\n+Best Answer:\n(.+)",response_text,re.DOTALL)
    if m:
      candidate_idx = int(m.group(1)) - 1
      reason = m.group(2).strip()
      best_answer = m.group(3).strip()
      print(f"Selected Candidates #{candidate_idx+1}. Reason : {reason}")
    else:
      candidate_idx = 0
      answer = candidate_answers[0]
      print("Could not parse ranking output , returning first candidate")
    return answer , candidate_idx

In [None]:
#RAG Orchestrator
class RAGOrchestrator():
  def __init__(self,n_candidates =3, k=5):
    self.loader_agent = PDFLoaderAgent()
    self.embedding_agent = EmbeddingAgent()
    self.retrieval_agent = RetrievalAgent(self.embedding_agent.index)
    self.qa_agent = QAAgent()
    self.ranking_agent = RankingAgent()
    self.text_chunks = []
    self.retriver = None
    self.n_candidates = n_candidates
    self.k = k
  def ingest(self,pdf_path):
    self.text_chunks = self.loader_agent.load_and_split(pdf_path)
    self.embedding_agent.add_to_index(self.text_chunks)
    self.retriver = RetrievalAgent(self.embedding_agent.index)
  def query(self,question):
    candidates = self.retriver.retrieve_candidates(question,self.text_chunks,n_candidate=self.n_candidates,k=self.k)
    candidate_answers = self.qa_agent.answer_parallel(question,candidates)
    final_answer , chosen_candidate_idx = self.ranking_agent.rank(question,candidate_answers,candidates)
    return final_answer
