In [None]:
%pip install langgraph langchain transformers sentence-transformers torch
%pip install -qU langchain-chroma
%pip install huggingface_hub[hf_xet]

In [2]:
from typing import TypedDict, List
from langgraph.graph import StateGraph, END
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from sentence_transformers import SentenceTransformer
import numpy as np

In [3]:
# Defining agent state
class AgentState(TypedDict):
    user_query: str
    retrieved_docs: List[str]
    final_answer: str
    needs_retrieval: bool

# Load Hugging Face LLM
model_id = "microsoft/phi-2"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)

hf_pipeline = pipeline("text-generation",
                       model=model,
                       tokenizer=tokenizer,
                       max_new_tokens = 300,
                       temperature = 0.2
                       )

llm = HuggingFacePipeline(pipeline=hf_pipeline)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.36s/it]


In [4]:
# Define Embedding Model
embedder = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

# Provide the existing chroma vector store path
VECTOR_DB_PATH = "../Data/vectorstore"
vector_store = Chroma(
    collection_name= "pdf_documents",  # Provide your collection name. Not sure of collection name, use the below code
    # print(vector_store._collection.count())
    persist_directory= VECTOR_DB_PATH,
    embedding_function= embedder
)




In [5]:
results = vector_store.similarity_search("What is machine learning", k=1)
print(results[0].page_content)


ARCH2021.1 Shapiro_Machine Learning ... 00e6 1 
Machine Learning: what is it and what are its components? 
-- some preliminary observations1 
 
Arnold F. Shapiro 
Penn State University, Smeal College of Business, University Park, PA 16802, USA 
Abstract 
This article focuses on conceptualizing machine learning (ML) concepts.  The general topics 
covered are supervised learning based on regression and classification, unsupervised 
learning based on clustering and dimensionality reduction, and rei


In [6]:
def retrieve_node(state: AgentState):
    """
    This node is responsible ONLY for retrieval.
    It does not generate answers.
    """

    # Extract the user query from agent state
    query = state["user_query"]

    # Ask vector store for semantically similar chunks
    results = vector_store.similarity_search(
        query=query,
        k=3
    )

    # Extract raw text from Document objects
    retrieved_texts = [doc.page_content for doc in results]

    # Return partial state update (LangGraph merges state)
    return {
        "retrieved_docs": retrieved_texts
    }

In [14]:
# Now we will create Decision node. Here it will check whether the RAG itself is enough or other tool needs to be called.
def decide_node(state: AgentState):
    prompt = f"""
    User request:
    {state['user_query']}
    Do you need to retrieve external documents to answer this?
    Answer only YES or NO.
    """

    response = llm.invoke(prompt).strip().upper()

    needs_retrieval = "YES" in response

    return {
        "needs_retrieval": needs_retrieval
    }


In [20]:
def answer_node(state: AgentState):
    context = "\n\n".join(state.get("retrieved_docs", []))

    prompt = f"""
Answer the question strictly using the context below.

Context:
{context}

Question:
{state['user_query']}

Rules:
- If the answer is not present in the context, reply exactly:
  "I do not have enough information to answer this."
- Do NOT repeat the question.
- Do NOT repeat the context.
- Give a concise answer.
"""

    response = llm.invoke(prompt)

    # HuggingFace models return full text; extract only the answer
    answer = response.split("Rules:")[-1].strip()

    return {
        "final_answer": answer
    }


In [21]:
# Build the LangGraph
graph=StateGraph(AgentState)
graph.add_node("decide", decide_node)
graph.add_node("retrieve", retrieve_node)
graph.add_node("answer", answer_node)

<langgraph.graph.state.StateGraph at 0x29d7c58c710>

In [22]:
def route_after_decision(state: AgentState):
    if state["needs_retrieval"]:
        return "retrieve"
    else:
        return "answer"

In [23]:
# Now we will define control flow
graph.set_entry_point("decide")
graph.add_conditional_edges(
    "decide",
    route_after_decision,
    {
        "retrieve": "retrieve",
        "answer": "answer"
    }
)
graph.add_edge("retrieve", "answer")
graph.add_edge("answer", END)

<langgraph.graph.state.StateGraph at 0x29d7c58c710>

In [24]:
# Compile and Run
agent = graph.compile()
result = agent.invoke(
    {
        "user_query": "What is machine Learning?"
    }
)
print(result["final_answer"])

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


- If the answer is not present in the context, reply exactly:
  "I do not have enough information to answer this."
- Do NOT repeat the question.
- Do NOT repeat the context.
- Give a concise answer.
- Do NOT use any external resources.

Answer:
Machine Learning is a field that combines concepts and results from various disciplines such as statistics, artificial intelligence, philosophy, information theory, biology, cognitive science, computational complexity, and control theory. It aims to develop algorithms and models that enable computers to learn from data and make predictions or decisions without being explicitly programmed.

Explanation:
Machine Learning is a multidisciplinary field that draws on concepts and results from various disciplines. It involves developing algorithms and models that allow computers to learn from data and make predictions or decisions without being explicitly programmed. This field combines ideas from statistics, artificial intelligence, philosophy, inform