In [4]:
pip install -U langchain-community

Collecting langchain-community
  Downloading langchain_community-0.4.1-py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-classic<2.0.0,>=1.0.0 (from langchain-community)
  Downloading langchain_classic-1.0.1-py3-none-any.whl.metadata (4.2 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain-community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading marshmallow-3.26.2-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7.0,>=0.6.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting langchain-core<2.0.0,>=1.0.1 (from langchain-community)
  Downloading langchain_core-1.2.7-py3-none-any.whl.metadata (3.7 kB)
Collecting langch

In [14]:
!pip install pypdf



In [21]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.2


In [24]:
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS

docs = []
docs += PyPDFLoader("/content/Workshop 5 FAQ_V1.pdf").load()

splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50
)
chunks = splitter.split_documents(docs)

embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    encode_kwargs={"normalize_embeddings": True}
)

db = FAISS.from_documents(chunks, embeddings)
db.save_local("vectorstore")

print("✅ Knowledge base indexed")




✅ Knowledge base indexed


In [25]:
%%writefile rag.py
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings

VECTOR_DIR = "/content/vectorstore"

embeddings = HuggingFaceEmbeddings(
    model_name="BAAI/bge-base-en-v1.5",
    encode_kwargs={"normalize_embeddings": True}
)

db = FAISS.load_local(
    VECTOR_DIR,
    embeddings,
    allow_dangerous_deserialization=True
)

def retrieve(query: str, k: int = 3):
    docs = db.similarity_search(query, k=k)
    return [
        {
            "content": d.page_content,
            "source": d.metadata.get("source", "unknown")
        }
        for d in docs
    ]


Writing rag.py


In [26]:
%%writefile tools.py
from rag import retrieve

# MCP-style tool (tool + contract)
def search_kb(query: str):
    return retrieve(query)


Writing tools.py


In [33]:
%%writefile graph.py
from typing import TypedDict, List
from langgraph.graph import StateGraph, END
from transformers import pipeline
from langchain_community.llms import HuggingFacePipeline
from tools import search_kb

# --------------------
# STATE SCHEMA
# --------------------
class AgentState(TypedDict):
    question: str
    docs: List[dict]
    draft: str
    final: str

# --------------------
# LLM
# --------------------
llm_pipeline = pipeline(
    "text2text-generation",
    model="google/flan-t5-base",
    max_length=256
)

llm = HuggingFacePipeline(pipeline=llm_pipeline)

# --------------------
# GRAPH NODES
# --------------------
def retrieve_node(state: AgentState):
    return {
        "docs": search_kb(state["question"])
    }

def draft_node(state: AgentState):
    context = "\n".join(d["content"] for d in state["docs"])
    prompt = f"""
Answer using ONLY the context below.

Context:
{context}

Question:
{state["question"]}
"""
    return {"draft": llm.invoke(prompt)}

def cite_node(state: AgentState):
    sources = {d["source"] for d in state["docs"]}
    final = state["draft"] + "\n\nSources:\n" + "\n".join(sources)
    return {"final": final}

# --------------------
# BUILD GRAPH
# --------------------
graph = StateGraph(AgentState)

graph.add_node("retrieve", retrieve_node)
graph.add_node("draft", draft_node)
graph.add_node("cite", cite_node)

graph.set_entry_point("retrieve")
graph.add_edge("retrieve", "draft")
graph.add_edge("draft", "cite")
graph.add_edge("cite", END)

app = graph.compile()


Overwriting graph.py


In [44]:
from graph import app

result = app.invoke({
    "question": "How to access the workshop"
})

print("\n Final Answer:\n")
print(result["final"])



 Final Answer:

Register for the workshop using the "Join event" button.

Sources:
/content/Workshop 5 FAQ_V1.pdf
