# Setup

## Install necessary libraries

In [1]:
! pip install langchain_community tiktoken langchainhub langchain langchain-google-genai
! pip install -qU langchain-huggingface sentence_transformers chromadb  langchain-qdrant qdrant_client

Collecting langchain_community
  Downloading langchain_community-0.3.22-py3-none-any.whl.metadata (2.4 kB)
Collecting tiktoken
  Downloading tiktoken-0.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Collecting langchainhub
  Downloading langchainhub-0.1.21-py3-none-any.whl.metadata (659 bytes)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.1.3-py3-none-any.whl.metadata (4.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting types-requests<3.0.0.0,>=2.31.0.2 (from langchainhub)
  Downloading types_requests-2.32.0.20250328-py3-none-any.whl.metadata (2.3 kB)
Collec

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/67.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m67.3/67.3 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m345.7/345.7 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m18.3/18.3 MB[0m [31m88.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m74.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m94.9/94.9 kB[0m [31m7.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m327.7/327.7 kB[0m [31m20.5 MB/s[0m eta [36m0:00:

## Insert API key


You will need first to get an API key from [Google AI Studio](https://aistudio.google.com/app/apikey).

In [3]:

import os

GOOGLE_API_KEY = "AIzaSyBE_q_5jLjIHUPXbJ_mIhCULthfzVZfSRA"
os.environ["GOOGLE_API_KEY"] = GOOGLE_API_KEY

#or run this
#from google.colab import userdata
#from google.colab import drive
#os.environ["GOOGLE_API_KEY"] = userdata.get("GOOGLE_API_KEY")

# Indexing


## 1. Load Data

### From your files

In [1]:
!pip install unstructured
!pip install "unstructured[pdf]"



In [2]:
import os
from google.colab import files
import shutil

# Create the folder
folder_path = os.path.join("/content/", "uploaded_files")
os.makedirs(folder_path, exist_ok=True)  # Create if it doesn't exist

# Upload files
uploaded = files.upload()

# Move uploaded files to the folder
for filename, data in uploaded.items():
  source_path = os.path.join("/content/", filename)  # Path to uploaded file
  destination_path = os.path.join(folder_path, filename)
  shutil.move(source_path, destination_path)  # Move the file

Saving MYY601-6-GR.pdf to MYY601-6-GR.pdf


In [3]:
from langchain_community.document_loaders import DirectoryLoader

loader = DirectoryLoader(folder_path)
docs = loader.load()
len(docs)

1

### From URLs

In [4]:
import bs4

from langchain_community.document_loaders import WebBaseLoader

# Load Documents
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-content", "post-title", "post-header")
        )
    ),
)
docs = loader.load()



In [5]:
len(docs)

1

## 2. Use a Text Splitter to Split Documents

In [6]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=256, chunk_overlap=0)
splits = text_splitter.split_documents(docs)

In [7]:
len(splits)

241

## 3.Embed the documents and store them

In [8]:
from langchain_huggingface import HuggingFaceEmbeddings
from sentence_transformers import SentenceTransformer
#sentence-transformers/all-MiniLM-L6-v2
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    multi_process=True,
    model_kwargs={"device": "cpu"},
    encode_kwargs={"normalize_embeddings": True},  # Set `True` for cosine similarity

)
print(f"Model's maximum sequence length: {SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2').max_seq_length}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Model's maximum sequence length: 256


In [9]:
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams

client = QdrantClient(
    path="/content/vector_store_folder"
    #":memory:"
    # you can use :memory: mode for fast and light-weight experiments,
)
client.create_collection(
    collection_name="workshop_collection",
    vectors_config=VectorParams(size=384, distance=Distance.COSINE),
)

vector_store = QdrantVectorStore(
    client=client,
    collection_name="workshop_collection",
    embedding=embedding_model
)
vector_store.add_documents(documents=splits)


['a449a4af9b47429abe375744db04d0a0',
 'c31cf77e83d841ca809292bae5837514',
 'f09dd2c4ac914a3cae8704a361cd7e61',
 '1398c76217d24c118bae7f1818ca6ecb',
 '1ed2124a00684ca3b736b3e161890037',
 '3bf56f9135f040889655cbbc5fad8be8',
 '6dc7205cc1c64bf095bdca4c750416a6',
 'acfa7e5778d74ea4a32ea16c9cea8145',
 'bd33ca3adc0245948af86c2a6f25b963',
 '18e536fd621d48d196d6e179774d0200',
 '9a570f78549e498592e4b9d59c0dfa3e',
 'a8ff5e1970f44c34a44ed9ffdaefcfba',
 '3211cfe8ecb2418c99dd344f64a8e5dc',
 '133a1c9d42af45b3b44251baa5496ca3',
 'eb889fa79c2c4a01b8af804ecb8b8a64',
 'd7a753f1b5f5492daec67edf1f3eb8bd',
 '1910d90109e54d578e7cbbdc04386b5d',
 '127ba3f5a1b8442d8d140391247e55c7',
 'ede5847c9c8049279cdd27e3673e2761',
 'e71c9ae6611b47979274d7b0acd929fb',
 '766aa90c11364c4bb1109dd29fc12706',
 '5e6650470ffc4eb69e0c5ed30c746cd1',
 '5afb1892fad24d988f5d0df42c176465',
 'cd24a3fbf7d8401f892ae69fe1125f79',
 '6ecfe3d2daf64e01ae7476ef50aa06bd',
 '083462d630084cd0ba00f247aaa519ba',
 '168fc1d8fe2c42caae04a0b18f8c976b',
 

### In case qdrant doesnt work, use this:

In [None]:
vector_store = Chroma.from_documents(documents=splits,
                                    embedding=embedding_model)

## Retrieval

In [10]:
user_query= "What are the three core parts of an agent?" #"Which system became the first AI to earn an IMO medal?"

In [11]:
retrieved_docs = vector_store.similarity_search_with_score(query=user_query, k=5)

In [12]:
retrieved_docs

[(Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', '_id': 'b14d08194dd249ebb5c6ecf238dc9dbf', '_collection_name': 'workshop_collection'}, page_content='Relationships between agents and observations of one agent by another are all taken into consideration for planning and reacting.\nEnvironment information is present in a tree structure.'),
  0.5085810981960195),
 (Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', '_id': '3bf56f9135f040889655cbbc5fad8be8', '_collection_name': 'workshop_collection'}, page_content='Subgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.'),
  0.4987262826216345),
 (Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', '_id': '53f2fa21e5b543c79231091d7eee3aec', '_collection_name': 'workshop_collection'}, page_content='Fig. 13. The generative agent architecture. (Image sour

In [13]:
print("Content:",retrieved_docs[0][0].page_content)
print("Metadata",retrieved_docs[0][0].metadata)
print("Similarity score",retrieved_docs[0][1])

Content: Relationships between agents and observations of one agent by another are all taken into consideration for planning and reacting.
Environment information is present in a tree structure.
Metadata {'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/', '_id': 'b14d08194dd249ebb5c6ecf238dc9dbf', '_collection_name': 'workshop_collection'}
Similarity score 0.5085810981960195


In [14]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})
docs = retriever.get_relevant_documents(user_query)
print(docs[0].page_content)

  docs = retriever.get_relevant_documents(user_query)


Relationships between agents and observations of one agent by another are all taken into consideration for planning and reacting.
Environment information is present in a tree structure.


In [19]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import ChatPromptTemplate


# Prompt

template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.If the provided context doesn't contain the answer, answer from your knowledge but say that you do,else just say that you don't know, don't try to make up an answer.
Question: {question}
Context: {context}
"""

prompt = ChatPromptTemplate.from_template(template)

In [20]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash-lite",#"gemini-2.0-flash",#"gemini-2.5-flash-preview-04-17",#
    temperature=0,
    # max_tokens=None,
    # timeout=None,
    # max_retries=2,
)

In [22]:
from langchain.schema.runnable import RunnablePassthrough

# Post-processing
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)


# Chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Question
response=rag_chain.invoke(user_query)

ERROR:grpc._plugin_wrapping:AuthMetadataPluginCallback "<google.auth.transport.grpc.AuthMetadataPlugin object at 0x7d0298210810>" raised exception!
Traceback (most recent call last):
  File "/usr/local/lib/python3.11/dist-packages/google/auth/compute_engine/credentials.py", line 126, in refresh
    self._retrieve_info(request)
  File "/usr/local/lib/python3.11/dist-packages/google/auth/compute_engine/credentials.py", line 99, in _retrieve_info
    info = _metadata.get_service_account_info(
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/google/auth/compute_engine/_metadata.py", line 338, in get_service_account_info
    return get(request, path, params={"recursive": "true"})
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/google/auth/compute_engine/_metadata.py", line 263, in get
    raise exceptions.TransportError(
google.auth.exceptions.TransportError: ("Failed to retrieve http:/

KeyboardInterrupt: 

In [None]:
print(response)

I'm sorry, but the provided text does not explicitly state the three core parts of an agent.


## Evaluating the pipeline

In [None]:
!pip install ragas

In [None]:
from ragas import EvaluationDataset

# sample_queries = ["What are the three core parts of an agent?",
#              "Which hypothesis says a single reward can be enough for intelligence?",
#              "Which system became the first AI to earn an IMO medal?",
#              "What are the two basic multi-agent orchestration patterns?",
#              "List the three steps in the guardrail-setup heuristic."
#             ]

# #If you are not interested in the context_recall metric, you don’t need to provide the ground_truths information.
# expected_responses = ["Model, Tools, Instructions",
#                 "'Reward is Enough' hypothesis",
#                 "AlphaProof",
#                 "Manager pattern and Decentralised pattern",
#                 "1 Focus on privacy & safety 2 Add guardrails for real-world edge cases 3 Tune for both security and user experience"
#                  ]
# #for sample website
sample_queries = ["What three components sit alongside the LLM “brain” in an autonomous agent system?",
             "What are the two main kinds of memory an agent maintains?",
             "Which prompting method tells the model to 'think step by step'?",
             "Which framework fuses reasoning traces with discrete actions inside an agent?",
             "Generative Agents score memories on three factors; name them."
            ]

#If you are not interested in the context_recall metric, you don’t need to provide the ground_truths information.
expected_responses = ["Planning, Memory, Tool use",
                "Short-term memory and Long-term memory",
                "Chain of Thought (CoT)",
                "ReAct",
                "Recency, Importance, Relevance"
                 ]

dataset = []

for query, reference in zip(sample_queries, expected_responses):
    relevant_docs = retriever.invoke(query)
    response = rag_chain.invoke(query)
    dataset.append(
        {
            "user_input": query,
            "retrieved_contexts": [rdoc.page_content for rdoc in relevant_docs],
            "response": response,
            "reference": reference,
        }
    )

evaluation_dataset = EvaluationDataset.from_list(dataset)

In [None]:
from ragas import evaluate
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import LLMContextRecall, Faithfulness, FactualCorrectness

evaluator_llm = LangchainLLMWrapper(llm)

result = evaluate(
    dataset=evaluation_dataset,
    metrics=[LLMContextRecall(), Faithfulness(), FactualCorrectness()],
    llm=evaluator_llm,
)



In [None]:
print(result)


{'context_recall': 0.2667, 'faithfulness': 0.5139, 'factual_correctness(mode=f1)': 0.4480}
