In [None]:
# A Practical Guide to Building Local RAG Applications with LangChain

### Retrieval augmented generation (RAG) encompasses a family of systems that extend conventional language models, large and otherwise (LLMs), to incorporate context based on retrieved knowledge from a document base, thereby leading to more truthful and relevant responses being generated upon user queries

In [None]:
pip install langchain langchain_community faiss-cpu sentence-transformers transformers

Collecting langchain_community
  Downloading langchain_community-0.3.20-py3-none-any.whl.metadata (2.4 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-set

In [None]:
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.chains.question_answering import load_qa_chain
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain_community.vectorstores import FAISS

In [None]:
import os
import urllib.request
import zipfile

zip_url = "https://github.com/gakudo-ai/open-datasets/raw/refs/heads/main/asia_documents.zip"
zip_path = "asia_documents.zip"
extract_folder = "asia_txt_files"

print("Downloading zip file...")
urllib.request.urlretrieve(zip_url, zip_path)
print("Download complete!")

print("Extracting files...")
os.makedirs(extract_folder, exist_ok=True)
with zipfile.ZipFile(zip_path, "r") as zip_ref:
    zip_ref.extractall(extract_folder)

print(f"Files extracted to: {extract_folder}")

print("Extracted files:")
print(os.listdir(extract_folder))

Downloading zip file...
Download complete!
Extracting files...
Files extracted to: asia_txt_files
Extracted files:
['Thailand.txt', 'South_Korea.txt', 'Mongolia.txt', 'Vietnam.txt', 'Japan.txt', 'Philippines.txt', 'Taiwan.txt', 'Indonesia.txt', 'Malaysia.txt']


In [None]:
import os
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings

folder_path = "asia_txt_files"

documents = []
for filename in os.listdir(folder_path):
    if filename.endswith(".txt"):
        file_path = os.path.join(folder_path, filename)
        loader = TextLoader(file_path)
        documents.extend(loader.load())

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

vectorstore = FAISS.from_documents(docs, embedding_model)
retriever = vectorstore.as_retriever()

  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from transformers import pipeline

llm_pipeline = pipeline("text-generation", model="gpt2", device=0, max_new_tokens=200)
llm = HuggingFacePipeline(pipeline=llm_pipeline)

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Device set to use cpu
  llm = HuggingFacePipeline(pipeline=llm_pipeline)


In [None]:
prompt_template = "Answer the following question based on the provided context: {context}\n\nQuestion: {query}\nAnswer:"

prompt = PromptTemplate(input_variables=["query", "context"], template=prompt_template)

llm_chain = LLMChain(llm=llm, prompt=prompt)

retrieval_qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    verbose=True
)

  llm_chain = LLMChain(llm=llm, prompt=prompt)


In [None]:
def truncate_to_max_tokens(text, max_tokens=500):
    tokens = text.split()
    if len(tokens) > max_tokens:
        return " ".join(tokens[:max_tokens])
    return text

In [13]:
query = "What are the best Asian cuisine dishes?"

# IMPORTANT: using only the top-1 document by default
retrieved_docs = retriever.get_relevant_documents(query)[:1]
context = " ".join([doc.page_content for doc in retrieved_docs])
context = truncate_to_max_tokens(context, max_tokens=500)
response = retrieval_qa.run(query)
print("Answer:", response)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m
Answer: Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

Vietnam is a Southeast Asian country known for its rich history, diverse landscapes, and delicious cuisine. Hanoi and Ho Chi Minh City are its major urban centers, each with a unique character. Ha Long Bay’s limestone karsts and the Mekong Delta’s floating markets are famous geographical highlights. Vietnamese culture is deeply influenced by Confucian values, French colonial heritage, and indigenous traditions.

Thailand is a Southeast Asian country famous for its tropical beaches, ornate temples, and bustling street food culture. Bangkok, the capital, is known for its vibrant nightlife and historical sites like the Grand Palace and Wat Arun. Northern Thailand features mountainous landscapes and cultural cities like Chiang Mai, while the south o