# Langchain for RAG Workflows


In [1]:
pip install langchain-community requests langchain langchain-core langchain-openai pypdf chromadb


Collecting langchain-community
  Downloading langchain_community-0.3.29-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-openai
  Downloading langchain_openai-0.3.33-py3-none-any.whl.metadata (2.4 kB)
Collecting pypdf
  Downloading pypdf-6.0.0-py3-none-any.whl.metadata (7.1 kB)
Collecting chromadb
  Downloading chromadb-1.0.21-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.3 kB)
Collecting requests
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7,>=0.6.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting langchain-core
  Downloading langchain_core-0.3.76-py3-none-any.whl.metadata (3.7 kB)
Collecting pybase64>=1.4.1 (from chromadb)
  Downloading pybase64-1.4.2-cp312-cp312-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl.metadata (8.7 kB)
Collecting posthog<6.0.0,>=2.4.0 (from chromadb)
  Downloading posthog-5.4.0-py3-

In [2]:
# 🔑 Setup OpenAI API key in Colab
import os
from dotenv import load_dotenv

# 1. Try to load from a .env file in Google Drive (optional)
if os.path.exists("/content/drive/MyDrive/.env"):
    load_dotenv("/content/drive/MyDrive/.env")

# 2. If still not set, prompt user to enter securely
if not os.getenv("OPENAI_API_KEY"):
    import getpass
    os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API Key: ")

# 3. Verify
api_key = os.getenv("OPENAI_API_KEY")
if not api_key:
    raise RuntimeError("❌ OPENAI_API_KEY not found. Please enter it manually above.")
else:
    print("✅ OpenAI API key loaded. Starts with:", api_key[:8], "...")


Enter your OpenAI API Key: ··········
✅ OpenAI API key loaded. Starts with: sk-proj- ...


In [3]:
## Data Ingestion
from langchain_community.document_loaders import TextLoader
loader = TextLoader('alice_in_wonderland.txt')
loader


<langchain_community.document_loaders.text.TextLoader at 0x7879ce3acc20>

In [4]:
docs = loader.load()
docs




In [5]:
import os
from dotenv import load_dotenv

load_dotenv()
os.environ['OPENAI_API_KEY']= os.getenv('OPENAI_API_KEY')


In [6]:
# web based Loader
from langchain_community.document_loaders import WebBaseLoader
import bs4

## Load,chunk and index the content of the html page
loader=WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs=dict(parse_only=bs4.SoupStrainer(
        class_=("post-title","post-content","post-header")
    ))
)

text_documents=loader.load()




In [7]:
text_documents


[Document(metadata={'source': 'https://lilianweng.github.io/posts/2023-06-23-agent/'}, page_content='\n\n      LLM Powered Autonomous Agents\n    \nDate: June 23, 2023  |  Estimated Reading Time: 31 min  |  Author: Lilian Weng\n\n\nBuilding agents with LLM (large language model) as its core controller is a cool concept. Several proof-of-concepts demos, such as AutoGPT, GPT-Engineer and BabyAGI, serve as inspiring examples. The potentiality of LLM extends beyond generating well-written copies, stories, essays and programs; it can be framed as a powerful general problem solver.\nAgent System Overview#\nIn a LLM-powered autonomous agent system, LLM functions as the agent’s brain, complemented by several key components:\n\nPlanning\n\nSubgoal and decomposition: The agent breaks down large tasks into smaller, manageable subgoals, enabling efficient handling of complex tasks.\nReflection and refinement: The agent can do self-criticism and self-reflection over past actions, learn from mistake

In [8]:
## Pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader=PyPDFLoader('/content/Alice_in_Wonderland.pdf')
docs=loader.load()


In [9]:
docs


[Document(metadata={'producer': 'BookVirtual Corp. Patents Pending.', 'creator': 'BookVirtual Digital Works', 'creationdate': '2000', 'keywords': "Carroll, Alice, Wonderland, children's, 1865; v.1.2", 'title': "Alice's Adventures in Wonderland", 'moddate': '2000-11-27T16:31:36-08:00', 'subject': "children's literature", 'author': 'Lewis Carroll; BkV0000010; Bkslr0000001; ISBN<n/a>;', 'source': '/content/Alice_in_Wonderland.pdf', 'total_pages': 105, 'page': 0, 'page_label': '1'}, page_content='BY LEWIS CARROLL ILLUSTRATED BY JOHN TENNIEL\nNAVIGATE\nA LICE ’S\nAdventures in W onderland\nCONTROL\nCLOSE THE BOOK\nTURN THE PAGE\nThe world’ s\nmost precise\nreplica\nof the world’ s \nmost famous\nchildren’ s book!\nIn 1998, Peter Zelchenko\nbegan a project for Volume-\nOne Publishing: to create an\nexact digital replica of Lewis\nCarroll’s ﬁrst edition of Alice.\nWorking with the original\n1865 edition and numerous\nother editions at the Newberry\nLibrary in Chicago, Zelchenko\ncreated a dig

In [10]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)
documents[:5]


[Document(metadata={'producer': 'BookVirtual Corp. Patents Pending.', 'creator': 'BookVirtual Digital Works', 'creationdate': '2000', 'keywords': "Carroll, Alice, Wonderland, children's, 1865; v.1.2", 'title': "Alice's Adventures in Wonderland", 'moddate': '2000-11-27T16:31:36-08:00', 'subject': "children's literature", 'author': 'Lewis Carroll; BkV0000010; Bkslr0000001; ISBN<n/a>;', 'source': '/content/Alice_in_Wonderland.pdf', 'total_pages': 105, 'page': 0, 'page_label': '1'}, page_content='BY LEWIS CARROLL ILLUSTRATED BY JOHN TENNIEL\nNAVIGATE\nA LICE ’S\nAdventures in W onderland\nCONTROL\nCLOSE THE BOOK\nTURN THE PAGE\nThe world’ s\nmost precise\nreplica\nof the world’ s \nmost famous\nchildren’ s book!\nIn 1998, Peter Zelchenko\nbegan a project for Volume-\nOne Publishing: to create an\nexact digital replica of Lewis\nCarroll’s ﬁrst edition of Alice.\nWorking with the original\n1865 edition and numerous\nother editions at the Newberry\nLibrary in Chicago, Zelchenko\ncreated a dig

In [11]:
documents


[Document(metadata={'producer': 'BookVirtual Corp. Patents Pending.', 'creator': 'BookVirtual Digital Works', 'creationdate': '2000', 'keywords': "Carroll, Alice, Wonderland, children's, 1865; v.1.2", 'title': "Alice's Adventures in Wonderland", 'moddate': '2000-11-27T16:31:36-08:00', 'subject': "children's literature", 'author': 'Lewis Carroll; BkV0000010; Bkslr0000001; ISBN<n/a>;', 'source': '/content/Alice_in_Wonderland.pdf', 'total_pages': 105, 'page': 0, 'page_label': '1'}, page_content='BY LEWIS CARROLL ILLUSTRATED BY JOHN TENNIEL\nNAVIGATE\nA LICE ’S\nAdventures in W onderland\nCONTROL\nCLOSE THE BOOK\nTURN THE PAGE\nThe world’ s\nmost precise\nreplica\nof the world’ s \nmost famous\nchildren’ s book!\nIn 1998, Peter Zelchenko\nbegan a project for Volume-\nOne Publishing: to create an\nexact digital replica of Lewis\nCarroll’s ﬁrst edition of Alice.\nWorking with the original\n1865 edition and numerous\nother editions at the Newberry\nLibrary in Chicago, Zelchenko\ncreated a dig

In [12]:
## Vector Embedding And Vector Store
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
db = Chroma.from_documents(documents,OpenAIEmbeddings())


In [13]:
query = "Who is Alice?"
retrieved_results=db.similarity_search(query)
print(retrieved_results[0].page_content)


backs was the same as the rest of the pack,
she could not tell whether they were gardeners,
or soldiers, or courtiers, or three of her own
children.
“How should I know ?” said Alice, surprised
at her own courage. “It ’s no business of mine.”
The Queen turned crimson with fury, and,
after glaring at her for a moment like a wild
beast, began screaming, “ Off with her head !
Off—”
Fit Page Full Screen On/Off Close Book
Navigate Control Internet
