In [None]:
import os
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv


In [25]:
# Define paths
file_path = "C:\\Users\\ahmad\\OneDrive\\Desktop\\Langchian AI Agents\\4_Rag\\Documents\\lord_of_the_rings.txt"  # Text file location
persistent_directory = "db/chroma_db"             # Folder to store embeddings
# Make sure the vector store folder exists
os.makedirs(persistent_directory, exist_ok=True)

In [26]:
# Load the text file
loader = TextLoader(file_path)
documents = loader.load()

**Chunks Overlap**

The overlap between the two sets of covers is the set of all covers that are common to both
sets. This is the set of all covers that are in both sets of covers.
If we set overlap 0 it means that there are no common covers between the two sets of covers.
if we set it to 50 it means that 50% of the covers in the first set are common to the second set of covers.

In [27]:
# Split the document into chunks
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
chunks = splitter.split_documents(documents)

print(f"Loaded and split into {len(chunks)} chunks")

Created a chunk of size 1623, which is longer than the specified 1000
Created a chunk of size 1315, which is longer than the specified 1000
Created a chunk of size 1058, which is longer than the specified 1000
Created a chunk of size 1345, which is longer than the specified 1000
Created a chunk of size 1329, which is longer than the specified 1000
Created a chunk of size 1997, which is longer than the specified 1000
Created a chunk of size 1418, which is longer than the specified 1000
Created a chunk of size 1107, which is longer than the specified 1000
Created a chunk of size 1200, which is longer than the specified 1000
Created a chunk of size 1233, which is longer than the specified 1000
Created a chunk of size 1195, which is longer than the specified 1000
Created a chunk of size 1055, which is longer than the specified 1000
Created a chunk of size 1505, which is longer than the specified 1000
Created a chunk of size 1355, which is longer than the specified 1000
Created a chunk of s

Loaded and split into 43 chunks


In [28]:
#  Create embeddings using Google model
load_dotenv()
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [29]:
# Save to Chroma vector store
db = Chroma.from_documents(chunks, embeddings, persist_directory=persistent_directory)

print("Embeddings created and saved in Chroma DB!")

Embeddings created and saved in Chroma DB!


In [30]:
# Load from existing Chroma DB
db = Chroma(
    persist_directory=persistent_directory,
    embedding_function=embeddings
)

# Search with a sample question
results = db.similarity_search("Who is Frodo?", k=2)
for doc in results:
    print(doc.page_content)

On a quiet evening in the Shire, Gandalf came to Frodo in Hobbiton and revealed the terrible truth about the One Ring. They sat in the warmth of Bag End, and Gandalf laid out the importance of Frodoâ€™s role in the fate of Middle-earth
On a quiet evening in the Shire, Gandalf came to Frodo in Hobbiton and revealed the terrible truth about the One Ring. They sat in the warmth of Bag End, and Gandalf laid out the importance of Frodoâ€™s role in the fate of Middle-earth


# **Part 2**

In [31]:
# Use current working directory
current_dir = os.getcwd()
vector_db_folder = os.path.join(current_dir, "db", "chroma_db")

In [32]:
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
db = Chroma(persist_directory=vector_db_folder, embedding_function=embeddings)

In [None]:
# Define the user's question
query = "Where does Gandalf meet Frodo?"

retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3},
)
relevant_docs = retriever.invoke(query)

# Display the relevant results with metadata
print("Relevant Documents")
for i, doc in enumerate(relevant_docs, 1):
    print(f"Document {i}:\n{doc.page_content}\n")
    if doc.metadata:
        print(f"Source: {doc.metadata.get('source', 'Unknown')}\n")


--- Relevant Documents ---
Document 1:
Gandalf had been a friend to the Bagginses for many years, and he came to Hobbiton to visit Frodo one summer day. He found him sitting outside Bag End, the home of the Baggins family. It was here that Gandalf first spoke to Frodo about the dangers of the One Ring

Frodo was surprised to see Gandalf arriving at his doorstep in Hobbiton, for he had not expected the wizard for some time. The conversation they began was far more serious than any previous meeting, as Gandalf had urgent news regarding the Ring that Frodo had inherited.

Source: C:\Users\ahmad\OneDrive\Desktop\Langchian AI Agents\4_Rag\Documents\lord_of_the_rings.txt

Document 2:
Gandalf had been a friend to the Bagginses for many years, and he came to Hobbiton to visit Frodo one summer day. He found him sitting outside Bag End, the home of the Baggins family. It was here that Gandalf first spoke to Frodo about the dangers of the One Ring

Frodo was surprised to see Gandalf arriving at 