In [1]:
import os
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv
from langchain_groq import ChatGroq
load_dotenv()

True

In [2]:
llm=ChatGroq(model='llama-3.1-70b-versatile')

In [3]:
os.getcwd()

'c:\\Users\\varsh\\Documents\\AI\\LLM\\RAG_youtube_langchain\\code'

In [4]:
books_dir = r'C:\Users\varsh\Documents\AI\LLM\RAG_youtube_langchain\books'
db_dir = os.path.join(r'C:\Users\varsh\Documents\AI\LLM\RAG_youtube_langchain', "db")
persistent_directory = os.path.join(db_dir, "chroma_db_with_metadata")

print(f"Books directory: {books_dir}")
print(f"Persistent directory: {persistent_directory}")

Books directory: C:\Users\varsh\Documents\AI\LLM\RAG_youtube_langchain\books
Persistent directory: C:\Users\varsh\Documents\AI\LLM\RAG_youtube_langchain\db\chroma_db_with_metadata


In [5]:
books_dir

'C:\\Users\\varsh\\Documents\\AI\\LLM\\RAG_youtube_langchain\\books'

In [6]:
if not os.path.exists(persistent_directory):
    print("Persistent directory does not exist. Initializing vector store...")

    # Ensure the books directory exists
    if not os.path.exists(books_dir):
        raise FileNotFoundError(
            f"The directory {books_dir} does not exist. Please check the path."
        )
    book_files = [f for f in os.listdir(books_dir) if f.endswith(".txt")]
    # print(book_files)
    
    
    documents=[]
    for bookfile in book_files:
        file_path=os.path.join(books_dir,bookfile)
        loader = TextLoader(file_path, encoding='utf-8')
        book_docs=loader.load()
        # print(book_docs)
        for doc in book_docs:
            # print(doc)
            # break
            doc.metadata = {"source": bookfile}
            documents.append(doc)
            print(documents)
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    docs = text_splitter.split_documents(documents)
    
    print("\n--- Document Chunks Information ---")
    print(f"Number of document chunks: {len(docs)}")
    print("\n--- Creating embeddings ---")
    model_name="sentence-transformers/all-mpnet-base-v2"
    model_kwargs={"device": "cpu"}
    embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
    print("\n--- Finished creating embeddings ---")

    # Create the vector store and persist it
    print("\n--- Creating and persisting vector store ---")
    db = Chroma.from_documents(
        docs, embeddings, persist_directory=persistent_directory)
    print("\n--- Finished creating and persisting vector store ---")

else:
    print("Vector store already exists. No need to initialize.")

Vector store already exists. No need to initialize.


In [8]:
model_name="sentence-transformers/all-mpnet-base-v2"
model_kwargs={"device": "cpu"}
embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
db = Chroma(persist_directory=persistent_directory,
            embedding_function=embeddings)
query = "How is Watson related to Sherlock Holmes?"

# Retrieve relevant documents based on the query
retriever = db.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"k": 3, "score_threshold": 0.3},
)
relevant_docs = retriever.invoke(query)

# 3. Similarity Score Threshold
# This method retrieves documents that exceed a certain similarity score threshold.
# 'score_threshold' sets the minimum similarity score a document must have to be considered relevant.
# Use this when you want to ensure that only highly relevant documents are retrieved, filtering out less relevant ones.

# Display the relevant results with metadata
print("\n--- Relevant Documents ---")
for i, doc in enumerate(relevant_docs, 1):
    print(f"Document {i}:\n{doc.page_content}\n")
    print(f"Source: {doc.metadata['source']}\n")

  embeddings = HuggingFaceEmbeddings(model_name=model_name, model_kwargs=model_kwargs)
  from tqdm.autonotebook import tqdm, trange
  db = Chroma(persist_directory=persistent_directory,



--- Relevant Documents ---
Document 1:
I had called upon my friend, Mr. Sherlock Holmes, one day in the
 autumn of last year and found him in deep conversation with a very
 stout, florid-faced, elderly gentleman with fiery red hair. With an
 apology for my intrusion, I was about to withdraw when Holmes pulled
 me abruptly into the room and closed the door behind me.

“You could not possibly have come at a better time, my dear Watson,” he
said cordially.

“I was afraid that you were engaged.”

“So I am. Very much so.”

“Then I can wait in the next room.”

“Not at all. This gentleman, Mr. Wilson, has been my partner and helper
in many of my most successful cases, and I have no doubt that he will
be of the utmost use to me in yours also.”

The stout gentleman half rose from his chair and gave a bob of
greeting, with a quick little questioning glance from his small
fat-encircled eyes.

Source: adventures_of_sherlock_holmes.txt

Document 2:
One night—it was on the twentieth of March, 1888—

In [9]:
relevant_docs

[Document(metadata={'source': 'adventures_of_sherlock_holmes.txt'}, page_content='I had called upon my friend, Mr. Sherlock Holmes, one day in the\n autumn of last year and found him in deep conversation with a very\n stout, florid-faced, elderly gentleman with fiery red hair. With an\n apology for my intrusion, I was about to withdraw when Holmes pulled\n me abruptly into the room and closed the door behind me.\n\n“You could not possibly have come at a better time, my dear Watson,” he\nsaid cordially.\n\n“I was afraid that you were engaged.”\n\n“So I am. Very much so.”\n\n“Then I can wait in the next room.”\n\n“Not at all. This gentleman, Mr. Wilson, has been my partner and helper\nin many of my most successful cases, and I have no doubt that he will\nbe of the utmost use to me in yours also.”\n\nThe stout gentleman half rose from his chair and gave a bob of\ngreeting, with a quick little questioning glance from his small\nfat-encircled eyes.'),
 Document(metadata={'source': 'adventur

In [10]:
retriever = db.as_retriever(
    search_type="similarity", #uses cosine similarity
    search_kwargs={"k": 3},
)
relevant_docs = retriever.invoke(query)

# 1. Similarity Search
# This method retrieves documents based on vector similarity.
# It finds the most similar documents to the query vector based on cosine similarity.
# Use this when you want to retrieve the top k most similar documents.

In [11]:
relevant_docs

[Document(metadata={'source': 'adventures_of_sherlock_holmes.txt'}, page_content='I had called upon my friend, Mr. Sherlock Holmes, one day in the\n autumn of last year and found him in deep conversation with a very\n stout, florid-faced, elderly gentleman with fiery red hair. With an\n apology for my intrusion, I was about to withdraw when Holmes pulled\n me abruptly into the room and closed the door behind me.\n\n“You could not possibly have come at a better time, my dear Watson,” he\nsaid cordially.\n\n“I was afraid that you were engaged.”\n\n“So I am. Very much so.”\n\n“Then I can wait in the next room.”\n\n“Not at all. This gentleman, Mr. Wilson, has been my partner and helper\nin many of my most successful cases, and I have no doubt that he will\nbe of the utmost use to me in yours also.”\n\nThe stout gentleman half rose from his chair and gave a bob of\ngreeting, with a quick little questioning glance from his small\nfat-encircled eyes.'),
 Document(metadata={'source': 'adventur

In [26]:
# 2. Max Marginal Relevance (MMR)
# This method balances between selecting documents that are relevant to the query and diverse among themselves.
# 'fetch_k' specifies the number of documents to initially fetch based on similarity.
# 'lambda_mult' controls the diversity of the results: 1 for minimum diversity, 0 for maximum.
# Use this when you want to avoid redundancy and retrieve diverse yet relevant documents.
# Note: Relevance measures how closely documents match the query.
# Note: Diversity ensures that the retrieved documents are not too similar to each other,
#       providing a broader range of information.


retriever = db.as_retriever(
    search_type="mmr", #uses cosine similarity
    search_kwargs={"k": 3, "fetch_k": 20, "lambda_mult": 0.5},
)
relevant_docs = retriever.invoke(query)

In [27]:
relevant_docs

[Document(metadata={'source': 'adventures_of_sherlock_holmes.txt'}, page_content='I had called upon my friend, Mr. Sherlock Holmes, one day in the\n autumn of last year and found him in deep conversation with a very\n stout, florid-faced, elderly gentleman with fiery red hair. With an\n apology for my intrusion, I was about to withdraw when Holmes pulled\n me abruptly into the room and closed the door behind me.\n\n“You could not possibly have come at a better time, my dear Watson,” he\nsaid cordially.\n\n“I was afraid that you were engaged.”\n\n“So I am. Very much so.”\n\n“Then I can wait in the next room.”\n\n“Not at all. This gentleman, Mr. Wilson, has been my partner and helper\nin many of my most successful cases, and I have no doubt that he will\nbe of the utmost use to me in yours also.”\n\nThe stout gentleman half rose from his chair and gave a bob of\ngreeting, with a quick little questioning glance from his small\nfat-encircled eyes.'),
 Document(metadata={'source': 'adventur

In [30]:
import os
from dotenv import load_dotenv
from langchain_community.vectorstores import Chroma
from langchain_core.messages import HumanMessage, SystemMessage

# Load the existing vector store with the embedding function
db = Chroma(persist_directory=persistent_directory,
            embedding_function=embeddings)

# Define the user's question
query = "Summarize the information in the document mentioning Sherlock Holmes?"

# Retrieve relevant documents based on the query
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 1},
)
relevant_docs = retriever.invoke(query)

# Display the relevant results with metadata
print("\n--- Relevant Documents ---")
for i, doc in enumerate(relevant_docs, 1):
    print(f"Document {i}:\n{doc.page_content}\n")

# Combine the query and the relevant document contents
combined_input = (
    "Here are some documents that might help answer the question: "
    + query
    + "\n\nRelevant Documents:\n"
    + "\n\n".join([doc.page_content for doc in relevant_docs])
    + "\n\nPlease provide an answer based only on the provided documents. If the answer is not found in the documents, respond with 'I'm not sure'."
)

# Define the messages for the model
messages = [
    SystemMessage(content="You are a helpful assistant."),
    HumanMessage(content=combined_input),
]

# Invoke the model with the combined input
result = llm.invoke(messages)

# Display the full result and content only
print("\n--- Generated Response ---")
# print("Full result:")
# print(result)
print("Content only:")
print(result.content)


--- Relevant Documents ---
Document 1:
It was a cold morning of the early spring, and we sat after breakfast
on either side of a cheery fire in the old room at Baker Street. A
thick fog rolled down between the lines of dun-coloured houses, and the
opposing windows loomed like dark, shapeless blurs through the heavy
yellow wreaths. Our gas was lit and shone on the white cloth and
glimmer of china and metal, for the table had not been cleared yet.
Sherlock Holmes had been silent all the morning, dipping continuously
into the advertisement columns of a succession of papers until at last,
having apparently given up his search, he had emerged in no very sweet
temper to lecture me upon my literary shortcomings.


--- Generated Response ---
Content only:
The document mentions Sherlock Holmes as a main character in the scene. He was silent all morning, searching through advertisement columns in various papers. Eventually, he stopped searching and lectured the narrator about their literary sho

In [31]:
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_community.vectorstores import Chroma
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder


# Load the existing vector store with the embedding function
db = Chroma(persist_directory=persistent_directory, embedding_function=embeddings)

# Create a retriever for querying the vector store
# `search_type` specifies the type of search (e.g., similarity)
# `search_kwargs` contains additional arguments for the search (e.g., number of results to return)
retriever = db.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 3},
)


# Contextualize question prompt
# This system prompt helps the AI understand that it should reformulate the question
# based on the chat history to make it a standalone question
contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, just "
    "reformulate it if needed and otherwise return it as is."
)

# Create a prompt template for contextualizing questions
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# Create a history-aware retriever
# This uses the LLM to help reformulate the question based on chat history
history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt
)

# Answer question prompt
# This system prompt helps the AI understand that it should provide concise answers
# based on the retrieved context and indicates what to do if the answer is unknown
qa_system_prompt = (
    "You are an assistant for question-answering tasks. Use "
    "the following pieces of retrieved context to answer the "
    "question. If you don't know the answer, just say that you "
    "don't know. Use three sentences maximum and keep the answer "
    "concise."
    "\n\n"
    "{context}"
)

# Create a prompt template for answering questions
qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

# Create a chain to combine documents for question answering
# `create_stuff_documents_chain` feeds all retrieved context into the LLM
question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

# Create a retrieval chain that combines the history-aware retriever and the question answering chain
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)


# Function to simulate a continual chat
def continual_chat():
    print("Start chatting with the AI! Type 'exit' to end the conversation.")
    chat_history = []  # Collect chat history here (a sequence of messages)
    while True:
        query = input("You: ")
        if query.lower() == "exit":
            break
        # Process the user's query through the retrieval chain
        result = rag_chain.invoke({"input": query, "chat_history": chat_history})
        # Display the AI's response
        print(f"AI: {result['answer']}")
        # Update the chat history
        chat_history.append(HumanMessage(content=query))
        chat_history.append(SystemMessage(content=result["answer"]))


# Main function to start the continual chat
if __name__ == "__main__":
    continual_chat()

Start chatting with the AI! Type 'exit' to end the conversation.
AI: Your question seems to be missing. What would you like to ask about the provided context?
AI: Sherlock Holmes is a brilliant and analytical detective who uses his extraordinary powers of observation and reasoning to solve crimes and unravel mysteries. He is the main character in a series of stories and has a unique ability to transform himself into different characters and personas to aid in his investigations.
AI: Sherlock Holmes is a brilliant and analytical detective who uses his extraordinary powers of observation and reasoning to solve crimes and unravel mysteries. 

As for the story, the provided context appears to be the beginning of a collection of stories called "The Adventures of Sherlock Holmes" by Arthur Conan Doyle, but the specific story being referred to is not explicitly mentioned. However, it seems the narrator is about to share a remarkable case that was partially solved by Sherlock Holmes.


In [15]:
print(f'Chroma vectorstore created with: {db._collection.count()} documents')

Chroma vectorstore created with: 2066 documents


In [17]:
# Get one vector and find how many dimensions it has
collection=db._collection
sample_embedding=collection.get(limit=1,include=['embeddings'])['embeddings'][0]
dimensions=len(sample_embedding)
print(f'the vectors have {dimensions:,} dimensions')

the vectors have 768 dimensions


In [21]:
# collection.get(limit=1,include=['embeddings'])['embeddings'][0]