#### Implementation of Maximal Marginal Relevance

In [1]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chat_models import init_chat_model
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain

In [3]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ['GROQ_API_KEY']=os.getenv('GROQ_API_KEY')

In [5]:
loader = TextLoader('langchain_data.txt')
raw_documents = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=300,chunk_overlap=50)
chunks = splitter.split_documents(raw_documents)

chunks



[Document(metadata={'source': 'langchain_data.txt'}, page_content='LangChain is an open-source framework for building applications with large language models (LLMs), providing modular tools for tasks like integrating with external data sources, creating retrieval-augmented generation (RAG) pipelines, managing conversation memory, and connecting to various LLM'),
 Document(metadata={'source': 'langchain_data.txt'}, page_content="memory, and connecting to various LLM providers. Retrieval-Augmented Generation (RAG) is a technique that enhances an LLM's knowledge by retrieving relevant information from an external knowledge base, such as documents or a vector database, and then providing that information as context to the LLM"),
 Document(metadata={'source': 'langchain_data.txt'}, page_content='providing that information as context to the LLM to generate a more accurate and contextually relevant response. In a typical LangChain RAG pipeline, documents are loaded, split into chunks, convert

In [6]:
embedding_model = HuggingFaceEmbeddings(model='all-miniLM-L6-v2')
vector_store = FAISS.from_documents(chunks,embedding_model)

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
retriever = vector_store.as_retriever(
    search_type='mmr',
    search_kwargs={'k':3}
)

In [8]:
retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7faee73f7cb0>, search_type='mmr', search_kwargs={'k': 3})

In [9]:
from langchain.prompts import PromptTemplate

prompt = PromptTemplate.from_template("""
Answer the question based on the context provided.
                                      
                                      Context:
                                      {context}
                                      
                                      Question:{input}
                                      """)

llm = init_chat_model('groq:gemma2-9b-it')

In [10]:
document_chain = create_stuff_documents_chain(llm,prompt)
rag_chain = create_retrieval_chain(
    retriever=retriever,
    combine_docs_chain=document_chain
)

In [11]:
rag_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7faee73f7cb0>, search_type='mmr', search_kwargs={'k': 3}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\nAnswer the question based on the context provided.\n\n                                      Context:\n                                      {context}\n\n                                      Question:{input}\n                                 

In [13]:
query = {'input':'LLM retrieve context from?'}
response = rag_chain.invoke(query)

print(f'Answer:\n,{response['answer']}')

Answer:
,According to the text, an LLM retrieves context from a **vector database**. 

Here's the relevant excerpt:

"when a user query is received, the system structures this retrieved context along with the original query into a prompt, which is then passed to an LLM for final answer generation. This ... vector database..." 


Let me know if you have any other questions! 



In [14]:
query = {"input":"What is the term 'Vector'"}
response = rag_chain.invoke(query)

print(f'Answer:\n,{response['answer']}')

Answer:
,According to the text, a vector is a **numerical representation of text, images, or other data created by an embedding model**.  



In [15]:
response

{'input': "What is the term 'Vector'",
 'context': [Document(id='910525fb-f80c-4f90-a4fa-93fd6f052903', metadata={'source': 'langchain_data.txt'}, page_content='a complete system.A vector store is a specialized database optimized for storing and searching high-dimensional vectors, which are numerical representations of text, images, or other data created by an embedding model. This differs significantly from traditional databases that rely on keyword or'),
  Document(id='724ab339-eae4-464e-b8cf-7aaddce2d432', metadata={'source': 'langchain_data.txt'}, page_content='"neighboring" document chunks, providing the factual context needed to generate an accurate and grounded response.LangChain acts as the orchestration layer that brings all the necessary components of a RAG pipeline together, providing a standard interface for working with a wide array of vector'),
  Document(id='fec0e40f-008f-41d7-96aa-e1e78e615285', metadata={'source': 'langchain_data.txt'}, page_content='their core applica