In [None]:
import os

In [2]:
def get_file_contents(filename):
    """ Given a filename,
        return the contents of that file
    """
    try:
        with open(filename, 'r') as f:
            # It's assumed our file contains a single line,
            # with our API key
            return f.read().strip()
    except FileNotFoundError:
        print("'%s' file not found" % filename)

In [None]:
filename = "..\\GoogleAPIKey.txt"
os.environ['GOOGLE_API_KEY'] = get_file_contents(filename)
filename_grokKey = "..\\GroqAPIKey.txt"
os.environ['GROQ_API_KEY'] = get_file_contents(filename_grokKey)

### Retrievers

#### vector store base retriever

In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [None]:
embeddings_model = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [21]:
#Load the documents and split it into chunks, embed each chunk  and load it into vector store
raw_documents = TextLoader("..\\RAGFiles\\LangchainRetrieval.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=300,chunk_overlap=20)
documents = text_splitter.split_documents(raw_documents)
db =  Chroma.from_documents(documents,embeddings_model)

Created a chunk of size 376, which is longer than the specified 300
Created a chunk of size 412, which is longer than the specified 300
Created a chunk of size 497, which is longer than the specified 300
Created a chunk of size 426, which is longer than the specified 300
Created a chunk of size 389, which is longer than the specified 300
Created a chunk of size 760, which is longer than the specified 300


In [22]:
retreivers = db.as_retriever()

In [23]:
query="What is text embedding and how does langchain help in doing it"
docs = retreivers.invoke(query)

In [24]:
docs

[Document(id='97faa254-46a5-40c8-94b3-384f4a6a7843', metadata={'source': '..\\RAGFiles\\LangchainRetrieval.txt'}, page_content='Text embedding models\nAnother key part of retrieval is creating embeddings for documents. Embeddings capture the semantic meaning of the text, allowing you to quickly and efficiently find other pieces of a text that are similar. LangChain provides integrations with over 25 different embedding providers and methods, from open-source to proprietary API, allowing you to choose the one best suited for your needs. LangChain provides a standard interface, allowing you to easily swap between models.'),
 Document(id='5e6c177d-5f80-42f3-8884-ea19882391be', metadata={'source': '..\\RAGFiles\\LangchainRetrieval.txt'}, page_content='Text embedding models\nAnother key part of retrieval is creating embeddings for documents. Embeddings capture the semantic meaning of the text, allowing you to quickly and efficiently find other pieces of a text that are similar. LangChain pr

In [26]:
template = """
    Answer the question based only on the following context:
    {context}
    Question:{question}"""

prompt = ChatPromptTemplate.from_template(template)
model= ChatGoogleGenerativeAI(model='gemini-1.5-pro-001')

In [27]:
def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])

In [28]:
chain = (
    {"context":retreivers|format_docs, "question":RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [30]:
print(chain.invoke("What is text embedding and how does langchain help in doing it"))

Text embedding is the process of capturing the semantic meaning of text and representing it as a vector (a list of numbers). This allows for quick and efficient searching for similar pieces of text. 

LangChain assists in this process by providing:

* **Integrations:**  It connects with over 25 different embedding providers, both open-source and proprietary, giving users flexibility and choice.
* **Standard Interface:**  This makes it easy to switch between different embedding models without needing to rewrite large portions of code. 


#### Number of relevant similar documents

In [31]:
retreivers = db.as_retriever(search_kwargs={"k":1})

In [32]:
query="What is text embedding and how does langchain help in doing it"
docs = retreivers.invoke(query)

In [33]:
docs

[Document(id='5e6c177d-5f80-42f3-8884-ea19882391be', metadata={'source': '..\\RAGFiles\\LangchainRetrieval.txt'}, page_content='Text embedding models\nAnother key part of retrieval is creating embeddings for documents. Embeddings capture the semantic meaning of the text, allowing you to quickly and efficiently find other pieces of a text that are similar. LangChain provides integrations with over 25 different embedding providers and methods, from open-source to proprietary API, allowing you to choose the one best suited for your needs. LangChain provides a standard interface, allowing you to easily swap between models.')]

#### Score Threshold

In [45]:
retreivers = db.as_retriever(search_type="similarity_score_threshold",search_kwargs={"score_threshold":0.70}) 
#0.50 4 documents retreived
#0.80 No documents retreived
#0.75 No documents retreived
#0.70 3 documents retreived

In [46]:
query="What is text embedding and how does langchain help in doing it"
docs = retreivers.invoke(query)

In [47]:
docs

[Document(id='97faa254-46a5-40c8-94b3-384f4a6a7843', metadata={'source': '..\\RAGFiles\\LangchainRetrieval.txt'}, page_content='Text embedding models\nAnother key part of retrieval is creating embeddings for documents. Embeddings capture the semantic meaning of the text, allowing you to quickly and efficiently find other pieces of a text that are similar. LangChain provides integrations with over 25 different embedding providers and methods, from open-source to proprietary API, allowing you to choose the one best suited for your needs. LangChain provides a standard interface, allowing you to easily swap between models.'),
 Document(id='5e6c177d-5f80-42f3-8884-ea19882391be', metadata={'source': '..\\RAGFiles\\LangchainRetrieval.txt'}, page_content='Text embedding models\nAnother key part of retrieval is creating embeddings for documents. Embeddings capture the semantic meaning of the text, allowing you to quickly and efficiently find other pieces of a text that are similar. LangChain pr