In [None]:
## Data Ingestion
from langchain_community.document_loaders import TextLoader
loader=TextLoader("speech.txt")

text_documents = loader.load()
text_documents

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

google_key = os.getenv("GEMINI_API_KEY")
print(os.getenv("GEMINI_API_KEY"))

In [None]:
# Web based loader
from langchain_community.document_loaders import WebBaseLoader
import bs4

#load chunk and index the content of the html page

loader = WebBaseLoader(web_path=("https://lilianweng.github.io/posts/2023-06-23-agent/"), 
                        bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                            class_=("post-title", "post-content", "post-header")
                            )))

text_documents = loader.load()
text_documents              

In [None]:
## PDF reader

from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("attention.pdf")
pdf = loader.load()
pdf

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
documents = text_splitter.split_documents(pdf)
documents[:5]


In [None]:
## Vector Embedding and Vector Store
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma

embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004", google_api_key=google_key)
db = Chroma.from_documents(documents[:20], embeddings)

In [None]:
## CHROMA Vector Database
query = "An attention function can be described as mapping a query"
result = db.similarity_search(query)
result[0].page_content

In [None]:
### FAISS Vector Database
from langchain_community.vectorstores import FAISS
db1 = FAISS.from_documents(documents[:30], embeddings)
result1 = db1.similarity_search(query)
result[0].page_content


In [None]:
from langchain_community.llms import Ollama
## load Ollama LAMA2 LLM model
llm=Ollama(model="llama2")
llm

In [None]:
# Design Chatprompt Template

from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}""")

In [None]:
## Chain Introduction
## Create Stuff Document Chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm,prompt)

In [None]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever=db.as_retriever()
retriever

In [None]:
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [None]:
response = retrieval_chain.invoke({"input":"What happens Due to the reduced dimension of each head"})

In [None]:
response["answer"]