In [None]:
#data ingestion 
from langchain_community.document_loaders import TextLoader
loader = TextLoader("speech.txt")
text_documents = loader.load()
text_documents

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

In [None]:
#web based laoders
from langchain_community.document_loaders import WebBaseLoader
import bs4

#load chunk and index the content of the html page
loader = WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
                       bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                           class_= ("post-title","post-content","post-header")
                       )))
text_documents =loader.load()


In [None]:
text_documents

In [None]:
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("attention.pdf")
docs = loader.load()

In [None]:
#data transformation 
from langchain.text_splitter import RecursiveCharacterTextSplitter #for converting pdf into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
documents = text_splitter.split_documents(docs)
documents[:5]

In [None]:
#Vector Embeddings and Vector Store
#embeddings => text into vector then those vectors need to be stores in vector store
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import VectorStore, Chroma
db = Chroma.from_documents(documents[:20],OpenAIEmbeddings())

In [None]:
#Chroma Vector Database
query = "who are the autors of attention is all you need research paper"
result = db.similarity_search(query)
result[0].page_content

In [None]:
#FAISS Vector Database
from langchain_community.vectorstores import FAISS
db1 = FAISS.from_documents(documents[:20],OpenAIEmbeddings())


In [None]:
query = "An attention function can be described as mapping a query"
result = db.similarity_search(query)
result[0].page_content

In [None]:
from langchain_community.llms import Ollama
llm = Ollama(model="llama2")
llm

In [None]:
#ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}""")

In [None]:
#chain - Chains refer to sequences of calls to an LLM using LCEL(CONSTRUCTOR METHOD)
#create_stuff_docmument_chain - takes documents and puts them into a prompt anf then gives it to a llm
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm,prompt)



In [None]:

"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever=db.as_retriever()
retriever

In [None]:
#creating retriever chain - this chain takes user inquiry and pases it to the retriever
# to fetch relevant documents. Those documents are then passed to LLM to generate a response.
# UserQuery -> Retriever(Vecotr Store) -> LLM(prompts using stuff_document_chain) -> response 

In [None]:
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, document_chain)


In [17]:
response = retrieval_chain.invoke({"input":"An attention function can be described as mapping a query"})

In [None]:
response['answer']