In [1]:
# data ingestion
from langchain_community.document_loaders import TextLoader
loader = TextLoader("speech.txt")
text_documents = loader.load()
text_documents

[Document(metadata={'source': 'speech.txt'}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no indemnities for ourselves, no material compensation for the sacrifices we shall freely make. We are but one of the champions of the rights of mankind. We shall be satisfied when those rights have been made as secure as the faith and the freedom of nations can make them.\n\nJust because we fight without rancor and without selfish object, seeking nothing for ourselves but what we shall wish to share with all free peoples, we shall, I feel confident, conduct our operations as belligerents without passion and ourselves observe with proud punctilio the principles of right and of fair play we profess to be fighting for.\n\nâ€¦\n\nIt will be all the easier for us to conduct ourselves as belligerents in a high spirit of right and fairness 

In [None]:
import os 
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']= os.getenv("OPENAI_API_KEY")

python-dotenv could not parse statement starting at line 1
python-dotenv could not parse statement starting at line 4
python-dotenv could not parse statement starting at line 5


In [None]:
# web based loader 

from langchain_community.document_loaders import WebBaseLoader
import bs4

## load, chunk and index the content of html page

loader=WebBaseLoader(web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/"),
                     bs_kwargs=dict(parse_only=bs4.SoupStrainer(
                         class_=("post-title","post-content","post-header")

                     )))

text_document=loader.load()
text_document 

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [None]:
## Pdf reader
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('attention.pdf')
docs = loader.load()

docs


In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200 )
documents = text_splitter.split_documents(docs)
documents[:5]

In [None]:
# vector embedding technique
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(documents[:15], OpenAIEmbeddings())




In [None]:
# vector database 
query = "who are the authors of the attention is all you need research paper"
result = db.similarity_search(query)
result[0].page_content


In [None]:
# vector database 
query = "what is attention is all you need research paper"
result = db.similarity_search(query)
result[0].page_content

In [None]:
## FAISS vector database
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

db1 = FAISS.from_documents(documents[:30], OpenAIEmbeddings())

In [None]:
## faiss vector database 
query1 = "what is attention is all you need research paper"
result1 = db1.similarity_search(query1)
result1[0].page_content

In [None]:
from langchain_community.llms import Ollama

llm = Ollama(model = "llama2")
llm

In [None]:
## Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}""")

In [None]:
## Chain Introduction
## Create Stuff Docment Chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain=create_stuff_documents_chain(llm,prompt)

In [None]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""

retriever=db.as_retriever()
retriever

In [None]:
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [None]:
response=retrieval_chain.invoke({"input":"Scaled Dot-Product Attention"})

In [None]:
response['answer']