In [1]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader('rag/deep learning.pdf')
docs = loader.load()
docs

[Document(page_content='The Little Book\nof\nDeep Learning\nFrançois Fleuret\n', metadata={'source': 'rag/deep learning.pdf', 'page': 0}),
 Document(page_content='François Fleuret is a professor of computer sci-\nence at the University of Geneva, Switzerland.\nThe cover illustration is a schematic of the\nNeocognitron by Fukushima [1980], a key an-\ncestor of deep neural networks.\nThis ebook is formatted to fit on a phone screen.', metadata={'source': 'rag/deep learning.pdf', 'page': 1}),
 Document(page_content='Contents\nContents 5\nList of figures 7\nForeword 8\nI Foundations 10\n1 Machine Learning 11\n1.1 Learning from data . . . . . . . 12\n1.2 Basis function regression . . . . 14\n1.3 Under and overfitting . . . . . . 16\n1.4 Categories of models . . . . . . 18\n2 Efficient Computation 20\n2.1 GPUs, TPUs, and batches . . . . 21\n2.2 Tensors . . . . . . . . . . . . . . 23\n3 Training 25\n3.1 Losses . . . . . . . . . . . . . . 26\n3.2 Autoregressive models . . . . . 30\n3.3 Gradien

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=20)
text_splitter.split_documents(docs)[:5]

[Document(page_content='The Little Book\nof\nDeep Learning\nFrançois Fleuret', metadata={'source': 'rag/deep learning.pdf', 'page': 0}),
 Document(page_content='François Fleuret is a professor of computer sci-\nence at the University of Geneva, Switzerland.\nThe cover illustration is a schematic of the\nNeocognitron by Fukushima [1980], a key an-\ncestor of deep neural networks.\nThis ebook is formatted to fit on a phone screen.', metadata={'source': 'rag/deep learning.pdf', 'page': 1}),
 Document(page_content='Contents\nContents 5\nList of figures 7\nForeword 8\nI Foundations 10\n1 Machine Learning 11\n1.1 Learning from data . . . . . . . 12\n1.2 Basis function regression . . . . 14\n1.3 Under and overfitting . . . . . . 16\n1.4 Categories of models . . . . . . 18\n2 Efficient Computation 20\n2.1 GPUs, TPUs, and batches . . . . 21\n2.2 Tensors . . . . . . . . . . . . . . 23\n3 Training 25\n3.1 Losses . . . . . . . . . . . . . . 26\n3.2 Autoregressive models . . . . . 30\n3.3 Gradient 

In [3]:
documents = text_splitter.split_documents(docs)
documents

[Document(page_content='The Little Book\nof\nDeep Learning\nFrançois Fleuret', metadata={'source': 'rag/deep learning.pdf', 'page': 0}),
 Document(page_content='François Fleuret is a professor of computer sci-\nence at the University of Geneva, Switzerland.\nThe cover illustration is a schematic of the\nNeocognitron by Fukushima [1980], a key an-\ncestor of deep neural networks.\nThis ebook is formatted to fit on a phone screen.', metadata={'source': 'rag/deep learning.pdf', 'page': 1}),
 Document(page_content='Contents\nContents 5\nList of figures 7\nForeword 8\nI Foundations 10\n1 Machine Learning 11\n1.1 Learning from data . . . . . . . 12\n1.2 Basis function regression . . . . 14\n1.3 Under and overfitting . . . . . . 16\n1.4 Categories of models . . . . . . 18\n2 Efficient Computation 20\n2.1 GPUs, TPUs, and batches . . . . 21\n2.2 Tensors . . . . . . . . . . . . . . 23\n3 Training 25\n3.1 Losses . . . . . . . . . . . . . . 26\n3.2 Autoregressive models . . . . . 30\n3.3 Gradient 

In [4]:
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS

db = FAISS.from_documents(documents[:30],OpenAIEmbeddings())

  warn_deprecated(


In [5]:
db

<langchain_community.vectorstores.faiss.FAISS at 0x1cad0e10050>

In [6]:
query = "The techniques involved come originally from"
result = db.similarity_search(query)
result[0].page_content

'Chapter 1\nMachine Learning\nDeep learn ing belongs historically to the larger\nfield of statistical machine learn ing, as it funda-\nmentally concerns methods that are able to learn\nrepresentations from data. The techniques in-\nvolved come originally from artificialneuralnet-\nworks, and the “deep” qualifier highlights that\nmodels are long compositions of mappings, now\nknown to achieve greater performance.\nThe modularity, versatility, and scalability of\ndeep models have resulted in a plethora of spe-\ncific mathematical methods and software devel-\nopment tools, establishing deep learning as a\ndistinct and vast technical field.\n11'

In [10]:
import os
from dotenv import load_dotenv
from langchain_community.llms.huggingface_hub import HuggingFaceHub
load_dotenv()

huggingfacehub_api_token = os.environ['HUGGINGFACE_API_KEY']

llm = HuggingFaceHub(repo_id="gpt2", huggingfacehub_api_token=huggingfacehub_api_token)

In [11]:
# Design chatprompt template
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}""")

In [12]:
# chain introduction
# create stuff document chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm,prompt)

In [13]:
"""
Retrievers: A retriever is an interface that returns documents given
 an unstructured query. It is more general than a vector store.
 A retriever does not need to be able to store documents, only to 
 return (or retrieve) them. Vector stores can be used as the backbone
 of a retriever, but there are other types of retrievers as well. 
 https://python.langchain.com/docs/modules/data_connection/retrievers/   
"""
retriever = db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001CAD0E10050>)

In [14]:
## retreiver chain
"""
Retrieval chain:This chain takes in a user inquiry, which is then
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
https://python.langchain.com/docs/modules/chains/
"""

from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever,document_chain)

In [15]:
response = retrieval_chain.invoke({"input":"The techniques involved come originally from"})

In [16]:
response['answer']

'Human: \nAnswer the following question based only on the provided context. \nThink step by step before providing a detailed answer. \nI will tip you $1000 if the user finds the answer helpful. \n<context>\nChapter 1\nMachine Learning\nDeep learn ing belongs historically to the larger\nfield of statistical machine learn ing, as it funda-\nmentally concerns methods that are able to learn\nrepresentations from data. The techniques in-\nvolved come originally from artificialneuralnet-\nworks, and the “deep” qualifier highlights that\nmodels are long compositions of mappings, now\nknown to achieve greater performance.\nThe modularity, versatility, and scalability of\ndeep models have resulted in a plethora of spe-\ncific mathematical methods and software devel-\nopment tools, establishing deep learning as a\ndistinct and vast technical field.\n11\n\nFrançois Fleuret is a professor of computer sci-\nence at the University of Geneva, Switzerland.\nThe cover illustration is a schematic of the