In [1]:
import os
from langchain.chains import RetrievalQA
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

In [3]:
import time

import os
from dotenv import load_dotenv

load_dotenv()

# Supress warnings
import warnings
warnings.filterwarnings("ignore")


In [4]:
print("Loading PDF...")
loader = PyPDFLoader("data/first aid.pdf")
documents = loader.load()


Loading PDF...


In [5]:
split_start_time = time.time()
# split the documents into chunks
print("Splitting documents...")
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)
print("Split into {} chunks".format(len(texts)))



# select which embeddings we want to use
embeddings = OpenAIEmbeddings()
print("Loading embeddings...")
# create the vectorestore to use as the index
db = Chroma.from_documents(texts, embeddings)
print("Loading took {0}s".format(time.time()-split_start_time))

print("Done loading embeddings")

Splitting documents...
Split into 47 chunks
Loading embeddings...
Loading took 9.885992765426636s
Done loading embeddings


In [6]:
print("Indexing...")
# expose this index in a retriever interface
retriever = db.as_retriever( 
    search_type="similarity", search_kwargs={"k": 2}
)
print("Done indexing")
# create a chain to answer questions
print("Creating QA chain...")
qa = RetrievalQA.from_chain_type(
    llm=OpenAI(),
    chain_type="map_reduce",
    retriever=retriever,
    return_source_documents=True,
    verbose=True,
)
print("Done creating QA chain")

Indexing...
Done indexing
Creating QA chain...
Done creating QA chain


In [7]:
answer = qa("My friend has a broken leg, what should I do?")



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [10]:
answer['result']


[Document(page_content='31 | P a g e  \n Actions  \n\uf0b7 Provide support to the injured area  \n\uf0b7 Expose the site of the injury  \n\uf0b7 Treat any wounds  \n\uf0b7 Immobilize effectively  \n\uf0b7 Reassure and monitor  \n \nSteady and support the injured part immediately, so that no movement is possible. This stops \nfurther injury and helps to stop the bleeding . This can be done by bandages or by using splints \n(support) where available.  \n \nUsing Bandages  \n \n\uf0b7 Usually it is enough to use the other ( uninjured ) limb or the body of the patient as the \nsplint  (support) . The upper limb can be supported by the body, the lower limb by the \nother limb (Provided that also is not fractured) most fractures (except forearm) can be \nimmobilized \nthus.  Do not \napply \nbandage on \nthe site of \nfracture . \n \n\uf0b7 The \nbandaging should be fairly firm so that there is no movement of the fractured ends; but \nnot too tight in which case the circulation of blood to t