In [15]:
import os
import dotenv
import pinecone
import langchain
from pinecone import init
from pinecone import Pinecone
from langchain.llms import OpenAI
from langchain.vectorstores import Chroma, pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains.question_answering import load_qa_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredPDFLoader, OnlinePDFLoader, PyPDFLoader

dotenv.load_dotenv()

### Load your data

In [16]:
loader = PyPDFLoader("./ppl.pdf") # Add your PDF here

In [17]:
data = loader.load()

In [18]:
print (f'You have {len(data)} document(s) in your data')
print (f'There are {len(data[1].page_content)} characters in your document')

You have 20 document(s) in your data
There are 4028 characters in your document


### Chunk your data up into smaller documents

In [19]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=0)
texts = text_splitter.split_documents(data)

In [20]:
print (f'Now you have {len(texts)} documents')

Now you have 52 documents


### Create embeddings of your documents to get ready for semantic search

In [22]:
# Initialize the OpenAI API and the Pinecone API

OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
PINECONE_API_KEY = os.environ.get('PINECONE_API_KEY')
PINECONE_API_ENV = os.environ.get('PINECONE_API_ENV', 'us-west1-gcp-free') 

In [23]:
embeddings = OpenAIEmbeddings(openai_api_key)

In [None]:
pinecone.init(
    api_key=PINECONE_API_KEY,
    environment='us-west1-gcp-free'  
)
index_name = "pdf" 

In [None]:
docsearch = Pinecone.from_texts([t.page_content for t in texts], embeddings, index_name=index_name)

In [None]:
query = "what should i do EPISTAXIS?"
docs = docsearch.similarity_search(query)

In [None]:
print(docs[0].page_content[:450])

Ear and Nose Diseases 187
EPISTAXIS
Examine the patient and ascertain the site of bleeding. If
bleeding is from Little’s area, insert a cotton wool soakedwith 4 per cent lignocaine and 1 in 1000 solution of
adrenaline and squeeze the end of the nose for few
minutes. If bleeding recurs, bleeding points should besealed by application of chemical or electrical cautery.When bleeding is from nasal mucosa, e.g. hypertension,pressure can be put by passi


### Query those docs to get your answer back

In [None]:
llm = OpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
chain = load_qa_chain(llm, chain_type="stuff")

In [None]:
query = "What is meant by normative inquiry?"
docs = docsearch.similarity_search(query)

In [None]:
chain.run(input_documents=docs, question=query)

' Warm the patient with blankets and give hot soup/coffee. Remove all coverings from injured parts. Gradual rewarming with water or air. Analgesics for pain, i.e. Novalgin. Give Tetanus toxoid 1 cc. stat. Antibiotics in open wounds, Septran DS 1 bd × 5days. No dressings to be applied.'