In [2]:
from langchain import PromptTemplate
from langchain_pinecone import PineconeVectorStore
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.vecteorstores import Pinecone 
from pinecone import Pinecone,ServerlessSpec
from langchain.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
import os
from tqdm.autonotebook import tqdm
import sys
sys.path.append('../../')

  from tqdm.autonotebook import tqdm


In [3]:
ROOT_DIR = os.path.abspath('..')

In [4]:
PINECONE_INDEX_NAME = "medical-chatbot"

In [5]:
os.environ['PINECONE_API_KEY'] = "YOUR_API_KEY"

In [6]:
def load_data(data_path):
    loader = DirectoryLoader(data_path,glob='*.pdf',loader_cls=PyPDFLoader)
    data = loader.load()
    return data

In [7]:
data_path = os.path.join(ROOT_DIR, 'data')
data = load_data(data_path)

In [8]:
def text_split(data):
    splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap= 20)
    text_chunks = splitter.split_documents(data)
    return text_chunks

In [9]:
text_chunks = text_split(data)
print(len(text_chunks))

6983


In [10]:
def download_huggingface_embedding():
    embeddings = HuggingFaceEmbeddings(model_name = 'sentence-transformers/all-MiniLM-L6-v2')
    return embeddings

In [11]:
embeddings = download_huggingface_embedding()

In [33]:
vectorstore_from_docs = PineconeVectorStore.from_documents(
        text_chunks,
        index_name=PINECONE_INDEX_NAME,
        embedding=embeddings
    )

In [12]:
docsearch = PineconeVectorStore.from_existing_index(PINECONE_INDEX_NAME,embeddings)

In [13]:
query = "What are Allergies ?"
docs = docsearch.asimilarity_search(query,k=3)
print(docs)

<coroutine object VectorStore.asimilarity_search at 0x0000022439B05840>


In [14]:
prompt_template = """
Use the given information context to give appropriate answer for the user's question.
If you don't know the answer, just say that you know the answer, but don't make up an answer.
Context: {context}
Question: {question}
Only return the appropriate answer and nothing else.
Helpful answer:
"""

In [15]:
prompt = PromptTemplate(template=prompt_template,input_variables=["context","question"])
chain_type_kwargs = {"prompt":prompt}

In [27]:
prompt

PromptTemplate(input_variables=['1+2', 'context', 'question'], template="\nUse the given information context to give appropriate answer for the user's question.\nIf you don't know the answer, just say that you know the answer, but don't make up an answer.\nContext: {context}\nQuestion: {question} {1+2}\nOnly return the appropriate answer and nothing else.\nHelpful answer:\n")

In [16]:
config = {'max_new_tokens': 512, 'temperature': 0.8}

llm = CTransformers(model='TheBloke/Llama-2-7B-Chat-GGML',model_file='llama-2-7b-chat.ggmlv3.q4_0.bin',model_type='llama' ,config=config)

Fetching 1 files: 100%|██████████| 1/1 [00:00<?, ?it/s]
Fetching 1 files: 100%|██████████| 1/1 [00:00<?, ?it/s]


In [17]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)


In [21]:
qa.invoke("What is Acne?")

KeyboardInterrupt: 