## Experiments done before the modular implementation

### Imports

In [11]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from pinecone import Pinecone
from langchain_pinecone import PineconeVectorStore
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

### Load the environment

In [12]:
from dotenv import load_dotenv

load_dotenv()

True

In [13]:
# Extract the data from the PDF

def load_pdf(data):
    loader = DirectoryLoader(data, glob="*.pdf", loader_cls=PyPDFLoader)
    documents = loader.load()

    return documents

In [14]:
# Extract the data
extracted_data = load_pdf("data/")
# extracted_data

In [15]:
#Creates text chunks from extracted data
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap = 20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [16]:
# Create text chunks
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 8447


In [17]:
# Now we have to convert to chunks into vector embeddings in order to store those in Pinecone
def download_huggingface_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [18]:
# get the embeddings
embeddings = download_huggingface_embeddings()

In [19]:
# Embedding dimension is 384 based on the model specifications, see the model link.
query_result = embeddings.embed_query("Hello world")
print("Length", len(query_result))

Length 384


In [21]:
# We need to create Pinecone client to store the embeddings
import os

Pinecone(api_key=os.environ.get("PINECONE_API_KEY"),
         environment=os.environ.get("PINECONE_API_ENV"))

index_name = "med-chatbot"

# Creating embeddings for each text chunk and storing these in Pinecone
docsearch = PineconeVectorStore.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [22]:
# Test for one query, searches the knowledge base and gets the ranked 3 answers.
docsearch=PineconeVectorStore.from_existing_index(index_name, embeddings)
query = "What are Allergies"
docs=docsearch.similarity_search(query, k=3)
print("Result", docs)

Result [Document(page_content='ORGANIZATIONS\nAmerican Academy of Ophthalmology. 655 Beach Street, PO\nBox 7424, San Francisco, CA 94120-7424. <http://www.eyenet.org>.KEY TERMS\nAllergen —A substance capable of inducing an\nallergic response.\nAllergic reaction —An immune system reaction to\na substance in the environment; symptomsinclude rash, inflammation, sneezing, itchy wateryeyes, and runny nose.\nConjunctiva —The mucous membrane that covers\nthe white part of the eyes and lines the eyelids.'), Document(page_content='Although environmental medicine is gaining more\nrespect within conventional medicine, detoxificationKEY TERMS\nAllergen —A foreign substance, such as mites in\nhouse dust or animal dander, that wheninhaled,causes the airways to narrow and pro-duces symptoms of asthma.\nAntibody —A protein, also called immunoglobu-\nlin, produced by immune system cells to removeantigens (the foreign substances that trigger theimmune response).\nFibromyalgia —A condition of debilitatin

In [23]:
# Create the prompt template 
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [24]:
# Create the prompt template
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_kwargs = {"prompt": prompt}

In [25]:
# Load the Llama model using the CTransformers class. LLM is only for use for better response, for enhancing the quality for the response.

llm = CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                    model_type="llama",
                    config={'max_new_tokens': 512,
                            'temperature': 0.8})

In [26]:
llm

CTransformers(client=<ctransformers.llm.LLM object at 0x00000220C4E16DC0>, model='model/llama-2-7b-chat.ggmlv3.q4_0.bin', model_type='llama', config={'max_new_tokens': 512, 'temperature': 0.8})

In [27]:
docsearch.as_retriever()

VectorStoreRetriever(tags=['PineconeVectorStore', 'HuggingFaceEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x00000220FA26FD60>)

In [28]:
# Create the question-answering object

qa = RetrievalQA.from_chain_type(llm=llm,
                                 chain_type="stuff",
                                 retriever=docsearch.as_retriever(search_kwargs={"k": 2}),
                                 return_source_documents=True,
                                 chain_type_kwargs=chain_kwargs
)

In [29]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])

  warn_deprecated(


ProtocolError: Failed to connect; did you specify the correct index name?