In [83]:
# ! pip install -r requirements.txt
# ! pip install python-dotenv pypdf
!pip install ctransformers

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [86]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone as LangchainPinecone
import pinecone
from pinecone import Pinecone, ServerlessSpec
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import CTransformers 
import os
from dotenv import load_dotenv

In [19]:
load_dotenv()

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")

In [45]:
# llm_model1 = ""

embedding_model1 = "sentence-transformers/all-MiniLM-L6-v2"

In [27]:
# Extract text from PDFs
def load_pdf (data):
    loader = DirectoryLoader(
        data,
        glob="*.pdf",
        loader_cls=PyPDFLoader
    )
    documents = loader.load()

    return documents

In [29]:
extracted_data = load_pdf("data")

In [33]:
len(extracted_data)

4005

In [35]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    text_chunks = text_splitter.split_documents(extracted_data)
    
    return text_chunks

In [38]:
text_chunks = text_split(extracted_data)
print("nomber of chunks:",len(text_chunks))

nomber of chunks: 21526


In [49]:
def download_hugging_face_model(embedding_model):
    embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
    return embeddings

In [50]:
embeddings = download_hugging_face_model(embedding_model1)

In [51]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [52]:
query_result = embeddings.embed_query("What is the capital of France?")
print("length:",len(query_result))

length: 384


In [69]:
# query_result

In [21]:
pc = Pinecone(
    api_key=PINECONE_API_KEY
)

In [58]:
index_name = "medchatbot"

In [66]:
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=len(query_result),
        metric="cosine",
        spec=ServerlessSpec(
            cloud='aws', 
            region='us-east-1'
        ) 
    ) 

In [82]:
index = pc.Index(index_name)

In [68]:
docsearch = LangchainPinecone.from_texts(
    texts=[t.page_content for t in text_chunks], 
    embedding=embeddings, 
    index_name=index_name
)

In [74]:
docsearch = LangchainPinecone.from_existing_index(index_name=index_name, embedding=embeddings)

query = "What are Allergies?"

docs = docsearch.similarity_search(query, k=3)

print("Results:",docs)

Results: [Document(page_content='Purpose\nAllergy is a reaction of the immune system. Nor-\nmally, the immune system responds to foreign microor-ganisms and particles, like pollen or dust, by producingspecific proteins called antibodies that are capable ofbinding to identifying molecules, or antigens, on theforeign organisms. This reaction between antibody andantigen sets off a series of reactions designed to protectthe body from infection. Sometimes, this same series ofreactions is triggered by harmless, everyday substances.This is the condition known as allergy, and the offend-ing substance is called an allergen. Common inhaledallergens include pollen, dust, and insect parts from tinyhouse mites. Common food allergens include nuts, fish,and milk.\nAllergic reactions involve a special set of cells in'), Document(page_content='Description\nAllergies are among the most common of medical\ndisorders. It is estimated that 60 million Americans, ormore than one in every five people, suffer f

In [78]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [79]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])
chain_type_kwargs={"prompt": PROMPT}

In [87]:
llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.8})

In [88]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [89]:
while True:
    user_input=input(f"Input Prompt:")
    result=qa({"query": user_input})
    print("Response : ", result["result"])

  warn_deprecated(


Response :  Goldids need more questions related to write your request help@user: I have you don'Acid= 125
You’”
What are there is a bookmark
The information on the user doesn'', ' Acneededits just copyright- A Reference (843.
I don'
I don' Acneverification of the information@user: I have no books and Resp>
If you can provide exact matches the
In citations in writing a)
What is there are there are you's
If you want to whom may not helpful hints for me help
Response :  Allergy is a type of immune imune reactions involving a type of immunease an immune response of immunease a type of immune immune immune immune immune immune immune immune immune immune of immune of immune response of immunease of immuneimmune reaction of immune of immune of immune immune immune immune immune of immune of immunease an immunease a type of immune immune immune of immune of immunee type of immunease a type of immuneimmune immune immune is a type of immunease of immune response of immune reaction of immune of 