In [1]:
print("test")

test


In [2]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone
import pinecone
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers

  from tqdm.autonotebook import tqdm


In [3]:
PINECONE_API_KEY= "9d0dc972-d588-4b73-ad05-e41f033dc14c"

In [4]:
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls= PyPDFLoader)
    
    documents = loader.load()

    return documents

In [5]:
extracted_data = load_pdf("data/")

In [6]:
#text chunks
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)

    return text_chunks

In [7]:
text_chunks = text_split(extracted_data)
print(len(text_chunks))

7020


In [8]:
#download embedding model
def download_hugging_face_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

In [9]:
embeddings = download_hugging_face_embeddings()

In [10]:
embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [11]:
e_vector = embeddings.embed_query("Hello world")
print("Length", len(e_vector))

Length 384


In [12]:
#to avoid naming conflict with another Pinecone
from langchain.vectorstores import Pinecone as plcone
import os

In [13]:
os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY

In [14]:
docsearch = plcone.from_texts([t.page_content for t in text_chunks], embeddings, index_name="project")

In [15]:
docsearch.as_retriever()

VectorStoreRetriever(tags=['Pinecone', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.pinecone.Pinecone object at 0x000002353E056DF0>)

In [18]:
aller_doc = docsearch.similarity_search("Tell me about sulphur allergy", k=3)

In [19]:
aller_doc

[Document(page_content='medicine into a fine spray that can be inhaled.\nSulfite —A type of preservative that causes allergic\nreactions in some people.edness, drowsiness, headache , sweating, fast or pound-\ning heartbeat, muscle cramps or twitches, nausea, vomit-ing, diarrhea , sleep problems and weakness also may\noccur and do not need medical attention unless they donot go away or they interfere with normal activities.\nMore serious side effects are not common, but may'),
 Document(page_content='other asthma medicines. The physician must determinethe proper amount of time between doses.\nSome bronchodilator products contain sulfites, that\ntrigger an allergic reaction in certain people. Anyonewho has a sulfite allergy should read the label carefullyor check with a physician or pharmacist before using abronchodilator. Call a physician immediately if any ofthese signs of an allergic reaction to sulfite occur:\n• bluish coloration of the skin\n• flushed or red face or skin'),
 Documen

In [20]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [21]:
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])

#prompt is passed via this parameter
chain_type_kwargs={"prompt": PROMPT}

In [22]:
llm=CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                  model_type="llama",
                  config={'max_new_tokens':512,
                          'temperature':0.5})

In [23]:
qa=RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True, 
    chain_type_kwargs=chain_type_kwargs)

In [24]:
user_input=input(f"Input Prompt:")
result=qa({"query": user_input})
print("Response : ", result["result"])

  warn_deprecated(


Response :  Acne is a common skin disease that affects nearly 17 million people in the United States, usually beginning at puberty and worsening during adolescence. Up to 20% of women develop mild acne, and it can also be found in newborns. The sebaceous glands lie just beneath the skin's surface, and hormonal changes during adolescence can cause them to produce excess oil, leading to acne.


In [25]:
res = qa.invoke("Give some information common cold and fever")

In [26]:
res

{'query': 'Give some information common cold and fever',
 'result': 'The common cold and fever are two different conditions. A common cold is a viral infection that affects the upper respiratory system, causing symptoms such as runny nose, congestion, cough, sore throat, and fatigue. Fever, on the other hand, is a temporary increase in body temperature, usually caused by an infection or inflammation. It can also be caused by other factors such as exposure to cold temperatures or certain medications.',
 'source_documents': [Document(page_content='acute infections. It tends to be mild and short-lived, anddisappears without treatment.\nChronic cold antibody hemolytic anemia is most\ncommon in women and most often affects those who areover 40 and who have arthritis. This condition usuallylasts for a lifetime, generally causing few symptoms.However, exposure to cold temperatures can accelerate\nGALE ENCYCLOPEDIA OF MEDICINE 2 181AnemiasGEM - 0001 to 0432 - A  10/22/03 1:42 PM  Page 181'),
 