In [44]:
print("OK")

OK


In [45]:
from langchain import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings

from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain.prompts import PromptTemplate
from langchain.llms import CTransformers
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [46]:
def load_pdf(data):
    loader=DirectoryLoader(data,
                    glob="*.pdf",
                    loader_cls=PyPDFLoader)
    documents=loader.load()
    return documents

In [None]:
extracted_pdf = load_pdf("Data/")


In [None]:
def text_split(extracted_pdf):
   text_spliter= RecursiveCharacterTextSplitter(chunk_size=500,chunk_overlap=20)
   text_chunks=text_spliter.split_documents(extracted_pdf)
   
   return text_chunks

In [None]:
text_chunks=text_split(extracted_pdf)
print("length of my chunk;",len(text_chunks))

length of my chunk; 5860


In [None]:
#download embedding model
def download_huggingfaceembedding():
    embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embedding

In [None]:
embedding = download_huggingfaceembedding()

In [None]:
embedding

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={})

In [None]:
query_result = embedding.embed_query("Hello World!")
print("Length:", len(query_result))
query_result

Length: 384


[-0.020386816933751106,
 0.025280792266130447,
 -0.0005662452895194292,
 0.011615419760346413,
 -0.03798845037817955,
 -0.11998119205236435,
 0.04170941933989525,
 -0.02085712179541588,
 -0.05900676175951958,
 0.024232570081949234,
 0.0621202290058136,
 0.06767992675304413,
 0.033100228756666183,
 -0.010369383729994297,
 -0.03121573105454445,
 -0.032733216881752014,
 -0.0021117650903761387,
 0.009261962957680225,
 -0.12476455420255661,
 0.011236815713346004,
 0.03904544934630394,
 0.054402485489845276,
 -0.0028255314100533724,
 0.044556282460689545,
 -0.08542022109031677,
 -0.02287369966506958,
 0.039140552282333374,
 0.03604685142636299,
 -0.03212680667638779,
 -0.06425869464874268,
 0.05812907963991165,
 0.04669088125228882,
 0.08061561733484268,
 -0.007734259124845266,
 -0.02208320051431656,
 0.06713147461414337,
 -0.04504144564270973,
 -0.10212123394012451,
 0.001264391466975212,
 0.04680197685956955,
 0.02639591135084629,
 -0.06990957260131836,
 -0.04453347250819206,
 -0.006901960

In [None]:
import faiss
from langchain.vectorstores import FAISS


In [None]:
def store_embeddings_faiss(embeddings, text_chunks):
    vector_store = FAISS.from_documents(text_chunks, embeddings)
    vector_store.save_local("faiss_index")
    print("FAISS database saved successfully!")

store_embeddings_faiss(embedding, text_chunks)


FAISS database saved successfully!


In [None]:
vector_store = FAISS.load_local("faiss_index", embedding)


In [None]:
query = "What are allergies?"

# Perform similarity search
docs = vector_store.similarity_search(query=query, k=3)

# Print results
for i, doc in enumerate(docs):
    print(f"Result {i+1}:\n{doc.page_content}\n")

Result 1:
reaction. Allergic rhinitis is characterized by an itchy,
runny nose, often with a scratchy or irritated throat due
to post-nasal drip. Inflammation of the thin membrane
covering the eye (allergic conjunctivitis) causes redness,
irritation, and increased tearing in the eyes. Asthma caus-
es wheezing, coughing, and shortness of breath. Symp-
toms of food allergies depend on the tissues most sensi-
tive to the allergen and whether the allergen spread sys-

Result 2:
reactions is triggered by harmless, everyday substances.
This is the condition known as allergy, and the offend-
ing substance is called an allergen. Common inhaled
allergens include pollen, dust, and insect parts from tiny
house mites. Common food allergens include nuts, fish,
and milk.
Allergic reactions involve a special set of cells in
the immune system known as mast cells. Mast cells
serve as guards in the tissues where the body meets the

Result 3:
Purpose
Allergy is a reaction of the immune system. Nor-
mally

In [None]:
prompt_template = """
Use the following piece of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to generate any random answer from your own

Context:{context}
Question:{question}

Only return the helpful answer and nothing else
helpful answer:
"""

In [None]:
PROMPT = PromptTemplate(template=prompt_template, input_variables=["context","question"])
chain_type_kwargs = {"prompt":PROMPT}

In [None]:
llm = CTransformers(model="model/llama-2-7b-chat.ggmlv3.q4_0.bin",
                    model_type="llama",
                    config={"max_new_tokens":512,
                            'temperature':0.8})

In [None]:
retriever = vector_store.as_retriever(search_kwargs={"k": 2})

In [None]:
chain_type_kwargs = {"verbose": True}

In [None]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [None]:
while True:
    user_input = input(f"Input Prompt: ")
    result = qa({"query":user_input})
    print("Response:", result["result"])



[1m> Entering new  chain...[0m


[1m> Entering new  chain...[0m
Prompt after formatting:
[32;1m[1;3mUse the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

threatening condition. Characteristic signs of the disease
include fever, rash, diarrhea, liver problems, and a com-
promised immune system.
Approximately 25–50% of bone marrow transplant
recipients develop long-term complications. Chronic graft
versus host disease symptoms include skin changes such as
dryness, altered pigmentation, and thickening; abnormal
liver function tests; dry mouth and eyes; infections; and
weight loss. Other long-term complications include

not function properly.
Symptoms
The symptoms of the above disorders include:
• Systemic lupus erythematosus. Symptoms include
fever, chills, fatigue, weight loss, skin rashes (particu-
larly the classic “butterfly” rash on the face), vasculitis,
polyarthralg

KeyboardInterrupt: 