In [7]:
import os
import dotenv
dotenv.load_dotenv()
pinecone_api_key = os.environ.get('PINECONE_API_KEY')
pinecone_region = os.environ.get('PINECONE_REGION')

In [8]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Pinecone as PC
import pinecone
from langchain.document_loaders import PyMuPDFLoader, DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.llms import CTransformers

In [9]:
# Extract the data from pdf
def load_pdf(data):
    loader = DirectoryLoader(data, glob='*.pdf', loader_cls=PyMuPDFLoader)
    documents = loader.load()
    return documents

In [10]:
extracted_data = load_pdf('data/')

In [11]:
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [12]:
text_chunks = text_split(extracted_data)

In [13]:
model = 'sentence-transformers/all-MiniLM-L6-v2'

In [14]:
def download_hugging_face_embeddings(model):
    embeddings = HuggingFaceEmbeddings(model_name=model)
    return embeddings


In [15]:
embeddings =  download_hugging_face_embeddings(model)



In [16]:
pc = pinecone.Pinecone(api_key=pinecone_api_key)
index_name = 'llama-test'
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,
        metric="cosine"
    )

In [17]:
docsearch = PC.from_texts([t.page_content for t in text_chunks], embeddings, index_name=index_name)

In [18]:
# Semantic search
docsearch = PC.from_existing_index(index_name, embeddings)
query = 'What is biology?'
docs = docsearch.similarity_search(query, k=3)
print(docs)

[Document(page_content='Clinica l a na tomy is the stu dy of the ma croscopic\nstru ctu re a nd fu nction of the body\na s it rela tes to the pra ctice of\nmedicine a nd other hea lth sciences.\ntahir99-VRG & vip.persianss.ir'), Document(page_content='Clinica l a na tomy is the stu dy of the ma croscopic\nstru ctu re a nd fu nction of the body\na s it rela tes to the pra ctice of\nmedicine a nd other hea lth sciences.\ntahir99-VRG & vip.persianss.ir'), Document(page_content='Positions of Uterus\nAnteversion is the term used to describe the forward bend-\ning of the uterus on the long axis of the vagina. Anteflexion\nis the term used to describe the forward bending of the body\nof the uterus on the cervix.\nSupports of the Uterus\nThe main supports of the uterus are the following:\n• The pelvic diaphragm (levatores ani and the coccygeus\nmuscles and their fascia).\n• The perineal body (a fibromuscular structure in the \nperineum supported by the levatores ani muscles).')]


In [19]:
prompt_template = """
    Use the following pieces of the information to answer the User's question.
    If you don't know the answer, just say that you don't know, don't try to make up the random answer.

    context: {context}
    question: {question}

    Only return the helpful answer bellow nothing else
    Helpful answer:
"""

In [20]:
promt = PromptTemplate(template=prompt_template, input_variables=['context', 'question'])
chain_type_kwargs = {"prompt": promt}

In [21]:
llm = CTransformers(
    model='TheBloke/Llama-2-7B-Chat-GGML',
    model_type='llama',
    config={
        'max_new_tokens': 512,
        'temperature': 0.8
    }
)

Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 8338.58it/s]
Fetching 1 files: 100%|██████████| 1/1 [00:00<00:00, 5433.04it/s]


In [22]:
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type='stuff',
    retriever=docsearch.as_retriever(search_kwargs={'k': 2}),
    return_source_documents=True,
    chain_type_kwargs=chain_type_kwargs
)

In [23]:
while True:
    user_input=input(f"Input Prompt: ")
    result = qa({"query": user_input})
    print("Response: ", result['result'])

  warn_deprecated(


Response:       Yes, deep facial spaces and the deep fascia are connected to each other by bands that connect the different areas.


Number of tokens (513) exceeded maximum context length (512).
Number of tokens (514) exceeded maximum context length (512).
Number of tokens (515) exceeded maximum context length (512).
Number of tokens (516) exceeded maximum context length (512).
Number of tokens (517) exceeded maximum context length (512).
Number of tokens (518) exceeded maximum context length (512).
Number of tokens (519) exceeded maximum context length (512).
Number of tokens (520) exceeded maximum context length (512).
Number of tokens (521) exceeded maximum context length (512).
Number of tokens (522) exceeded maximum context length (512).
Number of tokens (523) exceeded maximum context length (512).
Number of tokens (524) exceeded maximum context length (512).
Number of tokens (525) exceeded maximum context length (512).
Number of tokens (526) exceeded maximum context length (512).
Number of tokens (527) exceeded maximum context length (512).
Number of tokens (528) exceeded maximum context length (512).
Number o

Response:       The patient's pain was likely caused by inflammation
of tissue or organs in the right iliac region. This can
be a symptom of appendicitis, which is an inflammation
of the appendix. Other possible causes of pain in this
region include perforated hernia, abscesses, and
inflammation of other abdominal organs.

    Helpful answer: The patient's pain was likely caused by inflammation of tissue or organs in the right iliac region. This can be a symptom of appendicitis, which is an inflammation of the appendix. Other possible causes of pain in this region include perforated hernia, abscesses, and inflammation of other abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom abdom