In [1]:
from langchain.embeddings import OllamaEmbeddings, HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.vectorstores import Pinecone, Chroma, FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import PyPDFLoader, DirectoryLoader
from langchain_community.llms import CTransformers, Ollama
from langchain_pinecone import PineconeVectorStore
import pinecone

  from tqdm.autonotebook import tqdm


In [2]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [3]:
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT = os.getenv("LANGCHAIN_PROJECT")
PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
PINECONE_API_ENV = os.getenv("PINECONE_API_ENV")

In [4]:
#DATA INGESTION
def load_pdf(data):
    loader = DirectoryLoader(data, glob='*.pdf', loader_cls=PyPDFLoader)
    docs = loader.load()
    return docs

In [29]:
extracted_data = load_pdf(data="D:\My_Coding_Files\medical-chatbot-using-llama2\data")

  extracted_data = load_pdf(data="D:\My_Coding_Files\medical-chatbot-using-llama2\data")


In [30]:
len(extracted_data)

4005

In [31]:
#DATA TRANSFORMATION
#CREATING TEXT CHUNKS
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 500, chunk_overlap=20)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [32]:
docs = text_split(extracted_data=extracted_data)

In [33]:
len(docs)

42820

In [10]:
#Use embedding model
def hugging_face_embed_docs():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

def ollama_embed_docs():
    embeddings = OllamaEmbeddings(model="llama2")
    return embeddings

In [11]:
embeddings = ollama_embed_docs()
hugging_embeddings = hugging_face_embed_docs()



In [12]:
embeddings

OllamaEmbeddings(base_url='http://localhost:11434', model='llama2', embed_instruction='passage: ', query_instruction='query: ', mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None, show_progress=False, headers=None, model_kwargs=None)

In [34]:
hugging_embeddings

HuggingFaceEmbeddings(client=SentenceTransformer(
  (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
  (2): Normalize()
), model_name='sentence-transformers/all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [35]:
vector_store = FAISS.from_documents(docs, hugging_embeddings)

In [36]:
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x1a2ca415520>

In [37]:
sample_ans = vector_store.similarity_search("What is an allergy", k=3)
sample_ans

[Document(page_content='ORGANIZATIONS\nAmerican Academy of Ophthalmology. 655 Beach Street, PO\nBox 7424, San Francisco, CA 94120-7424. <http://www.\neyenet.org>.KEY TERMS\nAllergen —A substance capable of inducing an\nallergic response.\nAllergic reaction —An immune system reaction to\na substance in the environment; symptoms\ninclude rash, inflammation, sneezing, itchy watery\neyes, and runny nose.\nConjunctiva —The mucous membrane that covers\nthe white part of the eyes and lines the eyelids.', metadata={'source': 'D:\\My_Coding_Files\\medical-chatbot-using-llama2\\data\\Medical_book_2.pdf', 'page': 659}),
 Document(page_content='GALE ENCYCLOPEDIA OF MEDICINE 2 2591Physical allergy', metadata={'source': 'D:\\My_Coding_Files\\medical-chatbot-using-llama2\\data\\Medical_book_4.pdf', 'page': 297}),
 Document(page_content='When thisoccurs, an allergy develops against the offending sub-stance (an allergen.)', metadata={'source': 'D:\\My_Coding_Files\\medical-chatbot-using-llama2\\data\\M

In [38]:
prompt_template = """
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don;t try to make up an answer.

Context: {context}
Question: {question}
Only return the helpful answer below and nothing else
Helpful answer : 
"""

In [39]:
PROMPT = PromptTemplate(template = prompt_template, input_variables=["context", "question"])
chain_type_kwargs = {"prompt" : PROMPT}

In [41]:
# llm = CTransformers(model = r"C:\Users\Arjo\.ollama\models\blobs\sha256-8934d96d3f08982e95922b2b7a2c626a1fe873d7c3b06e8e56d7bc0a1fef9246", model_type='llama2', config={'max_new_tokens' : 512, 'temperature' : 0.8})

In [42]:
llm = Ollama(model="phi3:mini", temperature=0.8, num_predict=1024)

In [43]:
qa = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = 'stuff',
    chain_type_kwargs = chain_type_kwargs,
    return_source_documents = True,
    retriever = vector_store.as_retriever(search_kwargs = {'k' : 2})
)

In [28]:
while True:
    user_input = input(f"Input Prompt : ")
    result = qa({'query' : user_input})
    print("Response : ", result['result'])

Response :   An allergy is a reaction of the immune system to certain substances, called allergens, that are typically harmless. It can cause symptoms such as an itchy, scratchy nose, eyes, and throat, which are common in allergic rhinitis. The specific allergens vary from person to person. If you don't know more details about the process or causes of allergies beyond this general definition, I may not have enough information to provide additional insights.
Response :   Some treatments for solving alcoholism include addressing associated nutritional deficiencies, maintaining a diet adequate in proteins and carbohydrates, as well as behavioral measures and social supports. Additionally, stress-relief methods like massage, meditation, and hypnotherapy may also be beneficial. It's essential to help the patient overcome alcohol addiction for effective treatment.
Response :   Some treatments to solve alcoholism include dealing with and relieving stress through methods like massage, meditati

KeyboardInterrupt: 