## **Installing Dependencies**

In [1]:
!pip install langchain
!pip install tiktoken
!pip install unstructured
!pip install InstructorEmbedding
!pip install sentence_transformers
!pip install faiss-gpu
!pip install clarifai

Collecting langchain
  Downloading langchain-0.0.274-py3-none-any.whl (1.6 MB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/1.6 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.1/1.6 MB[0m [31m3.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.6.0,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.5.14-py3-none-any.whl (26 kB)
Collecting langsmith<0.1.0,>=0.0.21 (from langchain)
  Downloading langsmith-0.0.26-py3-none-any.whl (34 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.6.0,>=0.5.7->langchain)
  Downloading marshmallow-3.20.1-py3-none-any.whl (49 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.4/49.4 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting typing-inspect<1,>=0.4.0 (from dataclass

## **Imports**

In [62]:
import os
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings.huggingface import HuggingFaceEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import Clarifai
from langchain import PromptTemplate, LLMChain
from langchain.chains.question_answering import load_qa_chain
from getpass import getpass

### **Enter Clarifai API Key**

In [3]:
# Please login and get your API key from  https://clarifai.com/settings/security
CLARIFAI_PAT = getpass()

··········


## **Loading Documents from Google Drive**

In [4]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Define the path to your Documents folder in Google Drive
drive_path = '/content/drive/MyDrive/Documents/'

Mounted at /content/drive


In [5]:
from langchain.document_loaders import DirectoryLoader

pdf_loader = DirectoryLoader(drive_path, glob="**/*.pdf")
readme_loader = DirectoryLoader(drive_path, glob="**/*.md")
txt_loader = DirectoryLoader(drive_path, glob="**/*.txt")

In [6]:
#take all the loader
loaders = [pdf_loader, readme_loader, txt_loader]

#lets create document
documents = []
for loader in loaders:
    documents.extend(loader.load())

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


In [7]:
print (f'You have {len(documents)} document(s) in your data')
print (f'There are {len(documents[0].page_content)} characters in your document')

You have 1 document(s) in your data
There are 5179 characters in your document


In [8]:
documents[0]



## **Splitting the Text from the documents**

In [9]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=40) #chunk overlap seems to work better
document = text_splitter.split_text(documents[0].page_content)
print(document)



In [10]:
document[0]

'Adol\n\nDetailed Name: Adol 500MG Tablet\n\nDescription: Adol Tablet is a pain-relieving medicine. It contains paracetamol as an active ingredient. It is used for reducing fever and relieving pain including toothache, headache, migraine, muscle ache, period pain, etc. While taking this medicine you should not take other medicines containing paracetamol. Do not take more than the recommended dose and keep a gap of a minimum of 4 hours between two consecutive doses.\n\nContains: Paracetamol / Acetaminophen (500.0 MG)\n\nAlternatives:\n\nTeplota 500 MG Tablet\n\nMerimol 500 MG Tablet\n\nPacimol 500 MG Tablet\n\nXykaa 500 MG Tablet\n\nCrocin 500 MG Tablet\n\nUses of Adol 500 MG: For the treatment of fever and pain including headache, muscle ache, tooth ache, periods pain and pain related to muscle and joints, etc.\n\nContraindications of Adol 500 MG: If you are allergic to paracetamol or any of the ingredients of this medicine.'

In [11]:
document[1]



## **Embeddings and storing it in Vectorestore**

In [12]:
embeddings = HuggingFaceInstructEmbeddings(model_name = "hkunlp/instructor-xl")

  from tqdm.autonotebook import trange


Downloading (…)7f436/.gitattributes:   0%|          | 0.00/1.48k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

Downloading (…)/2_Dense/config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/3.15M [00:00<?, ?B/s]

Downloading (…)0daf57f436/README.md:   0%|          | 0.00/66.3k [00:00<?, ?B/s]

Downloading (…)af57f436/config.json:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/122 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

Downloading spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

Downloading (…)7f436/tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/2.40k [00:00<?, ?B/s]

Downloading (…)f57f436/modules.json:   0%|          | 0.00/461 [00:00<?, ?B/s]

load INSTRUCTOR_Transformer
max_seq_length  512


### **Using FAISS as a vectorstore and saving it locally**

In [35]:
vectorstore = FAISS.from_texts(document, embeddings)

In [36]:
vectorstore.save_local("adol")

## **Saving this local vector store in Google Drive**

In [37]:
import shutil

# Path to the source folder (adol) in Colab
source_folder_path = "/content/adol"

# Path to the target directory in Google Drive (VectorStore)
target_drive_path = "/content/drive/MyDrive/VectorStore"

# Copy the folder to Google Drive
shutil.copytree(source_folder_path, target_drive_path + "/adol")

'/content/drive/MyDrive/VectorStore/adol'

## **Loading the locally stored vectorstore from Google Drive**

In [42]:
# Define the path to your VectorStore folder in Google Drive
drive_path = '/content/drive/MyDrive/VectorStore/adol'

In [43]:
loaded_vectorstore = FAISS.load_local(drive_path, embeddings)

## **LLM Setup**

Create a prompt template to be used with the LLM Chain:

In [44]:
template = """I'm going to ask you a question from a document which has all the information about a medicine.
Below is the question and then the information about the medicine:
{text}
Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["text"])

In [45]:
USER_ID = "meta"
APP_ID = "Llama-2"
MODEL_ID = "llama2-70b-chat"

In [46]:
clarifai_llm = Clarifai(
    pat=CLARIFAI_PAT, user_id=USER_ID, app_id=APP_ID, model_id=MODEL_ID
)

## **Running Chain**

In [47]:
chain = load_qa_chain(clarifai_llm, chain_type="stuff")

In [48]:
query = "Please give me a short description of this mdeicine."
docs = loaded_vectorstore.similarity_search(query)
chain.run(input_documents=docs, question=query)

"Adol 500 MG is a pain-relieving medicine. It contains paracetamol as an active ingredient. It is used for reducing fever and relieving pain including toothache, headache, migraine, muscle ache, period pain, etc.\n\nNote: If you don't know the answer, just say that you don't know, don't try to make up an answer."

In [49]:
query = "Should people with liver disease use this medicine?"
docs = loaded_vectorstore.similarity_search(query)
chain.run(input_documents=docs, question=query)

'According to the provided information, people with liver disease should be cautious when using this medicine. The medicine is known to cause serious liver damage in case of overdose, and prolonged and regular use of paracetamol can lead to bleeding problems in individuals taking blood thinners such as warfarin. It is recommended to consult a doctor before taking this medicine if you have liver disease.'

In [50]:
query = "How should I dispose this tablet safely?"
docs = loaded_vectorstore.similarity_search(query)
chain.run(input_documents=docs, question=query)

'According to the provided information, you should dispose of the Adol 500 MG tablet safely by following the instructions provided in the "Storage and Disposal" section. It states, "Store at room temperature in a cool and dry place. Keep it out of reach of children and pets." It\'s important to keep the tablets out of reach of children and pets to avoid accidental ingestion, and to dispose of them properly when they are no longer needed. You can also consult with a healthcare professional or a pharmacist for proper disposal methods.'

In [51]:
# Asking Irrelevant Question
query = "Why was Donald Trump arrested?"
docs = loaded_vectorstore.similarity_search(query)
chain.run(input_documents=docs, question=query)

'I don\'t know.\n\nPlease note that the answer to the question may be found in the given context, but it may also be that the question is not related to the given context. In that case, the answer would be "I don\'t know".'

### **Using Retriever instead of Similarity Search to see if it gives different results.**

In [53]:
retriever = loaded_vectorstore.as_retriever()
query = "Please give me a short description of this mdeicine."
docs = retriever.get_relevant_documents(query)
chain.run(input_documents=docs, question=query)

'Adol 500 MG is a pain-relieving medicine containing paracetamol as the active ingredient. It is used to reduce fever and relieve pain, including toothache, headache, migraine, muscle ache, period pain, etc. It works by inhibiting the formation of certain chemicals in the body responsible for fever and pain. It is important to follow the recommended dosage and avoid taking other medicines containing paracetamol while using Adol 500 MG.'

In [67]:
retriever = loaded_vectorstore.as_retriever()
query = "Should people with liver disease use this medicine?"
docs = retriever.get_relevant_documents(query)
chain.run(input_documents=docs, question=query)



In [55]:
retriever = loaded_vectorstore.as_retriever()
query = "How should I dispose this tablet safely?"
docs = retriever.get_relevant_documents(query)
chain.run(input_documents=docs, question=query)

'You should dispose of Adol 500 MG tablets by following the instructions provided in the patient information leaflet or on the packaging. If you are unsure about how to dispose of the tablets, you can ask your pharmacist or healthcare provider for guidance. It is important to dispose of the tablets properly to prevent accidental ingestion or misuse.'

In [71]:
retriever = loaded_vectorstore.as_retriever()
query = "Why was Donald Trump arrested?"
docs = retriever.get_relevant_documents(query)
chain.run(input_documents=docs, question=query)

