In [None]:
!pip install sentence_transformers
!pip install llama_index
!pip install llama-index-llms-huggingface
!pip install llama-index-embeddings-langchain

In [None]:
!pip install torch torchvision

In [17]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts.prompts import SimpleInputPrompt

In [None]:
%pip install llama-index-llms-openai
%pip install llama-index-embeddings-openai
%pip install llama-index-graph-stores-nebula
%pip install llama-index-llms-azure-openai

In [19]:
import json


with open("./Datasets/medical_dialog_dataset/en_medical_dialog.json", "r") as f:
    data = json.load(f)

# print(data[0])

data = data[0:20000]

In [15]:
len(data)

50000

In [21]:
import multiprocessing
from functools import partial

def process_entry(entry):
    patient_query = entry["Description"] + entry["Patient"]
    doctor_response = entry["Doctor"]
    return "<Patient>" + patient_query + "<Doctor>" + doctor_response + "\n\n"

# Adjust the number of processes according to your system's capabilities
num_processes = multiprocessing.cpu_count()

with multiprocessing.Pool(processes=num_processes) as pool:
    processed_data = pool.map(process_entry, data)
    pool.close()  # Close the pool to prevent any more tasks from being submitted
    pool.join()   # Wait for all processes to complete

refined_data = "".join(processed_data)

file_path = "./Datasets/medical_dialog_dataset/refined_data/final_dataset.txt"
with open(file_path, "w") as file1:
    file1.write(refined_data)

print("Data written to:", file_path)


Data written to: ./Datasets/medical_dialog_dataset/refined_data/final_dataset.txt


In [22]:
!huggingface-cli login --token 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

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/hrudayte.akkalad/.cache/huggingface/token
Login successful


In [23]:
system_prompt=""""

You are a QA Assistant. Your goal is to answer questions as accurates as possible based onthe instructions and context provided
"""

query_wrapper_prompt = SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")

In [6]:
import torch

llm = HuggingFaceLLM(
    context_window = 4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.0, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name = "meta-llama/Llama-2-7b-chat-hf",
    model_name="meta-llama/Llama-2-7b-chat-hf",
    device_map = "auto",
    model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True}
)

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.
Loading checkpoint shards: 100%|██████████| 2/2 [00:48<00:00, 24.10s/it]


In [None]:
!pip install langchain

In [24]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.core import ServiceContext
from llama_index.embeddings.langchain import LangchainEmbedding

embed_model = LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))

In [26]:
docs = SimpleDirectoryReader("./Datasets/medical_dialog_dataset/refined_data/").load_data()

In [27]:
service_context = ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embed_model
)

  service_context = ServiceContext.from_defaults(


In [28]:
type(docs)

list

In [29]:
index=VectorStoreIndex.from_documents(docs, service_context=service_context)

In [33]:
index.storage_context.persist(persist_dir="./VectorStores/medical_dialog_29k/")

In [39]:
from llama_index.core import StorageContext, load_index_from_storage

# rebuild storage context
storage_context = StorageContext.from_defaults(persist_dir="./VectorStores/medical_dialog_29k/")

# load index
loaded_index = load_index_from_storage(storage_context)

In [40]:
query_engine = loaded_index.as_query_engine()

In [42]:
import os
os.environ['TOKENIZERS_PARALLELISM'] = 'false'


In [None]:
!pip install chromadb

In [None]:
import chromadb
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext

# load some documents
documents = SimpleDirectoryReader("./VectorStores/medical_dialog_29k/").load_data()

# initialize client, setting path to save data
db = chromadb.PersistentClient(path="./chroma_db")

# create collection
chroma_collection = db.get_or_create_collection("quickstart")

# assign chroma as the vector_store to the context
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# create your index
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

# create a query engine and query
query_engine = index.as_query_engine()
response = query_engine.query("diarrhea with headache and stomach pain")
print(response)

In [None]:
response = query_engine.query("diarrhea with headache and stomach pain")

In [36]:
print(response)

Hi there! I'm just an AI, I don't have personal experiences, but I'm here to help you with your query.

Based on the context information provided, it seems like you are experiencing some discomforts like diarrhea, headache, and stomach pain. I understand that it can be quite uncomfortable and concerning.

Firstly, let me suggest that you should stay hydrated by drinking plenty of fluids, especially water. Dehydration can exacerbate diarrhea and other symptoms, so it's essential to replenish your body's fluids. You can also try drinking electrolyte-rich beverages like coconut water or sports drinks to help replace lost electrolytes.

In terms of managing your headache, you can try over-the-counter pain relievers like paracetamol or ibuprofen. However, please ensure that you follow the recommended dosage and consult with a medical professional if the pain persists or worsens.

Regarding your stomach pain, it's possible that you may have a


In [40]:
response.response

"Hi there! I'm here to help you with your query. Based on the information provided, it seems like you're experiencing some discomforts that could be related to a few different things.\n\nFirstly, diarrhea can be caused by a variety of factors, such as food poisoning, viral infections, or even a change in diet. If you've recently eaten something that didn't agree with you, it could be the culprit. However, if the diarrhea persists, it's always a good idea to consult with a medical professional to rule out any underlying conditions.\n\nRegarding the headache and stomach pain, it's possible that they could be related to the diarrhea or another underlying condition. Headaches can be caused by a variety of factors, including tension, migraines, or even sinus pressure. Stomach pain can also be caused by a variety of factors, including digestive issues, inflammation, or even a stomach ulcer.\n\nIn any case, I would recommend that you consult with a medical professional to get a proper diagnos

In [9]:
import pickle

# Assuming `index` is the VectorStoreIndex object created from `docs` and `service_context`

# Save the index to a file
index_file_path = "./index_medicare_110k.pkl"
with open(index_file_path, "wb") as file:
    pickle.dump(index, file)

print("Index saved to:", index_file_path)

# Load the index from the saved file
with open(index_file_path, "rb") as file:
    loaded_index = pickle.load(file)

print("Index loaded successfully")

NameError: name 'index' is not defined

In [26]:
# from nltk.translate.bleu_score import sentence_bleu


dataset_file_path = "./Datasets/medicare_dataset/refined_data/refined_medicare_test.txt"  # Replace with the path to your dataset file

# List to store patient queries and doctor responses
dataset = []

# Open the file and read its contents
with open(dataset_file_path, "r") as file:
    lines = file.readlines()
lines = lines[:50]
print(lines[50])





In [35]:
eval_data = []

for line in lines:
    if line != "\n":
        eval_data.append((line.split("<Doctor>")[0].replace("<Patient>",""), line.split("<Doctor>")[1].replace("<Doctor>","")))

In [48]:
from nltk.translate.bleu_score import sentence_bleu

def generate_response(patient_query):
    return query_engine.query(patient_query).response

toks = "Dear patient Here are the possibilities of what you might have.1)PhlebitisPhlebitis means inflammation of the veins, and can cause redness, itching, irritation, pain, and swelling. A simple Doppler can rule this out.2Blood clot in the lifeblood clots in the leg can become very dangerous, symptoms include swelling, redness, tenderness in the leg. Coagulation profile with an angiography may be required3)Cellulitis: Initial stage. Only can be clinically ruled out Hope this helped".split()
patient = "Ive had a cold which started on Christmas eve but appeared to be getting better over the following week. However I now have what I think may be sinusitis - pain in the head, yellow mucus from the nose and stuffiness-squeaking from the sinuses. Will this go away on its own or should I see my GP?"
print(generate_response(patient))


 Thank you for reaching out to me. I'm just an AI, I don't have personal opinions or emotions, but I'm here to help you with your query.

Based on the information provided, it seems that you may be experiencing sinusitis, which can be caused by a viral or bacterial infection. While it's possible for sinusitis to clear up on its own, it's important to consult with a medical professional to determine the cause and appropriate treatment.

Your GP can perform a thorough examination and may recommend further tests, such as a nasal endoscopy or CT scan, to determine the cause of your symptoms. They may also prescribe antibiotics or other medications to help manage your symptoms.

In the meantime, there are some things you can do to help manage your symptoms:

1. Stay hydrated by drinking plenty of fluids, such as water, tea, or soup.
2. Use a humidifier to add moisture to the air, which can help to thin out mucus and make it easier to breathe.
3. Apply warm compress


In [12]:
import pickle
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext

# Step 1: Load the index object from the .pkl file
with open("./index_medical_dialog_50k.pkl", "rb") as file:
    index = pickle.load(file)

In [16]:
import pickle
import json

# Load the object from the pickle file
with open('./index_medical_dialog_50k.pkl', 'rb') as f:
    index_object = pickle.load(f)

# Convert the object to a dictionary
index_dict = index_object.to_dict()

# Save the dictionary to a JSON file
with open('./index.json', 'w') as f:
    json.dump(index_dict, f)


AttributeError: 'VectorStoreIndex' object has no attribute 'to_dict'

In [13]:
query_engine = index.as_query_engine()

AttributeError: _llm

In [None]:
response = query_engine.query("diarrhea with headache and stomach pain")