
## Load Model

In [1]:
%pip install -U datasets
!pip install transformers accelerate peft trl bitsandbytes

Collecting datasets
  Downloading datasets-2.16.1-py3-none-any.whl (507 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m507.1/507.1 kB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.8,>=0.3.0 (from datasets)
  Downloading dill-0.3.7-py3-none-any.whl (115 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.3/115.3 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
Collecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
INFO: pip is looking at multiple versions of multiprocess to determine which version is compatible with other requirements. This could take a while.
  Downloading multiprocess-0.70.15-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m13.3 MB/s[0m eta [36m0:00:00[0m
Installing collected pa

In [2]:
import os
os.environ["HF_TOKEN"] = "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"

In [3]:
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
)

In [4]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    #Quant type
    #We will use the "nf4" format this was introduced in the QLoRA paper
    bnb_4bit_quant_type="nf4",
    #As the model weights are stored using 4 bits and when we want to compute its only going to use 16 bits so we have more accuracy
    bnb_4bit_compute_dtype=torch.float16,
    #Quantization parameters are quantized
    bnb_4bit_use_double_quant=True,
)

In [5]:
base_model = "meta-llama/Llama-2-7b-chat-hf"
medmatch_model = "typosonlr/llama-2-7b-chat-MEDMATCH_0"

tokenizer = AutoTokenizer.from_pretrained(base_model, use_fast=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/1.62k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

In [6]:
# base_model = AutoModelForCausalLM.from_pretrained(
#     base_model,
#     quantization_config=bnb_config,
#     device_map={"": 0}
# )
medmatch_model = AutoModelForCausalLM.from_pretrained(
    medmatch_model,
    quantization_config=bnb_config,
    device_map={"": 0}
)

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/3 [00:00<?, ?it/s]

model-00001-of-00003.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

model-00002-of-00003.safetensors:   0%|          | 0.00/4.95G [00:00<?, ?B/s]

model-00003-of-00003.safetensors:   0%|          | 0.00/3.59G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/183 [00:00<?, ?B/s]

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Database of doctors
Schema of the database:
Table Name: Doctors

Columns:

    id (INTEGER): Primary key, auto-incremented for unique identification.
    name (TEXT): Full name of the doctor.
    specialization (TEXT): Field of medicine the doctor specializes in.
    contact_number (TEXT): Phone number of the doctor.
    email (TEXT): Email address of the doctor.
    office_address (TEXT): Address of the doctor's office or clinic.
    experience_years (INTEGER): Number of years the doctor has been in practice.
    education (TEXT): Academic qualifications of the doctor.
    languages_spoken (TEXT): Languages the doctor is fluent in.
    availability (TEXT): Office hours and appointment availability.


This schema is designed to store comprehensive information about doctors, allowing users to make informed decisions when selecting a healthcare provider. The table includes a mix of text and numeric data types, along with boolean values for certain attributes.


In [8]:
import sqlite3

In [9]:
class Doctor:
    def __init__(self, name, specialization, contact_number, email=None,
                 office_address=None,
                 experience_years=None, education=None,
                 languages_spoken=None, availability=None):
        self.doctor_data = {
            'name': name,
            'specialization': specialization,
            'contact_number': contact_number,
            'email': email,
            'office_address': office_address,
            'experience_years': experience_years,
            'education': education,
            'languages_spoken': languages_spoken,
            'availability': availability,
        }

class DoctorDatabase:
    def __init__(self, db_name='/content/drive/MyDrive/AI_IT255/doctors_database.db'):
        self.conn = sqlite3.connect(db_name)
        self.cursor = self.conn.cursor()
        self.create_table()

    def create_table(self):
        self.cursor.execute('''
            CREATE TABLE IF NOT EXISTS Doctors (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                name TEXT NOT NULL,
                specialization TEXT NOT NULL,
                contact_number TEXT NOT NULL,
                email TEXT,
                office_address TEXT,
                experience_years INTEGER,
                education TEXT,
                languages_spoken TEXT,
                availability TEXT
            )
        ''')
        self.conn.commit()

    def insert_doctor(self, doctor_data):
        query = '''
            INSERT INTO Doctors (
                name, specialization, contact_number, email, office_address,
                experience_years,
                education, languages_spoken, availability
            )
            VALUES (
                :name, :specialization, :contact_number, :email,
                :office_address, :experience_years,
                :education, :languages_spoken, :availability
            )
        '''
        self.cursor.execute(query, doctor_data)
        self.conn.commit()

    def get_doctors_by_specialization(self, specialization):
        query = '''
            SELECT * FROM Doctors
            WHERE specialization = ?
        '''
        self.cursor.execute(query, (specialization,))
        doctors = self.cursor.fetchall()
        return doctors

    def close_connection(self):
        self.conn.close()





In [10]:
!pip install Faker

from faker import Faker
import random

def create_fake_doctor_entries(doctor_db, num_entries=200):
    fake = Faker()

    # Define a list of the 20 most prominent specializations
    prominent_specializations = [
        'Internal Medicine', 'Cardiology', 'Orthopedics', 'Dermatology', 'Pediatrics',
        'Ophthalmology', 'Obstetrics and Gynecology', 'Neurology', 'Gastroenterology',
        'Endocrinology', 'Urology', 'Pulmonology', 'Nephrology', 'Oncology',
        'Rheumatology', 'Psychiatry', 'Dentistry', 'ENT (Otolaryngology)', 'General Surgery'
    ]

    for _ in range(num_entries):
        doctor_data = {
            'name': fake.name(),
            'specialization': fake.random_element(elements=prominent_specializations),
            'contact_number': fake.phone_number(),
            'email': fake.email(),
            'office_address': fake.address(),
            'experience_years': random.randint(1, 30),
            'education': fake.random_element(elements=('MChB', 'DO', 'MD', 'MBBS')),
            'languages_spoken': fake.random_element(elements=('English', 'Hindi', 'Kannada', 'Tulu')),
            'availability': fake.random_element(elements=('Monday', 'Wednesday', 'Friday', 'Tuesday')),
        }
        doctor_db.insert_doctor(doctor_data)


Collecting Faker
  Downloading Faker-23.1.0-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Faker
Successfully installed Faker-23.1.0


In [12]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [13]:
    doctor_db = DoctorDatabase()
    create_fake_doctor_entries(doctor_db, num_entries=200)
    doctor_db.close_connection()

# Prompting

We try to maintain a balance between helpfulness and safety.
The instructions are designed keeping in mind the different scenarios and the safety of the user. We only ever prescribe over the counter medicines and for all other purposes the user will be redirected to the nearest, suitable specialist based on the preliminary diagnosis

In [14]:
def get_medmatch_response(prompt, max_new_tokens=2000):
    inputs = tokenizer(prompt, return_tensors="pt").to(device)
    outputs = medmatch_model.generate(**inputs, max_new_tokens=max_new_tokens)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

In [None]:
user_input = input()
prompt_initial = f'''[INST] <<SYS>>
 You are providing preliminary information as a medical assistant.
 Respond to the query referring to the
 user as "you", avoid answering in third person.
 Offer a preliminary diagnosis based on the user's query.
 Do not suggest prescription medicines; only mention over-the-counter options if necessary.
 Answer in bullet points
 Emphasize that this information is not a replacement for professional medical advice.
 If it is an emergency case, include emergency medical contacts for India.
 Mandatorily include the medical specialization at the end of the answer in one word.
<</SYS>>
{user_input} [/INST]  '''


output = get_medmatch_response(prompt_initial)

# Script to fetch doctor information
The script fetches the doctor information from the database based on the medical specialization that was identified in the preliminary diagnosis

In [None]:
def fetch_specialist_list(paragraph, doctors_db):
    words = paragraph.split()
    found_doctors = []
    medical_area = None

    for word in words:
        doctors_with_specialization = doctors_db.get_doctors_by_specialization(word)
        if doctors_with_specialization:
            found_doctors.extend(doctors_with_specialization)
            medical_area = word
            break

    # Close the connection
    doctors_db.close_connection()

    if found_doctors:
      return found_doctors, medical_area

    else:
      return None, medical_area

In [None]:
def print_final_response(medmatch_response, doctor_list=None, speciality=None):


  # Find the position of [/INST]
  inst_position = medmatch_response.find("[/INST]")

# Extract text after [/INST]
  filtered_response = medmatch_response[inst_position + len("[/INST]"):].strip()
  print(filtered_response)
  if doctor_list is None:
    print("\n\nBased on the diagnosis, no suitable specialists found in the database, please visit your nearest general physician")
    return
  print(f"\n\nBased on the diagnosis, here are the doctors from out database that specialise in {speciality}")


  for doctor in doctor_list:
            print("- Name:", doctor[1])
            print("  Specialization:", doctor[2])
            print("  Contact Number:", doctor[3])
            print("  Email:", doctor[4])
            print("  Office Address:", doctor[5])
            print("  Experience (Years):", doctor[6])
            print("  Education:", doctor[7])
            print("  Languages Spoken:", doctor[8])
            print("  Availability:", doctor[9])
            print("\n")

In [None]:
medmatch_response = output
# print(medmatch_response)
doctors_db = DoctorDatabase()
doctor_list, speciality = fetch_specialist_list(medmatch_response, doctors_db)
print_final_response(medmatch_response, doctor_list, speciality)



Preliminary Diagnosis:

Based on your query, it seems that you are experiencing flaky rashes and itchy skin. This could be caused by several factors, including allergies, eczema, or psoriasis. However, without conducting a thorough physical examination and gathering additional information, it is difficult to provide a definitive diagnosis.

Preliminary Recommendation:

If the rashes are causing significant discomfort or are not improving with over-the-counter treatments, it may be best to consult a dermatologist for further evaluation and treatment.

Additional Information:

It is essential to note that a flaky rash could be a symptom of a more severe underlying condition, such as psoriasis or eczema. If left untreated, these conditions can lead to complications, including skin infections and increased risk of developing mental health disorders.

In case of an emergency, contact the following emergency medical services in India:

* National Emergency Medical Services: 108
* Apollo Hosp

In [None]:
import sqlite3

class SpecialistDatabase:
    def __init__(self, db_path='/content/drive/MyDrive/AI_IT255/SPECIALISTS.db'):
        self.conn = sqlite3.connect(db_path)
        self.cursor = self.conn.cursor()
        self.create_table()

    def create_table(self):
        self.cursor.execute('''
            CREATE TABLE IF NOT EXISTS Specialists (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                specialization TEXT NOT NULL,
                specialist TEXT NOT NULL
            )
        ''')
        self.conn.commit()

    def get_specialization_for_word(self, word):
        query = '''
            SELECT specialist FROM Specialists
            WHERE specialization = ?
        '''
        self.cursor.execute(query, (word,))
        result = self.cursor.fetchone()
        return result[1] if result else None

    def insert_specialist(self, specialization, specialist):
        query = '''
            INSERT INTO Specialists (specialization, specialist)
            VALUES (?, ?)
        '''
        self.cursor.execute(query, (specialization, specialist))
        self.conn.commit()

    def close_connection(self):
        self.conn.close()



if __name__ == "__main__":
    # Sample usage
    specialist_db = SpecialistDatabase()

    # List of specializations and corresponding specialists
    specializations_and_specialists = [
        ("Allergy and Immunology", "Allergist/Immunologist"),
        ("Anesthesiology", "Anesthesiologist"),
        ("Cardiology", "Cardiologist"),
        ("Dermatology", "Dermatologist"),
        ("Emergency Medicine", "Emergency Medicine Physician"),
        ("Endocrinology", "Endocrinologist"),
        ("Family Medicine", "Family Medicine Physician"),
        ("Gastroenterology", "Gastroenterologist"),
        ("General Surgery", "General Surgeon"),
        ("Geriatrics", "Geriatrician"),
        ("Hematology", "Hematologist"),
        ("Infectious Disease", "Infectious Disease Specialist"),
        ("Internal Medicine", "Internist/Internal Medicine Physician"),
        ("Neonatology", "Neonatologist"),
        ("Nephrology", "Nephrologist"),
        ("Neurology", "Neurologist"),
        ("Obstetrics and Gynecology", "Obstetrician-Gynecologist (OB/GYN)"),
        ("Oncology", "Oncologist"),
        ("Ophthalmology", "Ophthalmologist"),
        ("Orthopedic Surgery", "Orthopedic Surgeon"),
        ("Otolaryngology (ENT)", "Otolaryngologist (Ear, Nose, and Throat Specialist)"),
        ("Pain Medicine", "Pain Management Specialist"),
        ("Pathology", "Pathologist"),
        ("Pediatric Cardiology", "Pediatric Cardiologist"),
        ("Pediatric Surgery", "Pediatric Surgeon"),
        ("Pediatrics", "Pediatrician"),
        ("Physical Medicine and Rehabilitation", "Physical Medicine and Rehabilitation Physician (Physiatrist)"),
        ("Plastic Surgery", "Plastic Surgeon"),
        ("Podiatry", "Podiatrist"),
        ("Psychiatry", "Psychiatrist"),
        ("Pulmonology", "Pulmonologist"),
        ("Radiology", "Radiologist"),
        ("Rheumatology", "Rheumatologist"),
        ("Sleep Medicine", "Sleep Medicine Specialist"),
        ("Sports Medicine", "Sports Medicine Physician"),
        ("Thoracic Surgery", "Thoracic Surgeon"),
        ("Urology", "Urologist"),
        ("Vascular Surgery", "Vascular Surgeon"),
        ("Anatomic Pathology", "Anatomic Pathologist"),
        ("Clinical Pathology", "Clinical Pathologist"),
        ("Dental Medicine", "Dentist"),
        ("Dermatopathology", "Dermatopathologist"),
        ("Emergency Medical Services", "Emergency Medical Services Physician"),
        ("Medical Genetics", "Medical Geneticist"),
        ("Neurological Surgery", "Neurosurgeon"),
        ("Nuclear Medicine", "Nuclear Medicine Physician"),
        ("Pain Management", "Pain Management Specialist"),
        ("Surgical Oncology", "Surgical Oncologist"),
        ("Transplant Hepatology", "Transplant Hepatologist"),
        ("Undersea and Hyperbaric Medicine", "Undersea and Hyperbaric Medicine Specialist"),
        ("Vascular Neurology", "Vascular Neurologist"),
        ("Adolescent Medicine", "Adolescent Medicine Specialist"),
        ("Critical Care Medicine", "Critical Care Medicine Specialist"),
        ("Hospice and Palliative Medicine", "Hospice and Palliative Medicine Specialist"),
        ("Medical Biochemical Genetics", "Medical Biochemical Geneticist"),
        ("Medical Genetics and Genomics", "Medical Geneticist/Genomic Medicine Specialist"),
        ("Molecular Genetic Pathology", "Molecular Genetic Pathologist"),
        ("Clinical Biochemical Genetics", "Clinical Biochemical Geneticist"),
        ("Clinical Cytogenetics", "Clinical Cytogeneticist"),
        ("Clinical Genetics (MD)", "Clinical Geneticist (MD)"),
        ("Clinical Molecular Genetics", "Clinical Molecular Geneticist"),
        ("Neuropathology", "Neuropathologist"),
        ("Pediatric Anesthesiology", "Pediatric Anesthesiologist"),
        ("Pediatric Cardiology", "Pediatric Cardiologist"),
        ("Pediatric Critical Care Medicine", "Pediatric Critical Care Medicine Specialist"),
        ("Pediatric Emergency Medicine", "Pediatric Emergency Medicine Specialist"),
        ("Pediatric Endocrinology", "Pediatric Endocrinologist"),
        ("Pediatric Gastroenterology", "Pediatric Gastroenterologist"),
        ("Pediatric Hematology-Oncology", "Pediatric Hematologist-Oncologist"),
        ("Pediatric Infectious Diseases", "Pediatric Infectious Disease Specialist"),
        ("Pediatric Nephrology", "Pediatric Nephrologist"),
        ("Pediatric Pulmonology", "Pediatric Pulmonologist"),
        ("Pediatric Rheumatology", "Pediatric Rheumatologist"),
        ("Pediatric Surgery", "Pediatric Surgeon"),
        ("Pediatrics", "Pediatrician"),
        ("Geriatric Medicine", "Geriatrician"),
        ("Sleep Medicine", "Sleep Medicine Specialist"),
        ("Sports Medicine", "Sports Medicine Physician"),
        ("Interventional Cardiology", "Interventional Cardiologist"),
        ("Clinical Cardiac Electrophysiology", "Clinical Cardiac Electrophysiologist"),
        ("Colon and Rectal Surgery", "Colorectal Surgeon"),
        ("Female Pelvic Medicine and Reconstructive Surgery", "Urogynecologist"),
        ("Gynecologic Oncology", "Gynecologic Oncologist"),
        ("Hand Surgery", "Hand Surgeon"),
        ("Hospice and Palliative Medicine", "Hospice and Palliative Medicine Specialist"),
        ("Medical Toxicology", "Medical Toxicologist"),
        ("Neonatal-Perinatal Medicine", "Neonatologist"),
        ("Neurodevelopmental Disabilities", "Neurodevelopmental Disabilities Specialist"),
        ("Orthopedic Sports Medicine", "Orthopedic Sports Medicine Specialist"),
        ("Pain Medicine", "Pain Management Specialist"),
        ("Pediatric Allergy/Immunology", "Pediatric Allergist/Immunologist"),
        ("Pediatric Cardiology", "Pediatric Cardiologist"),
        ("Pediatric Endocrinology", "Pediatric Endocrinologist"),
        ("Pediatric Gastroenterology", "Pediatric Gastroenterologist"),
        ("Pediatric Hematology-Oncology", "Pediatric Hematologist-Oncologist"),
        ("Pediatric Infectious Diseases", "Pediatric Infectious Disease Specialist"),
        ("Pediatric Nephrology", "Pediatric Nephrologist"),
        ("Pediatric Pulmonology", "Pediatric Pulmonologist"),
        ("Pediatric Rheumatology", "Pediatric Rheumatologist")
    ]

    # Insert specializations and specialists into the database
    for specialization, specialist in specializations_and_specialists:
        specialist_db.insert_specialist(specialization, specialist)

    # Close the connection
    specialist_db.close_connection()
