# Named Entity Recognition (NER)
* Named Entity Recognition (NER) is a fundamental task in Natural Language Processing (NLP) that involves identifying and classifying entities (such as persons, locations, organizations, dates, and more) in text. This tutorial will guide you through the basics of NER, its applications, and how to implement it using Python.


### What is Named Entity Recognition (NER)?
* NER is the process of detecting and categorizing named entities in a given text. Named entities typically belong to categories such as:

* Person (e.g., "Elon Musk")
* Location (e.g., "New York", "Mount Everest")
* Organization (e.g., "Google", "NASA")
* Date & Time (e.g., "March 1, 2025", "5 PM")
* Monetary Values (e.g., "$100", "€50,000")
* Percentages (e.g., "10%")


# Using spaCy
* spaCy is a fast NLP library that provides a pre-trained NER model.


In [None]:
!pip install spacy
# python -m spacy download en_core_web_sm




In [None]:
import spacy

# Load English NLP model
nlp = spacy.load("en_core_web_sm")

# Sample text
text = "Google was founded by Larry Page and Sergey Brin in 1998 in California."

# Process text
doc = nlp(text)

# Extract entities
for ent in doc.ents:
    print(f"Entity: {ent.text}, Label: {ent.label_}")


Entity: Google, Label: ORG
Entity: Larry Page, Label: PERSON
Entity: Sergey Brin, Label: PERSON
Entity: 1998, Label: DATE
Entity: California, Label: GPE


In [None]:
# Extract entities
for ent in doc.ents:
    print(f"Entity: {ent.text}, Label: {ent.label_},Explain_label:{spacy.explain(ent.label_)}")


Entity: Google, Label: ORG,Explain_label:Companies, agencies, institutions, etc.
Entity: Larry Page, Label: PERSON,Explain_label:People, including fictional
Entity: Sergey Brin, Label: PERSON,Explain_label:People, including fictional
Entity: 1998, Label: DATE,Explain_label:Absolute or relative dates or periods
Entity: California, Label: GPE,Explain_label:Countries, cities, states


### display the entity in special way


In [None]:
from spacy import displacy
displacy.render(doc,style="ent")

### Add pattern


In [None]:
import spacy

# Load English NLP model
nlp = spacy.load("en_core_web_sm")

# Sample text
text = "Dr. Alex Smith chaired first board meeting of Acme Corp Inc."

# Process text
doc = nlp(text)

# Extract entities
for ent in doc.ents:
    print(f"Entity: {ent.text}, Label: {ent.label_}")


Entity: Alex Smith, Label: PERSON
Entity: first, Label: ORDINAL
Entity: Acme Corp Inc., Label: ORG


In [None]:
import spacy
from spacy.language import Language
from spacy.tokens import Span

nlp = spacy.load("en_core_web_sm")

@Language.component("expand_person_entities")
def expand_person_entities(doc):
    new_ents = []
    for ent in doc.ents:
        if ent.label_ == "PERSON" and ent.start != 0:
            prev_token = doc[ent.start - 1]
            if prev_token.text in ("Dr", "Dr.", "Mr", "Mr.", "Ms", "Ms."):
                new_ent = Span(doc, ent.start - 1, ent.end, label=ent.label)
                new_ents.append(new_ent)
            else:
                new_ents.append(ent)
        else:
            new_ents.append(ent)
    doc.ents = new_ents
    return doc

# Add the component after the named entity recognizer
nlp.add_pipe("expand_person_entities", after="ner")

doc = nlp("Dr. Alex Smith chaired first board meeting of Acme Corp Inc.")
print([(ent.text, ent.label_) for ent in doc.ents])


[('Dr. Alex Smith', 'PERSON'), ('first', 'ORDINAL'), ('Acme Corp Inc.', 'ORG')]


## Add pattern for meadical key


In [None]:
import spacy
from spacy.matcher import Matcher
import json

# Sample diseases (for illustration, use real disease list of 500 items)
disease_list = [
    "covid 19", "diabetes", "cancer", "hypertension", "pneumonia",
    "asthma", "flu", "tuberculosis", "malaria", "hepatitis", "Alzheimer's",
    "Parkinson's", "arthritis", "leukemia", "stroke", "epilepsy", "schizophrenia",
    "heart disease", "kidney failure", "liver cirrhosis", "AIDS", "obesity",
    # Add more diseases here, ideally from a curated list of 500 diseases
]

# Create patterns from the disease list
disease_patterns = []
for disease in disease_list:
    pattern = [{"LOWER": term} for term in disease.split()]
    disease_patterns.append(pattern)

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Initialize the matcher
matcher = Matcher(nlp.vocab)

# Add the patterns to the matcher under the 'DISEASE' label
matcher.add("DISEASE", disease_patterns)

# Function to extract diseases from text
def extract_diseases(text):
    # Process the text with the loaded spaCy model
    doc = nlp(text)

    # Use the matcher to find diseases
    matches = matcher(doc)

    # Extract matched diseases
    diseases = []
    for match_id, start, end in matches:
        span = doc[start:end]
        diseases.append(span.text)

    return diseases

# Example text
text = "The patient was diagnosed with diabetes and covid 19."

# Extract diseases from the text
diseases = extract_diseases(text)
print("Extracted Diseases:", diseases)


Extracted Diseases: ['diabetes', 'covid 19']


In [None]:
import spacy
import json
import os
from spacy.matcher import Matcher

def save_model_with_patterns(nlp, patterns, save_dir):
    """
    Save the spaCy model along with matcher patterns.
    """
    # Save the spaCy model
    nlp.to_disk(save_dir)

    # Save matcher patterns as a JSON file
    patterns_file = os.path.join(save_dir, "matcher_patterns.json")
    with open(patterns_file, "w") as f:
        json.dump(patterns, f)

    print(f"Model and patterns saved at: {save_dir}")

# Dictionary to store patterns with labels
medical_patterns = {
    "DISEASE": [
    # Your original entries
    [{"LOWER": "covid"}, {"LOWER": "19"}],
    [{"LOWER": "diabetes"}],
    [{"LOWER": "cancer"}],
    [{"LOWER": "hypertension"}],
    [{"LOWER": "pneumonia"}],
    [{"LOWER": "asthma"}],
    [{"LOWER": "flu"}],
    [{"LOWER": "tuberculosis"}],
    [{"LOWER": "malaria"}],
    [{"LOWER": "hepatitis"}],
    [{"LOWER": "alzheimer's"}],
    [{"LOWER": "parkinson's"}],
    [{"LOWER": "arthritis"}],
    [{"LOWER": "leukemia"}],
    [{"LOWER": "stroke"}],
    [{"LOWER": "epilepsy"}],
    [{"LOWER": "schizophrenia"}],
    [{"LOWER": "heart"}, {"LOWER": "disease"}],
    [{"LOWER": "kidney"}, {"LOWER": "failure"}],
    [{"LOWER": "liver"}, {"LOWER": "cirrhosis"}],
    [{"LOWER": "aids"}],
    [{"LOWER": "obesity"}],
    [{"LOWER": "chronic"}, {"LOWER": "obstructive"}, {"LOWER": "pulmonary"}, {"LOWER": "disease"}],
    [{"LOWER": "hiv"}],
    [{"LOWER": "sepsis"}],
    [{"LOWER": "rheumatoid"}, {"LOWER": "arthritis"}],
    [{"LOWER": "depression"}],
    [{"LOWER": "schizoaffective"}, {"LOWER": "disorder"}],
    [{"LOWER": "bipolar"}, {"LOWER": "disorder"}],
    [{"LOWER": "dementia"}],
    [{"LOWER": "liver"}, {"LOWER": "cancer"}],
    [{"LOWER": "cirrhosis"}],
    [{"LOWER": "multiple"}, {"LOWER": "sclerosis"}],
    [{"LOWER": "fibromyalgia"}],
    [{"LOWER": "chronic"}, {"LOWER": "kidney"}, {"LOWER": "disease"}],
    [{"LOWER": "kidney"}, {"LOWER": "stones"}],
    [{"LOWER": "cystic"}, {"LOWER": "fibrosis"}],
    [{"LOWER": "ulcerative"}, {"LOWER": "colitis"}],
    [{"LOWER": "crohn's"}, {"LOWER": "disease"}],
    [{"LOWER": "ms"}],
    [{"LOWER": "tetanus"}],
    [{"LOWER": "herpes"}],
    [{"LOWER": "hepatitis"}, {"LOWER": "b"}],
    [{"LOWER": "hepatitis"}, {"LOWER": "c"}],
    [{"LOWER": "sickle"}, {"LOWER": "cell"}, {"LOWER": "anemia"}],
    [{"LOWER": "lupus"}],
    [{"LOWER": "gout"}],
    [{"LOWER": "eczema"}],
    [{"LOWER": "psoriasis"}],
    [{"LOWER": "skin"}, {"LOWER": "cancer"}],
    [{"LOWER": "melanoma"}],
    [{"LOWER": "prostate"}, {"LOWER": "cancer"}],
    [{"LOWER": "breast"}, {"LOWER": "cancer"}],
    [{"LOWER": "ovarian"}, {"LOWER": "cancer"}],
    [{"LOWER": "lung"}, {"LOWER": "cancer"}],
    [{"LOWER": "colorectal"}, {"LOWER": "cancer"}],
    [{"LOWER": "pancreatic"}, {"LOWER": "cancer"}],
    [{"LOWER": "bladder"}, {"LOWER": "cancer"}],
    [{"LOWER": "stomach"}, {"LOWER": "cancer"}],
    [{"LOWER": "esophageal"}, {"LOWER": "cancer"}],
    [{"LOWER": "oral"}, {"LOWER": "cancer"}],
    [{"LOWER": "kidney"}, {"LOWER": "cancer"}],
    [{"LOWER": "lymphoma"}],
    [{"LOWER": "gastrointestinal"}, {"LOWER": "cancer"}],
    [{"LOWER": "cholangitis"}],
    [{"LOWER": "hemochromatosis"}],
    [{"LOWER": "endometriosis"}],
    [{"LOWER": "polycystic"}, {"LOWER": "ovary"}, {"LOWER": "syndrome"}],
    [{"LOWER": "prostate"}, {"LOWER": "enlargement"}],
    [{"LOWER": "epileptic"}, {"LOWER": "seizures"}],
    [{"LOWER": "neurofibromatosis"}],
    [{"LOWER": "huntington's"}, {"LOWER": "disease"}],
    [{"LOWER": "amyotrophic"}, {"LOWER": "lateral"}, {"LOWER": "sclerosis"}],
    [{"LOWER": "spondylitis"}],
    [{"LOWER": "osteoporosis"}],
    [{"LOWER": "ankylosing"}, {"LOWER": "spondylitis"}],
    [{"LOWER": "gerd"}],
    [{"LOWER": "acid"}, {"LOWER": "reflux"}],
    [{"LOWER": "gallstones"}],
    [{"LOWER": "peptic"}, {"LOWER": "ulcer"}],
    [{"LOWER": "heart"}, {"LOWER": "attack"}],
    [{"LOWER": "arrhythmia"}],
    [{"LOWER": "angina"}],
    [{"LOWER": "congestive"}, {"LOWER": "heart"}, {"LOWER": "failure"}],
    [{"LOWER": "peripheral"}, {"LOWER": "artery"}, {"LOWER": "disease"}],
    [{"LOWER": "myocardial"}, {"LOWER": "infarction"}],
    [{"LOWER": "cardiomyopathy"}],
    [{"LOWER": "brain"}, {"LOWER": "aneurysm"}],
    [{"LOWER": "meningitis"}],
    [{"LOWER": "encephalitis"}],
    [{"LOWER": "brain"}, {"LOWER": "tumor"}],
    [{"LOWER": "cerebral"}, {"LOWER": "palsy"}],
    [{"LOWER": "autism"}],
    [{"LOWER": "addison's"}, {"LOWER": "disease"}],
    [{"LOWER": "graves'"}, {"LOWER": "disease"}],
    [{"LOWER": "goiter"}],
    [{"LOWER": "hyperthyroidism"}],
    [{"LOWER": "hypothyroidism"}],
    [{"LOWER": "menopause"}],
    [{"LOWER": "testicular"}, {"LOWER": "cancer"}],
    [{"LOWER": "endometrial"}, {"LOWER": "cancer"}],
    [{"LOWER": "gallbladder"}, {"LOWER": "cancer"}],
    [{"LOWER": "inflammatory"}, {"LOWER": "bowel"}, {"LOWER": "disease"}],
    [{"LOWER": "peptic"}, {"LOWER": "ulcer"}, {"LOWER": "disease"}],
    [{"LOWER": "alcoholism"}],
    [{"LOWER": "smoking"}],
    [{"LOWER": "drug"}, {"LOWER": "addiction"}],
    [{"LOWER": "anxiety"}, {"LOWER": "disorder"}],
    [{"LOWER": "panic"}, {"LOWER": "disorder"}],
    [{"LOWER": "bulimia"}, {"LOWER": "nervosa"}],
    [{"LOWER": "anorexia"}, {"LOWER": "nervosa"}],
    [{"LOWER": "insomnia"}],
    [{"LOWER": "restless"}, {"LOWER": "leg"}, {"LOWER": "syndrome"}],
    [{"LOWER": "sleep"}, {"LOWER": "apnea"}],
    [{"LOWER": "snoring"}],
    [{"LOWER": "narcolepsy"}],
    [{"LOWER": "autism"}, {"LOWER": "spectrum"}, {"LOWER": "disorder"}],
    [{"LOWER": "chronic"}, {"LOWER": "fatigue"}, {"LOWER": "syndrome"}],
    [{"LOWER": "chronic"}, {"LOWER": "pain"}],
    [{"LOWER": "hearing"}, {"LOWER": "loss"}],
    [{"LOWER": "glaucoma"}],
    [{"LOWER": "cataracts"}],
    [{"LOWER": "macular"}, {"LOWER": "degeneration"}],
    [{"LOWER": "diabetic"}, {"LOWER": "retinopathy"}],
    [{"LOWER": "retinitis"}, {"LOWER": "pigmentosa"}],
    [{"LOWER": "fibroids"}],
    [{"LOWER": "endometrial"}, {"LOWER": "hyperplasia"}],
    [{"LOWER": "ectopic"}, {"LOWER": "pregnancy"}],
    [{"LOWER": "menorrhagia"}],
    [{"LOWER": "dysmenorrhea"}],
    [{"LOWER": "polycystic"}, {"LOWER": "ovarian"}, {"LOWER": "syndrome"}],
    [{"LOWER": "vaginitis"}],
    [{"LOWER": "vulvodynia"}],
    [{"LOWER": "miscarriage"}],
    [{"LOWER": "pelvic"}, {"LOWER": "inflammatory"}, {"LOWER": "disease"}],
    [{"LOWER": "prostatitis"}],
    [{"LOWER": "testicular"}, {"LOWER": "torsion"}],
    [{"LOWER": "impotence"}],
    [{"LOWER": "erectile"}, {"LOWER": "dysfunction"}],
    [{"LOWER": "premature"}, {"LOWER": "ejaculation"}],
    [{"LOWER": "benign"}, {"LOWER": "prostatic"}, {"LOWER": "hyperplasia"}],
    [{"LOWER": "male"}, {"LOWER": "infertility"}],
    [{"LOWER": "female"}, {"LOWER": "infertility"}],
    [{"LOWER": "abnormal"}, {"LOWER": "uterine"}, {"LOWER": "bleeding"}],
    [{"LOWER": "vulvar"}, {"LOWER": "cancer"}],
    [{"LOWER": "vaginal"}, {"LOWER": "cancer"}],
    [{"LOWER": "cervical"}, {"LOWER": "cancer"}],
    [{"LOWER": "uterine"}, {"LOWER": "fibroids"}],
    [{"LOWER": "pelvic"}, {"LOWER": "floor"}, {"LOWER": "disorders"}],
    [{"LOWER": "urinary"}, {"LOWER": "tract"}, {"LOWER": "infections"}],
    [{"LOWER": "urinary"}, {"LOWER": "incontinence"}],
    [{"LOWER": "kidney"}, {"LOWER": "infections"}],
    [{"LOWER": "liver"}, {"LOWER": "diseases"}],
    [{"LOWER": "renal"}, {"LOWER": "failure"}],
    [{"LOWER": "polyuria"}],
    [{"LOWER": "oliguria"}],
    [{"LOWER": "hypoalbuminemia"}],
    [{"LOWER": "nephrotic"}, {"LOWER": "syndrome"}],
    [{"LOWER": "proteinuria"}],
    [{"LOWER": "glomerulonephritis"}],
    [{"LOWER": "dialysis"}],

    # Added expansions (from my previous response)
    [{"LOWER": "lyme"}, {"LOWER": "disease"}],
    [{"LOWER": "dengue"}, {"LOWER": "fever"}],
    [{"LOWER": "yellow"}, {"LOWER": "fever"}],
    [{"LOWER": "zika"}, {"LOWER": "virus"}],
    [{"LOWER": "ebola"}],
    [{"LOWER": "rocky"}, {"LOWER": "mountain"}, {"LOWER": "spotted"}, {"LOWER": "fever"}],
    [{"LOWER": "cholera"}],
    [{"LOWER": "typhoid"}],
    [{"LOWER": "measles"}],
    [{"LOWER": "mumps"}],
    [{"LOWER": "rubella"}],
    [{"LOWER": "chickenpox"}],
    [{"LOWER": "shingles"}],
    [{"LOWER": "whooping"}, {"LOWER": "cough"}],
    [{"LOWER": "diphtheria"}],
    [{"LOWER": "leprosy"}],
    [{"LOWER": "bell's"}, {"LOWER": "palsy"}],
    [{"LOWER": "guillain-barré"}, {"LOWER": "syndrome"}],
    [{"LOWER": "tourette"}, {"LOWER": "syndrome"}],
    [{"LOWER": "down"}, {"LOWER": "syndrome"}],
    [{"LOWER": "sickle"}, {"LOWER": "cell"}, {"LOWER": "disease"}],
    [{"LOWER": "thalassemia"}],
    [{"LOWER": "hemophilia"}],
    [{"LOWER": "marfan"}, {"LOWER": "syndrome"}],
    [{"LOWER": "sjögren's"}, {"LOWER": "syndrome"}],
    [{"LOWER": "hashimoto's"}, {"LOWER": "thyroiditis"}],
    [{"LOWER": "celiac"}, {"LOWER": "disease"}],
    [{"LOWER": "type"}, {"LOWER": "1"}, {"LOWER": "diabetes"}],
    [{"LOWER": "myasthenia"}, {"LOWER": "gravis"}],
    [{"LOWER": "aortic"}, {"LOWER": "aneurysm"}],
    [{"LOWER": "deep"}, {"LOWER": "vein"}, {"LOWER": "thrombosis"}],
    [{"LOWER": "pulmonary"}, {"LOWER": "embolism"}],
    [{"LOWER": "atherosclerosis"}],
    [{"LOWER": "coronary"}, {"LOWER": "artery"}, {"LOWER": "disease"}],
    [{"LOWER": "irritable"}, {"LOWER": "bowel"}, {"LOWER": "syndrome"}],
    [{"LOWER": "diverticulitis"}],
    [{"LOWER": "non-alcoholic"}, {"LOWER": "fatty"}, {"LOWER": "liver"}, {"LOWER": "disease"}],
    [{"LOWER": "ptsd"}],
    [{"LOWER": "ocd"}],
    [{"LOWER": "adhd"}],
    [{"LOWER": "borderline"}, {"LOWER": "personality"}, {"LOWER": "disorder"}],
    [{"LOWER": "creutzfeldt-jakob"}, {"LOWER": "disease"}],
    [{"LOWER": "kawasaki"}, {"LOWER": "disease"}],
    [{"LOWER": "ehlers-danlos"}, {"LOWER": "syndrome"}],
    [{"LOWER": "acute"}, {"LOWER": "lymphoblastic"}, {"LOWER": "leukemia"}],
    [{"LOWER": "chronic"}, {"LOWER": "myeloid"}, {"LOWER": "leukemia"}],
    [{"LOWER": "thyroid"}, {"LOWER": "cancer"}],
    [{"LOWER": "nasopharyngeal"}, {"LOWER": "carcinoma"}],
    [{"LOWER": "bone"}, {"LOWER": "cancer"}],
    [{"LOWER": "basal"}, {"LOWER": "cell"}, {"LOWER": "carcinoma"}],
    [{"LOWER": "squamous"}, {"LOWER": "cell"}, {"LOWER": "carcinoma"}],
    [{"LOWER": "acute"}, {"LOWER": "kidney"}, {"LOWER": "injury"}],
    [{"LOWER": "nephrolithiasis"}],
    [{"LOWER": "osteomyelitis"}],
    [{"LOWER": "tendonitis"}],
    [{"LOWER": "bursitis"}],
    [{"LOWER": "strep"}, {"LOWER": "throat"}],
    [{"LOWER": "sinusitis"}],
    [{"LOWER": "tonsillitis"}],
    [{"LOWER": "otitis"}, {"LOWER": "media"}],
    [{"LOWER": "conjunctivitis"}],
    [{"LOWER": "impetigo"}],
    [{"LOWER": "cellulitis"}],
    [{"LOWER": "abscess"}]
],
    "MEDICATION": [
    # Your original entries (cleaned for duplicates)
    [{"LOWER": "aspirin"}],
    [{"LOWER": "metformin"}],
    [{"LOWER": "ibuprofen"}],
    [{"LOWER": "paracetamol"}],
    [{"LOWER": "acetaminophen"}],
    [{"LOWER": "amoxicillin"}],
    [{"LOWER": "simvastatin"}],
    [{"LOWER": "losartan"}],
    [{"LOWER": "lisinopril"}],
    [{"LOWER": "hydrochlorothiazide"}],
    [{"LOWER": "prednisone"}],
    [{"LOWER": "atorvastatin"}],
    [{"LOWER": "omeprazole"}],
    [{"LOWER": "insulin"}],
    [{"LOWER": "clopidogrel"}],
    [{"LOWER": "warfarin"}],
    [{"LOWER": "metoprolol"}],
    [{"LOWER": "salbutamol"}],
    [{"LOWER": "enalapril"}],
    [{"LOWER": "hydrocodone"}],
    [{"LOWER": "tramadol"}],
    [{"LOWER": "zolpidem"}],
    [{"LOWER": "gabapentin"}],
    [{"LOWER": "fluoxetine"}],
    [{"LOWER": "sertraline"}],
    [{"LOWER": "diazepam"}],
    [{"LOWER": "lorazepam"}],
    [{"LOWER": "alprazolam"}],
    [{"LOWER": "cetirizine"}],  # Fixed typo ("cetrizine" → "cetirizine")
    [{"LOWER": "loratadine"}],
    [{"LOWER": "diphenhydramine"}],
    [{"LOWER": "amitriptyline"}],
    [{"LOWER": "doxycycline"}],
    [{"LOWER": "metronidazole"}],
    [{"LOWER": "azithromycin"}],
    [{"LOWER": "clarithromycin"}],
    [{"LOWER": "fluconazole"}],
    [{"LOWER": "tamsulosin"}],
    [{"LOWER": "morphine"}],
    [{"LOWER": "furosemide"}],
    [{"LOWER": "spironolactone"}],
    [{"LOWER": "acetazolamide"}],
    [{"LOWER": "dantrolene"}],
    [{"LOWER": "prednisolone"}],
    [{"LOWER": "etoposide"}],
    [{"LOWER": "tamoxifen"}],
    [{"LOWER": "methotrexate"}],
    [{"LOWER": "cyclophosphamide"}],
    [{"LOWER": "sildenafil"}],
    [{"LOWER": "tadalafil"}],
    [{"LOWER": "verapamil"}],
    [{"LOWER": "diltiazem"}],
    [{"LOWER": "amlodipine"}],
    [{"LOWER": "nifedipine"}],
    [{"LOWER": "lansoprazole"}],
    [{"LOWER": "pantoprazole"}],
    [{"LOWER": "ranitidine"}],
    [{"LOWER": "levothyroxine"}],
    [{"LOWER": "hydrocortisone"}],
    [{"LOWER": "carbamazepine"}],
    [{"LOWER": "phenytoin"}],
    [{"LOWER": "clonazepam"}],
    [{"LOWER": "topiramate"}],
    [{"LOWER": "lamotrigine"}],
    [{"LOWER": "rosuvastatin"}],
    [{"LOWER": "pravastatin"}],
    [{"LOWER": "pioglitazone"}],
    [{"LOWER": "glimepiride"}],
    [{"LOWER": "glipizide"}],
    [{"LOWER": "chlorpheniramine"}],
    [{"LOWER": "mupirocin"}],
    [{"LOWER": "betamethasone"}],
    [{"LOWER": "triamcinolone"}],
    [{"LOWER": "hydroxychloroquine"}],
    [{"LOWER": "rituximab"}],
    [{"LOWER": "methadone"}],
    [{"LOWER": "naloxone"}],
    [{"LOWER": "buprenorphine"}],
    [{"LOWER": "ibandronate"}],
    [{"LOWER": "alendronate"}],
    [{"LOWER": "denosumab"}],
    [{"LOWER": "teriparatide"}],
    [{"LOWER": "bisacodyl"}],
    [{"LOWER": "loperamide"}],
    [{"LOWER": "diphenoxylate"}],
    [{"LOWER": "penicillin"}],
    [{"LOWER": "cephalexin"}],
    [{"LOWER": "ciclosporin"}],
    [{"LOWER": "risperidone"}],
    [{"LOWER": "quetiapine"}],
    [{"LOWER": "olanzapine"}],
    [{"LOWER": "aripiprazole"}],
    [{"LOWER": "clozapine"}],
    [{"LOWER": "citalopram"}],
    [{"LOWER": "escitalopram"}],
    [{"LOWER": "paroxetine"}],
    [{"LOWER": "venlafaxine"}],
    [{"LOWER": "duloxetine"}],
    [{"LOWER": "bupropion"}],
    [{"LOWER": "mirtazapine"}],
    [{"LOWER": "modafinil"}],
    [{"LOWER": "levodopa"}],
    [{"LOWER": "carbidopa"}],
    [{"LOWER": "ropinirole"}],
    [{"LOWER": "pramipexole"}],
    [{"LOWER": "bromocriptine"}],
    [{"LOWER": "amantadine"}],
    [{"LOWER": "valproate"}],
    [{"LOWER": "phenobarbital"}],
    [{"LOWER": "zonisamide"}],
    [{"LOWER": "ethosuximide"}],
    [{"LOWER": "vigabatrin"}],
    [{"LOWER": "clobazam"}],
    [{"LOWER": "pregabalin"}],
    [{"LOWER": "fentanyl"}],
    [{"LOWER": "oxycodone"}],
    [{"LOWER": "hydromorphone"}],
    [{"LOWER": "codeine"}],
    [{"LOWER": "oxytocin"}],
    [{"LOWER": "pentoxifylline"}],
    [{"LOWER": "tacrolimus"}],
    [{"LOWER": "mycophenolate"}],
    [{"LOWER": "hydroxyurea"}],
    [{"LOWER": "azathioprine"}],
    [{"LOWER": "lecithin"}],
    [{"LOWER": "alpha-lipoic"}, {"LOWER": "acid"}],
    [{"LOWER": "coenzyme"}, {"LOWER": "q10"}],
    [{"LOWER": "omega"}, {"LOWER": "3"}, {"LOWER": "fatty"}, {"LOWER": "acids"}],
    [{"LOWER": "zinc"}],
    [{"LOWER": "magnesium"}],
    [{"LOWER": "iron"}],
    [{"LOWER": "folic"}, {"LOWER": "acid"}],
    [{"LOWER": "calcium"}],
    [{"LOWER": "vitamin"}, {"LOWER": "d"}],
    [{"LOWER": "vitamin"}, {"LOWER": "c"}],
    [{"LOWER": "vitamin"}, {"LOWER": "b12"}],
    [{"LOWER": "probiotic"}],
    [{"LOWER": "n-acetylcysteine"}],
    [{"LOWER": "silymarin"}],
    [{"LOWER": "turmeric"}],
    [{"LOWER": "ginger"}],
    [{"LOWER": "ginseng"}],
    [{"LOWER": "glucosamine"}],
    [{"LOWER": "chondroitin"}],
    [{"LOWER": "omega-3"}],

    # New additions (expanded list)
    [{"LOWER": "montelukast"}],
    [{"LOWER": "celecoxib"}],
    [{"LOWER": "naproxen"}],
    [{"LOWER": "indomethacin"}],
    [{"LOWER": "meloxicam"}],
    [{"LOWER": "sumatriptan"}],
    [{"LOWER": "rizatriptan"}],
    [{"LOWER": "propranolol"}],
    [{"LOWER": "carvedilol"}],
    [{"LOWER": "nebivolol"}],
    [{"LOWER": "finasteride"}],
    [{"LOWER": "dutasteride"}],
    [{"LOWER": "tadalafil"}],
    [{"LOWER": "vardenafil"}],
    [{"LOWER": "adalimumab"}],
    [{"LOWER": "infliximab"}],
    [{"LOWER": "etanercept"}],
    [{"LOWER": "secukinumab"}],
    [{"LOWER": "ustekinumab"}],
    [{"LOWER": "insulin"}, {"LOWER": "glargine"}],
    [{"LOWER": "insulin"}, {"LOWER": "aspart"}],
    [{"LOWER": "empagliflozin"}],
    [{"LOWER": "dapagliflozin"}],
    [{"LOWER": "canagliflozin"}],
    [{"LOWER": "semaglutide"}],
    [{"LOWER": "liraglutide"}],
    [{"LOWER": "dulaglutide"}],
    [{"LOWER": "sitagliptin"}],
    [{"LOWER": "saxagliptin"}],
    [{"LOWER": "linagliptin"}],
    [{"LOWER": "acarbose"}],
    [{"LOWER": "miglitol"}],
    [{"LOWER": "colchicine"}],
    [{"LOWER": "allopurinol"}],
    [{"LOWER": "febuxostat"}],
    [{"LOWER": "terbinafine"}],
    [{"LOWER": "itraconazole"}],
    [{"LOWER": "voriconazole"}],
    [{"LOWER": "acyclovir"}],
    [{"LOWER": "valacyclovir"}],
    [{"LOWER": "oseltamivir"}],
    [{"LOWER": "zanamivir"}],
    [{"LOWER": "ribavirin"}],
    [{"LOWER": "sofosbuvir"}],
    [{"LOWER": "ledipasvir"}],
    [{"LOWER": "tenofovir"}],
    [{"LOWER": "emtricitabine"}],
    [{"LOWER": "rilpivirine"}],
    [{"LOWER": "darunavir"}],
    [{"LOWER": "raltegravir"}],
    [{"LOWER": "dolutegravir"}],
    [{"LOWER": "levetiracetam"}],
    [{"LOWER": "brivaracetam"}],
    [{"LOWER": "perampanel"}],
    [{"LOWER": "lacosamide"}],
    [{"LOWER": "rufinamide"}],
    [{"LOWER": "eszopiclone"}],
    [{"LOWER": "ramelteon"}],
    [{"LOWER": "suvorexant"}],
    [{"LOWER": "melatonin"}],
    [{"LOWER": "doxepin"}],
    [{"LOWER": "trazodone"}],
    [{"LOWER": "doxazosin"}],
    [{"LOWER": "prazosin"}],
    [{"LOWER": "tamsulosin"}],
    [{"LOWER": "silodosin"}],
    [{"LOWER": "alfuzosin"}],
    [{"LOWER": "tamsulosin"}],
    [{"LOWER": "tolterodine"}],
    [{"LOWER": "oxybutynin"}],
    [{"LOWER": "solifenacin"}],
    [{"LOWER": "darifenacin"}],
    [{"LOWER": "fesoterodine"}],
    [{"LOWER": "mirabegron"}],
    [{"LOWER": "vibegron"}],
    [{"LOWER": "bethanechol"}],
    [{"LOWER": "pilocarpine"}],
    [{"LOWER": "timolol"}],
    [{"LOWER": "latanoprost"}],
    [{"LOWER": "dorzolamide"}],
    [{"LOWER": "brimonidine"}],
    [{"LOWER": "travoprost"}],
    [{"LOWER": "bimatoprost"}],
    [{"LOWER": "tafluprost"}],
    [{"LOWER": "cyclosporine"}],
    [{"LOWER": "tacrolimus"}],
    [{"LOWER": "pimecrolimus"}],
    [{"LOWER": "crisaborole"}],
    [{"LOWER": "dupilumab"}],
    [{"LOWER": "omalizumab"}],
    [{"LOWER": "mepolizumab"}],
    [{"LOWER": "reslizumab"}],
    [{"LOWER": "benralizumab"}],
    [{"LOWER": "tezepelumab"}],
    [{"LOWER": "trastuzumab"}],
    [{"LOWER": "pertuzumab"}],
    [{"LOWER": "bevacizumab"}],
    [{"LOWER": "cetuximab"}],
    [{"LOWER": "panitumumab"}],
    [{"LOWER": "rituximab"}],
    [{"LOWER": "obinutuzumab"}],
    [{"LOWER": "ibrutinib"}],
    [{"LOWER": "acalabrutinib"}],
    [{"LOWER": "zanubrutinib"}],
    [{"LOWER": "venetoclax"}],
    [{"LOWER": "midostaurin"}],
    [{"LOWER": "gilteritinib"}],
    [{"LOWER": "ivosidenib"}],
    [{"LOWER": "enasidenib"}],
    [{"LOWER": "tretinoin"}],
    [{"LOWER": "isotretinoin"}],
    [{"LOWER": "adapalene"}],
    [{"LOWER": "clindamycin"}],
    [{"LOWER": "erythromycin"}],
    [{"LOWER": "minocycline"}],
    [{"LOWER": "doxycycline"}],
    [{"LOWER": "tetracycline"}],
    [{"LOWER": "azelaic"}, {"LOWER": "acid"}],
    [{"LOWER": "benzoyl"}, {"LOWER": "peroxide"}],
    [{"LOWER": "salicylic"}, {"LOWER": "acid"}],
    [{"LOWER": "coal"}, {"LOWER": "tar"}],
    [{"LOWER": "calcipotriene"}],
    [{"LOWER": "tazarotene"}],
    [{"LOWER": "efalizumab"}],
    [{"LOWER": "ixekizumab"}],
    [{"LOWER": "brodalumab"}],
    [{"LOWER": "guselkumab"}],
    [{"LOWER": "tildrakizumab"}],
    [{"LOWER": "risankizumab"}],
    [{"LOWER": "mirikizumab"}],
    [{"LOWER": "apremilast"}],
    [{"LOWER": "tofacitinib"}],
    [{"LOWER": "upadacitinib"}],
    [{"LOWER": "baricitinib"}],
    [{"LOWER": "deucravacitinib"}],
    [{"LOWER": "ruxolitinib"}],
    [{"LOWER": "fedratinib"}],
    [{"LOWER": "pacritinib"}],
    [{"LOWER": "momelotinib"}],
    [{"LOWER": "nilotinib"}],
    [{"LOWER": "dasatinib"}],
    [{"LOWER": "bosutinib"}],
    [{"LOWER": "ponatinib"}],
    [{"LOWER": "imatinib"}],
    [{"LOWER": "sunitinib"}],
    [{"LOWER": "sorafenib"}],
    [{"LOWER": "pazopanib"}],
    [{"LOWER": "axitinib"}],
    [{"LOWER": "cabozantinib"}],
    [{"LOWER": "lenvatinib"}],
    [{"LOWER": "regorafenib"}],
    [{"LOWER": "vandetanib"}],
    [{"LOWER": "tivozanib"}],
    [{"LOWER": "everolimus"}],
    [{"LOWER": "temsirolimus"}],
    [{"LOWER": "sirolimus"}],
    [{"LOWER": "alpelisib"}],
    [{"LOWER": "copanlisib"}],
    [{"LOWER": "idelalisib"}],
    [{"LOWER": "duvelisib"}],
    [{"LOWER": "buparlisib"}],
    [{"LOWER": "sonidegib"}],
    [{"LOWER": "vismodegib"}],
    [{"LOWER": "talazoparib"}],
    [{"LOWER": "olaparib"}],
    [{"LOWER": "rucaparib"}]
],
   "TREATMENT": [
    # Your original entries
    [{"LOWER": "surgery"}],
    [{"LOWER": "chemotherapy"}],
    [{"LOWER": "radiation"}, {"LOWER": "therapy"}],
    [{"LOWER": "physical"}, {"LOWER": "therapy"}],
    [{"LOWER": "immunotherapy"}],
    [{"LOWER": "hormone"}, {"LOWER": "therapy"}],
    [{"LOWER": "stem"}, {"LOWER": "cell"}, {"LOWER": "therapy"}],
    [{"LOWER": "dialysis"}],
    [{"LOWER": "transplant"}],
    [{"LOWER": "blood"}, {"LOWER": "transfusion"}],
    [{"LOWER": "antibiotic"}, {"LOWER": "therapy"}],
    [{"LOWER": "pain"}, {"LOWER": "management"}],
    [{"LOWER": "vaccination"}],
    [{"LOWER": "radiotherapy"}],
    [{"LOWER": "laser"}, {"LOWER": "therapy"}],
    [{"LOWER": "counseling"}],
    [{"LOWER": "psychotherapy"}],
    [{"LOWER": "acupuncture"}],
    [{"LOWER": "chiropractic"}],
    [{"LOWER": "blood"}, {"LOWER": "pressure"}, {"LOWER": "management"}],
    [{"LOWER": "physical"}, {"LOWER": "rehabilitation"}],
    [{"LOWER": "nutritional"}, {"LOWER": "therapy"}],
    [{"LOWER": "gene"}, {"LOWER": "therapy"}],
    [{"LOWER": "massage"}, {"LOWER": "therapy"}],
    [{"LOWER": "occupational"}, {"LOWER": "therapy"}],
    [{"LOWER": "wound"}, {"LOWER": "care"}],
    [{"LOWER": "psychological"}, {"LOWER": "therapy"}],
    [{"LOWER": "laser"}, {"LOWER": "surgery"}],
    [{"LOWER": "treatment"}, {"LOWER": "management"}],
    [{"LOWER": "vitamin"}, {"LOWER": "supplement"}],
    [{"LOWER": "chemoprevention"}],
    [{"LOWER": "hypothermia"}],
    [{"LOWER": "neurostimulation"}],
    [{"LOWER": "electrotherapy"}],
    [{"LOWER": "oxygen"}, {"LOWER": "therapy"}],
    [{"LOWER": "insulin"}, {"LOWER": "therapy"}],
    [{"LOWER": "lifestyle"}, {"LOWER": "modification"}],
    [{"LOWER": "pulmonary"}, {"LOWER": "rehabilitation"}],
    [{"LOWER": "surgical"}, {"LOWER": "intervention"}],
    [{"LOWER": "anesthesia"}],
    [{"LOWER": "antiviral"}, {"LOWER": "treatment"}],
    [{"LOWER": "hospice"}, {"LOWER": "care"}],
    [{"LOWER": "supportive"}, {"LOWER": "care"}],
    [{"LOWER": "home"}, {"LOWER": "care"}],
    [{"LOWER": "prophylactic"}, {"LOWER": "treatment"}],
    [{"LOWER": "allergy"}, {"LOWER": "treatment"}],
    [{"LOWER": "antidepressant"}, {"LOWER": "medication"}],
    [{"LOWER": "antipsychotic"}, {"LOWER": "medication"}],
    [{"LOWER": "mood"}, {"LOWER": "stabilizer"}],
    [{"LOWER": "hypoglycemic"}, {"LOWER": "therapy"}],
    [{"LOWER": "anticoagulation"}],
    [{"LOWER": "blood"}, {"LOWER": "glucose"}, {"LOWER": "monitoring"}],
    [{"LOWER": "anti-inflammatory"}, {"LOWER": "treatment"}],
    [{"LOWER": "antidiabetic"}, {"LOWER": "medication"}],
    [{"LOWER": "antiemetic"}, {"LOWER": "medication"}],
    [{"LOWER": "antihypertensive"}, {"LOWER": "therapy"}],
    [{"LOWER": "antifungal"}, {"LOWER": "treatment"}],
    [{"LOWER": "immunosuppressive"}, {"LOWER": "therapy"}],
    [{"LOWER": "probiotics"}],
    [{"LOWER": "hormonal"}, {"LOWER": "treatment"}],
    [{"LOWER": "medical"}, {"LOWER": "management"}],
    [{"LOWER": "painkillers"}],
    [{"LOWER": "tinctures"}],
    [{"LOWER": "herbal"}, {"LOWER": "remedies"}],
    [{"LOWER": "homeopathy"}],
    [{"LOWER": "audiology"}, {"LOWER": "treatment"}],
    [{"LOWER": "orthopedic"}, {"LOWER": "treatment"}],
    [{"LOWER": "surgical"}, {"LOWER": "removal"}],
    [{"LOWER": "cardiac"}, {"LOWER": "treatment"}],
    [{"LOWER": "diabetic"}, {"LOWER": "education"}],
    [{"LOWER": "preventive"}, {"LOWER": "care"}],
    [{"LOWER": "rehabilitation"}],
    [{"LOWER": "mental"}, {"LOWER": "health"}, {"LOWER": "therapy"}],
    [{"LOWER": "maternity"}, {"LOWER": "care"}],
    [{"LOWER": "palliative"}, {"LOWER": "care"}],
    [{"LOWER": "medication"}, {"LOWER": "management"}],

    # New additions
    [{"LOWER": "targeted"}, {"LOWER": "therapy"}],
    [{"LOWER": "photodynamic"}, {"LOWER": "therapy"}],
    [{"LOWER": "plasma"}, {"LOWER": "exchange"}],
    [{"LOWER": "cryotherapy"}],
    [{"LOWER": "brachytherapy"}],
    [{"LOWER": "proton"}, {"LOWER": "therapy"}],
    [{"LOWER": "cognitive"}, {"LOWER": "behavioral"}, {"LOWER": "therapy"}],
    [{"LOWER": "dialectical"}, {"LOWER": "behavior"}, {"LOWER": "therapy"}],
    [{"LOWER": "speech"}, {"LOWER": "therapy"}],
    [{"LOWER": "respiratory"}, {"LOWER": "therapy"}],
    [{"LOWER": "hyperbaric"}, {"LOWER": "oxygen"}, {"LOWER": "therapy"}],
    [{"LOWER": "biofeedback"}],
    [{"LOWER": "art"}, {"LOWER": "therapy"}],
    [{"LOWER": "music"}, {"LOWER": "therapy"}],
    [{"LOWER": "ayurvedic"}, {"LOWER": "medicine"}],
    [{"LOWER": "naturopathy"}],
    [{"LOWER": "minimally"}, {"LOWER": "invasive"}, {"LOWER": "surgery"}],
    [{"LOWER": "robotic"}, {"LOWER": "surgery"}],
    [{"LOWER": "laparoscopic"}, {"LOWER": "surgery"}],
    [{"LOWER": "orthopedic"}, {"LOWER": "rehabilitation"}],
    [{"LOWER": "neurological"}, {"LOWER": "rehabilitation"}],
    [{"LOWER": "cardiac"}, {"LOWER": "rehabilitation"}],
    [{"LOWER": "fertility"}, {"LOWER": "treatment"}],
    [{"LOWER": "geriatric"}, {"LOWER": "care"}],
    [{"LOWER": "continuous"}, {"LOWER": "positive"}, {"LOWER": "airway"}, {"LOWER": "pressure"}],
    [{"LOWER": "symptom"}, {"LOWER": "management"}],
    [{"LOWER": "car"}, {"LOWER": "t-cell"}, {"LOWER": "therapy"}],
    [{"LOWER": "cancer"}, {"LOWER": "screening"}]
],
    "BODY_PART": [
    [{"LOWER": "lungs"}],
    [{"LOWER": "kidney"}],
    [{"LOWER": "brain"}],
    [{"LOWER": "liver"}],
    [{"LOWER": "stomach"}],
    [{"LOWER": "intestine"}],
    [{"LOWER": "pancreas"}],
    [{"LOWER": "bladder"}],
    [{"LOWER": "spleen"}],
    [{"LOWER": "skin"}],
    [{"LOWER": "muscle"}],
    [{"LOWER": "bone"}],
    [{"LOWER": "spine"}],
    [{"LOWER": "esophagus"}],
    [{"LOWER": "trachea"}],
    [{"LOWER": "diaphragm"}],
    [{"LOWER": "gallbladder"}],
    [{"LOWER": "urethra"}],
    [{"LOWER": "rectum"}],
    [{"LOWER": "anus"}],
    [{"LOWER": "cornea"}],
    [{"LOWER": "iris"}],
    [{"LOWER": "pupil"}],
    [{"LOWER": "retina"}],
    [{"LOWER": "eyebrow"}],
    [{"LOWER": "eyelash"}],
    [{"LOWER": "ear"}],
    [{"LOWER": "nose"}],
    [{"LOWER": "throat"}],
    [{"LOWER": "liver"}],
    [{"LOWER": "gland"}],
    [{"LOWER": "thyroid"}],
    [{"LOWER": "adrenal"}, {"LOWER": "gland"}],
    [{"LOWER": "pituitary"}, {"LOWER": "gland"}],
    [{"LOWER": "pineal"}, {"LOWER": "gland"}],
    [{"LOWER": "ovary"}],
    [{"LOWER": "testicle"}],
    [{"LOWER": "uterus"}],
    [{"LOWER": "vagina"}],
    [{"LOWER": "penis"}],
    [{"LOWER": "clitoris"}],
    [{"LOWER": "cervix"}],
    [{"LOWER": "mammary"}, {"LOWER": "gland"}],
    [{"LOWER": "peritoneum"}],
    [{"LOWER": "pharynx"}],
    [{"LOWER": "coccyx"}],
    [{"LOWER": "scalp"}],
    [{"LOWER": "jaw"}],
    [{"LOWER": "chin"}],
    [{"LOWER": "cheek"}],
    [{"LOWER": "forehead"}],
    [{"LOWER": "temple"}],
    [{"LOWER": "lip"}],
    [{"LOWER": "tooth"}],
    [{"LOWER": "tongue"}],
    [{"LOWER": "femur"}],
    [{"LOWER": "patella"}],
    [{"LOWER": "fibula"}],
    [{"LOWER": "tibia"}],
    [{"LOWER": "pelvis"}],
    [{"LOWER": "radius"}],
    [{"LOWER": "ulna"}],
    [{"LOWER": "carpal"}],
    [{"LOWER": "metacarpal"}],
    [{"LOWER": "phalanges"}],
    [{"LOWER": "clavicle"}],
    [{"LOWER": "scapula"}],
    [{"LOWER": "sternum"}],
    [{"LOWER": "rib"}],
    [{"LOWER": "vertebra"}],
    [{"LOWER": "ilium"}],
    [{"LOWER": "ischium"}],
    [{"LOWER": "pubis"}],
    [{"LOWER": "fibula"}],
    [{"LOWER": "tibia"}],
    [{"LOWER": "patella"}],
    [{"LOWER": "calcaneus"}],
    [{"LOWER": "metatarsal"}],
    [{"LOWER": "phalanges"}],
    [{"LOWER": "sacrum"}],
    [{"LOWER": "coccyx"}],
    [{"LOWER": "rectus"}, {"LOWER": "abdominis"}],
    [{"LOWER": "deltoid"}],
    [{"LOWER": "biceps"}],
    [{"LOWER": "triceps"}],
    [{"LOWER": "quadriceps"}],
    [{"LOWER": "hamstring"}],
    [{"LOWER": "gastrocnemius"}],
    [{"LOWER": "soleus"}],
    [{"LOWER": "pectoralis"}],
    [{"LOWER": "trapezius"}],
    [{"LOWER": "latissimus"}, {"LOWER": "dorsi"}],
    [{"LOWER": "gluteus"}, {"LOWER": "maximus"}],
    [{"LOWER": "gluteus"}, {"LOWER": "medius"}],
    [{"LOWER": "gluteus"}, {"LOWER": "minimus"}]
  ],

    "MEDICAL_PROCEDURE": [
    # Your original entries
    [{"LOWER": "mri"}],
    [{"LOWER": "ct"}, {"LOWER": "scan"}],
    [{"LOWER": "ultrasound"}],
    [{"LOWER": "biopsy"}],
    [{"LOWER": "endoscopy"}],
    [{"LOWER": "x-ray"}],
    [{"LOWER": "blood"}, {"LOWER": "test"}],
    [{"LOWER": "ecg"}],
    [{"LOWER": "ekg"}],
    [{"LOWER": "surgical"}, {"LOWER": "procedure"}],
    [{"LOWER": "lumbar"}, {"LOWER": "puncture"}],
    [{"LOWER": "colonoscopy"}],
    [{"LOWER": "laparoscopy"}],
    [{"LOWER": "angiography"}],
    [{"LOWER": "cardiac"}, {"LOWER": "catheterization"}],
    [{"LOWER": "aspiration"}],
    [{"LOWER": "surgical"}, {"LOWER": "resection"}],
    [{"LOWER": "organ"}, {"LOWER": "transplant"}],
    [{"LOWER": "stent"}, {"LOWER": "placement"}],
    [{"LOWER": "pacemaker"}, {"LOWER": "implantation"}],
    [{"LOWER": "arthroscopy"}],
    [{"LOWER": "skin"}, {"LOWER": "graft"}],
    [{"LOWER": "laser"}, {"LOWER": "surgery"}],
    [{"LOWER": "dialysis"}],
    [{"LOWER": "bone"}, {"LOWER": "marrow"}, {"LOWER": "transplant"}],
    [{"LOWER": "endovenous"}, {"LOWER": "laser"}, {"LOWER": "treatment"}],
    [{"LOWER": "nephrectomy"}],
    [{"LOWER": "hysterectomy"}],
    [{"LOWER": "tonsillectomy"}],
    [{"LOWER": "cholecystectomy"}],
    [{"LOWER": "appendectomy"}],
    [{"LOWER": "cesarean"}, {"LOWER": "section"}],
    [{"LOWER": "vasectomy"}],
    [{"LOWER": "circumcision"}],
    [{"LOWER": "tracheotomy"}],
    [{"LOWER": "biopsy"}, {"LOWER": "sampling"}],
    [{"LOWER": "skin"}, {"LOWER": "test"}],
    [{"LOWER": "allergy"}, {"LOWER": "test"}],
    [{"LOWER": "sputum"}, {"LOWER": "culture"}],
    [{"LOWER": "urinalysis"}],
    [{"LOWER": "tuberculosis"}, {"LOWER": "test"}],
    [{"LOWER": "chest"}, {"LOWER": "x-ray"}],
    [{"LOWER": "hba1c"}, {"LOWER": "test"}],
    [{"LOWER": "mammogram"}],
    [{"LOWER": "pap"}, {"LOWER": "smear"}],
    [{"LOWER": "bone"}, {"LOWER": "density"}, {"LOWER": "test"}],
    [{"LOWER": "cystoscopy"}],
    [{"LOWER": "colposcopy"}],
    [{"LOWER": "pulmonary"}, {"LOWER": "function"}, {"LOWER": "test"}],
    [{"LOWER": "electromyography"}],
    [{"LOWER": "pet"}, {"LOWER": "scan"}],
    [{"LOWER": "neurological"}, {"LOWER": "examination"}],
    [{"LOWER": "vaginal"}, {"LOWER": "examination"}],
    [{"LOWER": "prostate"}, {"LOWER": "exam"}],
    [{"LOWER": "fecal"}, {"LOWER": "occult"}, {"LOWER": "blood"}],
    [{"LOWER": "pft"}],
    [{"LOWER": "elisa"}, {"LOWER": "test"}],
    [{"LOWER": "liver"}, {"LOWER": "function"}, {"LOWER": "test"}],
    [{"LOWER": "kidney"}, {"LOWER": "function"}, {"LOWER": "test"}],
    [{"LOWER": "thyroid"}, {"LOWER": "function"}, {"LOWER": "test"}],
    [{"LOWER": "preoperative"}, {"LOWER": "assessment"}],
    [{"LOWER": "epidural"}, {"LOWER": "injection"}],
    [{"LOWER": "spinal"}, {"LOWER": "tap"}],
    [{"LOWER": "thoracentesis"}],
    [{"LOWER": "paracentesis"}],
    [{"LOWER": "sputum"}, {"LOWER": "analysis"}],
    [{"LOWER": "endometrial"}, {"LOWER": "biopsy"}],
    [{"LOWER": "laparotomy"}],
    [{"LOWER": "colorectal"}, {"LOWER": "screening"}],
    [{"LOWER": "blood"}, {"LOWER": "culture"}],
    [{"LOWER": "urine"}, {"LOWER": "culture"}],
    [{"LOWER": "genetic"}, {"LOWER": "testing"}],
    [{"LOWER": "molecular"}, {"LOWER": "testing"}],
    [{"LOWER": "electroencephalogram"}],
    [{"LOWER": "computed"}, {"LOWER": "tomography"}],
    [{"LOWER": "biomarker"}, {"LOWER": "testing"}],
    [{"LOWER": "genetic"}, {"LOWER": "screening"}],
    [{"LOWER": "clinical"}, {"LOWER": "examination"}],

    # New additions (expanded)
    [{"LOWER": "fluoroscopy"}],
    [{"LOWER": "bone"}, {"LOWER": "scan"}],
    [{"LOWER": "myelogram"}],
    [{"LOWER": "bronchoscopy"}],
    [{"LOWER": "esophagogastroduodenoscopy"}],
    [{"LOWER": "sigmoidoscopy"}],
    [{"LOWER": "coronary"}, {"LOWER": "angiogram"}],
    [{"LOWER": "percutaneous"}, {"LOWER": "coronary"}, {"LOWER": "intervention"}],
    [{"LOWER": "cardiac"}, {"LOWER": "ablation"}],
    [{"LOWER": "defibrillator"}, {"LOWER": "implantation"}],
    [{"LOWER": "arthroplasty"}],
    [{"LOWER": "joint"}, {"LOWER": "replacement"}],
    [{"LOWER": "dialysis"}, {"LOWER": "catheter"}, {"LOWER": "insertion"}],
    [{"LOWER": "lithotripsy"}],
    [{"LOWER": "hemodialysis"}],
    [{"LOWER": "peritoneal"}, {"LOWER": "dialysis"}],
    [{"LOWER": "intravenous"}, {"LOWER": "therapy"}],
    [{"LOWER": "plasmapheresis"}],
    [{"LOWER": "radiation"}, {"LOWER": "therapy"}],
    [{"LOWER": "brachytherapy"}],
    [{"LOWER": "cryoablation"}],
    [{"LOWER": "biomarker"}, {"LOWER": "panel"}],
    [{"LOWER": "troponin"}, {"LOWER": "test"}],
    [{"LOWER": "lipid"}, {"LOWER": "panel"}],
    [{"LOWER": "coagulation"}, {"LOWER": "tests"}],
    [{"LOWER": "allergy"}, {"LOWER": "prick"}, {"LOWER": "test"}],
    [{"LOWER": "patch"}, {"LOWER": "testing"}],
    [{"LOWER": "stress"}, {"LOWER": "test"}],
    [{"LOWER": "tilt-table"}, {"LOWER": "test"}],
    [{"LOWER": "holter"}, {"LOWER": "monitor"}],
    [{"LOWER": "endoscopic"}, {"LOWER": "retrograde"}, {"LOWER": "cholangiopancreatography"}],
    [{"LOWER": "virtual"}, {"LOWER": "colonoscopy"}],
    [{"LOWER": "coronary"}, {"LOWER": "artery"}, {"LOWER": "bypass"}, {"LOWER": "graft"}],
    [{"LOWER": "lobectomy"}],
    [{"LOWER": "thyroidectomy"}],
    [{"LOWER": "mastectomy"}],
    [{"LOWER": "amputation"}],
    [{"LOWER": "cataract"}, {"LOWER": "surgery"}],
    [{"LOWER": "lasik"}],
    [{"LOWER": "root"}, {"LOWER": "canal"}],
    [{"LOWER": "dental"}, {"LOWER": "implant"}],
    [{"LOWER": "rhinoplasty"}],
    [{"LOWER": "liposuction"}],
    [{"LOWER": "bi-level"}, {"LOWER": "positive"}, {"LOWER": "airway"}, {"LOWER": "pressure"}]
]
}

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Save the model and patterns
save_model_with_patterns(nlp, medical_patterns, "/content/drive/MyDrive/NLP/NER/saved_model_with_medical_patterns")

# python -m spacy download en_core_web_sm


Model and patterns saved at: /content/drive/MyDrive/NLP/NER/saved_model_with_medical_patterns


## Find the medical key


In [None]:
import spacy
import json
from spacy.matcher import Matcher
import os

def load_model_with_patterns(load_dir):
    """
    Load the spaCy model and matcher patterns together.
    """
    # Load the spaCy model
    nlp = spacy.load(load_dir)

    # Load matcher patterns from the saved file
    patterns_file = os.path.join(load_dir, "matcher_patterns.json")
    with open(patterns_file, "r") as f:
        medical_patterns = json.load(f)

    # Initialize the matcher and add the patterns
    matcher = Matcher(nlp.vocab)
    for label, patterns in medical_patterns.items():
        for pattern in patterns:
            matcher.add(label, [pattern])

    print("Model and patterns loaded successfully.")

    return nlp, matcher

# Load the saved model and patterns
nlp, matcher = load_model_with_patterns("/content/drive/MyDrive/NLP/NER/saved_model_with_medical_patterns")

# Test the matcher on a new text
doc = nlp("The Alex has diabetes and takes Metformin. They had an MRI scan for a brain tumor.")
#doc = nlp("Dr. Alex Smith chaired first board meeting of Acme Corp Inc.")
for ent in doc.ents:
    print(f"Entity: {ent.text}, Label: {ent.label_}")
matches = matcher(doc)

# Print matches with labels
for match_id, start, end in matches:
    label = nlp.vocab.strings[match_id]
    span = doc[start:end]
    print(f"Keyword: {span.text}, Label: {label}")


Model and patterns loaded successfully.
Entity: Alex, Label: PERSON
Entity: Metformin, Label: PERSON
Keyword: diabetes, Label: DISEASE
Keyword: Metformin, Label: MEDICATION
Keyword: MRI, Label: MEDICAL_PROCEDURE
Keyword: brain, Label: BODY_PART
Keyword: brain tumor, Label: DISEASE


### Make app streamlit of Medical Classification Disease,Medication,Medical procedure and body part


In [None]:
!pip install streamlit pyngrok



In [None]:
from pyngrok import ngrok

# Replace 'your_authtoken' with your actual ngrok authtoken
ngrok.set_auth_token('')

In [None]:
%writefile app.py
import streamlit as st
import spacy
import json
from spacy.matcher import Matcher
import os
import pandas as pd

# Function to load the spaCy model and matcher patterns
def load_model_with_patterns(load_dir):
    try:
        nlp = spacy.load(load_dir)
        st.success(f"Loaded model: {nlp.meta.get('name', 'Unknown')} (version {nlp.meta.get('version', 'Unknown')})")
    except Exception as e:
        st.error(f"Error loading model: {e}")
        return None, None

    # Load matcher patterns
    patterns_file = os.path.join(load_dir, "matcher_patterns.json")
    if not os.path.exists(patterns_file):
        st.warning("Pattern file not found. Only model will be used.")
        return nlp, None

    try:
        with open(patterns_file, "r", encoding="utf-8") as f:
            medical_patterns = json.load(f)
    except Exception as e:
        st.error(f"Error loading matcher patterns: {e}")
        return nlp, None

    # Initialize the matcher
    matcher = Matcher(nlp.vocab)
    for label, patterns in medical_patterns.items():
        if label not in matcher:
            for pattern in patterns:
                matcher.add(label, [pattern])

    st.success("Model and patterns loaded successfully.")
    return nlp, matcher

# Streamlit UI
st.title("Medical Text NER & Keyword Matcher")

# Load Model Only Once
if "nlp" not in st.session_state:
    st.session_state.nlp = None
    st.session_state.matcher = None

load_dir = st.text_input("Enter the model directory path:", "/content/drive/MyDrive/NLP/NER/saved_model_with_medical_patterns")

if st.button("Load Model"):
    nlp, matcher = load_model_with_patterns(load_dir)
    if nlp:
        st.session_state.nlp = nlp
        st.session_state.matcher = matcher
        st.success("Model loaded into memory! Now you can analyze multiple texts.")

# User Input Text
user_input = st.text_area("Enter medical text:", "The Alex has diabetes and takes Metformin. They had an MRI scan for a brain tumor.")

if st.button("Analyze Text"):
    if st.session_state.nlp is None:
        st.error("Please load the model first.")
    else:
        nlp = st.session_state.nlp
        matcher = st.session_state.matcher
        doc = nlp(user_input)
        results = []

        # Extract NER entities
        for ent in doc.ents:
            results.append((ent.text, ent.label_, "NER"))

        # Extract Matcher results
        if matcher:
            matches = matcher(doc)
            for match_id, start, end in matches:
                label = nlp.vocab.strings[match_id]
                span = doc[start:end]
                results.append((span.text, label, "Matcher"))

        # Convert to DataFrame for display
        df = pd.DataFrame(results, columns=["Text", "Label", "Source"])
        st.write("### Extracted Entities & Matched Keywords")
        st.dataframe(df)


UsageError: Line magic function `%writefile` not found (But cell magic `%%writefile` exists, did you mean that instead?).


In [None]:
!pip install fitz



In [None]:
%%writefile app.py
import streamlit as st
import spacy
import json
import requests
import fitz  # PyMuPDF
from bs4 import BeautifulSoup
from spacy.matcher import Matcher
import os
import pandas as pd


# Function to extract text from a website URL
def extract_text_from_url(url):
    try:
        response = requests.get(url, timeout=5)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, "html.parser")
        return soup.get_text(separator=" ", strip=True)
    except Exception as e:
        st.error(f"Error fetching URL: {e}")
        return ""

# Function to load the spaCy model and matcher
def load_model_with_patterns(load_dir):
    try:
        nlp = spacy.load(load_dir)
        st.success(f"Loaded model: {nlp.meta.get('name', 'Unknown')} (version {nlp.meta.get('version', 'Unknown')})")
    except Exception as e:
        st.error(f"Error loading model: {e}")
        return None, None

    # Load matcher patterns
    patterns_file = os.path.join(load_dir, "matcher_patterns.json")
    if not os.path.exists(patterns_file):
        st.warning("Pattern file not found. Only model will be used.")
        return nlp, None

    try:
        with open(patterns_file, "r", encoding="utf-8") as f:
            medical_patterns = json.load(f)
    except Exception as e:
        st.error(f"Error loading matcher patterns: {e}")
        return nlp, None

    # Initialize the matcher
    matcher = Matcher(nlp.vocab)
    for label, patterns in medical_patterns.items():
        if label not in matcher:
            for pattern in patterns:
                matcher.add(label, [pattern])

    st.success("Model and patterns loaded successfully.")
    return nlp, matcher

# Streamlit UI
st.title("Medical Text NER & Keyword Matcher")

# Load Model Once
if "nlp" not in st.session_state:
    st.session_state.nlp = None
    st.session_state.matcher = None

load_dir = st.text_input("Enter the model directory path:", "/content/drive/MyDrive/NLP/NER/saved_model_with_medical_patterns")

if st.button("Load Model"):
    nlp, matcher = load_model_with_patterns(load_dir)
    if nlp:
        st.session_state.nlp = nlp
        st.session_state.matcher = matcher
        st.success("Model loaded into memory! Now you can analyze multiple texts.")

# Select input type
input_option = st.radio("Select input method:", ["Manual Text", "Website URL", "PDF Upload"])

# Get input text
text = ""

if input_option == "Manual Text":
    text = st.text_area("Enter text:", "The Alex has diabetes and takes Metformin. They had an MRI scan for a brain tumor.")

elif input_option == "Website URL":
    url = st.text_input("Enter website URL:")
    if st.button("Extract Text from URL"):
        text = extract_text_from_url(url)
        st.text_area("Extracted Text:", text, height=200)

# Analyze text
if st.button("Analyze Text"):
    if st.session_state.nlp is None:
        st.error("Please load the model first.")
    elif not text.strip():
        st.error("No text provided. Enter text, URL, or upload a PDF.")
    else:
        nlp = st.session_state.nlp
        matcher = st.session_state.matcher
        doc = nlp(text)
        results = []

        # Extract Named Entities
        for ent in doc.ents:
            results.append((ent.text, ent.label_, "NER"))

        # Extract Matcher results
        if matcher:
            matches = matcher(doc)
            for match_id, start, end in matches:
                label = nlp.vocab.strings[match_id]
                span = doc[start:end]
                results.append((span.text, label, "Matcher"))

        # Convert to DataFrame for display
        df = pd.DataFrame(results, columns=["Text", "Label", "Source"])
        st.write("### Extracted Entities & Matched Keywords")
        st.dataframe(df)


Overwriting app.py


In [None]:
from pyngrok import ngrok
import os

# Open a tunnel to the Streamlit app
tunnel = ngrok.connect(8501)

# Run the Streamlit app
os.system('streamlit run app.py &')

tunnel  # This will return the public URL of the Streamlit app

<NgrokTunnel: "https://2499-35-187-144-65.ngrok-free.app" -> "http://localhost:8501">