In [1]:
import pandas as pd
import numpy as np
import pickle
import seaborn as sns

from matplotlib import pyplot as plt
from medcat.cat import CAT
import re
from nltk.corpus import stopwords
import nltk
nltk.download('stopwords')

  from tqdm.autonotebook import tqdm, trange
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\BILAL\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
# Load model pack and Create CAT - the main class from medcat used for concept annotation

model_pack_path = 'medcat/mc_modelpack_snomed_int_16_mar_2022_25be3857ba34bdd5'
cat = CAT.load_model_pack(model_pack_path)

Found an existing unziped model pack at: medcat\mc_modelpack_snomed_int_16_mar_2022_25be3857ba34bdd5, the provided zip will not be touched.
{
  "Model ID": "25be3857ba34bdd5",
  "Last Modifed On": "16 March 2022",
  "History (from least to most recent)": [
    "a474096eb4566638",
    "009617d7ff372682"
  ],
  "Description": "SNOMED INT enriched with UMLS and trained unsupervised on MIMIC-III",
  "Source Ontology": "SnomedCT_InternationalRF2_PRODUCTION_20210131T120000Z",
  "Location": "MedCAT Rosalind machine, available for public download from https://github.com/CogStack/MedCAT",
  "MetaCAT models": {
    "Status": "Detects is a concept Affirmed or Negated/Hypothetical"
  },
  "Basic CDB Stats": {
    "Number of concepts": 354448,
    "Number of names": 2049216,
    "Number of concepts that received training": 29674,
    "Number of seen training examples in total": 20585988,
    "Average training examples per concept": 693.7382220125362
  },
  "Performance": {
    "ner": {},
    "meta"

In [5]:
example = ' \nName:  ___                 Unit No:   ___\n \nAdmission Date:  ___              Discharge Date:   ___\n \nDate of Birth:  ___             Sex:   M\n \nService: ORTHOPAEDICS\n \nAllergies: \nerythromycin base\n \nAttending: ___.\n \nChief Complaint:\nLeft ankle fracture\n \nMajor Surgical or Invasive Procedure:\nORIF left ankle ___\n \nHistory of Present Illness:\n___ yr old man fall down 3 stairs while holding lumbar at \nconstruction\njob, resulting in immediate L ankle pain. ED ankle fracture, \nsurgery performed\n\n \nPast Medical History:\nPrevious R ankle fracture\nRLE cellulitis\nOsteoarthritis\n\n \nSocial History:\n___\nFamily History:\nN/A\n \nPhysical Exam:\nAOXO3\nsplint intact, positive CSM\nPain well controlled, no N/V\n\n \nPertinent Results:\n___ 08:28PM   WBC-9.5 RBC-4.49* HGB-13.7 HCT-41.0 MCV-91 \nMCH-30.5 MCHC-33.4 RDW-13.8 RDWSD-46.7*\n___ 08:28PM   GLUCOSE-96 UREA N-13 CREAT-0.7 SODIUM-145 \nPOTASSIUM-4.1 CHLORIDE-108 TOTAL CO2-24 ANION GAP-13\n___ 08:28PM   CALCIUM-9.0 PHOSPHATE-3.5 MAGNESIUM-1.9\n \nBrief Hospital Course:\nTaken to operating room and underwent surgical fixation of ankle \nfracture on ___. No complications. Post op doing well, pain \nwell controlled. Cleared by ___ to go home without services\n \nMedications on Admission:\nThe Preadmission Medication list is accurate and complete.\n1. Multivitamins 1 TAB PO DAILY \n2. Naproxen 500 mg PO Q8H:PRN Pain - Mild \n\n \nDischarge Medications:\n1.  Acetaminophen 1000 mg PO Q8H \nRX *acetaminophen 500 mg 2 tablet(s) by mouth every eight (8) \nhours Disp #*40 Tablet Refills:*0 \n2.  Aspirin 325 mg PO DAILY \nRX *aspirin 325 mg 1 tablet(s) by mouth once a day Disp #*14 \nTablet Refills:*0 \n3.  Docusate Sodium 100 mg PO BID \nRX *docusate sodium 100 mg 1 capsule(s) by mouth twice a day \nDisp #*30 Capsule Refills:*0 \n4.  Ondansetron 4 mg PO Q8H:PRN severe nausea \nRX *ondansetron 4 mg 1 tablet(s) by mouth every six (6) hours \nDisp #*8 Tablet Refills:*0 \n5.  OxyCODONE (Immediate Release) ___ mg PO Q4H:PRN Pain - \nModerate \nRX *oxycodone 5 mg ___ tablet(s) by mouth every four (4) hours- \n(6) hours Disp #*60 Tablet Refills:*0 \n6.  Multivitamins 1 TAB PO DAILY  \n\n \nDischarge Disposition:\nHome\n \nDischarge Diagnosis:\nleft ankle fracture, s/p ORIF ___\n \nDischarge Condition:\nMental Status: Clear and coherent.\nLevel of Consciousness: Alert and interactive.\nActivity Status: Ambulatory - Independent.\n \nDischarge Instructions:\nNWB to LLE\nKeep splint dry, in place\nElevate LLE\nFollow up in ___ clinic in 2 weeks\n \nFollowup Instructions:\n___\n'

In [93]:
medical_stopwords = stopwords.words("english")


medical_stopwords.extend(['speaking', 'none', 'time', 'flush'])

def process_clinical_note(clinical_note):
    # Define the sections to remove
    sections_to_remove = [
        "Name:",
        "Unit No:",
        "Admission Date:",
        "Discharge Date:",
        "Date of Birth:",
        "Sex:",
        "Service:",
        "Allergies:",
        "Attending:",
        "Past Medical History:",
        "Social History:",
        "Family History:",
        "Vitals:",
        "Pertinent Results:",
        "Medications on Admission:",
        "Discharge Medications:",
        "Discharge Disposition:",
        "Discharge Condition:",
        "Discharge Instructions:",
        "Followup Instructions:"
    ]

    # Split the clinical note into lines
    lines = clinical_note.split('\n')

    # Initialize the processed note
    processed_note = []

    # Flag to exclude lines within unwanted sections
    exclude_section = False

    # Iterate through the lines and filter unwanted sections
    for line in lines:
        if any(section in line for section in sections_to_remove):
            exclude_section = True
        elif line.strip() == "":
            # Empty lines separate sections, so reset the flag
            exclude_section = False

        if not exclude_section:
            processed_note.append(line)

    # Join the lines to create the final note
    final_note = '\n '.join(processed_note)
    
    sections_to_remove = [
        r'chief complaint',
        r'history of present illness',
        r'Major Surgical or Invasive Procedure',
        r'physical exam',
        r'brief hospital course',
        r'Discharge',
        
        r'completed by',
    ]
    
    for pattern in sections_to_remove:
        final_note = re.sub(pattern, '', final_note, flags=re.IGNORECASE)

    # Define patterns to identify negations
    negation_patterns = [
        r'no\s+\w+',
        r'not\s+\w+',
        r'did\s+not\s+have\s+\w+'
    ]
    
    # Filter out sentences with negations
    sentences = [sentence for sentence in final_note.split('\n') if not any(re.search(pattern, sentence, re.IGNORECASE) for pattern in negation_patterns)]

    # Remove keys and special characters
    cleaned_note = re.sub(r'\w+:', '', '\n'.join(sentences), flags=re.IGNORECASE)  # Remove keys (case-insensitive)
    cleaned_note = re.sub(r'[^a-zA-Z\s]', '', cleaned_note)  # Remove special characters
    # Tokenize the note into sentences based on '\n'
    sentences = [sentence.strip() for sentence in cleaned_note.split('\n') if sentence.strip()]

    # Remove stop words and empty sentences
    sentences = [
        ' '.join(word for word in sentence.split() if word.lower() not in medical_stopwords)
        for sentence in sentences
    ]
    sentences = [item for item in sentences if item != '']
    
    final_text = '. '.join(sentence for sentence in sentences)
    return final_text

In [94]:
sample = """19YRS OLD FEMALE
CP CHILD
REFRRED FROM OPD 
FOR TACHYPNEA

ALREADY FOLLOWING IN OPD 
LOST TO FOLLOW SINCE 2 MONTHS
C/O DRY COUGH SINCE 8 MONTHS
WT LOSS SINCE 3 MONTH
FEVER ON/OFF SINCE 3 MONTHS

H/O TB CONTACT +VE

O/E
YOUNG ALERT EMACIATED FEMALE
VITALLY STABLE
TACHYPNEIC
MAINTAINING SATURATION
CHEST B/L CREPTS

ADV
XRAY CHEST
====================="""

In [95]:
processed_example = process_clinical_note(sample)
processed_example

'YRS OLD FEMALE. CP CHILD. REFRRED OPD. TACHYPNEA. ALREADY FOLLOWING OPD. LOST FOLLOW SINCE MONTHS. CO DRY COUGH SINCE MONTHS. WT LOSS SINCE MONTH. FEVER ONOFF SINCE MONTHS. HO TB CONTACT. OE. YOUNG ALERT EMACIATED FEMALE. VITALLY STABLE. TACHYPNEIC. MAINTAINING SATURATION. CHEST BL CREPTS. ADV. XRAY CHEST'

In [14]:
icd10_data = {
    # 'hadm_id': [],
    'icd10_codes': [],
    'text': []
}



In [15]:
# Get the entities from the text
all_entities = cat.get_entities(processed_example)

icd_codes = set()  # Using a set to automatically remove duplicates

for entity_id, entity_data in all_entities['entities'].items():
    icd10 = entity_data.get('icd10', [])
    for code in icd10:
        if code:  # Check if the code is not an empty string
            icd_codes.add(code)

# icd10_data['hadm_id'].append('dummy_example')
icd10_data['icd10_codes'].append(list(icd_codes))
icd10_data['text'].append(processed_example)

In [96]:
# Create a DataFrame from the processed data
df_icd10 = pd.DataFrame(icd10_data)
df_icd10

Unnamed: 0,icd10_codes,text
0,"[R06.8, R05, R50.9, E43, R63.4]","[YRS OLD FEMALE, CP CHILD, REFRRED OPD, TACHYP..."


In [89]:
def reformat(code):
    code = ''.join(code.split('.'))
    code = code[:3]
    return code

In [18]:
df_icd10['icd10_codes'] = df_icd10['icd10_codes'].apply(lambda lst: [reformat(x) for x in lst])
df_icd10

Unnamed: 0,icd10_codes,text
0,"[R06., R05., R50., E43., R63.]",YRS OLD FEMALE. CP CHILD. REFRRED OPD. TACHYPN...


In [21]:
processed_example = process_clinical_note(sample)
processed_example

['YRS OLD FEMALE',
 'CP CHILD',
 'REFRRED OPD',
 'TACHYPNEA',
 'ALREADY FOLLOWING OPD',
 'LOST FOLLOW SINCE MONTHS',
 'CO DRY COUGH SINCE MONTHS',
 'WT LOSS SINCE MONTH',
 'FEVER ONOFF SINCE MONTHS',
 'HO TB CONTACT',
 'OE',
 'YOUNG ALERT EMACIATED FEMALE',
 'VITALLY STABLE',
 'TACHYPNEIC',
 'MAINTAINING SATURATION',
 'CHEST BL CREPTS',
 'ADV',
 'XRAY CHEST']

In [39]:
processed_example

['YRS OLD FEMALE',
 'CP CHILD',
 'REFRRED OPD',
 'TACHYPNEA',
 'ALREADY FOLLOWING OPD',
 'LOST FOLLOW SINCE MONTHS',
 'CO DRY COUGH SINCE MONTHS',
 'WT LOSS SINCE MONTH',
 'FEVER ONOFF SINCE MONTHS',
 'HO TB CONTACT',
 'OE',
 'YOUNG ALERT EMACIATED FEMALE',
 'VITALLY STABLE',
 'TACHYPNEIC',
 'MAINTAINING SATURATION',
 'CHEST BL CREPTS',
 'ADV',
 'XRAY CHEST']

In [90]:
def predict_codes_using_Medcat(cat, processed_example,  categories_description):
    results = pd.DataFrame()
    print("predicting codes")
    for sentence in processed_example:
        all_entities = cat.get_entities(sentence)
        for entity_id, entity_data in all_entities['entities'].items():
            icd10 = entity_data.get('icd10', [])
            for code in icd10:
                if code:  # Check if the code is not an empty string
                    results = results.append({'support': sentence, "label": code}, ignore_index = True)
            
    results['label'] = results['label'].apply(reformat)        
    # print("codes predicted")
    # for data, sentence in zip(predictions, processed_example):
    #     # Add a new key-value pair to each dictionary
    # #     data['support_sentence'] = sentence

    # predicted_codes = [
    # item for item in predictions if item['score'] >= threshold
    # ]

    final_results = pd.merge(results, categories_description, left_on = 'label', right_on = 'icd_code', how = 'left')
    final_results.drop(columns = ['icd_code'], inplace = True)

    return final_results

In [91]:
categories_description = pd.read_csv('categories.csv')

In [103]:
sample = """19YRS OLD FEMALE
CP CHILD
REFRRED FROM OPD 
FOR TACHYPNEA

ALREADY FOLLOWING IN OPD 
LOST TO FOLLOW SINCE 2 MONTHS
C/O DRY COUGH SINCE 8 MONTHS
WT LOSS SINCE 3 MONTH
FEVER ON/OFF SINCE 3 MONTHS

H/O TB CONTACT +VE

O/E
YOUNG ALERT EMACIATED FEMALE
VITALLY STABLE
TACHYPNEIC
MAINTAINING SATURATION
CHEST B/L CREPTS

ADV
XRAY CHEST
====================="""


In [104]:
processed_example = process_clinical_note(sample)
processed_example

['YRS OLD FEMALE',
 'CP CHILD',
 'REFRRED OPD',
 'TACHYPNEA',
 'ALREADY FOLLOWING OPD',
 'LOST FOLLOW SINCE MONTHS',
 'CO DRY COUGH SINCE MONTHS',
 'WT LOSS SINCE MONTH',
 'FEVER ONOFF SINCE MONTHS',
 'HO TB CONTACT',
 'OE',
 'YOUNG ALERT EMACIATED FEMALE',
 'VITALLY STABLE',
 'TACHYPNEIC',
 'MAINTAINING SATURATION',
 'CHEST BL CREPTS',
 'ADV',
 'XRAY CHEST']

In [105]:
predict_codes_using_Medcat(cat, processed_example, categories_description)

predicting codes
support    object
label      object
dtype: object


Unnamed: 0,support,label,description
0,TACHYPNEA,R06,Abnormalities of breathing
1,CO DRY COUGH SINCE MONTHS,R05,Cough
2,FEVER ONOFF SINCE MONTHS,R50,Fever of other and unknown origin
3,YOUNG ALERT EMACIATED FEMALE,E43,Unspecified severe protein-calorie malnutrition
4,YOUNG ALERT EMACIATED FEMALE,R63,Symptoms and signs concerning food and fluid i...
5,TACHYPNEIC,R06,Abnormalities of breathing


In [112]:
sample_3 = """48 yrs female  with  Hep c  (treated) , DM( following Diabetic clinic )/HTN IHD
echo  EF  30 %
presented with c/o:
shortness of breath a rest and on mild exertion 4 days
orthopnea,pnd+ve
b/l pedal oedema
o/e
chest:b/l decrease breath sound
cvs:s1+s2
abdomen:non-tender
cns:intact
plan:
cbc,u/c/e,cxe,ecg,top I
CARDIOLOGY PLAN
admit patient u/c/o Dr.Ahmed noor in CCU Bed 2"""

In [113]:
processed_example = process_clinical_note(sample_3)
predict_codes_using_Medcat(cat, processed_example, categories_description)

predicting codes
support    object
label      object
dtype: object


Unnamed: 0,support,label,description
0,yrs female Hep c treated DM following Diabetic...,B18,Chronic viral hepatitis
1,yrs female Hep c treated DM following Diabetic...,E14,
2,yrs female Hep c treated DM following Diabetic...,I25,Chronic ischemic heart disease
3,bl pedal oedema,R60,"Edema, not elsewhere classified"
4,bl decrease breath sound,R09,Other symptoms and signs involving the circula...


In [114]:
sample_4 = """NKCM 
HX OF RTA ON BIKE 
FALL OVER BACK 
18 WEEK GA 
PRESENTED WITH SUPERFICIAL LACERTAION OVER OCCIPITAL REGION WITH HEMATOMA FORMATION 
VERTIGO AFTER RTA
NO HX OF ALOC , EVENT AMNESIA 
NO HX OF ABD , CHEST TRAUMA
NO LIMITATION OF MOVMENT AT ANY JOINT
NO OBVIOUS SWELLING 
O/E;
BP134/85MMHG 
HR100/MIN 
GCS 15/15
PLAN;
INJ ATS IMX STAT
INJ FALGAN IVX STAT
U/S FOR FWB
PATIENT WENT LAMA FROM U/S DEP NEVER CAME BACK FOR REFEREA LETTER CONSIDER LAMA"""

In [116]:
processed_example = process_clinical_note(sample_4)
predict_codes_using_Medcat(cat, processed_example, categories_description)

predicting codes
support    object
label      object
dtype: object


Unnamed: 0,support,label,description
0,VERTIGO RTA,R42,Dizziness and giddiness


In [117]:
sample_5 = """48 yr old female hx of cva right sided residual weakness fc -4 k/c of dm on insulin 70/30 , ihd ,ef 30 % came to er wth c/o 
wound on right foot --4 months 
black discoloartion of wound --3 months 
no hx of fever , or discharge 
also complain of orthopnea-- 1.5 month
o/e
bp-145/91mmhg
pulse--92/min
spo2--87
dry gangrene involving big toe of right foot , hyperemia till mid foot , no tenderness observed 
chest :b/l crepts more at bases 
abd : soft,nontender
plan
xray foot 
cxr ecg
cbc suce 
rx
bp90/60 pulse weak , lasix did not give pt is not in much distress 
inj falgan iv 
inj clindamycin 600mg iv
ecg 2 pvcs , no acute st-t changes
trop i--303 last on dc was 5711 , seen by cardiology on call as well , no acute cardiology issue at present 
gs team consulted for diabetic foot 
needs admission for stablisation , no beds availble 
refer out"""

In [118]:
processed_example = process_clinical_note(sample_5)
predict_codes_using_Medcat(cat, processed_example, categories_description)

predicting codes
support    object
label      object
dtype: object


Unnamed: 0,support,label,description
0,yr old female hx cva right sided residual weak...,I64,
1,yr old female hx cva right sided residual weak...,R53,Malaise and fatigue
2,also complain orthopnea month,R06,Abnormalities of breathing
3,gs team consulted diabetic foot,E14,


In [119]:
sample_6 = """nkcm 
h/o trauma to right ar, block fell over the patient's right arm about 1.5 hour back 
presented with c/o severe pain in right elbow 
Patient seen a/c to ATLS Guidelines 
A- intact 
B- normal 
C- normal
D- deep open wound at right elbow, tenderness, no rom, distal pulses palpible 
plan 
inj. kinz 3 mg iv stat
inj. ats im stat
inj. augmention1.2 gm iv atd 
xray rt elbow
ortho notes
20 yr od male 
hx of brick fell over arm while sleeping 3 hr back 
a/c:
rt arm pain
o/e:
rt elbow lat aspect deep puncture wound with abrasion 
pain and tender at rt elbow
movement at elbow restricted and painful 
distal movvement intact 
distal nvb intact
rt hand dominant 

x-ray rt proximal ulna and olecranon fracture rt side 

plan:
need urgent debridement and intervention admission 
no ortho bed available 
refer out"""

In [120]:
processed_example = process_clinical_note(sample_6)
predict_codes_using_Medcat(cat, processed_example, categories_description)

predicting codes
support    object
label      object
dtype: object


Unnamed: 0,support,label,description
0,ho trauma right ar block fell patients right a...,T14,Injury of unspecified body region
1,presented co severe pain right elbow,R52,"Pain, unspecified"
2,rt arm pain,M79,"Other and unspecified soft tissue disorders, n..."
3,rt elbow lat aspect deep puncture wound abrasion,T14,Injury of unspecified body region
4,pain tender rt elbow,R52,"Pain, unspecified"
5,movement elbow restricted painful,R52,"Pain, unspecified"
6,xray rt proximal ulna olecranon fracture rt side,S52,Fracture of forearm


In [129]:
sample_9 = """NKCM
YOUNG MALE
HERE WITH C/O LOOSE STOOLS SINCE 4 DAYS
INITIALLY 4-5 EPISODES/DAY NOW 7-8 EPISODES SINCE 1 DAY
ASS WITH BLEEDING PR
NO ANY H/O NAUSEA/VOMITING OR FEVER

O/E
YOUNG ALERT MALE
WITH VITALS
BP 140/83
P 80
T 37
RR 22
SPO2 98
CHEST CLEAR NVB
ABD SOFT NON TENDER
DEHYDRATION MILD
DRE UNREMARKABLE

ADV
LABS
INJ N/S 500CC IV STAT
VBGS
===================

LABS SHOW TLC 12000
OTHERS WNR
PT CLINICALLY AND VITALLY SATBLE

D/W DR ADEEL
DC ON ORAL TAB NOVIDAT 500 MG 
TAB FLAGYL 400 MG FOR 5 DAYS 
ORS SCAHET
SMECTA SACHET
FNF"""

In [130]:
processed_example = process_clinical_note(sample_9)
predict_codes_using_Medcat(cat, processed_example, categories_description)

predicting codes
support    object
label      object
dtype: object


Unnamed: 0,support,label,description
0,CO LOOSE STOOLS SINCE DAYS,R19,Other symptoms and signs involving the digesti...
1,ASS BLEEDING PR,K62,Other diseases of anus and rectum
2,ABD SOFT NON TENDER,R19,Other symptoms and signs involving the digesti...
3,DEHYDRATION MILD,E86,Volume depletion


In [131]:
sample_10 = """32 FEMALE,
K/C OF UNCONTROLED D.M ( HbA1C FROM OUT SIDE 11)
C/O FEVER WITH CHILLS FOR 2 DAYS, DYSURIA FOR SAME DURATION,
CAME TO ER IN MORNING WAS INVESTIGATED AND FOUND TO HAVE UTI WITH HYPONETRIMIA,
NEEDED ADMISSION IN HDU BUT AS NO BEDS AVAILABLE, SO WAS REFERED OUT,
O/E:
BP:101/62 MMHG
HR:123/MINT
O2SAT:98%
TEMP: 102*F
RBS:340mg/dl,
CHEST: CLEAR,
ABDOMEN: FLABBY, SOFT, MILD TENDERNESS IN LOWER ABDOMEN,
G.MED REVIEW WITH RESIDENT,
ADVISED TO SEND LABS,
SYMPTOMATIC TREATMENT,
ADMIT IN HDU."""

In [132]:
processed_example = process_clinical_note(sample_10)
predict_codes_using_Medcat(cat, processed_example, categories_description)

predicting codes
support    object
label      object
dtype: object


Unnamed: 0,support,label,description
0,CO FEVER CHILLS DAYS DYSURIA DURATION,R50,Fever of other and unknown origin
1,CO FEVER CHILLS DAYS DYSURIA DURATION,R30,Pain associated with micturition
2,CAME ER MORNING INVESTIGATED FOUND UTI HYPONET...,N39,Other disorders of urinary system
3,FLABBY SOFT MILD TENDERNESS LOWER ABDOMEN,R52,"Pain, unspecified"
