This notebook searches Chronic Conditions in patient notes.

In [None]:
import pandas as pd
import csv

df = pd.read_csv('./patient_note_log.csv',  parse_dates=True, engine='python', error_bad_lines=False)

df.shape

In [2]:
# !aws s3 cp s3://patient-data-v1/patient_note_log.csv ./patient_note_log.csv
# !aws s3 cp s3://patient-data-v1/Chronic_condns.csv ./chronic_condns.csv

In [None]:
!pip install -U spacy

In [None]:
!python -m spacy download en_core_web_sm

In [5]:
import spacy
from spacy.matcher import PhraseMatcher
import pandas as pd
nlp = spacy.load('en_core_web_sm')

# Create Phrase Matcher Object
phrase_matcher = PhraseMatcher(nlp.vocab)

# Get Chronic Conditions master data
chrcond_lst = pd.read_csv('./chronic_condns.csv')['category'].tolist()
print(chrcond_lst)

chrcond_patterns = [nlp(cond) for cond in chrcond_lst]

phrase_matcher.add('ChronicCond', None, *chrcond_patterns)

['Acquired Hypothyroidism', 'Acute Myocardial Infarction', 'Alzheimers Disease', 'Alzheimers Disease, Related Disorders, or Senile Dementia', 'Anemia', 'Asthma', 'Atrial Fibrillation', 'Benign Prostatic Hyperplasia', 'Cancer, Colorectal', 'Cancer, Endometrial', 'Cancer, Breast', 'Cancer, Lung', 'Cancer, Prostate', 'Severe Vision Reduction', 'Chronic Kidney Disease', 'Chronic Obstructive Pulmonary Disease', 'Depression', 'Diabetes', 'Glaucoma', 'Heart Failure', 'Hip or Pelvic Fracture', 'Lipid Metabolism Disorder', 'Hypertension', 'Ischemic Heart Disease', 'Osteoporosis', 'Rheumatoid Arthritis or Osteoarthritis', 'Stroke or Transient Ischemic Attack', 'ADHD, Conduct Disorders, and Hyperkinetic Syndrome', 'Alcohol Use Disorders', 'Anxiety Disorders', 'Autism Spectrum Disorders', 'Bipolar Disorder', 'Cerebral Palsy', 'Cystic Fibrosis and Other Metabolic Developmental Disorders', 'Depressive Disorders', 'Drug Use Disorders', 'Epilepsy', 'Fibromyalgia, Chronic Pain and Fatigue', 'HIV or AID

In [8]:
df.head()

Unnamed: 0,id,mrn,noteDate,note_text
0,3,10743972,2022-01-10 12:10:00,INSTRUCTIONS FOR YOU:<br />Your Care Provider ...
1,4,3546328,2022-01-10 12:39:00,<br />Outpatient Procedure Nursing Follow up:...
2,5,10602162,2022-01-10 12:46:00,<br />\t\tPage 1<br /> <br />Discharge Reconc...
3,6,10315049,2022-01-10 11:04:00,Bronx Care Allergy and Asthma Center:<br />Enc...
4,10,3336997,2022-01-10 10:21:00,<br />Encounter Type:<br /> Encounter Type: ...


In [None]:
def seq_to_csv(seq):
    ret_str = ""
    for s in seq:
        ret_str += s + ','
    return ret_str[:-1]

def search_chrcond(note):
    if (note is None) or isinstance(note, float):
        return ''
    text = nlp(note)
    matched_phrases = phrase_matcher(text)
    chr_conds = set()
    for match_id, start, end in matched_phrases:
        span = text[start:end]
        chr_conds.add(span.text)
    if len(chr_conds):
        return seq_to_csv(chr_conds)
    else:
        return ''

In [None]:
df['chronic_cond'] = df['note_text'].map(search_chrcond)
df

In [None]:
df_chronic = df[df['chronic_cond'] != '']
df_chronic

In [None]:
df_chronic.to_csv('./chronic_patient.csv', index=False)

In [None]:
df.iloc[0:5, 3]