# Discharge Summary Extraction
### Author: Divya Veerapaneni MS4, Ong Lab
### Description: This ipynb extracts hospitalization data from discharge summaries as follows:
#### - EVD placement, Bolt placement, emergent surgery, mechanism of trauma, discharge exam, discharge condition, etc/
### Input: Extracted_Discharge_Summary_Parameters - discharge summaries
### Output: discharge summaries with extracted data

In [1]:
#import statements
import os
import pandas as pd
import numpy as np
import csv
from datetime import datetime 
import seaborn as sns
import matplotlib.pyplot as plt
from os import path
from scipy.stats import f_oneway
import datetime
import warnings
import statistics
warnings.filterwarnings("ignore")

In [3]:
#input file
file_path = '/Users/divs/Box/1-BMC Smartguards/10-Processing and Visualization/8-TBI Pupillometry/Results/Extracted_Discharge_Summary_Parameters.xlsx'
discharge_df = pd.read_excel(file_path)

# Extract EVD Data

In [4]:
def extract_evd_info(row):
    note = row['Consolidate']
    keywords = ['evd', 'external drain', 'EVD']
    for key in keywords:
        if key in note:
            return 'yes'
    return 'no'
           
discharge_df['evd'] = discharge_df.apply(extract_evd_info, axis=1)
discharge_df.evd.value_counts()

no     134
yes      3
Name: evd, dtype: int64

# Extract Bolt Data

In [5]:
def extract_bolt_info(row):
    note = row['Consolidate']
    keywords = ['bolt', 'icp monitor', 'icp', 'ICP']
    for key in keywords:
        if key in note:
            return 'yes'
    return 'no'
           
discharge_df['bolt'] = discharge_df.apply(extract_bolt_info, axis=1)
#discharge_df.to_excel(file_path)
discharge_df.bolt.value_counts()

no     116
yes     21
Name: bolt, dtype: int64

# Extract Surgery

In [6]:
def extract_surgery_info(row):
    note = row['Consolidate']
    if 'Major Procedures' in note:
        start_index = note.find('Major Procedures') 
        end_index = note.find('Last Recorded Vitals')
        if start_index < end_index:
            surgery_info = note[start_index: end_index] #surgery info
            surgery_info= surgery_info.replace('No Major Procedures or Diagnositic Tests were completed during your hospitalization', 'None')
            surgery_info= surgery_info.replace('No Major Procedures or Diagnositic Tests were completed during your hospitalization', 'None')
            surgery_info = surgery_info.replace('Major Procedures and Diagnostic Tests','')
            return surgery_info
        else:
            return note[start_index+len('Major Procedures:'): start_index +200] 
    else:
        return 'None'

discharge_df['surgery_info'] = discharge_df.apply(extract_surgery_info, axis=1)
# discharge_df = discharge_df[['MRN', 'surgery_info']]

# Extract Emergent Surgery Info

In [7]:
def extract_crani_info(row):
    note = row['surgery_info']
    keywords = ['craniectomy', 'craniotomy', 'subdural evacuation', 'sdh evacuation', 'crani', 'epidural']
    for key in keywords:
        if key in note:
            return 'yes'
    return 'no'
           
discharge_df['crani_surgery'] = discharge_df.apply(extract_crani_info, axis=1)

# Extract Type of Injury (Penetrating vs. Blunt)

In [8]:
def extract_type_injury(row):
    note = row['Consolidate']
    keywords = 'GSW', 'penetrating', 'gunshot', 'bullet', 'shot'
    for key in keywords:
        if key in note:
            return 'penetrating'
    return 'blunt'

discharge_df['type_of_injury'] = discharge_df.apply(extract_type_injury, axis=1)

# Extract Discharge Exams

In [10]:
def extract_exam(row):
  text = row['Consolidate']
  if type(text) != str:
    return ''
  else:
    exam_ind = text.find("Discharge Exam")
    if exam_ind !=-1:
      disc_cond_ind = text.find("Disposition")
      if disc_cond_ind > exam_ind:
        return text[exam_ind: disc_cond_ind]
      else:
        return text[exam_ind:]
        #return text[exam_ind: exam_ind+100]
    else:
      return ''
  #Discharge Exam ... Discharged Condition

discharge_df['Exam'] = discharge_df.apply(extract_exam, axis=1)

# Extract Discharge Conditions


In [11]:
def extract_dc_condition(row):
  text = row['Consolidate']
  if type(text) != str:
    return ''
  else:
    exam_ind = text.find("Discharged Condition")
    if exam_ind !=-1:
      disc_cond_ind = text.find("Disposition")
      if disc_cond_ind > exam_ind:
        return text[exam_ind+len("Discharged Condition")+2: disc_cond_ind]
      else:
        return text[exam_ind+len("Discharged Condition")+2:]
        #return text[exam_ind: exam_ind+100]
    else:
      return ''
  #Discharge Condition ... Disposition

discharge_df['Discharge Condition'] = discharge_df.apply(extract_dc_condition, axis=1) 

# Extract Weight Bearing Restrictions

In [None]:
def extract_weight_bearing(row):
  text = row['Consolidate']
  keyword = 'Weight Bearing Restrictions'
  if type(text) != str:
    return ''
  else:
    exam_ind = text.find(keyword)
    if exam_ind !=-1:
      disc_cond_ind = text.find('Discharged Condition')
      if disc_cond_ind > exam_ind:
        return text[exam_ind+len(keyword)+2: disc_cond_ind]
      else:
        return text[exam_ind+len(keyword)+2:]
    else:
      return ''
  #Discharge Condition ... Disposition

discharge_note_df = discharge_df.apply(extract_weight_bearing, axis=1)
discharge_note_df['Weight Bearing Restrictions']

# Extract General Condition

In [None]:
def helper_extract_general(text, mrn):
  keyword = 'General:'
  if type(text) != str:
    return ''
  else:
    exam_ind = text.find(keyword)
    if exam_ind !=-1:
      disc_cond_ind = text[exam_ind+len(keyword):].find(':') + exam_ind
      return text[exam_ind: disc_cond_ind]
      #else:
      return text[exam_ind:exam_ind+100]
    else:
      return ''
  #Discharge Condition ... Disposition

def extract_general(df):
  dc_condition_list = []
  for index, row in df.iterrows():
    text = row['Exam']
    mrn = row['MRN']
    dc_condition = helper_extract_general(text, mrn)
    dc_condition_list.append(dc_condition)
  df['General'] = dc_condition_list
  return df

discharge_note_df = extract_general(discharge_note_df)  
discharge_note_df['General']