In [1]:
%load_ext autoreload
%autoreload 2
import pandas as pd
import os

from mimic_helper_fs import get_ids_with_icd_codes, get_ids_with_kws, get_coocurring_symptoms_cpt_codes
from mimic_helper_fs import get_coocurring_symptoms_codes, get_coocurring_symptoms_kws

pd.set_option('max_rows', 500)
pd.set_option('display.width', 500)
pd.set_option('display.max_colwidth', 80)

# IPV codes + keyword sets

In [2]:
# National Hospital Ambulatory  Survey 2020 
# ICD-9 codes used: codes found in the cause of injury (rape [E960.1], spouse abuse [E967.3]) and 
#  diagnosis (adult abuse [995.80–995.83, 995.85], history of violence [V15.41, V15.42], and counseling [V61.10, V61.11]) fields
nhas_ipv_codes = ['E9601', 'E9673', '99580', '99581', '99582', '99583', '99585', 'V1541', 'V1542', 'V6110', 'V6111']

# Oregon ED code paper 2008
# Primary ICD-9 codes used: 
    #  E967.3 (battering by intimate partner), 995.81 (adult physical abuse), 995.80 (adult maltreatment)
# Secondary ICD-9 codes used:
    # E967.3, E960.0 (unarmed fight/brawl), 995.81 (adult physical abuse),  E968.0-E968.9 (assault by other unspecified means)
    # 995.83 (Adult sexual abuse), V61.10 (marital/partner  counseling), E962.0-E966 (assault: poison, ahnging, etc)
    # E967.9 (battering by unspecified person), E967.1 (battering by  other specified person), 995.85 adult abuse/neglect
    # V61.11 (IPV counseling), E961.0 assault by corrosive caustic substance
primary_codes = ['E9673', '99581', '99580']
secondary_codes = ['E9673', 'E9600',  '99581',  'E9680', 'E9681','E9682', 'E9683','E9684', 'E9685','E9686', 'E9687', 'E9688', 'E9689',  '99583', 
'V6110', 'E9620', 'E966', 'E9679', 'E9671', 'V6111',  '99585','E9610']
oregon_ipv_codes = list(set(primary_codes + secondary_codes))

    
# United States ED survey 2014
# ICD-9 codes used:  E967.3 (battering by intimate partner)
# Estimated p(y) = .02% 
used_ipv_codes = ['E9673']

code_sets = [('NHAS Study', nhas_ipv_codes), ('Oregon Study', oregon_ipv_codes), ('US ED Study', used_ipv_codes)]
kw_sets = [['adult physical abuse', 'adult abuse'], 
                ['adult physical abuse', 'adult abuse', 'assault'],
                ['adult physical abuse', 'adult abuse', 'maltreatment']]

# Analyzing CPT codes

In [3]:
cpt_codes = []
# https://s3.amazonaws.com/fwvcorp/wp-content/uploads/20160429154502/Preventive-Medicine-Service-Codes-and-privacy-principles.pdf
# 99381-99397

In [4]:
def column_names_to_lower(df):
    df.columns =  df.columns.str.lower()
    return df
data_path = '/home/t-dshanmugam/physionet.org/files/mimiciii/1.4/'
# Convert all filenames to lowercase for compatibility
files = os.listdir(data_path)
for file in files:        
    os.rename(data_path + file, data_path + file.lower())

english_names = pd.read_csv(data_path + 'd_icd_diagnoses.csv.gz')
cpt_code_names = pd.read_csv(data_path + 'd_cpt.csv.gz')

cpt_events = pd.read_csv(data_path + 'cptevents.csv.gz')
diagnoses = pd.read_csv(data_path + 'diagnoses_icd.csv.gz')

english_names = column_names_to_lower(english_names)
diagnoses = column_names_to_lower(diagnoses)
cpt_events = column_names_to_lower(cpt_events)

  interactivity=interactivity, compiler=compiler, result=result)


# What CPT codes are typically assigned to IPV patients?

In [37]:
# proportion is > 1 since some patients receive CPT codes in "sequence". ticket_sequence_id in this database doesn't make
# a ton of sense to me.
# also subsection_header isn't that informative but i couldn't find a direct CPT code --> normal text translator
ids, sub_d, value_counts = get_coocurring_symptoms_cpt_codes(diagnoses, cpt_events, 'hadm_id', nhas_ipv_codes, print_output=False)
cpt_codes = list(value_counts.index)
value_counts['subsection_header'] = [cpt_events[cpt_events['cpt_number'] == cpt_code]['subsectionheader'].iloc[0] for cpt_code in cpt_codes]
print(value_counts)

         # rows  # rows/# IDs            subsection_header
99232.0     198      3.473684  Hospital inpatient services
99233.0     107      1.877193  Hospital inpatient services
99291.0     104      1.824561       Critical care services
99231.0      66      1.157895  Hospital inpatient services
94003.0      49      0.859649                    Pulmonary
99254.0      22      0.385965                Consultations
99239.0      20      0.350877  Hospital inpatient services
99223.0      18      0.315789  Hospital inpatient services
99255.0      11      0.192982                Consultations
90801.0       9      0.157895                   Psychiatry
99253.0       9      0.157895                Consultations
99238.0       8      0.140351  Hospital inpatient services
99251.0       8      0.140351                Consultations
36556.0       6      0.105263        Cardiovascular system
99222.0       6      0.105263  Hospital inpatient services
90935.0       5      0.087719                     Dialys

In [38]:
ids, sub_d, value_counts = get_coocurring_symptoms_cpt_codes(diagnoses, cpt_events, 'hadm_id', oregon_ipv_codes, print_output=False)
cpt_codes = list(value_counts.index)
value_counts['subsection_header'] = [cpt_events[cpt_events['cpt_number'] == cpt_code]['subsectionheader'].iloc[0] for cpt_code in cpt_codes]
print(value_counts)

         # rows  # rows/# IDs            subsection_header
94003.0     401      1.623482                    Pulmonary
99232.0     334      1.352227  Hospital inpatient services
99231.0     215      0.870445  Hospital inpatient services
99291.0     214      0.866397       Critical care services
99233.0     139      0.562753  Hospital inpatient services
99254.0      35      0.141700                Consultations
99253.0      33      0.133603                Consultations
94002.0      29      0.117409                    Pulmonary
99223.0      28      0.113360  Hospital inpatient services
99239.0      21      0.085020  Hospital inpatient services
99238.0      20      0.080972  Hospital inpatient services
99252.0      18      0.072874                Consultations
99255.0      15      0.060729                Consultations
99222.0      15      0.060729  Hospital inpatient services
36556.0      14      0.056680        Cardiovascular system
32551.0      11      0.044534           Respiratory syst

In [39]:
ids, sub_d, value_counts = get_coocurring_symptoms_cpt_codes(diagnoses, cpt_events, 'hadm_id', used_ipv_codes, print_output=False)
cpt_codes = list(value_counts.index)
value_counts['subsection_header'] = [cpt_events[cpt_events['cpt_number'] == cpt_code]['subsectionheader'].iloc[0] for cpt_code in cpt_codes]
print(value_counts)

         # rows  # rows/# IDs            subsection_header
99232.0      11           5.5  Hospital inpatient services
99291.0       2           1.0       Critical care services
94003.0       1           0.5                    Pulmonary
99223.0       1           0.5  Hospital inpatient services
99238.0       1           0.5  Hospital inpatient services
99254.0       1           0.5                Consultations
99231.0       1           0.5  Hospital inpatient services


In [36]:
# The CPT code range recommended since it refers to an external preventative consultation?
len(cpt_events[(cpt_events['cpt_number'] > 99381) & (cpt_events['cpt_number'] < 99397)])


0