In [1]:
%load_ext autoreload 
%autoreload 2
import pandas as pd
import os

from mimic_helper_fs import get_ids_with_icd_codes, get_ids_with_kws
from mimic_helper_fs import get_coocurring_symptoms_codes, get_coocurring_symptoms_kws

pd.set_option('max_rows', 500)
pd.set_option('display.width', 500)
pd.set_option('display.max_colwidth', 80)

# Comparison of IPV coding strategies

Looks at three ICD9 code sets & three keyword sets. Compares the co-occurring diagnoses for each code set. CPT code strategy is a different notebook.

In [2]:
# National Hospital Ambulatory  Survey 2020 
# ICD-9 codes used: codes found in the cause of injury (rape [E960.1], spouse abuse [E967.3]) and 
#  diagnosis (adult abuse [995.80–995.83, 995.85], history of violence [V15.41, V15.42], and counseling [V61.10, V61.11]) fields
nhas_ipv_codes = ['E9601', 'E9673', '99580', '99581', '99582', '99583', '99585', 'V1541', 'V1542', 'V6110', 'V6111']

# Oregon ED code paper 2008
# Primary ICD-9 codes used: 
    #  E967.3 (battering by intimate partner), 995.81 (adult physical abuse), 995.80 (adult maltreatment)
# Secondary ICD-9 codes used:
    # E967.3, E960.0 (unarmed fight/brawl), 995.81 (adult physical abuse),  E968.0-E968.9 (assault by other unspecified means)
    # 995.83 (Adult sexual abuse), V61.10 (marital/partner  counseling), E962.0-E966 (assault: poison, ahnging, etc)
    # E967.9 (battering by unspecified person), E967.1 (battering by  other specified person), 995.85 adult abuse/neglect
    # V61.11 (IPV counseling), E961.0 assault by corrosive caustic substance
primary_codes = ['E9673', '99581', '99580']
secondary_codes = ['E9673', 'E9600',  '99581',  'E9680', 'E9681','E9682', 'E9683','E9684', 'E9685','E9686', 'E9687', 'E9688', 'E9689',  '99583', 
'V6110', 'E9620', 'E966', 'E9679', 'E9671', 'V6111',  '99585','E9610']
oregon_ipv_codes = list(set(primary_codes + secondary_codes))

    
# United States ED survey 2014
# ICD-9 codes used:  E967.3 (battering by intimate partner)
# Estimated p(y) = .02% 
used_ipv_codes = ['E9673']

code_sets = [('NHAS Study', nhas_ipv_codes), ('Oregon Study', oregon_ipv_codes), ('US ED Study', used_ipv_codes)]
kw_sets = [['adult physical abuse', 'adult abuse'], 
                ['adult physical abuse', 'adult abuse', 'assault'],
                ['adult physical abuse', 'adult abuse', 'maltreatment']]

In [3]:
def column_names_to_lower(df):
    df.columns =  df.columns.str.lower()
    return df

# likely need to change this for it tow ork
data_path = '/home/t-dshanmugam/physionet.org/files/mimiciii/1.4/'
# Convert all filenames to lowercase for compatibility
files = os.listdir(data_path)
for file in files:        
    os.rename(data_path + file, data_path + file.lower())

english_names = pd.read_csv(data_path + 'd_icd_diagnoses.csv.gz')
diagnoses = pd.read_csv(data_path + 'diagnoses_icd.csv.gz')

english_names = column_names_to_lower(english_names)
diagnoses = column_names_to_lower(diagnoses)

old_len = len(diagnoses)
diagnoses = pd.merge(diagnoses, 
                     english_names, 
                     how='inner', 
                     on=['icd9_code'], 
                     validate='many_to_one')
print("Warning: not all diagnoses are in ICD table: old len %i, new len %i (%2.3f%%)" % 
    (old_len, len(diagnoses), 100*len(diagnoses)/old_len))



In [20]:
id_type = 'subject_id'
for ipv_code_name, ipv_codes in code_sets:
    n_patients = len(get_ids_with_icd_codes(diagnoses, id_type, ipv_codes))
    p_y = n_patients/len(set(diagnoses[id_type]))
    print("# IPV patients using " +  ipv_code_name + " codes: ", n_patients, '\t p(y): ', p_y)

for kw_set in kw_sets:
    n_patients = len(get_ids_with_kws(diagnoses, id_type, kw_set))
    p_y = n_patients/len(set(diagnoses[id_type]))
    print("# IPV patients w/ kwds [" + ".".join(kw_set) + "]: ", n_patients, '\t p(y): ', p_y)

# 5 patients were admitted twice for IPV under NHAS Study codes
# 13 patients were admitted twice for IPV under Oregon Study codes

# IPV patients using NHAS Study codes:  52 	 p(y):  0.0011178708859126772
# IPV patients using Oregon Study codes:  244 	 p(y):  0.005245394156974869
# IPV patients using US ED Study codes:  2 	 p(y):  4.2995034073564504e-05
# IPV patients w/ kwds [adult physical abuse.adult abuse]:  15 	 p(y):  0.0003224627555517338
# IPV patients w/ kwds [adult physical abuse.adult abuse.assault]:  234 	 p(y):  0.005030418986607047
# IPV patients w/ kwds [adult physical abuse.adult abuse.maltreatment]:  15 	 p(y):  0.0003224627555517338


In [15]:
ids, sub_d = get_coocurring_symptoms_codes(diagnoses, id_type='hadm_id', codes=nhas_ipv_codes)

# Codes: 11, hadm_id. Total IDs: 57; total diagnoses: 999
                                                                                  # rows  # rows/# IDs
History of physical abuse                                                             42      0.736842
Tobacco use disorder                                                                  17      0.298246
Depressive disorder, not elsewhere classified                                         17      0.298246
Unspecified essential hypertension                                                    13      0.228070
Posttraumatic stress disorder                                                         12      0.210526
Other chronic pain                                                                    11      0.192982
Esophageal reflux                                                                      9      0.157895
Acute posthemorrhagic anemia                                                           9      0.157895
Anxiety state, 

In [16]:
ids, sub_d = get_coocurring_symptoms_codes(diagnoses, id_type='hadm_id', codes=oregon_ipv_codes)

# Codes: 23, hadm_id. Total IDs: 247; total diagnoses: 2077
                                                         # rows  # rows/# IDs
Assault by cutting and piercing instrument                   72      0.291498
Unarmed fight or brawl                                       67      0.271255
Assault by unspecified means                                 53      0.214575
Alcohol abuse, unspecified                                   40      0.161943
Assault by striking by blunt or thrown object                36      0.145749
Unspecified essential hypertension                           33      0.133603
Closed fracture of other facial bones                        31      0.125506
Closed fracture of malar and maxillary bones                 29      0.117409
Closed fracture of nasal bones                               28      0.113360
Alcohol withdrawal                                           23      0.093117
Tobacco use disorder                                         23      0.093117
Acut

In [17]:
# N = 2 here 
ids, sub_d = get_coocurring_symptoms_codes(diagnoses, id_type='hadm_id', codes=used_ipv_codes)

# Codes: 1, hadm_id. Total IDs: 2; total diagnoses: 38
                                                                                  # rows  # rows/# IDs
Perpetrator of child and adult abuse, by spouse or partner                             2           1.0
Assault by unspecified means                                                           1           0.5
Hypoglycemia, unspecified                                                              1           0.5
Depressive disorder, not elsewhere classified                                          1           0.5
Proteus (mirabilis) (morganii) infection in conditions classified elsewhere a...       1           0.5
Personal history of tobacco use                                                        1           0.5
Other and unspecified manifestations of thiamine deficiency                            1           0.5
Adult maltreatment, unspecified                                                        1           0.5
Tobacco use disord

In [22]:
ids, sub_d = get_coocurring_symptoms_kws(diagnoses, id_type='subject_id', query=['adult physical abuse', 'adult abuse'])

Query: ['adult physical abuse', 'adult abuse'], subject_id. Total IDs: 15; total diagnoses: 363
                                                                                  # rows  # rows/# IDs
Urinary tract infection, site not specified                                            7      0.466667
Perpetrator of child and adult abuse, by father, stepfather, or boyfriend              6      0.400000
Tobacco use disorder                                                                   6      0.400000
Diabetes mellitus without mention of complication, type II or unspecified typ...       5      0.333333
Acute respiratory failure                                                              5      0.333333
Unspecified essential hypertension                                                     5      0.333333
Depressive disorder, not elsewhere classified                                          5      0.333333
Acute kidney failure, unspecified                                               

In [23]:
ids, sub_d = get_coocurring_symptoms_kws(diagnoses, id_type='hadm_id', query=['adult physical abuse', 'adult abuse', 'assault'])

Query: ['adult physical abuse', 'adult abuse', 'assault'], hadm_id. Total IDs: 236; total diagnoses: 2060
                                                                  # rows  # rows/# IDs
Assault by cutting and piercing instrument                            72      0.305085
Assault by unspecified means                                          53      0.224576
Assault by striking by blunt or thrown object                         36      0.152542
Alcohol abuse, unspecified                                            32      0.135593
Unspecified essential hypertension                                    26      0.110169
Acute posthemorrhagic anemia                                          26      0.110169
Assault by handgun                                                    25      0.105932
Closed fracture of other facial bones                                 24      0.101695
Assault by other and unspecified firearm                              24      0.101695
Closed fracture of malar

In [11]:
diagnoses.loc[diagnoses['icd9_code'].map(lambda x:x.startswith('T74')), 'long_title'].value_counts()

Series([], Name: long_title, dtype: int64)

In [13]:
diagnoses.loc[diagnoses['icd9_code'].map(lambda x:x.startswith('T76')), 'long_title'].value_counts()

Series([], Name: long_title, dtype: int64)