# Project: ICD-AIS conversion using Deep Learning

This script converts 2013 and 2014 NTDB data into CSVs that can be utilized to convert ICD codes to AIS using deep learning

To run this file you will need to obtain the NTDB data from the American College of Surgeons TQIP
- You can request this data here: https://www.facs.org/quality-programs/trauma/quality/national-trauma-data-bank/datasets/

In [None]:
import modules.helper_functions_000 as hlp
import os

## Set parameters
* You can chose to have prefixes for the different codes
* You can remove severity from AIS codes
* You can places ages in decade bins
* You can set a minimum age

In [9]:
years = [2013, 2014]
prefix_age = True # add 'A' designator to age
prefix_ecodes = True # add 'E' designator to E-codes
prefix_pcodes = True # add 'P' designator to P-codes
prefix_dcodes = True # add 'D' designator to D-codes
truncate_ais = True # remove severity designation of AIS codes
age_cat = False  # place ages in bins
min_age = 18 # minimum age of patient

test_percent = 0.1 # percent of data for testing
validation_num = 2000 # number of cases for validation

## Load files and convert seperate CSVs to one pandas dataframe

In [10]:
%%time
# Create a dataframe containing inc_key, age, year of admission, icd9 ecode, 
# icd9 pcodes as a list, icd9 dcodes as a list, ais05codes as a list 
pt_df = hlp.create_inclusive_df(years, 
                                prefix_age, 
                                prefix_ecodes, 
                                prefix_pcodes, 
                                prefix_dcodes, 
                                truncate_ais, 
                                age_cat, 
                                min_age)

CPU times: user 1min 39s, sys: 2.46 s, total: 1min 41s
Wall time: 1min 41s


In [11]:
pt_df

Unnamed: 0,INC_KEY,AGE,YOADMIT,ECODE,PCODES,DCODES,AIS05CODE
0,14000000,A79,2013,E880.9,"[P87.03, P87.44]",[D850.0],[161001]
1,14000002,A55,2013,E884.9,[P-2],"[D807.03, D805.4, D805.2]","[650630, 450203, 650416]"
2,14000003,A70,2013,E885.9,"[P79.35, P87.03]",[D820.21],[853151]
3,14000004,A56,2013,E881.0,"[P88.38, P88.01]","[D825.20, D823.01, D805.4]","[857200, 650616, 854441]"
4,14000005,A40,2013,E986,[P54.11],[D879.4],[516000]
...,...,...,...,...,...,...,...
997167,140869291,A82,2014,E812.0,"[P79.35, P99.04, P88.29]","[D920, D873.42, D820.22]","[853271, 210202, 210402]"
997168,140869379,A73,2014,E888.8,[P-1],"[D959.01, D810.00, D807.06]",[450210]
997169,140869626,A77,2014,E888.8,[P-1],"[D910.0, D852.02]",[110202]
997170,140870093,A47,2014,E826.1,[P96.04],"[D916.0, D910.0, D803.20]",[810202]


## Write out data files

In [12]:
ICD_cols = ['AGE','ECODE','PCODES','DCODES']
AIS_cols = ['AIS05CODE']

In [13]:
# calc number of test cases
num_test = round(len(pt_df)*test_percent)

In [14]:
outdir = '../Data'
if not os.path.exists(outdir):
    os.mkdir(outdir)

if not age_cat:
    # ICD files
    hlp.write_pt_dat(pt_df[:num_test], ICD_cols, "../Data/test_icd_pre_I9_A05.csv")
    hlp.write_pt_dat(pt_df[num_test:(num_test+validation_num)], ICD_cols, "../Data/val_icd_pre_I9_A05.csv")
    hlp.write_pt_dat(pt_df[(num_test+validation_num):], ICD_cols, "../Data/train_icd_pre_I9_A05.csv")

    hlp.write_pt_dat(pt_df[:num_test], AIS_cols, "../Data/test_ais_pre_I9_A05.csv")
    hlp.write_pt_dat(pt_df[num_test:(num_test+validation_num)], AIS_cols, "../Data/val_ais_pre_I9_A05.csv")
    hlp.write_pt_dat(pt_df[(num_test+validation_num):], AIS_cols, "../Data/train_ais_pre_I9_A05.csv")

else:
    # ICD files if categorizing age
    hlp.write_pt_dat(pt_df[:num_test], ICD_cols, "../Data/test_icd_agecat_I9_A05.csv")
    hlp.write_pt_dat(pt_df[num_test:(num_test+validation_num)], ICD_cols, "../Data/val_icd_agecat_I9_A05.csv")
    hlp.write_pt_dat(pt_df[(num_test+validation_num):], ICD_cols, "../Data/train_icd_agecat_I9_A05.csv")

    hlp.write_pt_dat(pt_df[:num_test], AIS_cols, "../Data/test_ais_agecat_I9_A05.csv")
    hlp.write_pt_dat(pt_df[num_test:(num_test+validation_num)], AIS_cols, "../Data/val_ais_agecat_I9_A05.csv")
    hlp.write_pt_dat(pt_df[(num_test+validation_num):], AIS_cols, "../Data/train_ais_agecat_I9_A05.csv")