# Project: ICD-AIS conversion using Deep Learning

This converts ICD codes to AIS using the AAAM ICD-AIS map

## Setup

In [1]:
import numpy as np
import pandas as pd
import sqlite3
import random
import math

## Parameters


In [2]:
icd_obs_file ="../Data/test_icd_pre_I9_A05.csv" # Looks to be a list of ICD codes associated with patient injuries
ais_map_codes_file = "../Data/AIS08_mapped_codes.csv" # Maps AIS08 to AIS98
aaam_map_file = "../Data/AAAM ICD-ISS Map v2.0 (00082).xlsx" # Maps ICD to ISS; so you have in here ICD code, description, highest AIS, ISS body region, and AIS chapter (which correlate to body region)
aaam_map_sheet = "icd9Map" # This is not utilixed elsewere but in loading the data, so probably can be deleted

## Load data

In [3]:
# load ICD codes
icd_codes = pd.read_csv(icd_obs_file, header=None, names=["ICD9CODE"]) # Load the csv data of patient injuries into an object


In [4]:
# load ICD-AIS map
aaam_map = pd.read_excel(aaam_map_file, sheet_name="icd9Map", header=0, converters={'CODE':str}) # Load data that maps ICD to ISS into an object

In [5]:
aaam_map.head(5) # Print the first 5 entries in the ICD to ISS map (run to populate below)

Unnamed: 0,CODE,DESCRIPTION,HIGHEST AIS SEVERITY,ISS BODY REGION,AIS CHAPTER
0,800.0,Fracture Of Vault Of Skull,2,1,1
1,800.0,Closed Fracture Of Vault Of Skull Without Ment...,2,1,1
2,800.0,Closed fracture of vault of skull without ment...,2,1,1
3,800.01,Closed fracture of vault of skull without ment...,2,1,1
4,800.02,Closed fracture of vault of skull without ment...,2,1,1


## Create AIS code from Information

Since AIS codes are typically [Chapter]XXXXX.[Severity], we will transform this to [Chapter][Region]0000.[Severity].  These will not overlap any valid AIS08 codes.

In [6]:
# So below we add a new column to the AAAM map from above called dev_code. This is a code we are building from scratch to map
# So the first part of the dev code is the chapter
# Second is the region of the body?
# The last, after the period is severity
# We calculate this dev code below with some math
aaam_map['DEV_CODE'] = aaam_map['AIS CHAPTER'] * 100_000 + aaam_map['ISS BODY REGION'] * 10_000 + aaam_map['HIGHEST AIS SEVERITY'] * 0.1

In [7]:
aaam_map.head(5)

Unnamed: 0,CODE,DESCRIPTION,HIGHEST AIS SEVERITY,ISS BODY REGION,AIS CHAPTER,DEV_CODE
0,800.0,Fracture Of Vault Of Skull,2,1,1,110000.2
1,800.0,Closed Fracture Of Vault Of Skull Without Ment...,2,1,1,110000.2
2,800.0,Closed fracture of vault of skull without ment...,2,1,1,110000.2
3,800.01,Closed fracture of vault of skull without ment...,2,1,1,110000.2
4,800.02,Closed fracture of vault of skull without ment...,2,1,1,110000.2


## Convert ICD codes with tidy format

In [8]:
# Split strings into separate columns (wide format)
# So the csv data of patient injuries has the patient number and a list of ICD injuries sustained in one cell... so what we do is split that up
icd_codes = icd_codes.ICD9CODE.str.split(' ', expand=True)

In [9]:
icd_codes

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,117,118,119,120,121,122,123,124,125,126
0,A79,E880.9,P87.03,P87.44,D850.0,,,,,,...,,,,,,,,,,
1,A55,E884.9,P-2,D805.2,D805.4,D807.03,,,,,...,,,,,,,,,,
2,A70,E885.9,P79.35,P87.03,D820.21,,,,,,...,,,,,,,,,,
3,A56,E881.0,P88.01,P88.38,D805.4,D823.01,D825.20,,,,...,,,,,,,,,,
4,A40,E986,P54.11,D879.4,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122369,A22,E881.1,P78.13,D812.40,D813.40,D813.43,D850.9,D884.0,,,...,,,,,,,,,,
122370,A66,E885.9,P-2,D852.01,D873.43,,,,,,...,,,,,,,,,,
122371,A83,E888.9,P87.03,P88.38,P93.90,D852.05,D852.25,,,,...,,,,,,,,,,
122372,A50,E816.0,P34.04,D807.09,D860.0,,,,,,...,,,,,,,,,,


In [10]:
# Convert to long format
# So you have clean data now instead of
# pt#1 ICD ICD ICD none none none (like above)
# ...
#
# You have
# pt#1 ICD
# pt#1 ICD
# pt#2 ICD
# ...

icd_codes = pd.DataFrame(icd_codes.stack()).reset_index().rename(columns={'level_0':'key',0:'icd9_code'}).drop(columns=['level_1'])

## Trim to only diagnosis codes

In [11]:
# remove all codes that are not 'D' codes
icd_codes = icd_codes[icd_codes.icd9_code.str.contains('D')].reset_index(drop=True)
#icd_codes = icd_codes[~icd_codes.icd9_code.str.contains('V')].reset_index(drop=True) would keep V codes

In [12]:
# Remove leading 'D' cuz u know it's implied
icd_codes['icd9_code'] = icd_codes.icd9_code.apply(lambda x: x.lstrip('D'))
icd_codes.head(20)

Unnamed: 0,key,icd9_code
0,0,850.0
1,1,805.2
2,1,805.4
3,1,807.03
4,2,820.21
5,3,805.4
6,3,823.01
7,3,825.2
8,4,879.4
9,5,801.22


In [13]:
len(icd_codes)

440800

## Map ICD9 to AIS 08

In [14]:
aaam_map


Unnamed: 0,CODE,DESCRIPTION,HIGHEST AIS SEVERITY,ISS BODY REGION,AIS CHAPTER,DEV_CODE
0,800,Fracture Of Vault Of Skull,2,1,1,110000.2
1,800.0,Closed Fracture Of Vault Of Skull Without Ment...,2,1,1,110000.2
2,800.00,Closed fracture of vault of skull without ment...,2,1,1,110000.2
3,800.01,Closed fracture of vault of skull without ment...,2,1,1,110000.2
4,800.02,Closed fracture of vault of skull without ment...,2,1,1,110000.2
...,...,...,...,...,...,...
2505,959.5,Finger injury,0,4,7,740000.0
2506,959.6,Hip and thigh injury,0,4,8,840000.0
2507,959.7,Knee; leg; ankle; and foot injury,0,4,8,840000.0
2508,959.8,Other specified sites; including multiple injury,0,0,-1,-100000.0


In [15]:
# Merge map onto ICD codes
print(type(icd_codes['icd9_code'][1]))
print(type(aaam_map['CODE'][1]))
icd_codes = icd_codes.merge(aaam_map[['CODE','DEV_CODE']], how='left', left_on='icd9_code', right_on='CODE')
icd_codes.head(45)

<class 'str'>
<class 'str'>


Unnamed: 0,key,icd9_code,CODE,DEV_CODE
0,0,850.0,850.0,110000.1
1,1,805.2,805.2,620000.2
2,1,805.4,805.4,630000.2
3,1,807.03,807.03,420000.3
4,2,820.21,820.21,840000.3
5,3,805.4,805.4,630000.2
6,3,823.01,823.01,840000.2
7,3,825.2,825.2,840000.2
8,4,879.4,879.4,560000.1
9,5,801.22,801.22,110000.3


In [16]:
# sort in ascending order
# Note that the keys are already in order ? seemingly, what gets ordered instead is the dev_code from lowest to highest
icd_codes = icd_codes.sort_values(['key','DEV_CODE'])
icd_codes.head(45)

Unnamed: 0,key,icd9_code,CODE,DEV_CODE
0,0,850.0,850.0,110000.1
3,1,807.03,807.03,420000.3
1,1,805.2,805.2,620000.2
2,1,805.4,805.4,630000.2
4,2,820.21,820.21,840000.3
5,3,805.4,805.4,630000.2
6,3,823.01,823.01,840000.2
7,3,825.2,825.2,840000.2
8,4,879.4,879.4,560000.1
9,5,801.22,801.22,110000.3


In [17]:
# convert codes to numbers
icd_codes['DEV_CODE'] = icd_codes.DEV_CODE.astype(np.float64)
icd_codes.head(45)


Unnamed: 0,key,icd9_code,CODE,DEV_CODE
0,0,850.0,850.0,110000.1
3,1,807.03,807.03,420000.3
1,1,805.2,805.2,620000.2
2,1,805.4,805.4,630000.2
4,2,820.21,820.21,840000.3
5,3,805.4,805.4,630000.2
6,3,823.01,823.01,840000.2
7,3,825.2,825.2,840000.2
8,4,879.4,879.4,560000.1
9,5,801.22,801.22,110000.3


In [18]:
# fill in unmapped codes with '-1'
icd_codes = icd_codes.fillna(-1)

In [19]:
# fill in uspecified codes with 0
icd_codes = icd_codes.replace(-100000, 0)

## Evaluate map

In [20]:
# number of codes that do not map
print("Unmatched", sum(icd_codes.DEV_CODE==-1))
print("% Unmatched", sum(icd_codes.DEV_CODE==-1)/len(icd_codes)*100)

Unmatched 20485
% Unmatched 4.647232304900181


In [21]:
# number of unspecified codes
print("Unspecified", sum(icd_codes.DEV_CODE==0))
print("% Unspecified", sum(icd_codes.DEV_CODE==0)/len(icd_codes)*100)

Unspecified 2266
% Unspecified 0.514065335753176


In [22]:
# number of codes
len(icd_codes)

440800

In [23]:
# number of patients
len(icd_codes.key.unique())

122374

## Convert to codes list

In [24]:
icd_codes = icd_codes.groupby('key')['DEV_CODE'].apply(list).reset_index(name='DEV_CODE').drop(columns=['key'])

In [25]:
icd_codes.head(5)

Unnamed: 0,DEV_CODE
0,[110000.1]
1,"[420000.3, 620000.2, 630000.2]"
2,[840000.3]
3,"[630000.2, 840000.2, 840000.2]"
4,[560000.1]


## Store results

In [26]:
icd_codes.to_csv("../Results/test_ais_pred_aaam_map.csv", index=False, header=False)