# Project: ICD-AIS conversion using Deep Learning

This converts ICD codes to AIS using the AAAM ICD-AIS map

## Setup

In [7]:
import numpy as np
import pandas as pd
import sqlite3
import random
import math

## Parameters


In [8]:
icd_obs_file ="../Data/test_icd_pre_I9_A05.csv"
ais_map_codes_file = "../Data/AIS08_mapped_codes.csv"
aaam_map_file = "../Data/AAAM ICD-ISS Map v2.0 (00082).xlsx"
aaam_map_sheet = "icd9Map"

## Load data

In [9]:
# load ICD codes
icd_codes = pd.read_csv(icd_obs_file, header=None, names=["ICD9CODE"])

In [10]:
# load ICD-AIS map
aaam_map = pd.read_excel(aaam_map_file, sheet_name="icd9Map", header=0, converters={'CODE':str})

In [11]:
aaam_map.head(5)

Unnamed: 0,CODE,DESCRIPTION,HIGHEST AIS SEVERITY,ISS BODY REGION,AIS CHAPTER
0,800.0,Fracture Of Vault Of Skull,2,1,1
1,800.0,Closed Fracture Of Vault Of Skull Without Ment...,2,1,1
2,800.0,Closed fracture of vault of skull without ment...,2,1,1
3,800.01,Closed fracture of vault of skull without ment...,2,1,1
4,800.02,Closed fracture of vault of skull without ment...,2,1,1


## Create AIS code from Information

Since AIS codes are typically [Chapter]XXXXX.[Severity], we will transform this to [Chapter][Region]0000.[Severity].  These will not overlap any valid AIS08 codes.

In [12]:
aaam_map['DEV_CODE'] = aaam_map['AIS CHAPTER'] * 100_000 + aaam_map['ISS BODY REGION'] * 10_000 + aaam_map['HIGHEST AIS SEVERITY'] * 0.1

In [13]:
aaam_map.head(5)

Unnamed: 0,CODE,DESCRIPTION,HIGHEST AIS SEVERITY,ISS BODY REGION,AIS CHAPTER,DEV_CODE
0,800.0,Fracture Of Vault Of Skull,2,1,1,110000.2
1,800.0,Closed Fracture Of Vault Of Skull Without Ment...,2,1,1,110000.2
2,800.0,Closed fracture of vault of skull without ment...,2,1,1,110000.2
3,800.01,Closed fracture of vault of skull without ment...,2,1,1,110000.2
4,800.02,Closed fracture of vault of skull without ment...,2,1,1,110000.2


## Convert ICD codes with tidy format

In [14]:
# split strings into separate columns (wide format)
icd_codes = icd_codes.ICD9CODE.str.split(' ', expand=True)

In [15]:
icd_codes

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,117,118,119,120,121,122,123,124,125,126
0,A79,E880.9,P87.03,P87.44,D850.0,,,,,,...,,,,,,,,,,
1,A55,E884.9,P-2,D805.2,D805.4,D807.03,,,,,...,,,,,,,,,,
2,A70,E885.9,P79.35,P87.03,D820.21,,,,,,...,,,,,,,,,,
3,A56,E881.0,P88.01,P88.38,D805.4,D823.01,D825.20,,,,...,,,,,,,,,,
4,A40,E986,P54.11,D879.4,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122369,A22,E881.1,P78.13,D812.40,D813.40,D813.43,D850.9,D884.0,,,...,,,,,,,,,,
122370,A66,E885.9,P-2,D852.01,D873.43,,,,,,...,,,,,,,,,,
122371,A83,E888.9,P87.03,P88.38,P93.90,D852.05,D852.25,,,,...,,,,,,,,,,
122372,A50,E816.0,P34.04,D807.09,D860.0,,,,,,...,,,,,,,,,,


In [16]:
# convert to long format
icd_codes = pd.DataFrame(icd_codes.stack()).reset_index().rename(columns={'level_0':'key',0:'icd9_code'}).drop(columns=['level_1'])

## Trim to only diagnosis codes

In [17]:
# remove all codes that are not 'D' codes
icd_codes = icd_codes[icd_codes.icd9_code.str.contains('D')].reset_index(drop=True)
#icd_codes = icd_codes[~icd_codes.icd9_code.str.contains('V')].reset_index(drop=True)

In [18]:
# remove leading 'D' 
icd_codes['icd9_code'] = icd_codes.icd9_code.apply(lambda x: x.lstrip('D'))

In [19]:
len(icd_codes)

440800

## Map ICD9 to AIS 08

In [20]:
# merge map onto ICD codes
icd_codes = icd_codes.merge(aaam_map[['CODE','DEV_CODE']], how='left', left_on='icd9_code', right_on='CODE')

In [21]:
# sort in ascending order
icd_codes = icd_codes.sort_values(['key','DEV_CODE'])

In [22]:
# convert codes to numbers
icd_codes['DEV_CODE'] = icd_codes.DEV_CODE.astype(np.float64)

In [23]:
# fill in unmapped codes with '-1'
icd_codes = icd_codes.fillna(-1)

In [24]:
# fill in uspecified codes with 0
icd_codes = icd_codes.replace(-100000, 0)

## Evaluate map

In [25]:
# number of codes that do not map
print("Unmatched", sum(icd_codes.DEV_CODE==-1))
print("% Unmatched", sum(icd_codes.DEV_CODE==-1)/len(icd_codes)*100)

Unmatched 20485
% Unmatched 4.647232304900181


In [26]:
# number of unspecified codes
print("Unspecified", sum(icd_codes.DEV_CODE==0))
print("% Unspecified", sum(icd_codes.DEV_CODE==0)/len(icd_codes)*100)

Unspecified 2266
% Unspecified 0.514065335753176


In [27]:
# number of codes
len(icd_codes)

440800

In [28]:
# number of patients
len(icd_codes.key.unique())

122374

## Convert to codes list

In [29]:
icd_codes = icd_codes.groupby('key')['DEV_CODE'].apply(list).reset_index(name='DEV_CODE').drop(columns=['key'])

In [30]:
icd_codes.head(5)

Unnamed: 0,DEV_CODE
0,[110000.1]
1,"[420000.3, 620000.2, 630000.2]"
2,[840000.3]
3,"[630000.2, 840000.2, 840000.2]"
4,[560000.1]


## Store results

In [31]:
icd_codes.to_csv("../Results/test_ais_pred_aaam_map.csv", index=False, header=False)