# Project: ICD-AIS conversion using Deep Learning

This converts ICD codes to AIS using an SMT translation file.

## Setup

In [15]:
import numpy as np
import pandas as pd
import sqlite3
import random
import math
import modules.helper_functions_100 as hlp

## Parameters


In [16]:
icd_obs_file ="../Data/test_icd_pre_I9_A05.csv" # List of ICD codes associated with patient injuries
ais_map_codes_file = "../Data/AIS08_mapped_codes.csv" # Maps AIS08 to AIS98
smt_map_file = "../Results/SMT_ICD_AIS_map.csv" # Created from GIZA. Has the ICD code and it's ?most likely AIS code? and the probability that that's the correlation
ais_codes_file = "../Data/AIS08_codes.csv" # This maps AIS08 to AIS98


## Load data

In [17]:
# load ICD codes into a pandas object
icd_codes = pd.read_csv(icd_obs_file, header=None, names=["ICD9CODE"])
icd_codes.head(5)


Unnamed: 0,ICD9CODE
0,A79 E880.9 P87.03 P87.44 D850.0
1,A55 E884.9 P-2 D805.2 D805.4 D807.03
2,A70 E885.9 P79.35 P87.03 D820.21
3,A56 E881.0 P88.01 P88.38 D805.4 D823.01 D825.20
4,A40 E986 P54.11 D879.4


In [18]:
# load ICD-AIS map (via statistical method training?)
smt_map = pd.read_csv(smt_map_file, header=0).rename(columns={'icd_code':'icd9_code'})
# Remove leading D in icd9_code
smt_map['icd9_code'] = smt_map.icd9_code.apply(lambda x: x.lstrip('D'))

In [19]:
smt_map.head(5)

Unnamed: 0,icd9_code,ais_code,prob
0,4.2,241699,0.001267
1,4.9,856161,5.4e-05
2,5.9,251000,2.3e-05
3,8.42,854455,3.4e-05
4,8.45,545699,0.023131


In [20]:
# load AIS codes that are mapped from 08 to 98
ais_codes = pd.read_csv(ais_codes_file, header=0, encoding='iso-8859-1')

# Get predot code... so the AIS code that prefixes the point (severity)
# Note this line adds a new column that has the code without the severity (code is the 08 code?)
ais_codes['predot'] = ais_codes.code.apply(lambda x: math.floor(x))

# Make dictionary of codes
# So it looks here like this dict maps predot codes to various full AIS codes
ais_dot = pd.Series(ais_codes.code.values, index=ais_codes.predot).to_dict()
ais_dot[0] = 0.0 # Used as error codes unknown/invalid
ais_dot[-1] = -1.0 


## Add severity code

In [21]:
# So here we're using the AIS dot dict to find the severity code for 10000
ais_dot[10000]

10000.1

In [22]:
# Now what we do is we go back to the smt_map, which AIS codes, had no severity (see above) and use our ais_dot dict to add the severity score
smt_map['ais_code'] = smt_map.apply(lambda x: ais_dot[x.ais_code], axis=1)

In [23]:
smt_map.head(5)

Unnamed: 0,icd9_code,ais_code,prob
0,4.2,241699.1,0.001267
1,4.9,856161.3,5.4e-05
2,5.9,251000.1,2.3e-05
3,8.42,854455.2,3.4e-05
4,8.45,545699.1,0.023131


## Convert ICD codes with tidy format

In [24]:
icd_codes = hlp.tidy_icd_code(icd_codes)

In [25]:
icd_codes.head(5)

Unnamed: 0,key,icd9_code
0,0,A79
1,0,E880.9
2,0,P87.03
3,0,P87.44
4,0,D850.0


## Trim to only diagnosis codes

In [26]:
icd_codes = hlp.trim_codes(icd_codes)
icd_codes.head(5)

Unnamed: 0,key,icd9_code
0,0,850.0
1,1,805.2
2,1,805.4
3,1,807.03
4,2,820.21
5,3,805.4
6,3,823.01
7,3,825.2
8,4,879.4
9,5,801.22


In [27]:
len(icd_codes)

351765

## Map ICD9 to AIS 08

In [14]:
# Merge map onto ICD codes
icd_codes = hlp.ICD9_AIS08_map(smt_map, icd_codes, 'SMT')
icd_codes.head(5)

  icd9_code  ais_code      prob
0     004.2  241699.1  0.001267
1     004.9  856161.3  0.000054
2     005.9  251000.1  0.000023
3    008.42  854455.2  0.000034
4    008.45  545699.1  0.023131
   key icd9_code
0    0     850.0
1    1     805.2
2    1     805.4
3    1    807.03
4    2    820.21


Unnamed: 0,key,icd9_code,ais_code
0,0,850.0,161001.1
1,1,805.2,650420.2
2,1,805.4,650620.2
3,1,807.03,450203.3
4,2,820.21,853151.3
5,3,805.4,650620.2
6,3,823.01,854471.2
7,3,825.2,852004.2
8,4,879.4,510602.1
9,5,801.22,150202.3


## Evaluate map

In [15]:
# number of codes that do not map
print("Unmatched", sum(icd_codes.ais_code==-1))
print("% Unmatched", sum(icd_codes.ais_code==-1)/len(icd_codes)*100)

Unmatched 115
% Unmatched 0.026088929219600723


In [16]:
# number of unspecified codes
print("Unspecified", sum(icd_codes.ais_code==0))
print("% Unspecified", sum(icd_codes.ais_code==0)/len(icd_codes)*100)

Unspecified 0
% Unspecified 0.0


In [17]:
# number of codes
len(icd_codes)

440800

In [18]:
# number of patients
len(icd_codes.key.unique())

122374

## Convert to codes list

In [19]:
icd_codes = hlp.convert_codes_to_lists(icd_codes)
icd_codes.head(5)

Unnamed: 0,ais_code
0,[161001.1]
1,"[650420.2, 650620.2, 450203.3]"
2,[853151.3]
3,"[650620.2, 854471.2, 852004.2]"
4,[510602.1]


## Store results

In [41]:
icd_codes.to_csv("../Results/test_ais_pred_smt_map.csv", index=False, header=False)