# ICD-AIS translation: Analysis of GIZA output
Thomas Hartka
3/14/22

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import icd9cms
import math

## Load Data

In [2]:
# ais and icd vocab
icd_vcb = pd.read_csv("./train_icd_pre_I9_A05.csv.vcb", header=None, sep=" ", names=['key','code','occurrences'])
ais_vcb = pd.read_csv("./train_ais_pre_I9_A05.csv.vcb", header=None, sep=" ", names=['key','code','occurrences'])

In [3]:
# load sentence file
snt_file = "./train_ais_pre_I9_A05.csv_train_icd_pre_I9_A05.csv.snt"

with open(snt_file) as f:
    snt_lines = f.readlines()

In [4]:
# load translation file
ais_trans = pd.read_csv("./icd_ais.t3.final", header=None, sep=" ", names=['ais','icd','prob'])

In [5]:
# load ais training data
train_ais = pd.read_csv("./train_ais_pre_I9_A05.csv", header=None)

In [6]:
# load AIS code descriptions
ais_lookup = pd.read_csv("./AIS08_codes.csv", header=0, encoding='iso-8859-1')

ais_lookup['predot'] = ais_lookup.code.apply(lambda x: math.floor(x))

## Analysis of training data

In [7]:
train_ais['codes_list'] = train_ais[0].apply(lambda x: x.split(" "))

In [8]:
train_ais_uniq = np.unique([item for sublist in train_ais.codes_list.values for item in sublist])
len(train_ais_uniq)

1986

In [9]:
train_ais_uniq

array(['010002', '010004', '10000', ..., '915004', '915006', '916000'],
      dtype='<U6')

## Parse sentence file

In [10]:
snt_occ = []   # sentence occurences
snt_src = []   # source sentence
snt_trg = []   # target sentence

# loop through all lines
for i,l in enumerate(snt_lines):
    # first line in triple is number of occurences
    if ((i%3)==0):
        l = int(l.replace("\n",""))
        snt_occ = snt_occ + [l]
        
    # second line in triple is source sentence
    elif (((i+1)%3)==0):
        l = l.replace("\n","").rstrip(' ')
        l = l.split(' ')
        snt_src = snt_src + [[int(x) for x in l]]
        
    # thrid line in triple is target sentence
    else:
        l = l.replace("\n","").rstrip(' ')
        l = l.split(' ')
        snt_trg = snt_trg + [[int(x) for x in l]]     
        
    if(i>=29999):
        break

In [11]:
trg_list = [item for sublist in snt_trg for item in sublist]

In [12]:
np.unique(trg_list)

array([   2,    3,    4, ..., 1404, 1405, 1406])

In [13]:
len(snt_src)

10000

## Display translations

In [14]:
def translate_ais(ais_code, top_n=5):
    '''
    This function finds the top_n translations for an AIS based on the
    statistical occurrence.
    
    Parameters:
        ais_code - AIS code to translation
        top_n - number of results to return
    Returns:
        top translated code
    '''
    
    # get description for AIS code
    ais_desc = ais_lookup[ais_lookup.predot==ais_code].Description.values[0]
    
    print("AIS Code:", ais_code, ":", ais_desc)
    
    # get key for AIS code
    ais_key = ais_vcb[ais_vcb.code==ais_code].reset_index().key[0]
       
    # find matches for ais code 
    trans = ais_trans[ais_trans.ais == ais_key]
    
    # reorder and take top n
    trans = trans.sort_values('prob', ascending=False).head(top_n).reset_index()
 
    # look up icd code from vocab
    trans['icd_code'] = trans.apply(lambda x: icd_vcb[icd_vcb.key==x.icd]['code'].values[0].replace("D","").replace("P",""), axis=1)

    # decode ICD
    trans['icd_desc'] = trans.apply(lambda x: icd9cms.search(x.icd_code), axis=1)
    
    display(trans)
 
    # get top icd key
    icd_key = int(trans.iloc[0]['icd'])

    # get top icd code
    icd_code = icd_vcb[icd_vcb.key==icd_key]['code'].values[0]
    
    # take off prefixes
    icd_code = icd_code.replace("D","").replace("P","")
   
    return icd9cms.search(icd_code)

## Examine translations

In [15]:
translate_ais(710402)

AIS Code: 710402 : Skin/subcutaneous/muscle, upper extremity, contusion; hematoma


Unnamed: 0,index,ais,icd,prob,icd_code,icd_desc
0,3335,4,200,0.260017,923.0,92300:Contusion shoulder reg:Contusion of shou...
1,3292,4,45,0.198556,923.9,9239:Contusion upper limb NOS:Contusion of uns...
2,3321,4,141,0.124369,923.2,92320:Contusion of hand(s):Contusion of hand(s)
3,3308,4,89,0.118434,923.11,92311:Contusion of elbow:Contusion of elbow
4,3370,4,424,0.091967,923.1,92310:Contusion of forearm:Contusion of forearm


92300:Contusion shoulder reg:Contusion of shoulder region

In [16]:
translate_ais(650432)

AIS Code: 650432 : Vertebra, thoracic spine, fracture with or without dislocation but no cord involvement, vertebral body ("burst" fracture), minor compression (<=20% loss of anterior height)


Unnamed: 0,index,ais,icd,prob,icd_code,icd_desc
0,16190,21,39,0.43716,805.2,8052:Fx dorsal vertebra-close:Closed fracture ...
1,16265,21,305,0.062406,88.93,
2,16181,21,11,0.055104,88.38,
3,16194,21,52,0.041484,87.41,8741:Open wound of larynx and trachea complica...
4,16180,21,10,0.022061,88.01,8801:Open wound of shoulder and upper arm comp...


8052:Fx dorsal vertebra-close:Closed fracture of dorsal [thoracic] vertebra without mention of spinal cord injury

In [17]:
translate_ais(110600)

AIS Code: 110600 : Scalp, laceration NFS


Unnamed: 0,index,ais,icd,prob,icd_code,icd_desc
0,13922,19,41,0.427589,873.0,"8730:Open wound of scalp:Open wound of scalp, ..."
1,13908,19,8,0.118951,86.59,
2,13909,19,9,0.075465,87.03,8703:Penetr wnd orbit w/o FB:Penetrating wound...
3,13918,19,34,0.034371,E885.9,E8859:Fall from slipping NEC:Fall from other s...
4,13930,19,60,0.021238,E880.9,E8809:Fall on stair/step NEC:Accidental fall o...


8730:Open wound of scalp:Open wound of scalp, without mention of complication

In [18]:
translate_ais(320211)

AIS Code: 320211 : Carotid artery [common, internal], laceration; perforation; puncture, minor; superficial; incomplete circumferential involvement; blood loss <=20% by volume, neurological deficit (stroke) not head-injury related, bilateral


Unnamed: 0,index,ais,icd,prob,icd_code,icd_desc
0,444833,1985,1546,0.070309,81.01,8101:Open fracture of clavicle:None
1,444836,1985,1782,0.070309,25.51,2551:Hyperaldosteronism:None
2,444826,1985,389,0.070309,81.62,
3,444838,1985,2002,0.070307,76.73,7673:Bone injury NEC at birth:Other injuries t...
4,444837,1985,1965,0.070305,24.32,


8101:Open fracture of clavicle:None

In [19]:
translate_ais(251000)

AIS Code: 251000 : Nose, fracture, closed or NFS


Unnamed: 0,index,ais,icd,prob,icd_code,icd_desc
0,44617,67,55,0.763127,802.0,8020:Nasal bone fx-closed:Closed fracture of n...
1,44599,67,11,0.033497,88.38,
2,44597,67,9,0.024286,87.03,8703:Penetr wnd orbit w/o FB:Penetrating wound...
3,44603,67,25,0.011826,E960.0,E9600:Unarmed fight or brawl:Unarmed fight or ...
4,44775,67,755,0.008868,21.71,


8020:Nasal bone fx-closed:Closed fracture of nasal bones

## Store best translations

In [20]:
# remove ais key 0 (placeholder?)
trans = ais_trans[ais_trans.ais!=0].copy()

In [21]:
# merge ais key with ais code
trans = trans.merge(ais_vcb, how='left', left_on='ais', right_on='key').rename(columns={'code':'ais_code'}).drop(columns=['key', 'occurrences'])

In [22]:
# merge icd key with icd code
trans = trans.merge(icd_vcb, how='left', left_on='icd', right_on='key').rename(columns={'code':'icd_code'}).drop(columns=['key','occurrences'])

In [23]:
# drop keys and rearrange columns
trans = trans.drop(columns=['ais','icd'])[['icd_code','ais_code','prob']]

In [24]:
# keep only D-codes
trans = trans[trans.icd_code.str.contains('D')].reset_index(drop=True)

In [25]:
# filter best translation
trans = trans.sort_values(['icd_code','prob'], ascending=[True, False]).groupby('icd_code').head(1).reset_index(drop=True)

In [26]:
# store to file
trans.to_csv("../Results/SMT_ICD_AIS_map.csv", index=False)