In [1]:
import os
import re
from collections import defaultdict
import pandas as pd
from constants import *
from cateye import gen_path, tokenize, lemmatize, clean, write_spelling, filterout

In [2]:
icd = pd.read_csv('resource/ICD-10-subset.csv')

In [3]:
def write_snippet(row):
    namespace = row['namespace']
    base = os.path.join('data', 'snippet')
    code = row['code']
    path = gen_path(base, code)
    fn = code
    fp = os.path.join(path, fn)
    template = """<h4 class="code"><a>{}</a></h4><p class="name">{}<br/>{}</p>"""
    content = template.format(code, row['en_name'], row['zh_name'])
    os.makedirs(path, exist_ok=True)
    with open(fp, 'w') as f:
        f.write(content)

def derive_code(code, namespace):
    if namespace == 'cm' and len(code) <= 3:
        return [code[:3]]
    
    if namespace == 'pcs' and len(code) <= 2:
        return [code[:2]]
    
    else:
        return [code, *derive_code(code[:-1], namespace)]

def write_hint(row):
    namespace = row['namespace']
    base = os.path.join('data', 'hint')
    code = row['code']
    path = gen_path(base, code)
    fn = code
    fp = os.path.join(path, fn)
    
    # Make content
    raw_content = filterout(lemmatize(tokenize(clean(row['en_name']))))
    content = '\n'.join(raw_content)
    
    # Write to files
    os.makedirs(path, exist_ok=True)
    with open(fp, 'w') as f:
        f.write(content)
    return content
    
def write_token(row):
    namespace = row['namespace']
    base = os.path.join('data', 'token')
    code = row['code']
    path = gen_path(base, code)
    fn = code
    fp = os.path.join(path, fn)
    
    # Make content
    
    raw_content = derive_code(code, namespace)
    raw_content.extend(tokenize(clean(row['en_name'])))    
    content = [token.lower() for token in raw_content if token.lower() not in STOPWORDS]
        
    content = '\n'.join(sorted(list(set(content))))
    
    os.makedirs(path, exist_ok=True)
    with open(fp, 'w') as f:
        f.write(content)
        
    return content


In [4]:
!rm -rf data/snippet data/token data/hint

In [5]:
!mkdir data/snippet; mkdir data/token; mkdir data/hint

In [9]:
icd.apply(write_hint, axis=1)

Acute tracheitis without obstruction
Supraglottitis, unspecified, without obstruction
Acute epiglottitis without obstruction
Abscess of lung without pneumonia
Pyothorax without fistula
Pleural plaque without asbestos


0                   acute\nnasopharyngitis\ncommon\ncold
1               acute\nmaxillary\nsinusitis\nunspecified
2                 acute\nrecurrent\nmaxillary\nsinusitis
3                 acute\nfrontal\nsinusitis\nunspecified
4                   acute\nrecurrent\nfrontal\nsinusitis
5               acute\nethmoidal\nsinusitis\nunspecified
6                 acute\nrecurrent\nethmoidal\nsinusitis
7              acute\nsphenoidal\nsinusitis\nunspecified
8                acute\nrecurrent\nsphenoidal\nsinusitis
9                       acute\npansinusitis\nunspecified
10                        acute\nrecurrent\npansinusitis
11                               other\nacute\nsinusitis
12                    other\nacute\nrecurrent\nsinusitis
13                         acute\nsinusitis\nunspecified
14              acute\nrecurrent\nsinusitis\nunspecified
15                            streptococcal\npharyngitis
16       acute\npharyngitis\nother\nspecified\norganisms
17                       acute\

In [8]:
icd.apply(write_snippet, axis=1)

icd.apply(write_token, axis=1)


Acute tracheitis without obstruction
Supraglottitis, unspecified, without obstruction
Acute epiglottitis without obstruction
Abscess of lung without pneumonia
Pyothorax without fistula
Pleural plaque without asbestos
Acute tracheitis without obstruction
Supraglottitis, unspecified, without obstruction
Acute epiglottitis without obstruction
Abscess of lung without pneumonia
Pyothorax without fistula
Pleural plaque without asbestos


0              acute\ncold\ncommon\nj00\nnasopharyngitis
1      acute\nj01\nj01.\nj01.0\nj01.00\nmaxillary\nsi...
2      acute\nj01\nj01.\nj01.0\nj01.01\nmaxillary\nre...
3      acute\nfrontal\nj01\nj01.\nj01.1\nj01.10\nsinu...
4      acute\nfrontal\nj01\nj01.\nj01.1\nj01.11\nrecu...
5      acute\nethmoidal\nj01\nj01.\nj01.2\nj01.20\nsi...
6      acute\nethmoidal\nj01\nj01.\nj01.2\nj01.21\nre...
7      acute\nj01\nj01.\nj01.3\nj01.30\nsinusitis\nsp...
8      acute\nj01\nj01.\nj01.3\nj01.31\nrecurrent\nsi...
9      acute\nj01\nj01.\nj01.4\nj01.40\npansinusitis\...
10     acute\nj01\nj01.\nj01.4\nj01.41\npansinusitis\...
11     acute\nj01\nj01.\nj01.8\nj01.80\nother\nsinusitis
12     acute\nj01\nj01.\nj01.8\nj01.81\nother\nrecurr...
13     acute\nj01\nj01.\nj01.9\nj01.90\nsinusitis\nun...
14     acute\nj01\nj01.\nj01.9\nj01.91\nrecurrent\nsi...
15          j02\nj02.\nj02.0\npharyngitis\nstreptococcal
16     acute\nj02\nj02.\nj02.8\norganisms\nother\npha...
17     acute\nj02\nj02.\nj02.9\