In [2]:
import os
import re
from collections import defaultdict
import pandas as pd
from constants import *
from cateye import gen_path, tokenize, lemmatize, clean, write_spelling, filterout



In [3]:
icd = pd.read_csv('resource/ICD-10-subset.csv')

In [4]:
def write_snippet(row):
    namespace = row['namespace']
    base = os.path.join('data', 'snippet')
    code = row['code']
    path = gen_path(base, code)
    fn = code
    fp = os.path.join(path, fn)
    template = """<h4 class="code"><a>{}</a></h4><p class="name">{}</p>"""
    content = template.format(code, row['en_name'])
    os.makedirs(path, exist_ok=True)
    with open(fp, 'w') as f:
        f.write(content)

def derive_code(code, namespace):
    if namespace == 'cm' and len(code) <= 3:
        return [code[:3]]
    
    if namespace == 'pcs' and len(code) <= 2:
        return [code[:2]]
    
    else:
        return [code, *derive_code(code[:-1], namespace)]

def write_hint(row):
    namespace = row['namespace']
    base = os.path.join('data', 'hint')
    code = row['code']
    path = gen_path(base, code)
    fn = code
    fp = os.path.join(path, fn)
    
    # Make content
    raw_content = filterout(lemmatize(tokenize(clean(row['en_name']))))
    content = '\n'.join(raw_content)
    
    # Write to files
    os.makedirs(path, exist_ok=True)
    with open(fp, 'w') as f:
        f.write(content)
    return content
    
def write_token(row):
    namespace = row['namespace']
    base = os.path.join('data', 'token')
    code = row['code']
    path = gen_path(base, code)
    fn = code
    fp = os.path.join(path, fn)
    
    # Make content
    
    raw_content = derive_code(code, namespace)
    raw_content.extend(tokenize(clean(row['en_name'])))    
    content = [token.lower() for token in raw_content if token.lower() not in STOPWORDS]
        
    content = '\n'.join(sorted(list(set(content))))
    
    os.makedirs(path, exist_ok=True)
    with open(fp, 'w') as f:
        f.write(content)
        
    return content


In [5]:
!rm -rf data/snippet data/token data/hint

In [6]:
!mkdir data/snippet; mkdir data/token; mkdir data/hint

In [7]:
icd.apply(write_snippet, axis=1)
icd.apply(write_hint, axis=1)
icd.apply(write_token, axis=1)
write_spelling(TOKEN_FOLDER, SPELLING_FILE)