In [1]:
import pandas as pd
import re
import numpy as np
pd.set_option('mode.chained_assignment',  None)

## Regex rules

In [2]:
re_listhesis = re.compile(r'\w+?listhesis|\w+?lithesis|\w+?listhsis', re.I) # 전방전위증
re_scoliosis = re.compile(r'\w+?scoliosis|scolio\w+', re.I) # 척추측만증
re_fracture = re.compile(r'fracture', re.I) # 골절
re_etc_fracture = re.compile(r'T[1-9][\W\w]{1,10}L[1-5][\w\W]{1,30}C[1-5]|Old benign compre\w+[\w\W]{1,20}T11|s/p posterolateral fixation[\w\W]{1,}fracture[\w\W]{1,}interval change.', re.I) # fracture 예외
re_spondylosis = re.compile(r'spondylosis', re.I) # 척추증
re_a_fissure = re.compile(r'annular', re.I) # 섬유룬파열
re_bulge = re.compile(r'diff\w+ bulg\w+|posterior disc bulging|disc bulging|mild bulging|bulging discs|bulging disc|posterior bulging|bulgins|bulgings', re.I) # 디스크팽륜
re_degeneration = re.compile(r'mild degenerative change at|more degeneration in|degenera\w+?\W?disc|degenerations[\w\W]{1,50}disc|degenerations[\w\W]{1,30}discs|disc (?!and facet)[\w\W]{1,15}degenerations|disc degeneration|degeneration[\w\W]{1,30}disc|degenerative change[\w\W]{1,20}discs', re.I) # 추간판의 퇴행성 변화
re_extrusion = re.compile(r'extruding disc|extruded|extrusion', re.I) # 추간판 추출증
re_c_extrusion = re.compile(r'central canal stenosis due to central disc extrusion',re.I) # extrusion 예외
re_herniation = re.compile(r'herniation[\w\W]{1,15}disc|disc herniation|herniated disc', re.I) # 추간판 탈출증
re_t_herniation = re.compile(r'r/o herniation[\w\W]{1,50}T\d+\W\d',re.I) # herniation 예외
re_protrusion = re.compile(r'centrally protruding disc|posterolateral protrusion|forminal protrusion|foraminal protrusion|small prortusion[\w\W]{1,15}framen|Rt[\w\W]{1,15}protrusion|protruded disc|central+\w protruded disc|central[\w\W]{1,15}protrusion|central protrusion|disc protrusion|protursion of disc|protrusion[\w\W]{1,15}disc|central protrusion disc|protrusion[\w\W]{1,15}disc', re.I) # 추간판 돌출
re_modic = re.compile(r'modic type 1|modic type I\W|modic type[\w\W]{1,10} I ', re.I) # 경판의 부종을 동반한 모딕변화 type1
re_f_hypertrophy = re.compile(r'with left facet hypertrophy|hypertro\w+[\w\W]{1,15}facet joint+|facet joint[\w\W]{1,17}hypertrophy', re.I) # 척추 측면비대
re_c_stenosis = re.compile(r'central stenosis|central canal[\w\W]{1,45}stenosis|central spinal stenosis|centra canal stenosis|central cananl stenosis', re.I) # 중심 협착증
re_foraminal = re.compile(r' foram\w+ stenosis|mild NF|NF stenosis|NF stensois|subforam\w+ stenosis', re.I) # 척추 추간공내 협착
re_nerve_root = re.compile(r'compression[\w\W]{1,15}nerve|nerve roots compression|comprresion[\w\W]{1,17}nerve root|compressoin[\w\W]{1,17}nerve root|combined[\w\W]{1,30}compressi\w+|compressi\w+[\w\W]{1,32}nerve roo\w+|root compressi\w+', re.I) # 신경뿌리 압박
re_t_nerve_root = re.compile(r'compression and swelling of the right L5 nerve root',re.I) # nerve_root 예외
re_l_r_stenosis = re.compile(r'lateral recess[\w\W]{1,30}stenosis', re.I) # 외측 함요부 협착
re_spondylolysis = re.compile(r'spondyloly+|spodylolytic', re.I) # 척추분리증
re_schmorl = re.compile(r'schmorl\W\w|schmorls node', re.I) # 슈몰결절
re_osteophyte = re.compile(r'with spur|body spur|posterior spur|ostephyte|osteophyte|bony spur', re.I) # 골극
re_narrowing = re.compile(r'narrowing of[\w\W]{1,15}disc space|narrowing', re.I) # 디스크 공간 협착
re_any_stenosis = re.compile(r'congenital spinal stenosis|and spinal stenosis|associated spinal stenosis|mild spinal[\w\W]{1,15}stenosis|severe stenosis|severe spinal stenosis|spinal stenosis with|with spinal stenosis|moderate spinal stenosis', re.I) # 위치지정 안된 협착증
re_vacuum = re.compile(r'vac\w+ disc|multiple vac\w+|vaccum changes|vac\w+ in|vacuum pheno\w+|with vac\w+|intradiscal vac\w+|intervertebral vac+', re.I) # 디스크 내 진공
re_sequestration = re.compile(r'sequestration\W|disc sequestration|sequestrated disc', re.I) # 디스크 격리

re_sp = re.compile(r's/p', re.I) # 이전 상태
re_c = re.compile(r'C\d+\W\d|C\d', re.I) # 경추(C) 소견
re_t = re.compile(r'T[1-9] body|T[1-9] upper body|T[1-9]\WT[1-9](?![\w\W]{1,20}T[1][0-2])|T[1-9]\W[1-9](?![\w\W]{1,20}T[1][0-2])|thoracic spine', re.I) # T1~T9
re_no = re.compile(r'without disc herniation|no evidence|No remarkable|no visible', re.I) # 부정
re_exception = re.compile(r'Healing state of fracture|Healed fracture[\w\W]{1,}iliac spine', re.I) # 예외
re_associated = re.compile(r'associated central canal stenosis with compressive myelopathy.',re.I) # 문장 이어짐

## CT

In [10]:
CT = pd.read_excel(r'spine_train.xlsx', sheet_name = 'CT')

In [11]:
CT.fillna(0, inplace = True)

In [12]:
CT.head(2)

Unnamed: 0,번호,등록번호,촬영일,판독문,도출이유,Listhesis,scoliosis,fracture,spondylosis,Annular fissure,...,any stenosis_3,disc sequestration.2,intradiscal vacuum.1,intradiscal vacuum_1,intradiscal vacuum_2,intradiscal vacuum_3,disc sequestration.3,disc sequestration_1,disc sequestration_2,disc sequestration_3
0,2011-1,10013401,2011.10.21,CONCLUSION : \n1. Acute compression fracture o...,Acute compression fracture 로 fracture에 1\nT12 ...,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2011-2,10040558,2011.11.19,CONCLUSION : \ndiffuse bulging in L3/4.\ndegen...,diffuse bulging로 disc bulge에 1\ndegenerative s...,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
con = CT['판독문'].str.split('\n')
con[0]

['CONCLUSION : ',
 '1. Acute compression fracture of L4 lower body. ',
 '2. Bone cementoplasty status of T12 bursting fracture. ',
 '3. Old compression of T9 upper body. ',
 '4. Isthmic spondylolisthesis of L5 on S1, grade I with bilateral neural foraminal stenosis. ',
 '5. Bulging discs on L1-2, L3-4 and L4-5 levels. ',
 '6. Osteopenia.']

In [14]:
idx = 0
for i in con:
    for j in range(len(i)):
        if re.search(re_sp, i[j]) != None:
            continue
        if re.search(re_c, i[j]) != None:
            continue
        if re.search(re_no, i[j]) != None:
            continue
        if re.search(re_associated, i[j]) != None:
            continue
        if re.search(re_exception, i[j]) != None:
            continue
        if re.search(re_t, i[j]) != None:
            continue
        if re.search(re_listhesis, i[j]) != None:
            CT['Listhesis'][idx] = 1
        if re.search(re_scoliosis, i[j]) != None:
            CT['scoliosis'][idx] = 1
        if re.search(re_fracture, i[j]) != None:
            CT['fracture'][idx] = 1
        if re.search(re_spondylosis, i[j]) != None:   
            CT['spondylosis'][idx] = 1
        if re.search(re_a_fissure, i[j]) != None:
            CT['Annular fissure'][idx] = 1
        if re.search(re_bulge, i[j]) != None:
            CT['Disc bulge'][idx] = 1
        if re.search(re_degeneration, i[j]) != None:
            CT['Disc degeneration'][idx] = 1
        if re.search(re_extrusion, i[j]) != None:
            CT['Disc extrusion'][idx] = 1
        if re.search(re_herniation, i[j]) != None:
            CT['Disc herniation'][idx] = 1
        if re.search(re_protrusion, i[j]) != None:
            CT['Disc protrusion'][idx] = 1
        if re.search(re_modic, i[j]) != None:
            CT['Endplate edema or Type 1 modic'][idx] = 1
        if re.search(re_f_hypertrophy, i[j]) != None:
            CT['Facet hypertrophy'][idx] = 1
        if re.search(re_c_stenosis, i[j]) != None:
            CT['central stenosis'][idx] = 1
        if re.search(re_foraminal, i[j]) != None:
            CT['foraminal stenosis'][idx] = 1
        if re.search(re_nerve_root, i[j]) != None:
            CT['nerve root displaced/compressed'][idx] = 1
        if re.search(re_l_r_stenosis, i[j]) != None:
            CT['lateral recess stenosis'][idx] = 1
        if re.search(re_spondylolysis, i[j]) != None:
            CT['spondylolysis'][idx] = 1
        if re.search(re_schmorl, i[j]) != None:
            CT['''Schmorl's node'''][idx] = 1
        if re.search(re_osteophyte, i[j]) != None:
            CT['osteophyte'][idx] = 1
        if re.search(re_narrowing, i[j]) != None:
            CT['disc space narrowing'][idx] = 1
        if re.search(re_any_stenosis, i[j]) != None:
            CT['any stenosis'][idx] = 1
        if re.search(re_vacuum, i[j]) != None:
            CT['intradiscal vacuum'][idx] = 1
        if re.search(re_sequestration, i[j]) != None:
            CT['disc sequestration'][idx] = 1
    idx += 1

In [15]:
CT.head(2)

Unnamed: 0,번호,등록번호,촬영일,판독문,도출이유,Listhesis,scoliosis,fracture,spondylosis,Annular fissure,...,any stenosis_3,disc sequestration.2,intradiscal vacuum.1,intradiscal vacuum_1,intradiscal vacuum_2,intradiscal vacuum_3,disc sequestration.3,disc sequestration_1,disc sequestration_2,disc sequestration_3
0,2011-1,10013401,2011.10.21,CONCLUSION : \n1. Acute compression fracture o...,Acute compression fracture 로 fracture에 1\nT12 ...,1.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2011-2,10040558,2011.11.19,CONCLUSION : \ndiffuse bulging in L3/4.\ndegen...,diffuse bulging로 disc bulge에 1\ndegenerative s...,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## X-ray

In [17]:
xray = pd.read_excel(r'spine_train.xlsx', sheet_name = 'x-ray ')

In [18]:
xray.fillna(0, inplace = True)

In [19]:
xray.head(2)

Unnamed: 0,번호,등록번호,촬영일,판독문,도출이유,Listhesis,scoliosis,fracture,spondylosis,Annular fissure,...,any stenosis_3,disc sequestration.2,intradiscal vacuum.1,intradiscal vacuum_1,intradiscal vacuum_2,intradiscal vacuum_3,disc sequestration.3,disc sequestration_1,disc sequestration_2,disc sequestration_3
0,2011-1,10002173,2011.3.14,CONCLUSION : \nsevere spondylolisthesis of L4 ...,severe sponlylolisthesis로 listhesis 있어 listhes...,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2011-2,10012816,2011.2.24,CONCLUSION : \nDegenerative spondylolisthesis ...,"Degenerative spondylolisthesis로 listhesis에 1, ...",0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [20]:
con = xray['판독문'].str.split('\n')
con[0]

['CONCLUSION : ', 'severe spondylolisthesis of L4 on L5 with instability.']

In [21]:
idx = 0
for i in con:
    for j in range(len(i)):
        if re.search(re_etc_fracture, i[j]) != None:
            xray['fracture'][idx] = 1
    idx += 1

In [22]:
idx = 0
for i in con:
    for j in range(len(i)):
        if re.search(re_sp, i[j]) != None:
            continue
        if re.search(re_c, i[j]) != None:
            continue
        if re.search(re_no, i[j]) != None:
            continue
        if re.search(re_associated, i[j]) != None:
            continue
        if re.search(re_exception, i[j]) != None:
            continue
        if re.search(re_t, i[j]) != None:
            continue
        if re.search(re_listhesis, i[j]) != None:
            xray['Listhesis'][idx] = 1
        if re.search(re_scoliosis, i[j]) != None:
            xray['scoliosis'][idx] = 1
        if re.search(re_fracture, i[j]) != None:
            xray['fracture'][idx] = 1
        if re.search(re_spondylosis, i[j]) != None:   
            xray['spondylosis'][idx] = 1
        if re.search(re_a_fissure, i[j]) != None:
            xray['Annular fissure'][idx] = 1
        if re.search(re_bulge, i[j]) != None:
            xray['Disc bulge'][idx] = 1
        if re.search(re_degeneration, i[j]) != None:
            xray['Disc degeneration'][idx] = 1
        if re.search(re_extrusion, i[j]) != None:
            xray['Disc extrusion'][idx] = 1
        if re.search(re_herniation, i[j]) != None:
            xray['Disc herniation'][idx] = 1
        if re.search(re_protrusion, i[j]) != None:
            xray['Disc protrusion'][idx] = 1
        if re.search(re_modic, i[j]) != None:
            xray['Endplate edema or Type 1 modic'][idx] = 1
        if re.search(re_f_hypertrophy, i[j]) != None:
            xray['Facet hypertrophy'][idx] = 1
        if re.search(re_c_stenosis, i[j]) != None:
            xray['central stenosis'][idx] = 1
        if re.search(re_foraminal, i[j]) != None:
            xray['foraminal stenosis'][idx] = 1
        if re.search(re_nerve_root, i[j]) != None:
            xray['nerve root displaced/compressed'][idx] = 1
        if re.search(re_l_r_stenosis, i[j]) != None:
            xray['lateral recess stenosis'][idx] = 1
        if re.search(re_spondylolysis, i[j]) != None:
            xray['spondylolysis'][idx] = 1
        if re.search(re_schmorl, i[j]) != None:
            xray['''Schmorl's node'''][idx] = 1
        if re.search(re_osteophyte, i[j]) != None:
            xray['osteophyte'][idx] = 1
        if re.search(re_narrowing, i[j]) != None:
            xray['disc space narrowing'][idx] = 1
        if re.search(re_any_stenosis, i[j]) != None:
            xray['any stenosis'][idx] = 1
        if re.search(re_vacuum, i[j]) != None:
            xray['intradiscal vacuum'][idx] = 1
        if re.search(re_sequestration, i[j]) != None:
            xray['disc sequestration'][idx] = 1
    idx += 1
    


In [23]:
xray.head(2)

Unnamed: 0,번호,등록번호,촬영일,판독문,도출이유,Listhesis,scoliosis,fracture,spondylosis,Annular fissure,...,any stenosis_3,disc sequestration.2,intradiscal vacuum.1,intradiscal vacuum_1,intradiscal vacuum_2,intradiscal vacuum_3,disc sequestration.3,disc sequestration_1,disc sequestration_2,disc sequestration_3
0,2011-1,10002173,2011.3.14,CONCLUSION : \nsevere spondylolisthesis of L4 ...,severe sponlylolisthesis로 listhesis 있어 listhes...,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2011-2,10012816,2011.2.24,CONCLUSION : \nDegenerative spondylolisthesis ...,"Degenerative spondylolisthesis로 listhesis에 1, ...",1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## MRI

In [25]:
MRI = pd.read_excel(r'spine_train.xlsx', sheet_name = 'MRI')

In [26]:
MRI.fillna(0, inplace = True)

In [27]:
MRI.head(2)

Unnamed: 0,번호,등록번호,촬영일,판독문,도출이유,Listhesis,scoliosis,fracture,spondylosis,Annular fissure,...,nerve root displaced/compressed,lateral recess stenosis,spondylolysis,Schmorl's node,osteophyte,disc space narrowing,any stenosis,disc sequestration,intradiscal vacuum,disc sequestration.1
0,2011-1,10003613,2011.3.24,CONCLUSION : \nIsthmic spondylolisthesis (Meye...,"spondylolisthesis 있어서 listhesis에 1, with bulgi...",0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2011-2,10007037,2011.9.15,CONCLUSION : \n1. Severe central canal stenosi...,Severe central canal stenosis로 central stenosi...,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
MRI['central stenosis'][26]

0.0

In [29]:
con = MRI['판독문'].str.split('\n')
con[0]

['CONCLUSION : ',
 'Isthmic spondylolisthesis (Meyerding classification grade 1) at L5 on S1 with bulging disc of L5-S1.',
 '']

In [30]:
idx = 0
for i in con:
    for j in range(len(i)):
        if re.search(re_etc_fracture, i[j]) != None:
            MRI['fracture'][idx] = 1
        if re.search(re_c_extrusion, i[j]) != None:
            MRI['Disc extrusion'][idx] = 1
        #if re.search(re_c_stenosis, i[j]) != None:
            #MRI['central stenosis'][idx] = 1
        if re.search(re_t_herniation, i[j]) != None:
            MRI['Disc herniation'][idx] = 1
        if re.search(re_t_nerve_root, i[j]) != None:
            MRI['nerve root displaced/compressed'][idx] = 1
    idx += 1

In [32]:
idx = 0
for i in con:
    for j in range(len(i)):
        if re.search(re_sp, i[j]) != None:
            continue
        if re.search(re_c, i[j]) != None:
            continue
        if re.search(re_no, i[j]) != None:
            continue
        if re.search(re_associated, i[j]) != None:
            continue
        if re.search(re_exception, i[j]) != None:
            continue
        if re.search(re_t, i[j]) != None:
            continue
        if re.search(re_listhesis, i[j]) != None:
            MRI['Listhesis'][idx] = 1
        if re.search(re_scoliosis, i[j]) != None:
            MRI['scoliosis'][idx] = 1
        if re.search(re_fracture, i[j]) != None:
            MRI['fracture'][idx] = 1
        if re.search(re_spondylosis, i[j]) != None:   
            MRI['spondylosis'][idx] = 1
        if re.search(re_a_fissure, i[j]) != None:
            MRI['Annular fissure'][idx] = 1
        if re.search(re_bulge, i[j]) != None:
            MRI['Disc bulge'][idx] = 1
        if re.search(re_degeneration, i[j]) != None:
            MRI['Disc degeneration'][idx] = 1
        if re.search(re_extrusion, i[j]) != None:
            MRI['Disc extrusion'][idx] = 1
        if re.search(re_herniation, i[j]) != None:
            MRI['Disc herniation'][idx] = 1
        if re.search(re_protrusion, i[j]) != None:
            MRI['Disc protrusion'][idx] = 1
        if re.search(re_modic, i[j]) != None:
            MRI['Endplate edema or Type 1 modic'][idx] = 1
        if re.search(re_f_hypertrophy, i[j]) != None:
            MRI['Facet hypertrophy'][idx] = 1
        if re.search(re_c_stenosis, i[j]) != None:
            MRI['central stenosis'][idx] = 1
        if re.search(re_foraminal, i[j]) != None:
            MRI['foraminal stenosis'][idx] = 1
        if re.search(re_nerve_root, i[j]) != None:
            MRI['nerve root displaced/compressed'][idx] = 1
        if re.search(re_l_r_stenosis, i[j]) != None:
            MRI['lateral recess stenosis'][idx] = 1
        if re.search(re_spondylolysis, i[j]) != None:
            MRI['spondylolysis'][idx] = 1
        if re.search(re_schmorl, i[j]) != None:
            MRI['''Schmorl's node'''][idx] = 1
        if re.search(re_osteophyte, i[j]) != None:
            MRI['osteophyte'][idx] = 1
        if re.search(re_narrowing, i[j]) != None:
            MRI['disc space narrowing'][idx] = 1
        if re.search(re_any_stenosis, i[j]) != None:
            MRI['any stenosis'][idx] = 1
        if re.search(re_vacuum, i[j]) != None:
            MRI['intradiscal vacuum'][idx] = 1
        if re.search(re_sequestration, i[j]) != None:
            MRI['disc sequestration'][idx] = 1
    idx += 1

In [34]:
MRI.head(2)

Unnamed: 0,번호,등록번호,촬영일,판독문,도출이유,Listhesis,scoliosis,fracture,spondylosis,Annular fissure,...,nerve root displaced/compressed,lateral recess stenosis,spondylolysis,Schmorl's node,osteophyte,disc space narrowing,any stenosis,disc sequestration,intradiscal vacuum,disc sequestration.1
0,2011-1,10003613,2011.3.24,CONCLUSION : \nIsthmic spondylolisthesis (Meye...,"spondylolisthesis 있어서 listhesis에 1, with bulgi...",1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2011-2,10007037,2011.9.15,CONCLUSION : \n1. Severe central canal stenosi...,Severe central canal stenosis로 central stenosi...,0.0,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
