In [1]:
import pydicom as dcm
import xml.etree.ElementTree as ET
import pandas as pd
from textdistance import ratcliff_obershelp

In [2]:
dcmds = dcm.read_file('RS.1.2.246.352.221.550136246713487469715724200262812558270.dcm')
strsetsq = dcmds.StructureSetROISequence
contstrnames = [structure.ROIName for structure in strsetsq]
contstrnames

['BODY',
 'Ao',
 'Ambos pulmones',
 'Arteria Pulmonar',
 'Corazón',
 'Esófago',
 'GTVp MIP',
 'GTVt',
 'Hígado',
 'ITVt',
 'Médula',
 'MTV',
 'Pared costal',
 'Piel',
 'PTV LSI',
 'Pulmón dcho',
 'Pulmón izqdo',
 'Pulmones-GTVt',
 'Pulmones-PTV',
 'Tiroides',
 'Tráquea',
 'V_Pulmonary',
 'Vena cava sup',
 'via aerea',
 'CouchSurface',
 'CouchInterior']

In [3]:
for strname in contstrnames:
    if len(strname) > 16:
        print(strname)

In [4]:
tree = ET.parse('../protocolos/clinicos/ClinicalProtocol.xml')
root = tree.getroot()
structures = root.find('StructureTemplate').find('Structures')
protstrnames = [structure.get('ID') for structure in structures.findall('Structure')]
protstrnames

['PTV LSI',
 'suma pulmones',
 'medula',
 'plexo braquial',
 'via aerea',
 'traquea',
 'esofago',
 'corazon',
 'aorta',
 'v cava inferior',
 'v cava superior',
 'arteria pulmonar',
 'venas pulmonares',
 'piel',
 'pared costal',
 'higado',
 'plexo braquial']

In [30]:
{strname: ratcliff_obershelp('aurícula derecha', strname) for strname in strnames}

{'BODY': 0.0,
 'Ao': 0.0,
 'Arteria Pulmonar': 0.3125,
 'Aurícula dcha': 0.8275862068965517,
 'Aurícula izqda': 0.6666666666666666,
 'Corazón': 0.08695652173913043,
 'CTV boost izqdo': 0.12903225806451613,
 'CTV mama izqda': 0.3333333333333333,
 'Esófago': 0.08695652173913043,
 'Mama dcha': 0.56,
 'Médula': 0.2727272727272727,
 'PRV DA': 0.09090909090909091,
 'PTV boost izqdo': 0.12903225806451613,
 'PTV mama izqda': 0.3333333333333333,
 'Pulmón dcho': 0.4444444444444444,
 'Pulmón izqdo': 0.2857142857142857,
 'Tiroides': 0.25,
 'Vena cava inf': 0.27586206896551724,
 'Vena cava sup': 0.27586206896551724,
 'Ventrículo dcho': 0.5806451612903226,
 'Ventrículo izqdo': 0.4375}

In [11]:
infstrdf = pd.DataFrame([{strname: ratcliff_obershelp(protstrname, strname) 
                          for strname in contstrnames} 
                        for protstrname in protstrnames], index=protstrnames)

In [15]:
infstrdf['Suggestion'] = infstrdf.idxmax(axis=1)

In [18]:
infstrdf['Suggestion']

PTV LSI                      PTV LSI
suma pulmones         Ambos pulmones
medula                        Médula
plexo braquial               Tráquea
via aerea                  via aerea
traquea                      Tráquea
esofago                      Esófago
corazon                      Corazón
aorta                        Corazón
v cava inferior        CouchInterior
v cava superior        Vena cava sup
arteria pulmonar    Arteria Pulmonar
venas pulmonares      Ambos pulmones
piel                            Piel
pared costal            Pared costal
higado                        Hígado
plexo braquial               Tráquea
Name: Suggestion, dtype: object