In [None]:
import pandas as pd
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 170)
pd.set_option('display.max_colwidth', 100000)
import numpy as np
from matplotlib import pyplot as plt
import torch
import fastai
from fastai.vision.all import *
from fastai.distributed import *
import sklearn.metrics as skm
from unidecode import unidecode


In [None]:
#Load ICD10 dicts from HSJ source, shared in drive
path_icd = Path('ICD/cie10.csv')

ICD_df = pd.read_csv(path_icd,  delimiter='\t', encoding='iso-8859-1')
ICD = dict(zip(ICD_df.cie10_cod.str.strip(), ICD_df.cie10_des))


### generate ICD dictionary file in tagtog format
ICD_df = ICD_df.drop_duplicates(subset=['cie10_cod'], keep='last')
ICD_df.cie10_des = ICD_df.cie10_des.str.strip()
ICD_df.cie10_cod = ICD_df.cie10_cod.str.strip().str.replace('/', '_') #should be reverted for dictionary lookup
def f(x):
    s = ' '.join(x[:7])
    return s
ICD_df.cie10_des = ICD_df.cie10_des.str.split('\s').apply(lambda x: f(x))

ICD_df.to_csv('ICD/cie10.tsv', index = False, header = False, sep = '\t')

In [None]:
path = Path('datos_7')
path.ls()

In [None]:
dfs = {}
for dh in path.ls():
    if dh.with_suffix("").name == '17':
        for f in dh.ls():
            pd_name = f'{f.with_suffix("").name}'
            pd_name = re.sub(r"_\d+$", "", pd_name) #table name remove 'area de salud - number '
            pd_name = re.sub(r"_OC$", "", pd_name) #table name remove 'orion clinic - OC'
            print(pd_name)
            df = None
            try:
                df = pd.read_csv(f,  delimiter='\t')
            except: 
                df = pd.read_csv(f, encoding='iso-8859-1', delimiter='\t')

            print(df.columns)
            dfs[pd_name] = df
    

In [None]:
from datetime import datetime, timedelta
def format_date(s, source='HSJ'):
        x = None
        for fmt in ('%Y/%m/%d %H:%M:%S', '%d/%m/%Y %H:%M:%S', '%Y-%m-%d %H:%M:%S', '%Y/%d/%m %H:%M:%S', '%Y%m%d', '%Y'):  #usage example (y - x).days for delta time in days
            try:
                x= datetime.strptime(str(s), fmt)
                
                return x 
            except:
                #print(f"Failed date {s}: expected format in HSJ is '%d/%m/%Y %H:%M:%S' ") 
                pass
        
                
        return x
    



In [None]:

class Event:
    def __init__(self, start_date, end_date=None, event_type=None, event_value=None):
        self.start_date = start_date
        self.end_date = end_date
        self.event_type = event_type
        self.event_value = event_value
        
    
    @property
    def start_date(self): 
        return self._start_date
    
    @start_date.setter 
    def start_date(self, d, source = 'HSJ'):
        d = format_date(d, source)
        self._start_date = d
        
    @property
    def end_date(self): 
        return self._end_date
    
    @end_date.setter 
    def end_date(self, d, source = 'HSJ'):
        d = format_date(d, source)
        self._end_date = d
        

        
    def __repr__(self):
        m = ''
        if self.end_date and self.start_date: 
            total_days = (self.end_date - self.start_date).days
            m = f"{self.start_date} {self.event_type}: {self.event_value}. Total days hospitalization: {total_days }"
        else: 
            m = f"{self.start_date} {self.event_type}: {self.event_value}"
        return m
    
    
    
    

In [None]:
class Vs(Event):
    def __init__(self, start_date, end_date=None, event_type='VS', vs_name = None, event_value=None, hour = None):
        self.hour = hour
        self.start_date = start_date
        self.vs_name = vs_name
        self.end_date = end_date
        self.event_type = event_type
        self.event_value = event_value
       
    @property
    def start_date(self): 
        return self._start_date
    
    @start_date.setter 
    def start_date(self, d):
        d = format_date(d)
        h = int(self.hour) // 60 
        m = int(self.hour) % 60 
        
        d = d.replace(hour=h, minute=m)
        self._start_date = d
    
    
    @property
    def event_value(self): 
        return self._event_value
    
    @event_value.setter 
    def event_value(self, d): #vs value
        try:
            self._event_value = float(d.replace(',', '.')) 
        except:
            self._event_value = unidecode(d) if (d and type(d) == str) else d
        
    
    @property
    def vs_name(self): 
        return self._vs_name
    
    @vs_name.setter 
    def vs_name(self, t): 
        try:
            t = unidecode(t) if t else None
        except: 
            pass
        self._vs_name = t
        
    def __repr__(self):  
        m = f"{self.start_date} {self.event_type}: {self.vs_name} {self.event_value}"
        return m   
    
    
class Diagnosis(Event):
    def __init__(self, start_date, end_date=None, event_type='Diagnosis', event_value=None, desc = None):
        super().__init__(start_date, end_date, event_type, event_value)
        self.desc = desc
    
    @property
    def desc(self): 
        return self._desc
    
    @desc.setter 
    def desc(self, desc):
        decoded = None
        i = 1
        decoded = ICD.get(self.event_value) 
        while not decoded and len(self.event_value[:-i]) > 0: 
            decoded = ICD.get(self.event_value[:-i]) 
            i += 1
        if not decoded:
            print(self.event_value)
           
        
        self._desc = decoded
    
    def __repr__(self):  
        return f"{self.end_date} {self.event_type}: {self.event_value} {self.desc}"

class Procedure(Event):
    def __init__(self, start_date, end_date=None, event_type='Procedure', event_value=None, desc = None):
        super().__init__(start_date, end_date, event_type, event_value)
        self.desc = desc
        
    @property
    def desc(self): 
        return self._desc
    
    @desc.setter 
    def desc(self, desc):
        decoded = None
        i = 1
        decoded = ICD.get(self.event_value) 
        while not decoded and len(self.event_value[:-i]) > 0: 
            decoded = ICD.get(self.event_value[:-i]) 
            i += 1
        if not decoded:
            print(self.event_value)
           
        
        self._desc = decoded
    
    def __repr__(self):  
        return f"{self.end_date} {self.event_type}: {self.event_value} {self.desc}"
        
class Medication(Event):
    def __init__(self, start_date, end_date=None, event_type=event, event_value=None, dose = None, unit = None, freq = None, route = None, atc = None):
        super().__init__(start_date, end_date, event_type, event_value)
        self.event_value = event_value
        self.dose = dose
        self.unit = unit
        self.frequency = freq
        self.route = route
        self.atc = atc
        
    
    
            
    @property
    def event_value(self): 
        return self._event_value
    
    @event_value.setter 
    def event_value(self, drug): #principio activo 
        self._event_value = drug
        
    
    @property
    def dose(self): 
        return self._dose
    
    @dose.setter 
    def dose(self, d): 
        if (self.event_type == 'Fluid'): #obtain vol/duration in ml/h 
            try:
                
                v,h = d[0], d[1]
                v = float(str(v).replace(',', '.')) 
                h = float(str(h).replace(',', '.')) 
                self._dose = v//h
                
            except:
                self._dose = np.nan
            
        else:
            try:
                self._dose = float(d.replace(',', '.')) 
            except:
                self._dose = np.nan
        
    
    @property
    def unit(self): 
        return self._unit
    
    @unit.setter 
    def unit(self, u):
        self._unit = u
        
    
    @property
    def freq(self): 
        return self._freq
    
    @freq.setter 
    def freq(self, fre): 
        self._freq = fre
        
    
    @property
    def route(self): 
        return self._route
    
    @route.setter 
    def route(self, rou):   
        self._route = rou
        
    
    
    @property
    def atc(self): 
        return self._atc
    
    @atc.setter 
    def atc(self, atc):
        self._atc = atc
    
    
    def __repr__(self):  
        dose = self.dose if np.isnan(self.dose) == False else ''
        
        unit = self.unit if self.unit else ''
        return f"{self.start_date} {self.event_type}: {self.event_value} {dose } {unit } each: {self.frequency} route: {self.route}"

class Oxigen(Event):
    def __init__(self, start_date, end_date=None, event_type='Oxigen', event_value=None, method = None):
        super().__init__(start_date, end_date, event_type, event_value)
        self.method = method
        self.event_value = event_value
        
    @property
    def event_value(self): 
        return self._event_value
    
    @event_value.setter 
    def event_value(self, v):
        try:
            self._event_value = float(v) 
        except:
            self._event_value = np.nan
    
    @property
    def method(self): 
        return self._method
    
    @method.setter 
    def method(self, c):
        self._method = c
    
    def __repr__(self):  
        event_value = self.event_value if np.isnan(self.event_value) == False else ''
        
        method = self.method if self.method else ''
        
        return f"{self.start_date} {self.event_type}: {event_value } {method }"
    
    
class Lab(Event):
    def __init__(self, start_date, end_date=None, event_type='Lab', test_name = test, event_value=None, unit = None, limits = None):
        super().__init__(start_date, end_date, event_type, event_value)
        self.test_name = test_name
        self.event_value = event_value
        self.unit = unit
        self.limits = limits
        if isinstance(self.limits, list): 
            self.norm_value =  Lab.normalize_value( self.event_value, self.limits[0], self.limits[1]) 
        else:
            self.norm_value = None
    
            
    
    @property
    def event_value(self): 
        return self._event_value
    
    @event_value.setter 
    def event_value(self, d): #lab value
        try:
            self._event_value = float(d.replace(',', '.')) 
        except:
            self._event_value = d
        
    
    @property
    def test_name(self): 
        return self._test_name
    
    @test_name.setter 
    def test_name(self, t): 
        try:
            t = unidecode(t) if t else None
        except: 
            print(t)
            pass
        self._test_name = t
            
        
    
    @property
    def unit(self): 
        return self._unit
    
    @unit.setter 
    def unit(self, u):
        self._unit = u
        
    
    @property
    def limits(self): 
        return self._limits
    
    @limits.setter 
    def limits(self, limits):
        try: 
            l = str(limits).replace(',','.').replace('-',',')
            
            l = l.replace('<', '0,')
            #if '>' in l:
                #l = [l.replace('>', ''), np.nan()]
            limits = eval(l)
            
            self._limits = limits
        except: 
            self._limits = None
    
    @classmethod
    def normalize_value(cls, x,mini, maxi): #x normalized = (x – x minimum) / (x maximum – x minimum)
        x_norm = None
        try: 
            x_norm = (x-mini) / (maxi - mini)
        except: 
            pass
        return x_norm
        
        
    def __repr__(self):  
        m = ''
        norm_value = round(self.norm_value, 2) if type(self.norm_value) == float else self.norm_value
        unit = self.unit if self.unit else ''
        if (pd.isna(norm_value) == True):
            m =  f"{self.start_date} {self.event_type}: {self.test_name} {self.event_value} {unit }"
        else:
            m = f"{self.start_date} {self.event_type}: {self.test_name} {self.event_value} {unit } Norm_value: {norm_value}"
        
        return m
    
    
class Report(Event):
    def __init__(self, start_date, end_date=None, event_type=event, event_value=None):
        super().__init__(start_date, end_date, event_type, event_value)
        self.event_value = event_value
        
    
    
            
    @property
    def event_value(self): 
        return self._event_value
    
    @event_value.setter 
    def event_value(self, t): #text of the event
        t = unidecode(t) if (t and type(t) == str) else t
        self._event_value = t
    
    
    def __repr__(self):
        m = f"{self.start_date} {self.event_type}: {self.event_value}"
        return m
    
    
class Image(Event):
    def __init__(self, start_date, end_date=None, event_type=event, event_value=None, cuis = None, label_locs = None):
        self.date = eval(start_date)[0]
        super().__init__(self.date, end_date, event_type, event_value)
        #undo days shift
        self.start_date = self.start_date + timedelta(days=10)
        self.event_value = event_value
        self.cui_list = cuis
        self.label_locs = label_locs
    
    
            
    @property
    def event_value(self): 
        return self._event_value
    
    @event_value.setter 
    def event_value(self, t): #text of the image
        t = unidecode(t) if (t and type(t) == str) else t
        self._event_value = t
        
    @property
    def label_locs(self): 
        return self._label_locs
    
    @label_locs.setter 
    def label_locs(self, label_locs):
        self._label_locs = label_locs
        
    @property
    def cui_list(self): 
        return self._cui_list
    
    @cui_list.setter 
    def cui_list(self, cuis):
        self._cui_list = cuis
        
        
    
    
    
    def __repr__(self):
        m = f"{self.start_date} {self.event_type}: {self.event_value} {self.label_locs}  {self.cui_list}"
        #m = f"{self.start_date} {self.event_type}: {self.event_value} "
        return m


In [None]:
class Patient: 
    def __init__(self, patient_id, age=None, gender=None, health_dep='DepartmentUNK'):
        self.id = patient_id
        self.health_dep = health_dep
        self.age = age
        self.gender = gender
        self.events = []
    
    @property #health department
    def health_dep(self):  
        return self._health_dep
    
    @health_dep.setter 
    def health_dep(self, hd):
        self._health_dep = hd
        
    @property
    def age(self): 
        return self._age
    
    @age.setter 
    def age(self, a): #accepts either age in years or year of birth
        print(a)
        
        y = format_date(a) #yyyy birth
        
        if y: 
            n = datetime.now()
            a = (n - y).days//365
           
        else: 
            a = float(str(a).replace(',', '.'))
            
        if(a < 0 or a > 120): 
           
            raise ValueError("Failed age: out of range") 
        self._age = a
    
    @property
    def gender(self): 
        return self._gender
    
    @gender.setter 
    def gender(self, s): 
        cat = ['man','woman']
        if(s in ['Hombre','Mujer']) == False: 
            raise ValueError("Failed sex: values should be in ['Hombre','Mujer']") 
        self._gender = cat[0] if s == 'Hombre' else cat[1]
        
    @property
    def events(self): 
        return self._events
    
    @events.setter 
    def events(self, events):
        self._events = events
        
    def sort_events(self):
        self.events = sorted(self.events, key=lambda o: o.start_date, reverse=False)
    
    def __repr__(self):
        return f"{self.health_dep}:{self.id} is a {self.age} years old {self.gender}"
    



In [None]:
plist = []   
def f(g):
    x = g.iloc[0]
    p = Patient(x.paciente, age = x.paciente_fnac, gender = x.paciente_sexo)  
    
    elist = g.apply(lambda e: Event(e.fecha_ingreso, 
                                    end_date=e.fecha_alta , event_type='State Admission', event_value='ER'
                                   ), axis = 1).values
    p.events.extend(elist)
    
    elist = [Event(e.fecha_ingreso_desde_urgencias, 
                                    end_date=e.tras_1_fecha , event_type='State Bed', event_value=e.servicio_ingreso
                                   ) for i,e in g.iterrows() if len(str(e.fecha_ingreso_desde_urgencias))>10]
    p.events.extend(elist)
        
    
    elist = [Event(e.tras_1_fecha, 
                   end_date=e.tras_2_fecha ,event_type='State Bed', event_value=e.tras_1_servicio, 
                  ) for i,e in g.iterrows() if len(str(e.tras_1_fecha))>10]
    p.events.extend(elist)
    elist = [Event(e.tras_2_fecha, 
                   end_date=e.tras_3_fecha ,event_type='State Bed', event_value=e.tras_2_servicio,
                  ) for i,e in g.iterrows() if len(str(e.tras_2_fecha))>10]
    p.events.extend(elist)
    elist = [Event(e.tras_3_fecha, 
                   end_date=e.tras_4_fecha ,event_type='State Bed', event_value=e.tras_3_servicio, 
                  ) for i,e in g.iterrows() if len(str(e.tras_3_fecha))>10]
    p.events.extend(elist)
    elist = [Event(e.tras_4_fecha, 
                   end_date=e.tras_5_fecha , event_type='State Bed', event_value=e.tras_4_servicio, 
                  ) for i,e in g.iterrows() if len(str(e.tras_4_fecha))>10]
    p.events.extend(elist)
    
    elist = [Event(e.tras_5_fecha, 
                   end_date=e.tras_6_fecha ,event_type='State Bed', event_value=e.tras_5_servicio, 
                  ) for i,e in g.iterrows() if len(str(e.tras_5_fecha))>10]
    p.events.extend(elist)
    
    elist = [Event(e.fecha_alta, 
                   end_date=None , event_type='State Discharge', event_value=e.destino_alta
                  ) for i,e in g.iterrows() if len(str(e.fecha_alta))>10]
    p.events.extend(elist)
    
    elist = [Diagnosis(e.fecha_alta, 
                   end_date=e.fecha_alta ,  event_value=dx
                  ) for i,e in g.iterrows() for dx in re.findall( r'\[(.*?)\]', str(e.diagnosticos)) if len(str(e.diagnosticos))>4]
    p.events.extend(elist)
    
    
    elist = [Procedure(e.fecha_alta, 
                   end_date=e.fecha_alta ,  event_value=pc
                  ) for i,e in g.iterrows() for pc in re.findall( r'\[(.*?)\]', str(e.procedimientos)) if len(str(e.procedimientos))>4]
    p.events.extend(elist)
    
    
    
    
    p.sort_events()
    return p  


def load_patients_HSJ(dfs, filter_patients = None):
    d = dfs['DATOS_EVOLUCION_PACIENTES']
    patients = None
    if filter_patients:
        d = d.loc[d.paciente.isin(filter_patients)]
        
        patients = d.groupby('paciente').apply(lambda x: f(x) )
    else:
        patients = d.groupby('paciente').apply(lambda x: f(x) )
   
    return patients.values

filter_patients = None
plist = load_patients_HSJ(dfs, filter_patients)


In [None]:
pdict = {p.id:p for p in plist}



In [None]:
#update patient dict with image tests
anon = pd.read_csv(path/'ANONIMIZACION.txt', sep = '\t')
anon.MIDS_ID = anon.MIDS_ID.str.replace('_','-')
def image(g,pdict):
    x = str(g.iloc[0].PatientID)
    if x not in anon.MIDS_ID.values: 
        return
    sip = anon.loc[anon.MIDS_ID == x, 'SIPANON'].values[0]
    #print(f'missing patients in datos_evolucion_pacientes but found in images ')
    if ((sip in pdict.keys()) == False):
        return
    
    else:
        x = sip
        p = pdict[x]
        g.loc[pd.isna(g.LabelsLocalizationsBySentence) == True, ['LabelsLocalizationsBySentence']] = None  
        elist = [Image(e['Study Date'], 
                        end_date=None , event_type=f"{e.Modality} {e['Body Part Examined']}" , label_locs = e.LabelsLocalizationsBySentence, event_value=e.Report, cuis = e.labelCUIS
                       ) for i,e in g.iterrows() if len(str(e['Study Date']))>8 ]
        p.events.extend(elist)
        
        p.sort_events()
        
        pdict[x] = p
    return

# Rx-thorax-automatic-captioning/COVID_QC$ vi COVID19_POSI_v2.0.csv to obtain study_date and session
# Labels_covid_12.csv to obtain text, labels and cuis 
path_rx = Path('../Rx-thorax-automatic-captioning')
path_rx.ls()
cq1 = pd.read_csv(path_rx /'COVID_QC/COVID19_POSI_v2.0.csv')[['Subject','Session','Study Date','Modality', 'Body Part Examined']]
cq2 = pd.read_csv(path_rx /'COVID_QC/COVID19_POSI_v1.0.csv')[['Subject','Session','Study Date','Modality', 'Body Part Examined']]
img_cq = pd.concat([cq1,cq2])

print(img_cq.columns)
img_labels = pd.read_csv(path_rx /'Labels_covid_12.csv')[['PatientID','ReportID','Report','Labels', 'LabelsLocalizationsBySentence', 'labelCUIS', 'LocalizationsCUIS']]
print(img_labels.columns)
mer = img_cq.merge(img_labels, how = 'inner', left_on = ['Session'], right_on = ['ReportID'], indicator = True, ).drop_duplicates('ReportID')


mer.groupby('PatientID').apply(lambda x: image(x, pdict) )

In [None]:
#update patient dict with VS
def vs(g,pdict ):
    x = g.iloc[0].paciente
    #print(f'missing patients in datos_evolucion_pacientes but found in PRUEBAS_COVID ')
    if (x in pdict.keys()) == False:
        print(x)
    else:
        p = pdict[x]
        elist = [Vs(e.fecha, 
                        end_date=None , event_type='VS', vs_name = e.constante, event_value=e.valor, hour = e.hora
                       ) for i,e in g.iterrows() if (len(str(e.fecha))>10 & (pd.isnull(e.constante) == False))]
        p.events.extend(elist)
        
        
        
    
        p.sort_events()
        
        pdict[x] = p

dfs['DATOS_CONSTANTES'].groupby('paciente').apply(lambda x: vs(x, pdict) ).values

In [None]:
#update patient dict with reports
def report(g,pdict, report_type, report_field ):
    x = g.iloc[0].paciente
    #print(f'missing patients in datos_evolucion_pacientes but found in DATOS_INFORMES_ANAMNESIS')
    if (x in pdict.keys()) == False:
        print(x)
    else:
        g = g.loc[g[report_field].isna() == False]
        p = pdict[x]
        elist = [Report(e.fecha_informe, event_type=report_type, event_value=e[report_field]
                       ) for i,e in g.iterrows() if (len(str(e.fecha_informe))>10 & (e[report_field] != False) & (type(e[report_field]) == str))]
        p.events.extend(elist)
        
    
        p.sort_events()
        
        pdict[x] = p

dfs['DATOS_INFORMES_ANAMNESIS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text CX', 'enfermedad_actual') ).values
dfs['DATOS_INFORMES_ANAMNESIS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text PE', 'exploracion_fisica') ).values
dfs['DATOS_INFORMES_ANAMNESIS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text MH', 'antecedentes') ).values
dfs['DATOS_INFORMES_ANAMNESIS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text sDX', 'COD_sospecha_diagnostica') ).values
dfs['DATOS_INFORMES_ANAMNESIS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text PLAN', 'plan') ).values

dfs['DATOS_INFORMES_ALTA_URGENCIAS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text CX', 'enfermedad_actual') ).values
dfs['DATOS_INFORMES_ALTA_URGENCIAS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text PE', 'exploracion_fisica') ).values
dfs['DATOS_INFORMES_ALTA_URGENCIAS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text MH', 'antecedentes') ).values
dfs['DATOS_INFORMES_ALTA_URGENCIAS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text EV', 'evolucion') ).values
dfs['DATOS_INFORMES_ALTA_URGENCIAS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text 1DX', 'COD_diag_principal') ).values
dfs['DATOS_INFORMES_ALTA_URGENCIAS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text 2DX', 'COD_diag_secundario') ).values
dfs['DATOS_INFORMES_ALTA_URGENCIAS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text PX', 'COD_proc_diag_terap') ).values
dfs['DATOS_INFORMES_ALTA_URGENCIAS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text QX', 'COD_proc_quir') ).values
dfs['DATOS_INFORMES_ALTA_URGENCIAS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text TX', 'tratamiento') ).values
dfs['DATOS_INFORMES_ALTA_URGENCIAS'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text RE', 'recomendaciones') ).values



dfs['DATOS_INFORMES_ALTA_HOSPITALIZACION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text CX', 'enfermedad_actual') ).values
dfs['DATOS_INFORMES_ALTA_HOSPITALIZACION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text PE', 'exploracion_fisica') ).values
dfs['DATOS_INFORMES_ALTA_HOSPITALIZACION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text MH', 'antecedentes') ).values
dfs['DATOS_INFORMES_ALTA_HOSPITALIZACION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text EV', 'evolucion') ).values
dfs['DATOS_INFORMES_ALTA_HOSPITALIZACION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text 1DX', 'COD_diag_principal') ).values
dfs['DATOS_INFORMES_ALTA_HOSPITALIZACION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text 2DX', 'COD_diag_secundario') ).values
dfs['DATOS_INFORMES_ALTA_HOSPITALIZACION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text PX', 'COD_proc_diag_terap') ).values
dfs['DATOS_INFORMES_ALTA_HOSPITALIZACION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text QX', 'COD_proc_quir') ).values
dfs['DATOS_INFORMES_ALTA_HOSPITALIZACION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text TX', 'tratamiento') ).values
dfs['DATOS_INFORMES_ALTA_HOSPITALIZACION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text RE', 'recomendaciones') ).values
dfs['DATOS_INFORMES_ALTA_HOSPITALIZACION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text Discharge', 'destino_alta') ).values


dfs['DATOS_INFORMES_NOTAS_MED_EVOLUCION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text EV', 'seguimiento_actual') ).values
dfs['DATOS_INFORMES_NOTAS_MED_EVOLUCION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text sDX', 'COD_sospecha_diagnostica') ).values
#dfs['DATOS_INFORMES_NOTAS_MED_EVOLUCION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text TE', 'exploracion_complementaria') ).values
dfs['DATOS_INFORMES_NOTAS_MED_EVOLUCION'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text PLAN', 'plan') ).values

dfs['DATOS_INFORMES_CAMBIO_SERVICIO'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text CX', 'enfermedad_actual') ).values
dfs['DATOS_INFORMES_CAMBIO_SERVICIO'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text PE', 'exploracion_fisica') ).values
dfs['DATOS_INFORMES_CAMBIO_SERVICIO'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text MH', 'antecedentes') ).values
dfs['DATOS_INFORMES_CAMBIO_SERVICIO'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text EV', 'evolucion') ).values
dfs['DATOS_INFORMES_CAMBIO_SERVICIO'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text 1DX', 'COD_diag_principal') ).values
dfs['DATOS_INFORMES_CAMBIO_SERVICIO'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text 2DX', 'COD_diag_secundario') ).values
dfs['DATOS_INFORMES_CAMBIO_SERVICIO'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text PX', 'COD_proc_diag_terap') ).values
dfs['DATOS_INFORMES_CAMBIO_SERVICIO'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text QX', 'COD_proc_quir') ).values
dfs['DATOS_INFORMES_CAMBIO_SERVICIO'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text TX', 'tratamiento') ).values
dfs['DATOS_INFORMES_CAMBIO_SERVICIO'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text RE', 'recomendaciones') ).values
dfs['DATOS_INFORMES_CAMBIO_SERVICIO'].groupby('paciente').apply(lambda x: report(x, pdict, 'Text Discharge', 'destino_alta') ).values

In [None]:
#update patient dict with COVID tests
def lab_covid(g,pdict ):
    x = g.iloc[0].paciente
    #print(f'missing patients in datos_evolucion_pacientes but found in PRUEBAS_COVID ')
    if (x in pdict.keys()) == False:
        print(x)
    else:
        p = pdict[x]
        elist = [Lab(e.fecha_prueba, 
                        end_date=None , event_type='Lab Sars-cov-2', test_name = e.prueba, event_value=e.resultado
                       ) for i,e in g.iterrows() if len(str(e.fecha_prueba))>10]
        p.events.extend(elist)
        
        
        
    
        p.sort_events()
        
        pdict[x] = p

dfs['DATOS_MICROBIOLOGIA'].groupby('paciente').apply(lambda x: lab_covid(x, pdict) ).values

In [None]:
#update patient dict with lab tests
def lab(g,pdict ):
    x = g.iloc[0].paciente
    #print(f'missing patients in datos_evolucion_pacientes but found in DATOS_LABORATORIO')
    if (x in pdict.keys()) == False:
        print(x)
    else:
        p = pdict[x]
        elist = [Lab(e.fecha_prueba, 
                        end_date=None , event_type='Lab', test_name = e.prueba_lab, event_value=e.resultado_lab,  unit = e.UNIDAD, limits=e.REFERENCIA
                       ) for i,e in g.iterrows() if len(str(e.fecha_prueba))>10]
        p.events.extend(elist)
        
        
        
    
        p.sort_events()
        
        pdict[x] = p

dfs['DATOS_LABORATORIO'].groupby('paciente').apply(lambda x: lab(x, pdict) ).values

In [None]:
#update patient dict with fluid therapy
def fluid(g,pdict ):
    x = g.iloc[0].paciente

    #print(f'missing patients in datos_evolucion_pacientes but found in FLUIDOTERAPIA_OC')
    if (x in pdict.keys()) == False:
        print(x)
    else:
        p = pdict[x]
        elist = [Medication(e.fecha_inicio, 
                        end_date=e.fecha_fin , event_type='Fluid', event_value=e.principio_activo, dose = (e.volumen,e.duracion), unit = 'ml/h', atc=e.atc 
                       ) for i,e in g.iterrows() if (len(str(e.fecha_inicio))>10) & (e.valida_farmacia == 1)]
        p.events.extend(elist)
        
        
        p.sort_events()
        
        pdict[x] = p

dfs['DATOS_FLUIDOTERAPIA'].groupby('paciente').apply(lambda x: fluid(x, pdict) ).values

In [None]:
#update patient dict with gas therapy
def gas(g,pdict ):
    x = g.iloc[0].paciente
    #print(f'missing patients in datos_evolucion_pacientes but found in gasoterapia_oc')
    if (x in pdict.keys()) == False:
        print(x)
    else:
        p = pdict[x]
        elist = [Oxigen(e.fecha_inicio, 
                        end_date=e.fecha_fin , event_type='Oxigen', event_value=e.fio2, method=e.metodo
                       ) for i,e in g.iterrows() if len(str(e.fecha_inicio))>10]
        p.events.extend(elist)
        
    
        p.sort_events()

dfs['DATOS_GASOTERAPIA'].groupby('paciente').apply(lambda x: gas(x, pdict) ).values
    

In [None]:
#update patient dict with medication
def drug(g,pdict ):
    x = g.iloc[0].paciente
    #print(f'missing patients in datos_evolucion_pacientes but found in MEDICACION')
    if (x in pdict.keys()) == False:
        print(x)
    else:
        p = pdict[x]
        elist = [Medication(e.fecha_administracion_paciente, 
                        end_date=e.fecha_fin , event_type='Medication', event_value=e.principio_activo, dose = e.dosis, unit = e.unidad_medida, freq = e.frecuencia, route = e.forma_administracion, atc=e.atc 
                       ) for i,e in g.iterrows() if len(str(e.fecha_administracion_paciente))>10]
        p.events.extend(elist)
        
        
        
    
        p.sort_events()
        
        pdict[x] = p

dfs['DATOS_MEDICACION'].groupby('paciente').apply(lambda x: drug(x, pdict) ).values
    

In [None]:
#tagtog csv
f = open('HSJ_image.csv', 'w')
f.write(f'Text\n')
i = 0
for p in plist[:]:
    for e in p.events:
        if (type(e) == Image):
            s = f'{p.id}: {e.event_value}'
            f.write(s + '\n')
            i += 1
            if i > 10: break
f.close()   


In [None]:
f = open('HSJ_events_by_patient_4.txt', 'w')
#f = open('prueba.txt', 'w')
for p in plist[:]:
    f.write(f'\nPATIENT: {p}\n')
    for e in p.events:
        if (type(e) == Lab) and type(e.norm_value) == float and (e.norm_value <1 and e.norm_value >0):
            pass
        else:
            #print(str(e))
            f.write(str(e)+ '\n')
f.close()   

In [None]:
from pandas_profiling import ProfileReport
#profile = ProfileReport(df, minimal=True, pool_size = 32)
for key in dfs.keys():
    try:
        profile = ProfileReport(dfs[key],  pool_size = 32)
        profile.to_file(f'{key}.html')
    except: 
        print(f'skipped {key}')
    

In [None]:
df = dfs['DATOS_INFORMES_ALTA_URGENCIAS']
pd.isna(df.loc[(df.paciente =='02938240cda555197c41948cfc9c1122128913306d40cb4d045a7456bb885721') ].COD_proc_quir.values[0]) 

In [None]:
d = dfs['DATOS_MEDICACION']
d.loc[d.paciente == "4590808e81e65a61eb5585eb349db6e4c29a301183ec9a660c4be5a96537d437"].principio_activo.unique()

In [None]:
d = dfs['DATOS_EVOLUCION_PACIENTES']
d.loc[d.paciente == "02938240cda555197c41948cfc9c1122128913306d40cb4d045a7456bb885721"].diagnosticos.unique()