In [None]:

from datatable import (dt, f, by, ifelse, update, sort,
                       count, min, max, mean, sum, rowsum)
import pandas as pd
import warnings
warnings.filterwarnings(action='once')
import numpy as np
import datetime
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
 
from pathlib import Path
import os
from concurrent.futures import ProcessPoolExecutor

import classes

In [None]:
#build classes and attribute dictionary to interrogate bigTable
classes_field_dict = {}
for k, v in classes.__dict__.items(): 
    if  ('classes.' in str(v)) and (str(k).startswith('__') == False):
        classes_field_dict[k] = [f'_{att}' for att in v.__dict__.keys() if 
                                 str(att).startswith('__') == False
                                and type(v.__dict__[att]) == property
                                and str(att) != 'events' ]
        if (str(k) in ['Event', 'Patient']) == False:
            classes_field_dict[k].extend(classes_field_dict['Event'])
        
        if (str(k) in ['Patient']) == False:
            classes_field_dict[k].extend(['_patient_id', '_p_id', '_age', '_gender'])
        classes_field_dict[k] = list(set(classes_field_dict[k]))
    
classes_field_dict

In [None]:
csv_dir = Path('out_EDA/csv')
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)

str(csv_dir)

In [None]:
DT = dt.Frame("out_EDA/bigTable.jay")
dt.unique(DT[:,f._event_type]).to_csv('event_types.csv')


In [None]:
event_types_classes = pd.read_csv("event_types.csv")
def map_class(x):
    if 'Lab' in x:  return 'Lab'
    if 'State' in x:  return 'Event' 
    if 'VS' in x: return 'Vs'
    if 'Text' in x: return 'Report'
    if 'CHEST' in x: return 'Image'
    if 'Fluid' in x: return 'Medication'
    return x

event_types_classes['class'] = event_types_classes._event_type.apply(lambda x: map_class(x))
event_types_classes.to_csv('event_types_clases.csv')

In [None]:
df = DT.to_pandas()
df._start_date = pd.to_datetime(df._start_date) 

In [None]:
df._end_date = pd.to_datetime(df._end_date, errors = 'ignore') 

In [None]:
df['_p_id'] = None
patients_df = df[classes_field_dict['Patient']].drop_duplicates().reset_index()

In [None]:
#profile each class
profile = False
if profile:
    from pandas_profiling import ProfileReport
    profile = ProfileReport(patients_df,  pool_size = 32)            
    profile.to_file(f'out_EDA/Patient.html')

    #profile = ProfileReport(df, minimal=True, pool_size = 32)
    for i,r in event_types_classes.to_pandas().iterrows():
        event_type = r._event_type
        fields = classes_field_dict[r['class']]
        print(event_type, fields)
        df_ = DT[(f._event_type == event_type),fields].to_pandas()
        print(f'Starting to profile: {event_type} size {df.shape}')
        try:
            profile = ProfileReport(df_,  pool_size = 32)
            profile.to_file(f'out_EDA/{event_type}.html')
        except: 
            print(f'skipped {event_type}')
   

In [None]:
patients_df['_p_id'] = patients_df.index
p_dict = patients_df[[ '_patient_id', '_p_id']].set_index('_patient_id').to_dict()['_p_id']


In [None]:
#compress patient id to int in field _p_id
def f(x):
    i = p_dict[x]
    
    return i

df['_p_id'] = df._patient_id.apply(lambda x: f(x))


In [None]:
#load events of different types in a dictionary of df (df_dict)
df_dict = {}
for i,r in event_types_classes.iterrows():
        event_type = r._event_type
        fields = classes_field_dict[r['class']]
        df_dict[event_type] = df.loc[df._event_type == event_type, fields]

In [None]:
df_dict.keys()

In [None]:
dx = pd.DataFrame(df_dict['Diagnosis'][['_event_value', '_desc']].value_counts())
dx = dx.reset_index()
print(dx.shape[0])
dx.to_csv('Terminology/icd_dx_counts.tsv')

In [None]:
covid_rel_dx = pd.read_csv('Terminology/icd_covid_dx_counts.csv') #after manual selection of covid related dx
covid_rel_dx = covid_rel_dx.reset_index()
print(covid_rel_dx.shape[0])
covid_rel_dx['dx_cx'] = covid_rel_dx.apply(lambda x: 'x' if 'x' in x.values else None , axis = 1)

l_covid_dx = covid_rel_dx[covid_rel_dx.dx_cx == 'x']._event_value.values

l_covid_dx

In [None]:
#check if new icd dx coded added 
new = list(set(df_dict['Diagnosis'][['_event_value', '_desc']]._event_value.unique()) - set(covid_rel_dx._event_value.unique()) )
new

In [None]:
l_covid_image = ['C0277877', 'C2073538', 'C3544344', 'C2073672','C2073583', 'C1332240', 
                   'C0521530', 'C3669021', 'C3669021', 
                   'C1443940', 'C0032285', 'C1412002', 'C0032310', 'C5203670', 'C5203671']

In [None]:
cov_df =  df_dict['Lab Sars-cov-2']
cov_df[cov_df._test_name== 'PCR']._event_value.unique()

In [None]:
image_event_types = ['CHEST CT', 'CHEST x-ray']

In [None]:

def covid_cui(x):
    if not pd.isna(x): 
        x = x.replace('[','').replace(']','').split(',')
        covid_cuis = [str(cui)  for cui in x if cui in l_covid_image]
        result = covid_cuis if len(covid_cuis) > 0 else False
        
    return result
df['covid_image'] = False
df.loc[df._event_type.isin(image_event_types),['covid_image']] = df[df._event_type.isin(image_event_types)]._cui_list.apply(lambda x: covid_cui(x))


In [None]:
prior_im = pd.read_csv('../Rx-thorax-automatic-captioning/Labels_covid_12.csv')
prior_im.PatientID.nunique()

In [None]:
im = pd.read_csv('../Rx-thorax-automatic-captioning/Labels_covid_pharma.csv')
im.PatientID.nunique()

In [None]:
df[df._event_type.isin(image_event_types)]._p_id.nunique()

In [None]:
df._p_id.nunique()

In [None]:
len( set(df._patient_id.unique()) & set(im.PatientID.unique()) )

In [None]:
x = df
discharge = x[((x._event_type == 'State Discharge') )]._event_value
discharge.unique()

In [None]:

ox = x[(x._event_type == 'Oxigen')]._method
for m in ox.unique():
    print(m)
    print(x[(x._event_type == 'Oxigen') & (x._method == m)]._event_value.value_counts())


In [None]:
oxygen_dic = pd.read_csv('Terminology/umls_oxygen_techniques.tsv', index_col = 0, header = None).to_dict('index')
oxygen_dic_rev = {sk : k.split('\t')[0] for k in oxygen_dic.keys() for sk in k.lower().split('\t')[1:] }

oxygen_dic = { k.split('\t')[0]:  k.lower().split('\t')[1:] for k in oxygen_dic.keys()  }
oxygen_dic

In [None]:
oxigen_low = ['C0740087', 'C0181744']
oxigen_high = ['C1960097', 'C0180824', 'C0182123', 'C1960094', 'C0419003', 'C0021925', 'C0877798', 'C0015357'] #note: non-invasive mechanical ventilation (C0419003) is included as high flow as a simplification to help with 'who' outcome scale calculations, VMX is excluded (it requires fiO2 > 50% to be considered high flow device)
oxigen_pot_high = ['C1960097', 'C0181748' , 'C0180824', 'C0182123', 'C1960094', 'C0419003', 'C0021925', 'C0877798', 'C0015357'] #note: non-invasive mechanical ventilation (C0419003) is included as high flow as a simplification to help with 'who' outcome scale calculations 

In [None]:
df['oxigen_method'] = np.nan

In [None]:
import unidecode
df.loc[df._event_type== 'Oxigen',['oxigen_method']] = df[df._event_type== 'Oxigen']._method.apply(lambda x: oxygen_dic_rev[unidecode.unidecode(str(x)).lower()] if str(x).lower() in oxygen_dic_rev else np.nan )
df.oxigen_method.value_counts()

In [400]:
df.loc[(df._event_type== 'VS')  & (x._vs_name.str.lower().str.contains(
    '(metodo de administra|tipo oxigeno|oxigenoterapia|o2 suplem)', regex = True)),['oxigen_method']] = df[
    (df._event_type== 'VS')  & (x._vs_name.str.lower().str.contains(
    '(metodo de administra|tipo oxigeno|oxigenoterapia|o2 suplem)', regex = True))]._event_value.apply(lambda x: oxygen_dic_rev[unidecode.unidecode(str(x)).lower()] if str(x).lower() in oxygen_dic_rev else np.nan  )
df.oxigen_method.value_counts()

  return func(self, *args, **kwargs)


C0740087    4433
C0181748     832
C0181744     563
C0182123     394
C0419003       7
Name: oxigen_method, dtype: int64

In [401]:
x[(x._event_type == 'VS')]._vs_name.unique()

array(['Temperatura', 'Volumen de oxigeno', 'TA Diastolica',
       'Frecuencia cardiaca', 'Saturacion O2', 'TA Sistolica', 'Oxigeno',
       'Tipo Frecuencia cardiaca', 'Tipo toma temperatura',
       'Frecuencia respiratoria', 'Metodo de administracion de Oxigeno',
       'Glucemia digital', 'Insulina rapida', 'Glucemia (Tipo D)',
       'Deposiciones', 'Diuresis 24h', 'Insulina lenta', 'Dieta pautada',
       'Diuresis Turno', 'Glucemia', 'Turno', 'Nivel de consciencia',
       'Observaciones', 'Cambio panal', 'O2 Suplementario', 'Dolor',
       'Talla', 'Peso', 'Perimetro abdominal', 'Indice Masa Corporal',
       'Recogida de orina', 'Diuresis', 'Aseo de paciente',
       'Cambios posturales', 'Metrorragia', 'Tolerancia',
       'Estado de la piel', 'Efectividad enema', 'Enema de limpieza',
       'Vol. SNG', 'Volumen ingesta', 'Drenaje', 'Vomito', 'Vol. Ostomia',
       'Ostomia', 'Volumen drenado', 'MP Entrada', 'Matin Palanca Salida',
       'Martin Palanca Balance', 'Glasgow m

In [None]:
ox_vs = x[((x._event_type == 'VS')  & (x._vs_name.str.lower().str.contains(
    '(o2|oxig)', regex = True)))]._vs_name
for o in ox_vs.unique():
    print(o)
    print(x[((x._event_type == 'VS')  & (x._vs_name == o))]._event_value.value_counts())

In [None]:
df['fiO2'] = np.nan
df.loc[df._event_type== 'Oxigen',['fiO2']] = df[df._event_type== 'Oxigen']._event_value.apply(lambda x: float(x))
df.fiO2.describe()

In [None]:
df.loc[(df._event_type== 'VS')& (df._vs_name == 'Volumen de oxigeno'),['fiO2']] = df[(df._event_type== 'VS')& (df._vs_name == 'Volumen de oxigeno')]._event_value.apply(lambda x: 21 + (float(x) * 4) if float(x) <21 else float(x))
df.fiO2.describe() #fiO2 should be >= 21% - 100%

In [None]:
df.loc[(df._event_type== 'VS')& (df._vs_name == 'Oxigeno'),['fiO2']] = df[(df._event_type== 'VS')& (df._vs_name == 'Oxigeno')]._event_value.apply(lambda x: 21 if str(x).startswith('Sin ') else np.nan )
df.fiO2.describe() #if patient is reported as "Sin Oxigeno"  then FiO2 = 21%

In [None]:
df.loc[(df._event_type== 'VS')& (df._vs_name == 'O2 Suplementario'),['fiO2']] = df[(df._event_type== 'VS')& (df._vs_name == 'O2 Suplementario')]._event_value.apply(lambda x: 21 if str(x) == 'NO' else np.nan )
df.fiO2.describe() #if patient is reported as "No" in "O2 Suplementario" then FiO2 = 21%

In [None]:
df.loc[(df._event_type== 'VS')& (df._vs_name.str.startswith('FiO2')),['fiO2']] = df[(df._event_type== 'VS')&(df._vs_name.str.startswith('FiO2'))]._event_value.apply(lambda x: float(x) if float(x) else np.nan)  
df.fiO2.describe() #add reported FiO2

In [None]:
df[pd.isna(df.fiO2) == False]._p_id.nunique()

In [None]:
df[pd.isna(df.oxigen_method) == False]._p_id.nunique()

In [None]:
df[pd.isna(df.fiO2) == False].groupby(['_health_dep']).count()

In [None]:
print(x[(x._event_type == 'State Discharge')]._event_value.unique())
discharge_dict = {}


In [None]:
df.loc[(df._event_type == 'Scale') &  (df._scale_name == 'BARTHEL')]._event_value.unique()

In [None]:
df['barthel'] = np.nan
df.loc[(df._event_type == 'Scale') &  (df._scale_name == 'BARTHEL'), ['barthel']] = df[
    (df._event_type == 'Scale') &  (df._scale_name == 'BARTHEL')]._event_value.apply(lambda x: int(str(x).split('/')[0]) if not pd.isna(x) else np.na)
df[pd.isna(df.barthel) == False]._p_id.nunique()

In [None]:
df.loc[(df._event_type == 'Scale') &  (df._scale_name == 'SOFA')]._event_value.unique()

In [None]:
df['sofa'] = np.nan
df.loc[(df._event_type == 'Scale') &  (df._scale_name == 'SOFA'), ['sofa']] = df[
    (df._event_type == 'Scale') &  (df._scale_name == 'SOFA')]._event_value.apply(lambda x: int(str(x).split('/')[0]) if not pd.isna(x) else np.na)
df[pd.isna(df.sofa) == False]._p_id.nunique()

In [None]:
df.loc[(df._event_type == 'Scale') &  (df._scale_name == 'GLASGOW')]._event_value.unique()

In [None]:
df['glasgow'] = np.nan
df.loc[(df._event_type == 'Scale') &  (df._scale_name == 'GLASGOW'), ['glasgow']] = df[
    (df._event_type == 'Scale') &  (df._scale_name == 'GLASGOW')]._event_value.apply(lambda x: int(str(x).split('/')[0]) if not pd.isna(x) else np.na)
df[pd.isna(df.glasgow) == False]._p_id.nunique()

In [None]:
x[((x._event_type == 'State Bed'))]._event_value.unique()

In [None]:
synonym_icu = '(?:UCI|INTENSI|REA)'
uci_patients = x[((x._event_type == 'State Bed') & (x._event_value.str.contains(synonym_icu)  ))]._p_id.unique()
len(uci_patients)

In [None]:
df.columns

In [None]:
def who_out_scale(x, eval_date):
    in_date = x.iloc[0]._start_date
    
    out_date = x.iloc[-1]._start_date
    verbose = False
    if verbose:
        print(in_date, out_date)
        print(f'date outcome evaluation: {eval_date}')
        print(x[pd.isna(x.fiO2) == False][['_start_date', '_event_value', 'fiO2']])
        print(x.oxigen_method.unique())
    
    #Scale- the WHO COVID Outcomes Scale: Used as primary efficacy outcome of treatments. The primary outcome defined as clinical status 14 days after admission 
    #assessed with a 7-category ordinal scale  recommended by the World Health Organization (at baseline, 7 and at 14 days or discharge -whatever occurs first): 
    #https://www.tandfonline.com/doi/full/10.1080/19466315.2020.1811148 
    #Definition of category in Cao et al. (2020)
    #7, death; 
    #6, hospitalized, receiving extracorporeal membrane oxygenation (ECMO) or invasive mechanical ventilation; 
    #5, hospitalized, receiving noninvasive mechanical ventilation or nasal high-flow oxygen therapy; 
    #4, hospitalized, receiving supplemental oxygen without positive pressure or high flow; 
    #3, hospitalized, not receiving supplemental oxygen; 
    #2, not hospitalized and unable to perform normal activities; 
    #1, not hospitalized and able to perform normal activities. 
    
    
    
     #ICU 
    if verbose:
        print(x[((x._event_type == 'State Bed') & (x._event_value.str.contains(synonym_icu)))]._end_date )
    
    date_icu_in = x[((x._event_type == 'State Bed') & (x._event_value.str.contains(synonym_icu) )& (x._start_date >= in_date ) & (x._start_date <= eval_date ))]._start_date
    date_icu_out = x[((x._event_type == 'State Bed') & (x._event_value.str.contains(synonym_icu)) & (x._start_date >= in_date ) & (x._start_date <= eval_date ))]._end_date
    if not date_icu_in.empty:
        date_icu_in = date_icu_in.iloc[0]
    else: 
        date_icu_in = None
        
    if not date_icu_out.empty: #assume only one ICU admission
        date_icu_out = date_icu_out.iloc[-1]
    else: 
        date_icu_out = None
    
    
    #discharge 
    discharge = x[((x._event_type == 'State Discharge') & (x._start_date >= in_date ) & (x._start_date <= eval_date ))]
    #Fatal
    fatal = False
    date_death = None
    if not discharge[discharge._event_value.isin( ['Éxitus', 'In extremis'])].empty: 
        date_death = discharge[discharge._event_value.isin( ['Éxitus', 'In extremis'])].iloc[0]._start_date
        fatal = True 
       
    #Critic 
    critic = fatal or pd.isnull(date_icu_in) == False
        
    
    
    #Oxygens from in_date to outcome date (high flow = if > 50% FiO2 or (-when FiO2 is not present-) if patient has a potentially high-flow device)
    ox_high_device =  x[(x.oxigen_method.isin(oxigen_high) & (x._start_date <= eval_date ) )]._start_date 
    if not ox_high_device.empty:
        ox_high_device = ox_high_device.iloc[-1]
    else: 
        ox_high_device = None
        
    ox_high_fio2 = x[(x.fiO2 >= 50) &  (x._start_date <= eval_date )]._start_date 
    if not ox_high_fio2.empty:
        ox_high_fio2 = ox_high_fio2.iloc[-1]
    else:
        ox_high_fio2 = None
        
    ox_low_fio2 =x[(x.fiO2 < 50) & (x.fiO2 > 21)& (x._start_date <= eval_date )]._start_date 
    if not ox_low_fio2.empty:
        ox_low_fio2 = ox_low_fio2.iloc[-1] 
    else:
        ox_low_fio2 = None
    
    ox_low_device =  x[(x.oxigen_method.isin(oxigen_low) & (x._start_date <= eval_date ))]._start_date 
    if not ox_low_device.empty:
        ox_low_device = ox_low_device.iloc[-1]
    else :
        ox_low_device = None
    
    
    
    who_covid_out = None
    if date_death and date_death <= eval_date:
        who_covid_out = 7
    elif pd.isnull(date_icu_in) == False  and ((date_icu_in <= eval_date) and  pd.isnull(date_icu_out)):
        who_covid_out = 6
    elif ox_high_device and not ox_low_device : 
        who_covid_out = 5
    elif ox_high_fio2 and not ox_low_fio2:
        who_covid_out = 5
    elif (ox_high_device and  ox_low_device) and (ox_high_device >= ox_low_device ):
        who_covid_out = 5    
    elif (ox_high_fio2 and  ox_low_fio2) and (ox_high_fio2 >= ox_low_fio2):
        who_covid_out = 5
    elif discharge.empty and (ox_low_fio2 or ox_low_device):
        who_covid_out = 4
    elif discharge.empty or not discharge[discharge._event_value.isin(['Traslado Hospital de agudos'])].empty  : #still hospitalized
        who_covid_out = 3 
    elif not discharge[discharge._event_value.isin(['Traslado Residencia o Centro Socio-Sanitario asistido', 
                                      'Unidad de Hospitalización a domicilio', 'Traslado Hospital Media larga estancia' ])].empty:
        who_covid_out = 2
    else:
        who_covid_out = 1
    
    if verbose:
        print(out_date)
        print(discharge)
    return who_covid_out



In [None]:
for n_days in range(1,6):
    who_out = df[df._p_id == 100].groupby('_p_id').apply(lambda y: who_out_scale(y, y.iloc[0]._start_date +  datetime.timedelta(n_days)))
    print(f'outcome day {n_days}, WHO score {who_out.iloc[0]}')

In [None]:
def fiO2(x, eval_date, method = 'basal'): #first val during eval_date day (24 hours)
    
    in_date = x.iloc[0]._start_date
    
    out_date = x.iloc[-1]._start_date
    verbose = False
    if verbose:
        print(in_date, out_date)
        print(f'date evaluation: {eval_date}')
        print(x[pd.isnull(x.fiO2) == False][['_start_date', 'fiO2']])
    
    result = pd.to_numeric(x[(pd.isnull(x.fiO2) == False) &
              (x._start_date >= eval_date - datetime.timedelta(1) ) & (x._start_date <  eval_date )].fiO2, errors = 'coerce')
    
    if not result.empty: 
        if method == 'min':
            result = result.min()
        elif method == 'basal':
            result = result.iloc[0]
        elif method == 'max':
            result = result.max()
        elif method == 'mean':
            result = result.mean()
    else:
        result = np.nan
        
    if verbose:  
        print(eval_date)
        print(f'result: {result}%')
    return result

In [None]:
synonyms =  '(?:^sat)'
x[(x._event_type== 'VS') & (x._vs_name.str.contains(synonyms, case = False))]._event_value.value_counts()

In [434]:
cov = x[x._event_type.str.contains('Sars-cov-2', case = False) & (x._event_value.str.contains('POSITIVO' , case = False))]



In [435]:
print(cov._test_name.unique())
print(cov._event_value.unique())
print(cov._event_type.unique())

['PCR' 'IGG' 'IGM' 'ACT' 'ANT']
['POSITIVO']
['Lab Sars-cov-2']


## Group lab values based on string similarity and value distribution

In [396]:
lab_select = pd.read_csv('Terminology/Lab_selected.csv',  dtype = {'codigo_nacional': str})

test_selected = pd.DataFrame(lab_select[lab_select.select == 'x'].groupby(['prueba_lab','codigo_nacional'], dropna = False).first().reset_index())
test_selected.columns

Index(['prueba_lab', 'codigo_nacional', 'select', 'counts'], dtype='object')

In [321]:
l = df_dict['Lab']
choices = l[(l._test_name.str.contains(r'[+/]', regex = True) == False )]._test_name.value_counts()
choices = choices[choices > 100 ] #ignore lab tests with less than 100 observations
choices = choices.index

In [322]:
l._event_value = pd.to_numeric(l._event_value, errors='coerce')

In [323]:
lab_dist = l.groupby(['_test_name','_test_code'])[['_event_value']]


In [357]:
from scipy import stats
from scipy.spatial.distance import jensenshannon
import unidecode
t = '_test_name'
def t_student_differ(lab1, lab2): #two-sided Kolmogorov-Smirnov test for statistical difference between two distributions (True if thery are different)
    
     #lab1 is a tuple of lab name and lab code
    p = None
    median_ratio = None
    try: 
        d1 = lab_dist.get_group(lab1)._event_value 
        d2 = lab_dist.get_group(lab2)._event_value 
        median_ratio = np.round(d1.median()/d2.median(),2)
        d1 = d1.values[~np.isnan(d1.values)]
        d2 = d2.values[~np.isnan(d2.values)]

        max_len = np.min([len(d1), len(d2)])
        p = stats.ttest_ind(d1[:max_len ],d2[:max_len ], equal_var= False)[1] 
        
        #p = jensenshannon(d1[:max_len],d2[:max_len])
        p = np.round(p, 2)
    except: 
        print(f'failed: {lab1} {lab2}')
        median_ratio = 0
        pass
    
    
    return p , median_ratio


In [395]:
save_lab_dic = False
if save_lab_dic:
    lab_dic = {} #key = tuple (test,code), value = L[(test,code),]
    for r in test_selected[:].iterrows():
        test = unidecode.unidecode(r[1].prueba_lab)
        code = r[1].codigo_nacional

        #Apply a 1º level search of synonyms for selected test name
        sim = process.extract(test, choices, limit=20)
        print(sim)
        for it in sim :
            if it[1] >= 90: #if text similarity > 90 
                codes_sim = l[l._test_name == it[0]]._test_code.unique() #search all codes with same name as it[0]
                for c in codes_sim:
                    if lab_dic.get((test,code)) and (it[0], str(c)) in lab_dic.get((test,code)): 
                        continue
                    differ =t_student_differ((test,str(code)), (it[0],str(c))) 
                     #if distributions are not significantly different and does not come from differen samples (orine vs serum)
                    if (differ[1] > 0.7) & (differ[1] < 1.3) and not ((('N' in str(c)) & ('K' in str(code))) or (('N' in str(code)) & ('K' in str(c)) )): 
                        print(test, it[0], differ)
                        if lab_dic.get((test,code)): 
                            lab_dic[(test,code)].extend([(it[0], str(c))])   
                        else:
                            lab_dic[(test,code)] = [(it[0], str(c))] 

                        #Apply a 2º level search of synonyms for new added synonym 
                        new_test = it[0]
                        sim = process.extract(new_test, choices, limit=10)
                        print(sim)
                        for it in sim :
                            if it[1] >= 90: #if text similarity > 90 
                                codes_sim = l[l._test_name == it[0]]._test_code.unique() #search all codes with same name as it[0]
                                for c in codes_sim:
                                    if lab_dic.get((test,code)) and (it[0], str(c)) in lab_dic.get((test,code)): 
                                        continue
                                    differ =t_student_differ((test,str(code)), (it[0],str(c))) 
                                     #if distributions are not significantly different and does not come from differen samples (orine vs serum)
                                    if (differ[1] > 0.7) & (differ[1] < 1.3) and not ((('N' in str(c)) & ('K' in str(code))) or (('N' in str(code)) & ('K' in str(c)) )): 
                                        print(test, it[0], differ)
                                        if lab_dic.get((test,code)): 
                                            lab_dic[(test,code)].extend([(it[0], c)]) 
                                        else:
                                            lab_dic[(test,code)] = [(it[0], c)] 
        


In [393]:
import pickle

if  save_lab_dic:
    with open('Terminology/lab_dic.pickle', 'wb') as fp:
        pickle.dump(lab_dic,fp)
else: 
    with open('Terminology/lab_dic.pickle', 'rb') as handle:
        lab_dic = pickle.load(handle)


In [394]:
lab_dic

{('25 (OH) VITAMINA D',
  '300113040000N0000630'): [('25 (OH) VITAMINA D',
   '300113040000N0000630'), ('25 OH Vitamina D', '300113040000N0000630'), ('VITAMINA D 25 OH',
   '300113040000N0000630')],
 ('ALBUMINA', '300113010000N0000004'): [('ALBUMINA', '300113010000N0000004'),
  ('ALBUMINA', 'nan'),
  ('Albumina', '300113010000N0000004'),
  ('Albumina', 'CCD00010N'),
  ('ALBUMINA (#)', 'CCD00010N'),
  ('HP ALBUMINA', 'nan'),
  ('Albumina, Suero', '300113010000N0000004'),
  ('ALBUMINA ELECTROFORESIS', 'CCD00010N')],
 ('Bicarbonato', 'CCD00040N'): [('Bicarbonato', '300113010000N0000029'),
  ('Bicarbonato', 'nan'),
  ('Bicarbonato', 'CCD00040N'),
  ('BICARBONATO', 'CCD00041N'),
  ('BICARBONATO', 'nan'),
  ('BICARBONATO', '300113010000N0000029'),
  ('BICARBONATO STD', 'CCD00041N'),
  ('POC BICARBONATO STD', 'nan'),
  ('POC BICARBONATO CALCULADO', 'nan'),
  ('Bicarbonato Actual', 'nan'),
  ('Bicarbonato Estandar', 'nan'),
  ('Bicarbonato (Estandar)', 'CCD00041N'),
  ('Bicarbonato (Estandar)'

## Generate tables for experiments

In [366]:
lab = df_dict['Lab']
lab = lab[lab._limits.isna()]
no_norm = list(set(lab._test_code.unique())) #selected codes not normalized
def lab_var(x, eval_date, method = 'basal', synonyms = None, codes = None, raw = False): 
    
    in_date = x.iloc[0]._start_date
    
    out_date = x.iloc[-1]._start_date
    verbose = False
    
    
    result = None
    if codes and synonyms: #for lab tests,the synonyms param is assumed that is only used for tuples (test, code) where code = 'nan'
        if not raw and len(set(no_norm) & set(codes)) == 0: #use normalized values if range limits were provided for all dataset values of this test
            result = pd.to_numeric(x[(x._event_type== 'Lab') & (x._test_code.isin(codes) | x._test_name.isin(synonyms))& 
                                     (x._start_date >= eval_date - datetime.timedelta(1) ) & (x._start_date <  eval_date )]._norm_value, errors = 'coerce')
        else: #use raw value
            result = pd.to_numeric(x[(x._event_type== 'Lab') & (x._test_code.isin(codes) | x._test_name.isin(synonyms))& 
                                     (x._start_date >= eval_date - datetime.timedelta(1) ) & (x._start_date <  eval_date )]._event_value, errors = 'coerce')
    elif codes: 
        if not raw and len(set(no_norm) & set(codes)) == 0: #use normalized values if range limits were provided for all dataset values of this test
            result = pd.to_numeric(x[(x._event_type== 'Lab') & (x._test_code.isin(codes) )& 
                                     (x._start_date >= eval_date - datetime.timedelta(1) ) & (x._start_date <  eval_date )]._norm_value, errors = 'coerce')
        else: #use raw value
            result = pd.to_numeric(x[(x._event_type== 'Lab') & (x._test_code.isin(codes) )& 
                                     (x._start_date >= eval_date - datetime.timedelta(1) ) & (x._start_date <  eval_date )]._event_value, errors = 'coerce')
    
    else:
        result = pd.to_numeric(x[(x._event_type== 'Lab') & (x._test_name.isin(synonyms)) & 
              (x._start_date >= eval_date - datetime.timedelta(1) ) & (x._start_date <  eval_date )]._norm_value, errors = 'coerce')
    
    if not result.empty: 
        if method == 'min':
            result = result.min()
        elif method == 'basal':
            result = result.iloc[0]
        elif method == 'max':
            result = result.max()
        elif method == 'mean':
            result = result.mean()
        result = np.round(result,2)
    else:
        result = np.nan
    if verbose:  
        print(eval_date)
        print(f'result: {result}')
    return result

In [367]:
def vs_var(x, eval_date, method = 'basal', vs_name = None, synonyms = None): 
    
    in_date = x.iloc[0]._start_date
    
    out_date = x.iloc[-1]._start_date
    verbose = False
    if verbose:
        print(in_date, out_date)
        print(f'date evaluation: {eval_date}')
        print(x[(x._event_type== 'VS') & (x._vs_name.str.contains(synonyms, case = False))][['_start_date','_event_value']])
    
    result = pd.to_numeric(x[(x._event_type== 'VS') & (x._vs_name.str.contains(synonyms, case = False)) & 
              (x._start_date >= eval_date - datetime.timedelta(1) ) & (x._start_date <  eval_date )]._event_value, errors = 'coerce')
    if not result.empty: 
        if method == 'min':
            result = result.min()
        elif method == 'basal':
            result = result.iloc[0]
        elif method == 'max':
            result = result.max()
        elif method == 'mean':
            result = result.mean()
    else:
        result = np.nan
    if verbose:  
        print(eval_date)
        print(f'result: {result}')
    return result

In [368]:

ex1_columns = ['_patient_id','_p_id', '_health_dep','_age', '_gender', 'date_in', 'n_days_in', 'worst_barthel_scale', 'n_covid_rel_dx', 'covid_rel_dx', 'n_covid_rel_image','covid_rel_image', 'n_PCRs_+',
               'n_days_icu', 'worst_sofa_scale', 'worst_glasgow','critic', 'severity', '_discharge', 'who_out_scale',
              'satO2', 'fiO2', 'resp_rate' ]


In [442]:

#1º Select only admissions with concomitant events associated to ( ( covid related dx-cx) OR (Rx-thorax with infiltrates or condensations or pneumonia)) AND b) PCR confirmed COVID 

def table(x_p): #x_p is a dataframe with all events for a given patient, one patient may have >= 1 admissions
    _p_id, x_p = x_p
    #_p_id = x_p.iloc[0]._p_id
    verbose = False
    df_exp = pd.DataFrame(columns = ex1_columns)
    for in_date,out_date in list(zip(x_p[(x_p._event_type == 'State Admission')]._start_date, x_p[(x_p._event_type == 'State Discharge')]._start_date)):
        
        try:
            row_dict = dict.fromkeys(ex1_columns)
            #filter events for this admission
            x = x_p
            #x = x_p[(x_p._start_date >= in_date ) & (x_p._start_date <= out_date )]
            if verbose: 
                print(in_date, out_date)
                print(x[((x._event_type.isin(['State Bed', 'State Discharge'])))])
            
            #n_days_in 
            n_days_in = out_date - in_date


            #scales (barthel best is 100, sofa worst is 24, glasgow best is 15)
            worst_barthel_scale = np.min(x[pd.isna(x.barthel) == False].barthel.values) if not x[pd.isna(x.barthel) == False].empty else np.nan
            worst_sofa_scale = np.max(x[pd.isna(x.sofa) == False].sofa.values) if not x[pd.isna(x.sofa) == False].empty else np.nan
            worst_glasgow_scale = np.min(x[pd.isna(x.glasgow) == False].glasgow.values) if not x[pd.isna(x.glasgow) == False].empty else np.nan

            #covid related dx-cx
            dx = x[((x._event_type == 'Diagnosis') & (x._event_value.isin(l_covid_dx)) & (x._start_date >= in_date ) & (x._start_date <= out_date ))]
            #covid related image
            rx = x[ ((x.covid_image != False) & (x._start_date >= in_date ) & (x._start_date <= out_date ))]
            #covid confirmed by PCR (extending 3 weeks before admission )
            cov = x[((x._event_type == 'Lab Sars-cov-2') & (x._event_value.str.contains('POSITIVO')) & (x._start_date >= in_date - datetime.timedelta(7*3) ) & (x._start_date <= out_date + datetime.timedelta(0*1) ))]
            
            n_rx = x[((x._event_type.isin(image_event_types))  & (x._start_date >= in_date - datetime.timedelta(7*3) ) & (x._start_date <= out_date + datetime.timedelta(0*1) ))].shape[0]
            n_pcr = x[((x._event_type == 'Lab Sars-cov-2') & (x._start_date >= in_date - datetime.timedelta(7*3) ) & (x._start_date <= out_date + datetime.timedelta(0*1) ))].shape[0]

            
            #ICU 
            date_icu_in = x[((x._event_type == 'State Bed') & (x._event_value.str.contains(synonym_icu)) & (x._start_date >= in_date ) & (x._start_date <= out_date ))]._start_date
            date_icu_out = x[((x._event_type == 'State Bed') & (x._event_value.str.contains(synonym_icu)) & (x._start_date >= in_date ) & (x._start_date <= out_date ))]._end_date
            if date_icu_in.shape[0] > 0:
                date_icu_in = date_icu_in.min()
                date_icu_out = date_icu_out.max() #assume only one ICU admission, TODO improve
            else: 
                date_icu_in = None
                date_icu_out = None

            #discharge
            discharge = x[((x._event_type == 'State Discharge') & (x._start_date >= in_date ) & (x._start_date <= out_date ))]

            #Fatal
            fatal = False
            date_death = None
            death_event = discharge[discharge._event_value.isin( ['Éxitus', 'In extremis'])]
            if death_event.shape[0] > 0: 
                date_death = death_event._start_date.iloc[0]
                fatal = True 

            #Critic 
            critic = fatal or pd.isnull(date_icu_in) == False

            #n_days_icu 
            n_days_icu = datetime.timedelta(0)
            if pd.isnull(date_icu_in) == False: 
                if not pd.isnull(date_icu_out):
                    n_days_icu = date_icu_out + datetime.timedelta(.9) - date_icu_in
                else:
                    n_days_icu = discharge.iloc[0]._start_date + datetime.timedelta(.9) - date_icu_in



            #Severity grade (following AE grading system 1-5: 
            #1-mild(ER only, does not require hospitalization), 
            #2-moderate( hospitalization <= 5 days)
            #3-severe(prolongs hospitalization > 5 days), 
            #4-critical(life threatening = UCI), 
            #5-fatal 
            #Source: lab paper
            severity = None
            if fatal:
                severity = 5
            elif pd.isnull(date_icu_in ) == False:
                severity = 4
            elif(n_days_in.days > 5 ):
                severity = 3
            elif (n_days_in.days in range(1,6)): 
                severity = 2 
            elif (n_days_in == 0 ):
                severity = 1

            #Scale- 4C mortality score IASRIC 

            #PREDICITVE VAR
            n = 15 #temporal horizon for var arrays
            #SatO2 
            synonyms =  '(?:^sat)'
            sats = [vs_var(x, in_date + datetime.timedelta(1), method = 'basal', synonyms = synonyms)] #basal value
            min_v = [vs_var(x, in_date +  datetime.timedelta(n),  method = 'min', synonyms = synonyms) for n in range(1,n) ]
            sats.extend(min_v)
            #FiO2
            fi = [fiO2(x, in_date + datetime.timedelta(1), method = 'basal')] #basal value
            min_v = [fiO2(x, in_date +  datetime.timedelta(n),  method = 'max') for n in range(1,n) ]
            fi.extend(min_v)
            #Resp_rate 
            synonyms = '(?:frec.*resp.*)'
            fr = [vs_var(x, in_date + datetime.timedelta(1), method = 'basal', synonyms = synonyms)] #basal value
            min_v = [vs_var(x, in_date +  datetime.timedelta(n),  method = 'max', synonyms = synonyms) for n in range(1,n) ]
            fr.extend(min_v)
            #Heart_rate 
            synonyms = '(?:^frec.*card.*)'
            fc = [vs_var(x, in_date + datetime.timedelta(1), method = 'basal', synonyms = synonyms)] #basal value
            min_v = [vs_var(x, in_date +  datetime.timedelta(n),  method = 'max', synonyms = synonyms) for n in range(1,n) ]
            fc.extend(min_v)
            #Temp
            synonyms = '(?:^temper|^fiebre)'
            te = [vs_var(x, in_date + datetime.timedelta(1), method = 'basal', synonyms = synonyms)] #basal value
            te_v = [vs_var(x, in_date +  datetime.timedelta(n),  method = 'max', synonyms = synonyms) for n in range(1,n) ]
            te.extend(te_v)
            #Weight
            synonyms = '(?:^peso)'
            wei = vs_var(x, in_date + datetime.timedelta(1), method = 'basal', synonyms = synonyms) #basal value
            #Height
            synonyms = '(?:^talla)'
            hei = [vs_var(x, in_date + datetime.timedelta(1), method = 'basal', synonyms = synonyms)] #basal value
            #TAD
            synonyms = '(?:diast)'
            tad = [vs_var(x, in_date + datetime.timedelta(1), method = 'basal', synonyms = synonyms)] #basal value
            tad_v = [vs_var(x, in_date +  datetime.timedelta(n),  method = 'min', synonyms = synonyms) for n in range(1,n) ]
            tad.extend(tad_v)
            #TAS
            synonyms = '(?:sistol)'
            tas = [vs_var(x, in_date + datetime.timedelta(1), method = 'basal', synonyms = synonyms)] #basal value
            tas_v = [vs_var(x, in_date +  datetime.timedelta(n),  method = 'min', synonyms = synonyms) for n in range(1,n) ]
            tas.extend(tas_v)
            
            #add Lab values 
            neu_lab = []
            lym_lab = []
            for t,c in lab_dic.keys():
                code_list = [item[1] for item in lab_dic[(t,c)] if str(item[1]) != 'nan']
                #test = lab_select[ (lab_select.select == 'x') & (lab_select.codigo_nacional == c)].prueba_lab.iloc[0]
                test = t
                
                #synonyms_nan_code = lab_dic.get(test)
                synonyms_nan_code =[item[0] for item in lab_dic[(t,c)] if item[1] == 'nan']
                if len(synonyms_nan_code) == 0: synonyms_nan_code = None
                if len(code_list) == 0: code_list = None
                
                if 'CPK' in test: test = 'CPK'
                elif 'IL6' in test: test = 'IL6'
                elif 'BNP' in test: test = 'BNP'
                elif 'PCR' in test: test = 'PCR'
                elif 'Procalcit' in test: 
                    test = 'Prc'
                elif 'Potasio en orina' in test: test = 'K_o'
                elif 'Fibrin' in test: 
                    code_list.append('300114029900N0000052')
                elif 'B totales' in test: 
                    test = 'B'
                elif 'T totales' in test: 
                    test = 'T'
                elif 'Celulas  T4' in test: 
                    test = 'T4'
                elif 'Celulas  T8' in test: 
                    test = 'T8'
                elif 'CO2 Total' in test: 
                    test = 'CO2'
                    
                if verbose:
                    print('--------------')
                    print(t,c)
                    print(code_list, synonyms_nan_code)
                  
                test = test[:3].strip()
                lab = [lab_var(x, in_date + datetime.timedelta(1), method = 'basal', codes = code_list, synonyms = synonyms_nan_code)] #basal value
                lab_v = [lab_var(x, in_date +  datetime.timedelta(n),  method = 'mean', codes = code_list, synonyms = synonyms_nan_code) for n in range(1,n) ]
                lab.extend(lab_v)
                
                if code_list:
                    if 'CCD00179N' in code_list: 
                        lab = [lab_var(x, in_date + datetime.timedelta(1), method = 'basal', codes = code_list,  synonyms = synonyms_nan_code, raw = True)] #basal value
                        lab_v = [lab_var(x, in_date +  datetime.timedelta(n),  method = 'mean', codes = code_list,  synonyms = synonyms_nan_code, raw = True) for n in range(1,n) ]
                        lab.extend(lab_v)
                        neu_lab = lab
                    elif 'CCD00161N' in code_list:
                        lab = [lab_var(x, in_date + datetime.timedelta(1), method = 'basal', codes = code_list,  synonyms = synonyms_nan_code, raw = True)] #basal value
                        lab_v = [lab_var(x, in_date +  datetime.timedelta(n),  method = 'mean', codes = code_list,  synonyms = synonyms_nan_code, raw = True) for n in range(1,n) ]
                        lab.extend(lab_v)
                        lym_lab = lab
                
                row_dict[test] = lab
            
            #add NLR (Neutrophil-to-lymphocite ratio)
            #code_list = ['CCD00179N'] #Neutrophile
            #lab = [lab_var(x, in_date + datetime.timedelta(1), method = 'basal', codes = code_list,  synonyms = synonyms_nan_code, raw = True)] #basal value
            #lab_v = [lab_var(x, in_date +  datetime.timedelta(n),  method = 'mean', codes = code_list,  synonyms = synonyms_nan_code, raw = True) for n in range(1,n) ]
            #lab.extend(lab_v) 
            #code_list = ['CCD00161N'] #Lymphocite
            #li_lab = [lab_var(x, in_date + datetime.timedelta(1), method = 'basal', codes = code_list,  synonyms = synonyms_nan_code, raw = True)] #basal value
            #li_lab_v = [lab_var(x, in_date +  datetime.timedelta(n),  method = 'mean', codes = code_list,  synonyms = synonyms_nan_code,  raw = True) for n in range(1,n) ]
            #li_lab.extend(li_lab_v) 
            row_dict['NLR'] = np.round(np.array(neu_lab) / np.array(lym_lab), 2)
            
            
            
            row_dict['_p_id'] = _p_id
            row_dict['_patient_id'] = x.iloc[0]._patient_id
            row_dict['_health_dep'] = x.iloc[0]._health_dep
            row_dict['_gender'] = x.iloc[0]._gender
            row_dict['_age'] = x.iloc[0]._age
            row_dict['date_in'] = in_date
            row_dict['n_days_in'] = n_days_in
            row_dict['worst_barthel_scale'] = worst_barthel_scale
            row_dict['n_image'] = n_rx
            row_dict['n_covid_rel_image'] = rx.shape[0] #n images with related covid findings
            row_dict['covid_rel_image'] = list(set([l for i in rx.covid_image.values for l in  i]))
            row_dict['n_covid_rel_dx'] = dx.shape[0]
            row_dict['covid_rel_dx'] = dx._desc.str.strip().unique()
            row_dict['n_PCRs'] = n_pcr
            row_dict['n_PCRs_+'] = cov.shape[0]
            row_dict['worst_glasgow_scale'] = worst_glasgow_scale
            row_dict['worst_sofa_scale'] = worst_sofa_scale
            row_dict['date_icu_in'] = date_icu_in
            row_dict['n_days_icu'] = n_days_icu
            row_dict['_discharge'] = discharge._event_value.values[0]
            row_dict['critic'] = critic
            row_dict['severity'] = severity
            row_dict['who_out_scale'] = [who_out_scale(x, in_date +  datetime.timedelta(n)) for n in range(1,n) ]
            row_dict['satO2'] = sats
            row_dict['fiO2'] = fi
            row_dict['resp_rate'] = fr
            row_dict['heart_rate'] = fc
            row_dict['tem'] = te
            row_dict['weight'] = wei
            row_dict['height'] = hei
            row_dict['dbp'] = tad
            row_dict['sbp'] = tas
            
            
            df_exp = df_exp.append(row_dict, ignore_index = True)

            #if (((dx.shape[0] > 0) or (rx.shape[0] >  0)) and (cov.shape[0]> 0)) :
                #pass
                
            if verbose:
                print(f'finished {_p_id}')
            
        except Exception as e:
        
            print(f'{x_p.iloc[0]._health_dep}, {x_p.iloc[0]._patient_id}')
            print(e)
            
    return df_exp


In [445]:
df_experiment = pd.DataFrame(columns = ex1_columns)
with ProcessPoolExecutor() as executor:
        for dh in executor.map(table, df.groupby('_p_id')):
            df_experiment = pd.concat([df_experiment,dh]) 



In [452]:
pd.set_option('display.max_columns', None)
df_experiment[df_experiment._p_id ==1]
len(df_experiment.columns)

89

In [448]:
df_experiment.drop(columns = ['_p_id']).reset_index().to_csv('table_extended_PCR.csv', index = False)

In [449]:
target_pop = df_experiment[( (df_experiment['n_PCRs_+'] > 0) & ((df_experiment['n_covid_rel_dx'] > 0) | (df_experiment['n_covid_rel_image'] > 0) ))]

In [None]:
target_pop[((target_pop.severity == 4) ) & (target_pop.n_days_icu < pd.Timedelta(1, unit='d'))]

In [None]:
df_experiment[df_experiment._p_id == 1806]

In [450]:
target_pop.shape[0]

2822

In [None]:
df_experiment[df_experiment._health_dep == '21']

In [None]:
df_experiment[df_experiment._health_dep == '21']

In [None]:
df[df._p_id ==  4450]._patient_id

In [None]:
df_experiment.date_icu_in.value_counts()

In [None]:
df_experiment.iloc[0].covid_rel_image

In [None]:
df[(df._p_id == 4467) & (df._event_type == 'Lab Sars-cov-2')]

In [None]:


import pandas as pd
admissions_df = df_dict['State Admission']
discharge_df = df_dict['State Discharge']


In [None]:
admissions_df.loc[admissions_df._p_id == 3308]

In [None]:
p = 3308
#df_admissions 

In [None]:
list(zip(admissions_df.loc[admissions_df._p_id == p]._start_date, 
         discharge_df.loc[discharge_df._p_id == p]._start_date,
         discharge_df.loc[discharge_df._p_id == p]._health_dep,
        admissions_df.loc[admissions_df._p_id == p]._p_id,
        admissions_df.loc[admissions_df._p_id == p]._gender,
        admissions_df.loc[admissions_df._p_id == p]._age,
        discharge_df.loc[discharge_df._p_id == p]._event_value))

In [None]:
discharge_df['date'] = discharge_df['_start_date'] 
discharge_df

In [None]:
m = pd.merge(admissions_df,discharge_df[['_p_id', 'date']], sort = True)
m.loc[m._p_id == 3308]._start_date.values, m.loc[m._p_id == 3308].date.values

In [None]:
m.loc[m._p_id == 3308]._health_dep.unique()

In [None]:
m.loc[m._p_id == 3308]._patient_id.unique()

In [None]:
pd.merge(admissions_df,discharge_df, how='outer',on='_p_id')

In [None]:
df_out = df

In [None]:
def f(x):
    df.groupby('_p_id')
    
    return i

df_out['_out_discharge'] = df_out.apply(lambda x: f(x), axis = 1)

In [None]:
DT = dt.Frame(df)

In [None]:
DT.names

In [None]:
DT[:,{'count':count()} , by('_p_id')]

In [None]:
DT[1,classes_field_dict['Patient'] , by('_p_id')]

In [None]:
df._patient_id.value_counts()

In [None]:
#profile = ProfileReport(df,  pool_size = 32)
df._dose.describe()

In [None]:
DT[ (f._event_value == 'POTASIO CLORURO') ,fields_fluid]

In [None]:
dt.unique(DT[:, f._health_dep])

In [None]:
DT[(f.event_type == 'Text Discharge') ,:]

In [None]:
e = DT[(f.event_type == 'Text Discharge'), :][:,{'N': dt.count()} , by('_event_value')]

In [None]:
e.to_csv(str(csv_dir/'Text_Discharge.csv'))

In [None]:
DT[(dt.isna(f.date) == False),[f.date, f._start_date]]

In [None]:
DT[dt.rowany(dt.isna(f.date) == False),[f.date, f._start_date]]

In [None]:
DT.names

In [None]:
pdict['08_01d7a0442ab14089134c009df5aafdc1a8d206197e27db555281cf6582fda9fd'].__dict__.keys()

In [None]:
DT.names

In [None]:
DT[(dt.isna(f.event_value) == True) & (dt.isna(f._event_value) == False),['event_type', '_event_value', 'event_value']]

In [None]:
DT[dt.rowany(dt.isna(f.event_value) == False),[f.event_value, f._event_value, f._desc]]

In [None]:
DT[((f.event_type == 'Medication') & dt.isna(f._dose) == True  & (f._health_dep == '08')), :]