In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime
from datetime import timedelta
%matplotlib inline

from transplant.config import *
from transplant.data.dataset import Dataset

In [2]:
import random

In [3]:
dataset = Dataset()

In [4]:
df_static = pd.read_csv(PATH_STATIC_CLEAN)

In [5]:
df_dynamic = pd.read_csv(PATH_DYNAMIC_CLEAN, parse_dates=['time'])
df_dynamic = dataset._get_declampage_event(df_dynamic)
df_dynamic = df_dynamic.groupby('id_patient').apply(dataset._truncate_datetime)

In [6]:
df_static.shape, df_dynamic.shape

((410, 285), (188839, 41))

In [None]:
#take id_test

In [9]:
ids_immediate = df_static.id_patient[df_static.immediate_extubation == 1].values
ids_immediate

array([  2,   6,  13,  14,  16,  21,  22,  25,  26,  29,  31,  34,  37,
        44,  45,  51,  54,  57,  60,  62,  65,  66,  67,  70,  71,  73,
        74,  75,  76,  78,  79,  81,  84,  89,  91,  95,  96,  98,  99,
       101, 113, 120, 123, 128, 129, 138, 142, 144, 147, 148, 152, 158,
       160, 166, 167, 168, 170, 174, 176, 177, 178, 181, 186, 189, 193,
       195, 202, 208, 212, 214, 215, 218, 219, 228, 229, 231, 233, 237,
       238, 239, 240, 241, 244, 246, 247, 249, 263, 268, 269, 272, 274,
       275, 276, 282, 288, 289, 291, 294, 295, 302, 303, 307, 308, 309,
       315, 318, 324, 325, 329, 330, 335, 336, 342, 345, 355, 358, 359,
       361, 362, 364, 365, 366, 367, 371, 372, 379, 382, 383, 384, 385,
       386, 390, 397, 399, 400, 407, 409])

In [110]:
id_test = random.choice(ids_immediate)
data_dyn = df_dynamic[df_dynamic.id_patient==id_test]
data_stat = df_static[df_static.id_patient == id_test]
print(id_test)

65


## General functions

In [11]:
def get_end_surgery_data_dynamic(df_dynamic, time=15, period=15) : 
    """
    Pour les données dynamiques, permet de regarder les constantes après le deuxième déclampage (env 15min après),
    sur une periode de 10min
    """
    date_1 = df_dynamic[df_dynamic.declampage_cote2_done==1].time.min() + timedelta(minutes=time)
    date_2 = date_1 + timedelta(minutes=period)
    return df_dynamic[(df_dynamic.time >= date_1)&(df_dynamic.time <= date_2)]

In [12]:
data_dyn_end = get_end_surgery_data_dynamic(data_dyn)
data_dyn_end.shape

(16, 41)

In [29]:
def is_nan(data, column, threshold=0.8):
    """
    Fonction qui verifie si threshold% des données qu'on considère sont des nan"
    """
    if data[column].isnull().sum()/len(data[column]) > threshold:
        return True
    return False

In [13]:
def time_offset_dynamic(df, time_offset=60):
    """
    Function that truncate the dynamic dataset to take into account only the last minutes (time_offset) of the operation
    """
    df['time']= pd.to_datetime(df['time'])
    date_min = df['time'].max() - timedelta(minutes=time_offset)
    return df[df['time'] >= date_min]

In [14]:
def Sufficient_decline(declampage_2,fermeture, decline=1/3, epsilon = 0.001,regle_nan=True):
    """
    Function that test if the decline of a value between the event 'declampage_cote_2' and 'fermeture' is sufficient
    decline : fraction of the value at time 'declampage_2' 
            (ie "on veut que la noredraline baisse d'au moins 1/3 de sa valeur")
    epsilon : "nearly equal"
    """
    if np.isnan(fermeture) or np.isnan(declampage_2):
        return regle_nan
    if fermeture > declampage_2:
        return False
    delta = declampage_2 - fermeture
    if delta > decline*declampage_2:
        return True
    return np.abs(delta - decline*declampage_2)<= epsilon

## Test functions

In [50]:
def test_PASm_sup(data_dyn, obj=60, threshold=90, regle_nan=True):
    """
    Test if threshold% of our value (PASm) is > objective /do not take into account PASm = 0
    Return boolean
    True : "at the end of surgery, when PASm not equal 0, at least 90% of the values are > 60"
    """
    non_zeros = data_dyn['PASm'][data_dyn['PASm']>0].count() 
    nb_ok = data_dyn['PASm'][data_dyn['PASm']>obj].count()
    
    if is_nan(data=data_dyn, column='PASm', threshold=0.8) or non_zeros==0:
        #ne peut pas être negatif ou nul -> sinon problème dans les mesures
        return regle_nan
    
    return (nb_ok/non_zeros)*100>=threshold

In [31]:
data_dyn_end.PASm

id_patient       
62          42035     66.0
            42036     72.0
            42037     79.0
            42038    201.0
            42039     73.0
            42040    101.0
            42041     89.0
            42042     90.0
            42043     85.0
            42044     83.0
            42045    254.0
            42046     79.0
            42047    113.0
            42048     79.0
            42049     77.0
            42050     76.0
Name: PASm, dtype: float64

In [51]:
test_PASm_sup(data_dyn_end)

True

In [33]:
def test_noradrenaline(data_stat,nora_max=1.5, nora_min=1, regle_nan=True):
    """
    Test if the noradrenaline is sufficiently low :
        > 1.5mg/h --> ko
        < 1mh/h --> ok
        between : see if there is a sufficient decline
    """
    
    if is_nan(data=data_stat, column='NORAD_fermeture', threshold=0.8):
        return regle_nan
    elif data_stat['NORAD_fermeture'].values[0] >= nora_max:
        return False
    elif data_stat['NORAD_fermeture'].values[0] <= nora_min:
        return True
    else : 
        return Sufficient_decline(data_stat['NORAD_declampage_cote_2'].values[0],
                                  data_stat['NORAD_fermeture'].values[0],
                                  decline=1/3, epsilon = 0.001)

In [34]:
test_noradrenaline(data_stat)

True

In [52]:
def test_ratio_pao2_fio2(data_stat, data_dyn, obj, threshold=80, regle_nan=True):
    """
    Verify if the ratio PaO2/FiO2 ratio is high enough
    True : PaO2/FiO2 ratio > obj
    """
    
    ratios = data_stat['PaO2_fermeture'].values[0]/(data_dyn['FiO2']/100)
    non_zeros = ratios[ratios>0].count()
    nb_ok = ratios[ratios>obj].count()
    
    if ratios.isnull().sum()/len(ratios) > 0.8 or non_zeros==0:
        return regle_nan
    

    return (nb_ok/non_zeros)*100>threshold

In [84]:
test = data_dyn_end['FiO2']/100
data_stat['PaO2_fermeture'].values[0]/test

id_patient       
62          42035    245.588235
            42036    298.214286
            42037    321.153846
            42038    327.450980
            42039    334.000000
            42040    334.000000
            42041    334.000000
            42042    334.000000
            42043    334.000000
            42044    334.000000
            42045    334.000000
            42046    340.816327
            42047    340.816327
            42048    340.816327
            42049    340.816327
            42050    340.816327
Name: FiO2, dtype: float64

In [87]:
ratios = data_stat['PaO2_fermeture'].values[0]/(data_dyn_end['FiO2']/100)
non_zeros = ratios[ratios>0].count()
print(non_zeros)
nb_ok = ratios[ratios>300].count()
print(nb_ok)

16
14


In [88]:
(nb_ok/non_zeros)*100

87.5

In [90]:
0.7*16

11.2

In [37]:
test_ratio_pao2_fio2(data_stat, data_dyn_end, threshold=90, obj=200, regle_nan=True)

True

In [42]:
def test_PAP_diminution(data_dyn, paps_max=50, paps_min=30, regle_nan=True):
    """
    True : If PAPS, PAPM, PAPD decrease 
    /!\ : lot of nan for  PAPm -> take PAPs
    if nan return False
    """
    
        
    if is_nan(data=data_dyn, column='PAPsys', threshold=0.8):
        return regle_nan
    
    #if the last measure are < papsmin
    if (data_dyn.PAPsys.values[-5:]<=paps_min).all():
        return True
    elif(data_dyn.PAPsys.values[-5:]>=paps_max).all():
        return False
    else :
        return Sufficient_decline(data_dyn.PAPsys.values[0],data_dyn.PAPsys.values[-1],decline=1/3, epsilon = 0.001)

In [43]:
data_dyn_end[['PAPmoy','PAPsys','PAPdia']]

Unnamed: 0_level_0,Unnamed: 1_level_0,PAPmoy,PAPsys,PAPdia
id_patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
62,42035,27.0,33.0,22.0
62,42036,24.0,30.0,19.0
62,42037,23.0,29.0,19.0
62,42038,22.0,28.0,18.0
62,42039,24.0,30.0,19.0
62,42040,26.0,33.0,21.0
62,42041,27.0,34.0,21.0
62,42042,28.0,35.0,22.0
62,42043,27.0,33.0,20.0
62,42044,28.0,35.0,23.0


In [44]:
Sufficient_decline(data_dyn_end.PAPsys.values[0],data_dyn_end.PAPsys.values[-1],decline=1/3, epsilon = 0.001)

False

In [45]:
data_dyn_end.PAPsys <= 30

id_patient       
62          42035    False
            42036     True
            42037     True
            42038     True
            42039     True
            42040    False
            42041    False
            42042    False
            42043    False
            42044    False
            42045    False
            42046     True
            42047     True
            42048     True
            42049     True
            42050     True
Name: PAPsys, dtype: bool

In [46]:
test_PAP_diminution(data_dyn_end, paps_max=50, paps_min=30, regle_nan=True)

True

In [47]:
def test_augm_ratio_pao2_fio2(data_stat, data_dyn,threshold_augm=0, regle_nan=True):
    """
    Test if ratio sufficiently high or sufficient evolution between the events 'declapage_cote_2' & 'fermeture'
    """
    if test_ratio_pao2_fio2(data_stat, data_dyn, obj=300, threshold=90, regle_nan=regle_nan):
        return True

    ratio_end = data_stat['PaO2_fermeture'].values[0] / data_dyn['FiO2'].values[-1]
    ratio_beg = data_stat['PaO2_declampage_cote_2'].values[0] / data_dyn['FiO2'].values[-1]
    if pd.isna(ratio_end - ratio_beg):
        return regle_nan
    return ratio_end - ratio_beg > threshold_augm

In [48]:
test_augm_ratio_pao2_fio2(data_stat, data_dyn_end,threshold_augm=0, regle_nan=True)

True

In [49]:
test = data_dyn_end['FiO2']/100
data_stat['PaO2_fermeture'].values[0]/test

id_patient       
62          42035    245.588235
            42036    298.214286
            42037    321.153846
            42038    327.450980
            42039    334.000000
            42040    334.000000
            42041    334.000000
            42042    334.000000
            42043    334.000000
            42044    334.000000
            42045    334.000000
            42046    340.816327
            42047    340.816327
            42048    340.816327
            42049    340.816327
            42050    340.816327
Name: FiO2, dtype: float64

In [54]:
def test_temperature_sup36(data_dyn,threshold=90,regle_nan=True):
    """
    Test if threshold% of our value (Temp) is > 36° /do not take into account Temp = 0
    Return boolean
    """
    
    non_zeros = data_dyn.Temp[data_dyn.Temp>0].count()
    nb_ok = data_dyn['Temp'][data_dyn['Temp']>36].count()
    
    if is_nan(data=data_dyn, column='Temp', threshold=0.8) or non_zeros==0:
        return regle_nan

    return (nb_ok/non_zeros)*100>threshold

In [55]:
data_dyn_end.Temp

id_patient       
62          42035    37.500000
            42036    37.500000
            42037    37.500000
            42038    37.500000
            42039    37.500000
            42040    37.500000
            42041    37.500000
            42042    37.500000
            42043    37.400002
            42044    37.500000
            42045    37.400002
            42046    37.500000
            42047    37.500000
            42048    37.500000
            42049    37.500000
            42050    37.500000
Name: Temp, dtype: float64

In [56]:
test_temperature_sup36(data_dyn_end,threshold=90,regle_nan=True)

True

In [68]:
def test_NMT_sup(data_dyn, obj=50, threshold=90, regle_nan=True): #threshold ok ou trop severe ?
    """
    Test if threshold% of our value (NMTratio) is > 50% /do not take into account Temp = 0
    """
    if is_nan(data=data_dyn, column='NMTratio', threshold=0.8):
        return regle_nan
    
    non_zeros = data_dyn['NMTratio'][data_dyn['NMTratio']>0].count()
    nb_ok = data_dyn['NMTratio'][data_dyn['NMTratio']>=obj].count() 
    
    if non_zeros==0:
        return False
    return (nb_ok/non_zeros)*100>=threshold

In [69]:
test_NMT_sup(data_dyn_end, obj=50, threshold=90, regle_nan=True)

False

In [63]:
data_dyn_end.loc[:,['NMTratio','time']]

Unnamed: 0_level_0,Unnamed: 1_level_0,NMTratio,time
id_patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
62,42035,0.0,2013-01-26 21:03:00
62,42036,0.0,2013-01-26 21:04:00
62,42037,0.0,2013-01-26 21:05:00
62,42038,0.0,2013-01-26 21:06:00
62,42039,0.0,2013-01-26 21:07:00
62,42040,0.0,2013-01-26 21:08:00
62,42041,0.0,2013-01-26 21:09:00
62,42042,0.0,2013-01-26 21:10:00
62,42043,0.0,2013-01-26 21:11:00
62,42044,0.0,2013-01-26 21:12:00


In [62]:
data_dyn[['NMTratio','time']][data_dyn['NMTratio']>0]

Unnamed: 0_level_0,Unnamed: 1_level_0,NMTratio,time
id_patient,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
62,41862,95.699997,2013-01-26 18:10:00
62,41884,102.699997,2013-01-26 18:32:00
62,42052,4.8,2013-01-26 21:20:00
62,42053,1.5,2013-01-26 21:21:00
62,42054,2.4,2013-01-26 21:22:00
62,42055,0.8,2013-01-26 21:23:00
62,42056,3.2,2013-01-26 21:24:00
62,42057,4.3,2013-01-26 21:25:00
62,42058,8.1,2013-01-26 21:26:00
62,42059,11.5,2013-01-26 21:27:00


In [66]:
data_dyn_end.columns

Index(['id_patient', 'time', 'B.I.S', 'BIS SR', 'DC', 'ET Des.', 'ET Sevo.',
       'ETCO2', 'ETCO2 (mmHg)', 'ETO2', 'FC', 'FICO2', 'FICO2 (mmHg)', 'FIN2O',
       'FR', 'FR(ecg)', 'FiO2', 'MAC', 'NMT TOF', 'NMTratio', 'PAPdia',
       'PAPmoy', 'PAPsys', 'PASd', 'PASm', 'PASs', 'PEEPtotal', 'PNId', 'PNIm',
       'PNIs', 'PODmoy', 'Pmax', 'Pmean', 'Pplat', 'RR(co2)', 'SpO2',
       'SvO2 (m)', 'Temp', 'VT', 'declampage_cote1_done',
       'declampage_cote2_done'],
      dtype='object')

In [67]:
data_dyn_end['NMT TOF']

id_patient       
62          42035    0
            42036    1
            42037    1
            42038    1
            42039    1
            42040    1
            42041    1
            42042    1
            42043    0
            42044    2
            42045    2
            42046    2
            42047    2
            42048    3
            42049    2
            42050    2
Name: NMT TOF, dtype: int64

## Split functions

In [77]:
def split_ECMO_during_surgery(data_stat):
    """
    Return boolean 
    True : presence of ECMO during surgery
    False : absence of ECMO during surgery
    """
    if data_stat['ECMO_during_surgery'].values[0]==1:
        return "Node ECMO weaning Test"
    return "Node ratio-100 test"

In [78]:
split_ECMO_during_surgery(data_stat)

'Node ratio-100 test'

In [72]:
def split_ECMO_weaning_test(data_stat, data_dyn,regle_nan=True):
    """
    Return boolean 
    """
    sum_True = int(test_PASm_sup(data_dyn, obj=60, threshold=90, regle_nan=regle_nan)) + \
               int(test_noradrenaline(data_stat, nora_max=1.5, nora_min=1, regle_nan=regle_nan)) + \
               int(test_ratio_pao2_fio2(data_stat, data_dyn, obj=200,threshold=90, regle_nan=regle_nan)) + \
               int(test_PAP_diminution(data_dyn, paps_max=50, paps_min=30,regle_nan=regle_nan))
    if sum_True == 4:
        return "Node INO weaning Test"
    return "Transfer to ICU with ECMO and mechanical ventilation"

In [73]:
def split_ratio_100(data_stat, data_dyn,regle_nan=True):
    if test_ratio_pao2_fio2(data_stat, data_dyn, obj=100,threshold=90, regle_nan=True) :
        return "Node INO weaning Test"
    return "Transfer to ICU with ECMO and mechanical ventilation"

In [79]:
split_ratio_100(data_stat, data_dyn=data_dyn_end,regle_nan=True)

'Node INO weaning Test'

In [74]:
def split_INO_weaning_test(data_stat, data_dyn,regle_nan=True):
    """
    Return boolean 
    """
    sum_True = int(test_augm_ratio_pao2_fio2(data_stat, data_dyn,threshold_augm=0, regle_nan=regle_nan)) + \
               int(test_PAP_diminution(data_dyn, paps_max=50, paps_min=30, regle_nan=regle_nan))
    if sum_True == 2:
        return "Node ratio-300 test"
    return "Transfer to ICU with mechanical ventilation and iNO therapy"

In [80]:
split_INO_weaning_test(data_stat, data_dyn=data_dyn_end,regle_nan=True)

'Node ratio-300 test'

In [91]:
def split_ratio_300(data_stat, data_dyn,regle_nan=True):
    if test_ratio_pao2_fio2(data_stat, data_dyn, obj=300, threshold=80, regle_nan=regle_nan) :
        return "Node NIV attempt"
    return "Transfer to ICU with mechanical ventilation"

In [92]:
split_ratio_300(data_stat, data_dyn=data_dyn_end,regle_nan=True)

'Node NIV attempt'

In [125]:
def split_NIV_attempt(data_stat, data_dyn,data_dyn_tot, regle_nan=True, drop_NMT=True):
    """
    Return boolean 
    """
    sum_True = int(test_temperature_sup36(data_dyn,threshold=90, regle_nan=regle_nan)) + \
               int(test_PASm_sup(data_dyn, obj=60, threshold=90, regle_nan=regle_nan)) + \
               int(test_noradrenaline(data_stat, nora_max=1.5, nora_min=1,regle_nan=regle_nan)) + \
               int(test_ratio_pao2_fio2(data_stat, data_dyn, obj=300, threshold=80, regle_nan=regle_nan))
    if drop_NMT:
        sum_goal=4
    else:
        sum_True=sum_True+int(test_NMT_sup(data_dyn_tot,obj=50, threshold=80, regle_nan=regle_nan))
        sum_goal = 5
        
    if sum_True == sum_goal:
        return  "Transfer to ICU with standard oxygen therapy or NIV"
    return "Transfer to ICU with mechanical ventilation"

In [99]:
split_NIV_attempt(data_stat, data_dyn_end, regle_nan=True)

'Transfer to ICU with mechanical ventilation'

In [101]:
print("Temperature :", test_temperature_sup36(data_dyn_end,threshold=90, regle_nan=True))
print("NMT :",test_NMT_sup(data_dyn_end, obj=50, threshold=0, regle_nan=True))
print("PASm :", test_PASm_sup(data_dyn_end, obj=60, threshold=90, regle_nan=True))
print("noradrenaline :",test_noradrenaline(data_stat, nora_max=1.5, nora_min=1,regle_nan=True))
print("ratio :",test_ratio_pao2_fio2(data_stat, data_dyn_end, obj=300, threshold=80, regle_nan=True))

Temperature : True
NMT : False
PASm : True
noradrenaline : True
ratio : True


In [103]:
len(ids_immediate)

137

In [119]:
df_dynamic.columns

Index(['id_patient', 'time', 'B.I.S', 'BIS SR', 'DC', 'ET Des.', 'ET Sevo.',
       'ETCO2', 'ETCO2 (mmHg)', 'ETO2', 'FC', 'FICO2', 'FICO2 (mmHg)', 'FIN2O',
       'FR', 'FR(ecg)', 'FiO2', 'MAC', 'NMT TOF', 'NMTratio', 'PAPdia',
       'PAPmoy', 'PAPsys', 'PASd', 'PASm', 'PASs', 'PEEPtotal', 'PNId', 'PNIm',
       'PNIs', 'PODmoy', 'Pmax', 'Pmean', 'Pplat', 'RR(co2)', 'SpO2',
       'SvO2 (m)', 'Temp', 'VT', 'declampage_cote1_done',
       'declampage_cote2_done'],
      dtype='object')

In [121]:
result_NMT_end = []
result_NMT = []
result_Temperature = []
for ids in ids_immediate:
    data_dyn = df_dynamic[(df_dynamic.id_patient==ids)&(df_dynamic.declampage_cote2_done==1)]
    data_stat = df_static[df_static.id_patient == ids]
    data_dyn_end = get_end_surgery_data_dynamic(data_dyn)
    
    result_NMT_end.append(test_NMT_sup(data_dyn_end, obj=50, threshold=80, regle_nan=True))
    result_NMT.append(test_NMT_sup(data_dyn, obj=50, threshold=80, regle_nan=True))
    result_Temperature.append(test_temperature_sup36(data_dyn_end,threshold=90, regle_nan=True))

  """


In [122]:
print(np.asarray(result_NMT_end).sum())
np.asarray(result_NMT_end)

11


array([False, False,  True, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False, False, False,  True,  True,
       False, False, False, False, False, False,  True, False, False,
        True, False, False, False, False, False, False, False, False,
       False, False,  True,  True, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,

In [123]:
print(np.asarray(result_NMT).sum())
np.asarray(result_NMT)

17


array([ True, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False, False, False,  True,
        True,  True,  True,  True, False, False, False, False, False,
        True, False, False, False, False, False, False, False, False,
        True, False, False, False, False, False, False, False,  True,
       False, False, False, False,  True,  True, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False,  True, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False,  True, False,
       False, False,

In [115]:
print(np.asarray(result_Temperature).sum())
np.asarray(result_Temperature)

123


array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False,  True,  True, False,  True, False,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True, False,  True,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True, False,  True,  True,  True,
       False, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False,

## Validation Tree

In [126]:
def decision_empirique(df_static,df_dynamic,id_patient, regle_nan=True, drop_NMT=True):
    
    data_dyn_tot = df_dynamic[(df_dynamic.id_patient==id_patient)&(df_dynamic.declampage_cote2_done==1)]
    data_dyn_end = get_end_surgery_data_dynamic(data_dyn)
    data_stat = df_static[df_static.id_patient == id_patient]

                               
    if split_ECMO_during_surgery(data_stat) == "Node ECMO weaning Test":
        if split_ECMO_weaning_test(data_stat, data_dyn_end,regle_nan=regle_nan) == "Node INO weaning Test":
            if split_INO_weaning_test(data_stat, data_dyn_end,regle_nan=regle_nan) == "Node ratio-300 test":
                if split_ratio_300(data_stat, data_dyn_end,regle_nan=regle_nan) == "Node NIV attempt":
                    return split_NIV_attempt(data_stat, data_dyn=data_dyn_end, data_dyn_tot=data_dyn_tot, 
                                             regle_nan=regle_nan, drop_NMT=True)
                else:
                    return split_ratio_300(data_stat, data_dyn_end,regle_nan=regle_nan)
            else : 
                return split_INO_weaning_test(data_stat, data_dyn_end, regle_nan=regle_nan)
        else :
            return split_ECMO_weaning_test(data_stat, data_dyn_end, regle_nan=regle_nan)
        
    else: #Node ratio-100 test"
        if split_ratio_100(data_stat, data_dyn_end, regle_nan=regle_nan) =="Node INO weaning Test":
            if split_INO_weaning_test(data_stat, data_dyn_end, regle_nan=regle_nan) == "Node ratio-300 test":
                if split_ratio_300(data_stat, data_dyn_end, regle_nan=regle_nan) == "Node NIV attempt":
                    return split_NIV_attempt(data_stat, data_dyn=data_dyn_end, data_dyn_tot=data_dyn_tot, 
                                             regle_nan=regle_nan, drop_NMT=True)
                else:
                    return split_ratio_300(data_stat, data_dyn_end,regle_nan=regle_nan)
            else : 
                return split_INO_weaning_test(data_stat, data_dyn_end, regle_nan=regle_nan)
        else : 
            return split_ratio_100(data_stat, data_dyn_end, regle_nan=regle_nan)

## Tests

- avec les données postops
- avec notre label à nous

In [127]:
def recode_leaf_in_immediate_extubation(leaf):
    """ recode the leaves of the tree with immediate_extubation 0/1"""
    if leaf =="Transfer to ICU with standard oxygen therapy or NIV":
        return 1
    return 0

In [149]:
df_results=df_static[['id_patient','immediate_extubation']]

In [163]:
df_results_missing_false = df_static[['id_patient','immediate_extubation']]

In [150]:
df_results['result_tree'] = "stop"
df_results['result_tree_immediate_extubation'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [164]:
df_results_missing_false['result_tree'] = "stop"
df_results_missing_false['result_tree_immediate_extubation'] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [151]:
for index, row in df_results.iterrows():
    df_results.iloc[index,2]= decision_empirique(df_static,df_dynamic,id_patient=row['id_patient'], regle_nan=True, drop_NMT=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [165]:
for index, row in df_results_missing_false.iterrows():
    df_results_missing_false.iloc[index,2]= decision_empirique(df_static,df_dynamic,id_patient=row['id_patient'], regle_nan=False, drop_NMT=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [152]:
for index, row in df_results.iterrows():
    df_results.iloc[index,3] = recode_leaf_in_immediate_extubation(row['result_tree'])
    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [166]:
for index, row in df_results_missing_false.iterrows():
    df_results_missing_false.iloc[index,3] = recode_leaf_in_immediate_extubation(row['result_tree'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [168]:
df_results.head()

Unnamed: 0,id_patient,immediate_extubation,result_tree,result_tree_immediate_extubation
0,1,0,Transfer to ICU with standard oxygen therapy o...,1
1,2,1,Transfer to ICU with mechanical ventilation,0
2,3,0,Transfer to ICU with ECMO and mechanical venti...,0
3,4,0,Transfer to ICU with standard oxygen therapy o...,1
4,5,0,Transfer to ICU with mechanical ventilation an...,0


In [167]:
df_results_missing_false.head()

Unnamed: 0,id_patient,immediate_extubation,result_tree,result_tree_immediate_extubation
0,1,0,Transfer to ICU with standard oxygen therapy o...,1
1,2,1,Transfer to ICU with mechanical ventilation an...,0
2,3,0,Transfer to ICU with ECMO and mechanical venti...,0
3,4,0,Transfer to ICU with standard oxygen therapy o...,1
4,5,0,Transfer to ICU with mechanical ventilation an...,0


In [154]:
df_results['immediate_extubation'].sum(), df_results['result_tree_immediate_extubation'].sum(), df_results.shape[0]

(137, 173, 410)

In [169]:
df_results_missing_false['immediate_extubation'].sum(), df_results_missing_false['result_tree_immediate_extubation'].sum(), df_results_missing_false.shape[0]

(137, 147, 410)

In [155]:
from sklearn.metrics import confusion_matrix

In [156]:
df_results.head()

Unnamed: 0,id_patient,immediate_extubation,result_tree,result_tree_immediate_extubation
0,1,0,Transfer to ICU with standard oxygen therapy o...,1
1,2,1,Transfer to ICU with mechanical ventilation,0
2,3,0,Transfer to ICU with ECMO and mechanical venti...,0
3,4,0,Transfer to ICU with standard oxygen therapy o...,1
4,5,0,Transfer to ICU with mechanical ventilation an...,0


In [161]:
confusion_matrix(y_true=df_results['immediate_extubation'], y_pred=df_results['result_tree_immediate_extubation'])

array([[193,  80],
       [ 44,  93]])

In [174]:
mat=confusion_matrix(y_true=df_results['immediate_extubation'], y_pred=df_results['result_tree_immediate_extubation'])
(mat/df_results_missing_false.shape[0])*100

array([[47.07317073, 19.51219512],
       [10.73170732, 22.68292683]])

In [170]:
confusion_matrix(y_true=df_results_missing_false['immediate_extubation'], y_pred=df_results_missing_false['result_tree_immediate_extubation'])

array([[208,  65],
       [ 55,  82]])

In [182]:
df_results_missing_false['immediate_extubation'].sum()

137

In [171]:
mat=confusion_matrix(y_true=df_results_missing_false['immediate_extubation'], y_pred=df_results_missing_false['result_tree_immediate_extubation'])

In [173]:
(mat/df_results_missing_false.shape[0])*100

array([[50.73170732, 15.85365854],
       [13.41463415, 20.        ]])

In [175]:
train, test = dataset.get_static()
data_static = pd.concat([train, test])
df_results = pd.merge(df_results, data_static[['id_patient','target']], how='left', on='id_patient')

In [177]:
df_results.head()

Unnamed: 0,id_patient,immediate_extubation,result_tree,result_tree_immediate_extubation,target
0,1,0,Transfer to ICU with standard oxygen therapy o...,1,0
1,2,1,Transfer to ICU with mechanical ventilation,0,1
2,3,0,Transfer to ICU with ECMO and mechanical venti...,0,0
3,4,0,Transfer to ICU with standard oxygen therapy o...,1,1
4,5,0,Transfer to ICU with mechanical ventilation an...,0,0


In [178]:
df_results_missing_false = pd.merge(df_results_missing_false, data_static[['id_patient','target']], how='left', on='id_patient')

In [179]:
mat=confusion_matrix(y_true=df_results_missing_false['target'], y_pred=df_results_missing_false['result_tree_immediate_extubation'])

In [180]:
mat

array([[180,  61],
       [ 83,  86]])

In [181]:
(mat/df_results_missing_false.shape[0])*100

array([[43.90243902, 14.87804878],
       [20.24390244, 20.97560976]])