In [1]:
import pandas as pd
from tqdm import tqdm
import numpy as np

In [2]:
#df= pd.read_csv(f'./data/mimic_data_new/full.csv')
#df= pd.read_csv(f'C:/Users/M1107171/MIMIC/清出來的資料/DNR/20240326/full.csv')
df= pd.read_csv(f'C:/Users/USER/M1326168/MIMIC/DNR/dnr負樣本.csv')


In [3]:
"""一些額外處理"""
df.reset_index(drop=True, inplace=True)
if 'select_id' in df.columns:
    df = df.drop(columns=['select_id'])
if 'total' in df.columns:
    df = df.drop(columns=['total'])
    
df.rename(columns={'total2': 'total'}, inplace=True)
df.rename(columns={'respiration': 'SOFA-respiration'}, inplace=True)
df.rename(columns={'coagulation': 'SOFA-coagulation'}, inplace=True)
df.rename(columns={'liver': 'SOFA-liver'}, inplace=True)
df.rename(columns={'cardiovascular': 'SOFA-cardiovascular'}, inplace=True)
df.rename(columns={'cns': 'SOFA-cns'}, inplace=True)
df.rename(columns={'renal': 'SOFA-renal'}, inplace=True)

df.columns = df.columns.str.replace('avg ', '')

#spouse移至最後欄位
spouse_col = df.pop('Spouse')
df.insert(len(df.columns), 'Spouse', spouse_col)

In [4]:
distinct_stay_id = df['stay_id'].unique()
print(f'總患者數:{len(distinct_stay_id)}')

總患者數:5870


In [5]:
#定義多個任務標籤 
def Get_Label (df_in):
    
    df_out = df_in.copy()

    """
    Adjusting small PEEP levels
    """
    df_out.loc[df_out['PEEP'] < 5, 'PEEP'] = 5
    df_out.loc[df_out['FiO2'] < 30, 'FiO2'] = 30
    df_out.loc[df_out['Peak Airway Pressure'] < 10, 'Peak Airway Pressure'] = np.nan
    
    """
    Determine whether to use a ventilator => InvasiveVent = 1
    """    
    df_out.insert(1, 'use_vent', df_out['InvasiveVent'].astype(int))
    
    """
    "Fill in the intervals of ventilator discontinuation less than or equal to four days"
    """
    idx = df_out[df_out['use_vent'] == 1].index 
    for i in range(len(idx) - 1):
        start_idx = idx[i]
        end_idx = idx[i+1]
        gap = end_idx - start_idx - 1  
        if gap <= 3 and (df_out.loc[start_idx+1:end_idx-1, 'use_vent'] == 0).all():
            df_out.loc[start_idx+1:end_idx-1, 'use_vent'] = 1
            
        
    """
    Weaning (不考量死亡)
    """
    df_out['Weaning'] = -1
    for i in range(len(df_out)-1):
        dod_in_3day = df_out['dod'][i:i+4].max()  #[i,i+1,i+2,i+3]
        if df_out.at[i,'InvasiveVent'] == 1:
            if df_out.at[i+1,'InvasiveVent'] == 0:
                if dod_in_3day == 1: 
                    df_out.at[i,'Weaning'] = 0
                else:
                    df_out.at[i,'Weaning'] = 1
            else:
                df_out.at[i,'Weaning'] = 0
    
    """
    Reintubation => 後續無用 可拿掉
    """
    df_out['Reintubation'] = -1
    for i in range(len(df_out)-4):
        if df_out.at[i, 'Weaning'] != 1:
            continue
        if df_out.loc[i+1:i+4, 'InvasiveVent'].max() == 1:
            df_out.at[i, 'Reintubation'] = 1
        else:
            df_out.at[i, 'Reintubation'] = 0
            
    check_flag = False
    if df_out['Reintubation'].max() == 1:
        check_flag = True
        
    
    """
    Weaning_successful => 有考量死亡
    """
    df_out['Weaning_successful'] = -1
    for i in range(0,len(df_out) - 1):
        use_vent_today = df_out.at[i, 'use_vent']
        use_vent_tomorrow = df_out.at[i+1, 'use_vent']
        dod_in_3day = df_out['dod'][i+0:i+4].max()  #[i+1,i+2,i+3]
        
        if use_vent_today == 1 and use_vent_tomorrow == 1:
            df_out.at[i, 'Weaning_successful'] = 0
            
        if use_vent_today == 1 and use_vent_tomorrow == 0:
            #死亡視為失敗
            if dod_in_3day == 1:
                df_out.at[i, 'Weaning_successful'] = 0
            else:
                df_out.at[i, 'Weaning_successful'] = 1
                #break  
    """
    "Fill in default values for instances where the ventilator was not used."
    """
    for i in range(len(df_out)):
        if  df_out.at[i, 'use_vent'] == 0 or df_out.at[i, 'use_vent'] == -1:
            if pd.isnull(df_out.at[i, 'PEEP']):
                df_out.at[i, 'PEEP'] = 5 
            if pd.isnull(df_out.at[i, 'FiO2']):
                df_out.at[i, 'FiO2'] = 30            
    #################################################################################
    return df_out

In [6]:
df_result = pd.DataFrame()
df_result_list = []

for stay_id in tqdm(distinct_stay_id, desc="Processing stay_ids"):
    df_p = df[df['stay_id'] == stay_id]
    df_p.reset_index(drop=True, inplace=True)
    df_p = Get_Label(df_p)
    df_result_list.append(df_p)

df_result = pd.concat(df_result_list, ignore_index=True)

Processing stay_ids: 100%|██████████| 5870/5870 [00:17<00:00, 332.71it/s]


In [7]:
#新增DNR簽的時間&early_DNR，目前先不加入至預測DNR的model裡
"""

# 加載 df1 到 DataFrame
df1= pd.read_csv(f'C:/Users/USER/M1326168/MIMIC/DNR/merged_dnr天數.csv')

# 將 df1合併到 df 
df_result  = pd.merge(df_result , df1, on='stay_id', how='left')

#將 NaN 值填充為999，表示沒有對應的'stay_id'

df_result ["early_DNR"] = df_result ["early_DNR"].fillna(999)
df_result ["DNR_after_in_ICU_day"] = df_result ["DNR_after_in_ICU_day"].fillna(999)

# 將更新後的 DataFrame 匯出到新文件

#print("已更新 'DNR' 欄位'")

#rint(df.head(2))

#df.to_csv(f'C:/Users/USER/M1326168/MIMIC/DNR/20241204/dnr_step1.csv',index = False)
"""

'\n\n# 加載 df1 到 DataFrame\ndf1= pd.read_csv(f\'C:/Users/USER/M1326168/MIMIC/DNR/merged_dnr天數.csv\')\n\n# 將 df1合併到 df \ndf_result  = pd.merge(df_result , df1, on=\'stay_id\', how=\'left\')\n\n#將 NaN 值填充為999，表示沒有對應的\'stay_id\'\n\ndf_result ["early_DNR"] = df_result ["early_DNR"].fillna(999)\ndf_result ["DNR_after_in_ICU_day"] = df_result ["DNR_after_in_ICU_day"].fillna(999)\n\n# 將更新後的 DataFrame 匯出到新文件\n\n#print("已更新 \'DNR\' 欄位\'")\n\n#rint(df.head(2))\n\n#df.to_csv(f\'C:/Users/USER/M1326168/MIMIC/DNR/20241204/dnr_step1.csv\',index = False)\n'

In [8]:
df_result.to_csv(f'C:/Users/USER/M1326168/MIMIC/DNR/20241002/full_step1.csv',index = False)

In [9]:
for i in df_result.columns:
    print(i)

stay_id
use_vent
date
SaO2
Respiration
Heart Rate
Systemic Systolic
Systemic Diastolic
Systemic Mean
Compliance
FiO2
Minute Ventilation
Mean Airway Pressure
Peak Airway Pressure
PEEP
PC mode
Pressure Support
Plateau
Tidal Volume
Respiratory Rate
ROXindex
RASS
Platelets x1000
WBC x1000
Hgb
Albumin
Total Protein
Total Bilirubin
PO2
PaCO2
Glucose
BUN
pH
Sodium
Potassium
Magnesium
Calcium
Chloride
creatinine
HCO3
Phosphate
Alkaline Phos.
AST (SGOT)
ALT (SGPT)
PT-INR
Vasopressor
Relaxant
Sedation
PPI
Pain control
total
Fluid_intake_value
Nutrition_Enteral_value
Urine_value
Aspergillus
Candida
Abdomen
Blood
Respiratory tract
Skin and soft tissue
Urinary tract
Others
dod
dod_3day
dod_7day
dod_30day
dod_60day
dod_90day
InvasiveVent
tracheostomy
NonInvasiveVent
SupplementalOxygen
HFNC
Anxiety
Assess for anxiety, depression, or delirium which may contribut
Asystole
Blood Transfusion
Consult to pastoral care, social services, palliative care, psy
DNAR (Do Not Attempt Resuscitation) [DNR]
DNAR (Do