## Loading packages and datasets

In [6]:
# importing packages

import numpy as np
import os
import time
from dask import dataframe as dd
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import vitaldb
import csv
from datetime import datetime, time as datetime_time, timedelta
import glob
import sys
import sklearn.neighbors._base

In [5]:
import fancyimpute as fi

In [3]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 500)

In [7]:
def getRootDir():
    """
    Get root folder directory
    :return: root folder directory
    :rtype: path
    """
    rootdir = os.path.abspath(os.curdir)

    return rootdir

In [8]:
rootdir = getRootDir()
rootdir

'/Users/tselanna/Desktop/Code'

In [117]:
# Import df2 dataset which was partly imputed (not for BT and other intraop vars though)
df2 = glob.glob(os.path.join(
        rootdir, 'TOSHIBA_HDD/2022_07_05T15_28_54_df2.csv'))
df2 = pd.read_csv(df2[0], dtype={"cormack": 'string'})

In [9]:
# Import the shrinked dataset from PCA&Shrink where BT was been interpolated already 
df = glob.glob(os.path.join(
        rootdir, 'TOSHIBA_HDD/2022_07_06T14_57_48_1104cases_shrinked_final.csv'))
df = pd.read_csv(df[0], dtype={"cormack": 'string'})

In [64]:
# Import edited merged dataset for investigative purposes later in the script
merged_df = glob.glob(os.path.join(
        rootdir, '*TOSHIBA_HDD/2022_06_20T13_36_10_Merged_edited.csv'))
merged_df = pd.read_csv(merged_df[0], dtype={"position": 'string', 'cormack':'string', 'airway':'string', 'aline1':'string', 'Solar8000/BT':float})
merged_df

Unnamed: 0,caseid,subjectid,casestart,caseend,anestart,aneend,opstart,opend,adm,dis,icu_days,death_inhosp,age,sex,height,weight,bmi,asa,emop,department,optype,dx,opname,approach,position,ane_type,preop_htn,preop_dm,preop_ecg,preop_pft,preop_hb,preop_plt,preop_pt,preop_aptt,preop_na,preop_k,preop_gluc,preop_alb,preop_ast,preop_alt,preop_bun,preop_cr,cormack,airway,tubesize,iv1,aline1,intraop_ebl,intraop_uo,intraop_rbc,intraop_ffp,intraop_crystalloid,intraop_colloid,intraop_ppf,intraop_mdz,intraop_ftn,intraop_rocu,intraop_vecu,intraop_eph,intraop_phe,intraop_epi,intraop_ca,anedur,extdur,los,Primus/CO2,Primus/ETCO2,Primus/FEO2,Primus/FIO2,Primus/INCO2,Primus/MAC,Primus/PEEP_MBAR,Primus/RR_CO2,Primus/SET_FIO2,Primus/SET_INTER_PEEP,Solar8000/BT,Solar8000/ETCO2,Solar8000/FEO2,Solar8000/FIO2,Solar8000/HR,Solar8000/INCO2,Solar8000/NIBP_DBP,Solar8000/NIBP_MBP,Solar8000/NIBP_SBP,Solar8000/PLETH_HR,Solar8000/PLETH_SPO2,Solar8000/RR_CO2,Solar8000/VENT_MAWP,Solar8000/VENT_RR
0,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,,,,,,,,,,,,,,,,,,,,,,,,
1,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,,,,,,,,,,,,,,88.0,,,,,88.0,96.0,,0.0,
2,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,,,,,,,,,,20.9,,,,,,,,,,,,,
3,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,0.0,100.0,100.0,0.0,0.0,,,,,,,,,87.0,,,,,88.0,96.0,,0.0,
4,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,,,,,,,,,,21.0,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10421465,6385,2278,0,20640,-540,21000,-540,19800,-225600,1675200,0,0,69.0,1,159.3,62.3,24.6,2.0,0,General surgery,Colorectal,"Colon submucosal tumor, unknown behavior",Ultralow anterior resection,Videoscopic,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,15.2,239.0,114.0,28.7,144.0,4.0,140.0,3.7,18.0,28.0,19.0,0.84,,,7.5,Right forearm,Right radial,100.0,250.0,0,0,2500.0,0,100,0.0,0,100,0,25,30,0,300,359.0,20.0,19.159722,0.0,,,,,,,,,,,,,,,,,,,,,,,
10421466,6385,2278,0,20640,-540,21000,-540,19800,-225600,1675200,0,0,69.0,1,159.3,62.3,24.6,2.0,0,General surgery,Colorectal,"Colon submucosal tumor, unknown behavior",Ultralow anterior resection,Videoscopic,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,15.2,239.0,114.0,28.7,144.0,4.0,140.0,3.7,18.0,28.0,19.0,0.84,,,7.5,Right forearm,Right radial,100.0,250.0,0,0,2500.0,0,100,0.0,0,100,0,25,30,0,300,359.0,20.0,19.159722,0.0,,,,,,,,,,,,,,,,,,,298.0,80.0,,,
10421467,6385,2278,0,20640,-540,21000,-540,19800,-225600,1675200,0,0,69.0,1,159.3,62.3,24.6,2.0,0,General surgery,Colorectal,"Colon submucosal tumor, unknown behavior",Ultralow anterior resection,Videoscopic,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,15.2,239.0,114.0,28.7,144.0,4.0,140.0,3.7,18.0,28.0,19.0,0.84,,,7.5,Right forearm,Right radial,100.0,250.0,0,0,2500.0,0,100,0.0,0,100,0,25,30,0,300,359.0,20.0,19.159722,0.0,,,,,,,,,,,,,,,,,,,,,,,
10421468,6385,2278,0,20640,-540,21000,-540,19800,-225600,1675200,0,0,69.0,1,159.3,62.3,24.6,2.0,0,General surgery,Colorectal,"Colon submucosal tumor, unknown behavior",Ultralow anterior resection,Videoscopic,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,15.2,239.0,114.0,28.7,144.0,4.0,140.0,3.7,18.0,28.0,19.0,0.84,,,7.5,Right forearm,Right radial,100.0,250.0,0,0,2500.0,0,100,0.0,0,100,0,25,30,0,300,359.0,20.0,19.159722,0.0,,,,,,,,,,,,,,,,,,,283.0,79.0,,,


In [66]:
df2.isna().sum()

caseid                         0
icu_days                       0
age                            0
sex                            0
height                         0
weight                         0
bmi                            0
asa                        36189
emop                           0
department                     0
optype                         0
dx                             0
opname                         0
approach                       0
position                       0
ane_type                       0
preop_htn                      0
preop_dm                       0
preop_ecg                      0
preop_pft                      0
preop_hb                   95395
preop_plt                  89925
preop_pt                  101750
preop_aptt                105736
preop_na                  203798
preop_k                   203798
preop_gluc                 86136
preop_alb                  97370
preop_ast                  86136
preop_alt                  86136
preop_bun 

In [10]:
df.isna().sum()

caseid                         0
icu_days                       0
age                            0
sex                            0
height                         0
weight                         0
bmi                            0
asa                        36189
emop                           0
department                     0
optype                         0
dx                             0
opname                         0
approach                       0
position                  110725
ane_type                       0
preop_htn                      0
preop_dm                       0
preop_ecg                      0
preop_pft                      0
preop_hb                   95395
preop_plt                  89925
preop_pt                  101750
preop_aptt                105736
preop_na                  203798
preop_k                   203798
preop_gluc                 86136
preop_alb                  97370
preop_ast                  86136
preop_alt                  86136
preop_bun 

In [11]:
df.shape

(3585370, 79)

In [9]:
df2.shape

(3586392, 79)

## Interpolation Script

In [None]:
# Perform interpolation on df, a shrinked dataframe where Solar8000/BT NaN have been filled with ffill() and 
# 2 cases have been deleted as their vitals were recorded at a different frequency, causing NaN

In [12]:
# Function that performs interpolation

def imputInterp(rootdir, df, method="Linear"):
    """Imputation techniques"""
    dfs_imp = []
    neighbors = 10

    pID = df['caseid']

    for i in pID.unique():
        cond = df[pID == i]
        df_pat = cond.dropna(how='all', axis=1)
        col = df_pat.columns

        # Interpolation methods
        """Interpolation with different techniques - linear, pad, nearest, polynomial (order=2), spline (order=5), cubic,
    krogh (too slow), piecewise_polynomial, pchip, akima, cubicspline, from_derivatives"""
        if method == "Linear":
            df_lin_interp = df_pat.interpolate(
                method='linear', axis=0, limit_direction='both')
            df_lin_interp = pd.DataFrame(df_lin_interp, columns=col)
            dfs_imp.append(df_lin_interp)
        elif method == "Iterative":
            df_iter_imp = fi.IterativeImputer().fit_transform(df_pat)
            df_iter_imp = pd.DataFrame(df_iter_imp, columns=df_pat.columns)
            dfs_imp.append(df_iter_imp)

    df_imputation = pd.concat(dfs_imp)
    return(df_imputation)

In [13]:
# LInear interpolation does not work when using the whole dataset, only when using only number containing variables
imp = imputInterp(rootdir, df=df, method="Linear")

ValueError: Invalid fill method. Expecting pad (ffill) or backfill (bfill). Got linear

In [14]:
# Therefore, select variables containing numbers to try interpolation -it works now
dfp = df.select_dtypes(include='number')

In [15]:
imp = imputInterp(rootdir, df=dfp, method="Linear")

In [16]:
imp.isna().sum()

caseid                         0
icu_days                       0
age                            0
sex                            0
height                         0
weight                         0
bmi                            0
asa                        36189
emop                           0
preop_htn                      0
preop_dm                       0
preop_hb                   95395
preop_plt                  89925
preop_pt                  101750
preop_aptt                105736
preop_na                  203798
preop_k                   203798
preop_gluc                 86136
preop_alb                  97370
preop_ast                  86136
preop_alt                  86136
preop_bun                  96857
preop_cr                  102327
tubesize                   55924
intraop_uo               1086489
intraop_rbc                    0
intraop_ffp                    0
intraop_crystalloid       201116
intraop_colloid                0
intraop_ppf                    0
intraop_md

In [17]:
# Manually interplate the two vital variables that still have NaN
imp['Primus/SET_INTER_PEEP'].interpolate(method="linear", limit_direction='both', inplace=True)

In [18]:
imp['Solar8000/RR_CO2'].interpolate(method="linear", limit_direction='both', inplace=True)

In [19]:
# There are no NaN anymore in the intraop vars
imp.isna().sum()

caseid                         0
icu_days                       0
age                            0
sex                            0
height                         0
weight                         0
bmi                            0
asa                        36189
emop                           0
preop_htn                      0
preop_dm                       0
preop_hb                   95395
preop_plt                  89925
preop_pt                  101750
preop_aptt                105736
preop_na                  203798
preop_k                   203798
preop_gluc                 86136
preop_alb                  97370
preop_ast                  86136
preop_alt                  86136
preop_bun                  96857
preop_cr                  102327
tubesize                   55924
intraop_uo               1086489
intraop_rbc                    0
intraop_ffp                    0
intraop_crystalloid       201116
intraop_colloid                0
intraop_ppf                    0
intraop_md

In [20]:
imp.caseid.nunique()

1104

In [21]:
imp.shape

(3585370, 67)

In [106]:
imp.isna().sum().sum()

0

In [107]:
df.shape

(3585370, 79)

In [124]:
imp.shape

(3585370, 79)

In [None]:
# Now insert back the categorical variables that were removed to run interpolation

In [22]:
imp['department'] = df['department']
imp['optype'] = df['optype']
imp['dx'] = df['dx']
imp['opname'] = df['opname']
imp['approach'] = df['approach']
imp['position'] = df['position']
imp['ane_type'] = df['ane_type']
imp['preop_ecg'] = df['preop_ecg']
imp['preop_pft'] = df['preop_pft']
imp['cormack'] = df['cormack']
imp['iv1'] = df['iv1']
imp['airway'] = df['airway']

In [23]:
# Create new df with variables in the correct order

imp_final = imp[['caseid', 'icu_days', 'age', 'sex', 'height', 'weight', 'bmi', 'asa',
       'emop', 'department', 'optype', 'dx', 'opname', 'approach', 'position',
       'ane_type', 'preop_htn', 'preop_dm', 'preop_ecg', 'preop_pft',
       'preop_hb', 'preop_plt', 'preop_pt', 'preop_aptt', 'preop_na',
       'preop_k', 'preop_gluc', 'preop_alb', 'preop_ast', 'preop_alt',
       'preop_bun', 'preop_cr', 'cormack', 'airway', 'tubesize', 'iv1',
       'intraop_ebl', 'intraop_uo', 'intraop_rbc', 'intraop_ffp',
       'intraop_crystalloid', 'intraop_colloid', 'intraop_ppf', 'intraop_mdz',
       'intraop_ftn', 'intraop_rocu', 'intraop_vecu', 'intraop_eph',
       'intraop_phe', 'intraop_epi', 'intraop_ca', 'anedur', 'extdur', 'los',
       'Primus/CO2', 'Primus/ETCO2', 'Primus/FEO2', 'Primus/FIO2',
       'Primus/INCO2', 'Primus/MAC', 'Primus/PEEP_MBAR', 'Primus/RR_CO2',
       'Primus/SET_FIO2', 'Primus/SET_INTER_PEEP', 'Solar8000/BT',
       'Solar8000/ETCO2', 'Solar8000/FEO2', 'Solar8000/FIO2', 'Solar8000/HR',
       'Solar8000/INCO2', 'Solar8000/NIBP_DBP', 'Solar8000/NIBP_MBP',
       'Solar8000/NIBP_SBP', 'Solar8000/PLETH_HR', 'Solar8000/PLETH_SPO2',
       'Solar8000/RR_CO2', 'Solar8000/VENT_MAWP', 'Solar8000/VENT_RR',
       'seconds']]

In [24]:
imp_final.caseid.nunique()

1104

In [25]:
# No NaN in intraop variables
imp_final.isna().sum()

caseid                         0
icu_days                       0
age                            0
sex                            0
height                         0
weight                         0
bmi                            0
asa                        36189
emop                           0
department                     0
optype                         0
dx                             0
opname                         0
approach                       0
position                  110725
ane_type                       0
preop_htn                      0
preop_dm                       0
preop_ecg                      0
preop_pft                      0
preop_hb                   95395
preop_plt                  89925
preop_pt                  101750
preop_aptt                105736
preop_na                  203798
preop_k                   203798
preop_gluc                 86136
preop_alb                  97370
preop_ast                  86136
preop_alt                  86136
preop_bun 

In [136]:
type(imp_final)

pandas.core.frame.DataFrame

In [29]:
# This variable was erroneously not deleted in earlier stages, so do it now
imp_final = imp_final.drop('cormack', axis=1)

In [30]:
# Fill the remaining NaN in preop vars with -1
imp_final = imp_final.fillna(-1)

In [31]:
imp_final.shape

(3585370, 78)

In [33]:
# Export the final imputed & shrinked df with 1104 cases 

csv_filename = os.path.join(
            "TOSHIBA_HDD", f'{datetime.now().strftime("%Y_%m_%dT%H_%M_%S")}_1104cases_IMP_SHRINK_FINAL.csv')
imp_final.to_csv(csv_filename, encoding='utf-8-sig', index=False)

In [34]:
imp_final

Unnamed: 0,caseid,icu_days,age,sex,height,weight,bmi,asa,emop,department,optype,dx,opname,approach,position,ane_type,preop_htn,preop_dm,preop_ecg,preop_pft,preop_hb,preop_plt,preop_pt,preop_aptt,preop_na,preop_k,preop_gluc,preop_alb,preop_ast,preop_alt,preop_bun,preop_cr,airway,tubesize,iv1,intraop_ebl,intraop_uo,intraop_rbc,intraop_ffp,intraop_crystalloid,intraop_colloid,intraop_ppf,intraop_mdz,intraop_ftn,intraop_rocu,intraop_vecu,intraop_eph,intraop_phe,intraop_epi,intraop_ca,anedur,extdur,los,Primus/CO2,Primus/ETCO2,Primus/FEO2,Primus/FIO2,Primus/INCO2,Primus/MAC,Primus/PEEP_MBAR,Primus/RR_CO2,Primus/SET_FIO2,Primus/SET_INTER_PEEP,Solar8000/BT,Solar8000/ETCO2,Solar8000/FEO2,Solar8000/FIO2,Solar8000/HR,Solar8000/INCO2,Solar8000/NIBP_DBP,Solar8000/NIBP_MBP,Solar8000/NIBP_SBP,Solar8000/PLETH_HR,Solar8000/PLETH_SPO2,Solar8000/RR_CO2,Solar8000/VENT_MAWP,Solar8000/VENT_RR,seconds
0,1,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,Oral,7.5,Right forearm,-1.0,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,0.0,100.0,100.0,0.0,0.0,1.0,0.0,100.0,0.0,21.100000,30.0,89.0,95.0,84.0,1.0,84.0,100.0,150.0,85.0,97.0,5.0,0.0,18.0,96
1,1,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,Oral,7.5,Right forearm,-1.0,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,0.0,100.0,100.0,0.0,0.0,1.0,0.0,100.0,0.0,21.100000,30.0,89.0,95.0,84.0,1.0,84.0,100.0,150.0,84.0,97.0,5.0,0.0,18.0,98
2,1,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,Oral,7.5,Right forearm,-1.0,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,0.0,100.0,100.0,0.0,0.0,1.0,0.0,100.0,0.0,21.000000,30.0,89.0,95.0,84.0,1.0,84.0,100.0,150.0,85.0,97.0,5.0,0.0,18.0,100
3,1,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,Oral,7.5,Right forearm,-1.0,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,0.0,100.0,100.0,0.0,0.0,1.0,0.0,100.0,0.0,21.000000,30.0,89.0,95.0,85.0,1.0,84.0,100.0,150.0,85.0,97.0,5.0,0.0,18.0,102
4,1,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,Oral,7.5,Right forearm,-1.0,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,0.0,100.0,100.0,0.0,0.0,1.0,0.0,100.0,0.0,21.000000,30.0,89.0,95.0,85.0,1.0,84.0,100.0,150.0,86.0,97.0,5.0,0.0,18.0,104
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3585365,6385,0,69.0,1,159.3,62.3,24.6,2.0,0,General surgery,Colorectal,"Colon submucosal tumor, unknown behavior",Ultralow anterior resection,Videoscopic,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,15.2,239.0,114.0,28.7,144.0,4.0,140.0,3.7,18.0,28.0,19.0,0.84,-1,7.5,Right forearm,100.0,250.0,0,0,2500.0,0,100,0.0,0,100,0,25,30,0,300,359.0,20.0,19.159722,0.0,44.0,86.0,94.0,3.0,0.2,2.0,14.0,100.0,5.0,36.400002,43.0,55.0,94.0,108.0,0.0,58.0,83.0,118.0,106.0,100.0,12.0,0.0,11.0,20076
3585366,6385,0,69.0,1,159.3,62.3,24.6,2.0,0,General surgery,Colorectal,"Colon submucosal tumor, unknown behavior",Ultralow anterior resection,Videoscopic,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,15.2,239.0,114.0,28.7,144.0,4.0,140.0,3.7,18.0,28.0,19.0,0.84,-1,7.5,Right forearm,100.0,250.0,0,0,2500.0,0,100,0.0,0,100,0,25,30,0,300,359.0,20.0,19.159722,0.0,44.0,86.0,94.0,3.0,0.2,2.0,14.0,100.0,5.0,36.400002,43.0,55.0,94.0,108.0,0.0,58.0,83.0,118.0,105.0,100.0,12.0,0.0,11.0,20078
3585367,6385,0,69.0,1,159.3,62.3,24.6,2.0,0,General surgery,Colorectal,"Colon submucosal tumor, unknown behavior",Ultralow anterior resection,Videoscopic,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,15.2,239.0,114.0,28.7,144.0,4.0,140.0,3.7,18.0,28.0,19.0,0.84,-1,7.5,Right forearm,100.0,250.0,0,0,2500.0,0,100,0.0,0,100,0,25,30,0,300,359.0,20.0,19.159722,0.0,44.0,86.0,94.0,3.0,0.2,2.0,14.0,100.0,5.0,36.400002,43.0,55.0,94.0,108.0,0.0,58.0,83.0,118.0,106.0,100.0,12.0,0.0,11.0,20080
3585368,6385,0,69.0,1,159.3,62.3,24.6,2.0,0,General surgery,Colorectal,"Colon submucosal tumor, unknown behavior",Ultralow anterior resection,Videoscopic,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,15.2,239.0,114.0,28.7,144.0,4.0,140.0,3.7,18.0,28.0,19.0,0.84,-1,7.5,Right forearm,100.0,250.0,0,0,2500.0,0,100,0.0,0,100,0,25,30,0,300,359.0,20.0,19.159722,0.0,44.0,86.0,94.0,3.0,0.2,2.0,14.0,100.0,5.0,36.400002,43.0,55.0,94.0,108.0,0.0,58.0,83.0,118.0,106.0,100.0,12.0,0.0,11.0,20082


## This section was used to investigate why interpolation didnt work fully in vars such as BT

In [None]:
# Linear interpolation doesnt work when using all variables

In [13]:
# Therfore, select variables containing numbers to try interpolation -it works now
dfx = df2.select_dtypes(include='number')

In [14]:
df_imp2 = imputInterp(rootdir, df=dfx, method="Linear")

In [62]:
# 123 cases in the shirnked dataset were not imputed correctly for the BT variable
df_imp2.caseid[df_imp2['Solar8000/BT'].isna()].nunique()

123

In [29]:
# Case 5084 is responsible for all the NaN in Solar8000/ETCO2 & FEO2, FIO2, INCO2

df_imp2.caseid[df_imp2['Solar8000/ETCO2'].isna()].unique()

array([5084])

In [24]:
df_imp2.caseid[df_imp2['Primus/PEEP_MBAR'].isna()].unique()

array([1753])

In [25]:
df_imp2.caseid[df_imp2['Primus/SET_INTER_PEEP'].isna()].unique()

array([1562, 1753, 2630, 3620, 5084, 6156])

In [26]:
df_imp2.caseid[df_imp2['Solar8000/RR_CO2'].isna()].unique()

array([1753, 5084, 6156])

In [27]:
df_imp2.caseid[df_imp2['Solar8000/BT'].isna()].unique()

array([   1,   18,   33,  174,  329,  410,  420,  501,  612,  735,  804,
        888,  893,  937,  938,  958,  965, 1088, 1156, 1223, 1359, 1434,
       1438, 1459, 1517, 1562, 1634, 1666, 1721, 1740, 1753, 1796, 1826,
       1829, 1857, 1860, 1881, 1905, 1911, 1962, 2001, 2032, 2082, 2180,
       2201, 2287, 2306, 2341, 2345, 2347, 2408, 2565, 2624, 2630, 2712,
       2829, 3039, 3045, 3064, 3086, 3110, 3176, 3362, 3374, 3398, 3440,
       3518, 3535, 3620, 3664, 3788, 3801, 3918, 3938, 4007, 4010, 4025,
       4094, 4103, 4130, 4186, 4269, 4376, 4403, 4423, 4437, 4447, 4523,
       4524, 4569, 4627, 4636, 4686, 4711, 4736, 4775, 4845, 4884, 4956,
       5056, 5062, 5081, 5084, 5103, 5221, 5395, 5429, 5452, 5485, 5568,
       5658, 5711, 5767, 5813, 5888, 5972, 6016, 6021, 6059, 6148, 6156,
       6288, 6316])

In [47]:
case5084 = df_imp2[df_imp2['caseid'].eq(5084)]
case5084['Solar8000/ETCO2'].isna().sum()

439

In [58]:
case1 = df_imp2[df_imp2['caseid'].eq(1)]
case1['Solar8000/BT'].notnull().sum()

0

In [59]:
case1 = merged_df[merged_df['caseid'].eq(1)]
case1['Solar8000/BT'].isna().sum()

5782

In [61]:
case1.head(300)

Unnamed: 0,caseid,subjectid,casestart,caseend,anestart,aneend,opstart,opend,adm,dis,icu_days,death_inhosp,age,sex,height,weight,bmi,asa,emop,department,optype,dx,opname,approach,position,ane_type,preop_htn,preop_dm,preop_ecg,preop_pft,preop_hb,preop_plt,preop_pt,preop_aptt,preop_na,preop_k,preop_gluc,preop_alb,preop_ast,preop_alt,preop_bun,preop_cr,cormack,airway,tubesize,iv1,aline1,intraop_ebl,intraop_uo,intraop_rbc,intraop_ffp,intraop_crystalloid,intraop_colloid,intraop_ppf,intraop_mdz,intraop_ftn,intraop_rocu,intraop_vecu,intraop_eph,intraop_phe,intraop_epi,intraop_ca,anedur,extdur,los,Primus/CO2,Primus/ETCO2,Primus/FEO2,Primus/FIO2,Primus/INCO2,Primus/MAC,Primus/PEEP_MBAR,Primus/RR_CO2,Primus/SET_FIO2,Primus/SET_INTER_PEEP,Solar8000/BT,Solar8000/ETCO2,Solar8000/FEO2,Solar8000/FIO2,Solar8000/HR,Solar8000/INCO2,Solar8000/NIBP_DBP,Solar8000/NIBP_MBP,Solar8000/NIBP_SBP,Solar8000/PLETH_HR,Solar8000/PLETH_SPO2,Solar8000/RR_CO2,Solar8000/VENT_MAWP,Solar8000/VENT_RR
0,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,,,,,,,,,,,,,,,,,,,,,,,,
1,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,,,,,,,,,,,,,,88.0,,,,,88.0,96.0,,0.0,
2,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,,,,,,,,,,20.9,,,,,,,,,,,,,
3,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,0.0,100.0,100.0,0.0,0.0,,,,,,,,,87.0,,,,,88.0,96.0,,0.0,
4,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,,,,,,,,,,21.0,,,,,,,,,,,,,
5,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,,,,,,,,100.0,,,,,,88.0,,,,,89.0,97.0,,0.0,
6,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,,,,,,,,,,21.0,,,,,,,,,,,,,
7,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,,,,,,,,,,,,,,88.0,,,,,89.0,96.0,,0.0,
8,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,,,,,,,,,,21.0,,,,,,,,,,,,,
9,1,5955,0,11520,-540,10860,-540,10380,-236220,627780,0,0,77.0,1,160.2,67.5,26.3,2.0,0,General surgery,Colorectal,Rectal cancer,Low anterior resection,Open,Lithotomy,General,1,0,Normal Sinus Rhythm,Normal,14.1,189.0,94.0,33.2,141.0,3.1,134.0,4.3,18.0,16.0,10.0,0.82,I,Oral,7.5,Right forearm,Left radial,,300.0,0,0,350.0,0,120,0.0,100,70,0,10,0,0,0,190.0,8.0,7.145833,0.0,0.0,100.0,100.0,0.0,0.0,,,,,,,,,88.0,,,,,89.0,96.0,,0.0,


In [49]:
len(df_imp2[df_imp2['caseid'].eq(1)])

347

In [52]:
len(merged_df[merged_df['caseid'].eq(1)])

11542

In [53]:
merged_df.shape 

(10421470, 89)