# Import

In [None]:
import pandas as pd
from tqdm.notebook import tqdm

# Main

In [None]:
surgery_data: pd.DataFrame = pd.DataFrame(columns=['case', 'timestamp', 'type', 'value'])

# Numeric

## Vitals

### Load

In [None]:
file: pd.ExcelFile = pd.ExcelFile('6_1_raw/Studie 2023-01-OP-An„sthesie-Vitalparameter.xlsx')
sheetNames: list[str] = file.sheet_names
sheetNames

In [None]:
sheets: list[pd.DataFrame] = []
for name in tqdm(sheetNames):
    sheets.append(pd.read_excel(file, name))

In [None]:
data: pd.DataFrame = pd.concat(sheets, ignore_index=True)
data

### Process

In [None]:
# rename columns.
data.rename(columns={
    'Fallnummer': 'case',
    'MesswerteDatum': 'timestamp',
    'MesswerteTyp': 'type',
    'Messwert': 'value',
    'MesswertEinheit': 'unit'
}, inplace=True)

# drop unit column.
data.drop(columns=['unit'], inplace=True)

# convert to datetime.
data['timestamp'] = pd.to_datetime(data['timestamp'], format='%Y%m%d%H%M%S')

data

In [None]:
len(data.groupby('type').count())

### Save

In [None]:
groups = data.groupby('type')

# loop through groups.
for name, group in tqdm(groups):

    new_name: str = name
    if name == 'Blutdruck Diastolisch': new_name = 'bp_dia'
    elif name == 'Blutdruck Systolisch': new_name = 'bp_sys'
    elif name == 'PAT_ANAE_SEDLINE': new_name = 'sedline'
    elif name == 'Puls': new_name = 'hr'
    elif name == 'beat_mess_AMV': new_name = 'rmv'
    elif name == 'beat_mess_FiO2': new_name = 'fio2'
    elif name == 'beat_mess_Frequenz_AF': new_name = 'rr'
    elif name == 'beat_mess_IntrPEEP': new_name = 'vent_peep'
    elif name == 'beat_mess_Kapnometrie_etCO2': new_name = 'capno_et_co2'
    elif name == 'beat_mess_Spitzendruck_Ppeak': new_name = 'vent_p_peak'
    # elif name == 'beat_mess_exp_Des': new_name = 'exp_des'
    elif name == 'beat_mess_exp_Lachgas': new_name = 'exp_no'
    elif name == 'beat_mess_exp_Sevo': new_name = 'exp_sevo'
    # elif name == 'beat_mess_pulmon_compl': new_name = 'pulmon_compl'
    elif name == 'vital_AF': new_name = 'rr'
    elif name == 'vital_HF': new_name = 'hr'
    elif name == 'vital_SaO2': new_name = 'sao2'
    elif name == 'vital_T_K': new_name = 'temp'
    elif name == 'vital_T_K2': new_name = 'temp'
    elif name == 'vital_ZVD': new_name = 'cvd'
    else: continue

    group['type'] = new_name
    surgery_data: pd.DataFrame = pd.concat([surgery_data, group], ignore_index=True)

surgery_data

## GCS

### Load

In [None]:
file: pd.ExcelFile = pd.ExcelFile('6_1_raw/Studie-2023-11-OP-An„sthesie-GCS.xlsx')
sheetNames: list[str] = file.sheet_names
sheetNames

In [None]:
sheets: list[pd.DataFrame] = []
for name in tqdm(sheetNames):
    sheets.append(pd.read_excel(file, name))

In [None]:
data: pd.DataFrame = pd.concat(sheets, ignore_index=True)
data

### Process

In [None]:
# rename columns.
data.rename(columns={
    'Fallnummer': 'case',
    'Datum': 'timestamp',
    'Score': 'value',
}, inplace=True)

# add type columns.
data['type'] = 'gcs'

# convert to datetime.
data['timestamp'] = pd.to_datetime(data['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')

data

### Save

In [None]:
# surgery_data: pd.DataFrame = pd.concat([surgery_data, data], ignore_index=True)
# surgery_data

## Arterial Blood Gas

### Load

In [None]:
file: pd.ExcelFile = pd.ExcelFile('6_1_raw/Studie-2023-13-OP-An„sthesie-BGA.xlsx')
sheetNames: list[str] = file.sheet_names
sheetNames

In [None]:
sheets: list[pd.DataFrame] = []
for name in tqdm(sheetNames):
    sheets.append(pd.read_excel(file, name))

In [None]:
data: pd.DataFrame = pd.concat(sheets, ignore_index=True)
data

### Process

In [None]:
# rename columns.
data.rename(columns={
    'FALLNR': 'case',
    'LaborName': 'type',
    'Zeitpunkt': 'timestamp',
    'Wert': 'value',
    'Unit': 'unit'
}, inplace=True)

# drop unit column.
data.drop(columns=['unit'], inplace=True)

# convert to datetime.
data['timestamp'] = pd.to_datetime(data['timestamp'], format='%Y-%m-%d %H:%M:%S.%f')

data

In [None]:
len(data.groupby('type').count())

### Save

In [None]:
groups = data.groupby('type')

# loop through groups.
for name, group in tqdm(groups):

    new_name: str = name
    if name == 'ABE': new_name = 'abg_abe'
    elif name == 'COHb': new_name = 'abg_cohb'
    # elif name == 'Ca(7.4)': new_name = 'abg_ca_7_4'
    elif name == 'Ca++': new_name = 'abg_ca'
    elif name == 'Cl-': new_name = 'abg_cl'
    elif name == 'FIO2': new_name = 'abg_fio2'
    elif name == 'Glu': new_name = 'abg_glu'
    elif name == 'Hct': new_name = 'abg_hct'
    elif name == 'K+': new_name = 'abg_k'
    elif name == 'Lac': new_name = 'abg_lac'
    elif name == 'MetHb': new_name = 'abg_methb'
    elif name == 'Na+': new_name = 'abg_na'
    elif name == 'O2Hb': new_name = 'abg_o2hb'
    elif name == 'SBE': new_name = 'abg_sbe'
    elif name == 'T': new_name = 'abg_t'
    elif name == 'pCO2': new_name = 'abg_pco2'
    # elif name == 'pCO2(T)': new_name = 'abg_pco2_t'
    elif name == 'pH': new_name = 'abg_ph'
    # elif name == 'pH(T)': new_name = 'abg_ph_t'
    elif name == 'pO2': new_name = 'abg_po2'
    # elif name == 'pO2(T)': new_name = 'abg_po2_t'
    elif name == 'sO2': new_name = 'abg_so2'
    elif name == 'tHb': new_name = 'abg_thb'
    else: continue

    group['type'] = new_name
    surgery_data: pd.DataFrame = pd.concat([surgery_data, group], ignore_index=True)

surgery_data

## DDS

### Load

In [None]:
file: pd.ExcelFile = pd.ExcelFile('6_1_raw/Studie-2023-12-OP-An„sthesie-Deliriun Detection Score.xlsx')
sheetNames: list[str] = file.sheet_names
sheetNames

In [None]:
sheets: list[pd.DataFrame] = []
for name in tqdm(sheetNames):
    sheets.append(pd.read_excel(file, name))

In [None]:
data: pd.DataFrame = pd.concat(sheets, ignore_index=True)
data

# Process

In [None]:
surgery_data['value'] = surgery_data['value'].apply(pd.to_numeric, errors='coerce')
surgery_data['value'] = surgery_data['value'].astype(float)

In [None]:
# case_id
surgery_data['case'] = surgery_data['case'].astype(int)

# event_ts
surgery_data['timestamp'] = pd.to_datetime(surgery_data['timestamp'])

#type
surgery_data['type'] = surgery_data['type'].astype(str)

# drop nan
surgery_data.dropna(inplace=True)

# drop duplicates
surgery_data.drop_duplicates(keep = 'first', inplace = True)

# sort by event_ts
surgery_data.sort_values(by=['timestamp'], inplace=True)

surgery_data

In [None]:
# import 
included_cases = pd.read_csv('../4_cases/4_3_clean.csv')

# drop all masterData rows that are not in included_cases and print the number of deleted rows
print('Length of masterData before: ' + str(len(surgery_data)))
print('Number of cases before: ' + str(surgery_data['case'].nunique()))
surgery_data = surgery_data[surgery_data['case'].isin(included_cases['case'])]
print('Length of masterData after: ' + str(len(surgery_data)))
print('Number of cases after: ' + str(surgery_data['case'].nunique()))

surgery_data

# Save

In [None]:
surgery_data.to_csv('6_3_clean.csv', index=False)
surgery_data

In [None]:
types = surgery_data.groupby('type').size()
types.to_csv(f'6_4_types.csv', index=False)
types