In [None]:
import pandas as pd
import pyarrow as pa

# Read chartevents_subset from parquet file to pandas data frame
chartevents_subset = pd.read_parquet('./data/chartevents_clean_values_and_thresholds_with_chunkid_65_resampled.parquet', engine='pyarrow')

In [None]:
PARAMETER = 220045
CHUNKS = ['296490.0_220045.0_2192-09-26 23:51:00','260223.0_220045.0_2156-07-22 06:49:00']

TRAIN = 60 # 60 * 5 min = 5 hours of training
TEST = 12 # 12 * 5 min = 1 hour of testing
STEP = 6 # move 6 * 5 min = 0.5 hours per step

In [None]:
# subset data based on PARAMETER & CHUNKS
arima_data = chartevents_subset.loc[(chartevents_subset["ITEMID"] == PARAMETER) & (chartevents_subset.CHUNK_ID_FILLED_TH.isin(CHUNKS)) ,['CHUNK_ID_FILLED_TH','CHARTTIME','ITEMID','VALUENUM_CLEAN']]

In [None]:
all_chunks = arima_data.CHUNK_ID_FILLED_TH.value_counts()
relevant_chunks = all_chunks[all_chunks >= (TRAIN + TEST)].index
arima_data = arima_data.loc[arima_data.CHUNK_ID_FILLED_TH.isin(relevant_chunks)]

In [None]:
# Create new Column that holds difference to first measurement
import numpy as np
arima_data['HOURS_SINCE_FIRST'] = arima_data.groupby('CHUNK_ID_FILLED_TH')['CHARTTIME'].transform(lambda x: (x - x.min())/np.timedelta64(1,'h'))


In [None]:
# Create one row for each chunk; each column is a 'HOURS_SINCE_FIRST' value 
# index     | 1 | 2 | 3...
# firstChunk|89 | 93| 102...
#secondChunk| 77| 81|90...
measurements = []


for chunk in relevant_chunks:

    chunk_data = arima_data[arima_data.CHUNK_ID_FILLED_TH == chunk].copy()
    chunk_data.set_index('HOURS_SINCE_FIRST', inplace=True)
    chunk_data.sort_index(inplace=True)    
    measurements.append(chunk_data['VALUENUM_CLEAN'])


In [None]:
# Create multiple test & train sets for each chunk
chunk_with_test_train = pd.DataFrame(columns=["SUB_CHUNK_ID", "TRAIN_LIST","TEST_LIST"])

#merged_test_train = dict()

#single_test_train = dict()

for i,measurement in enumerate(measurements):
    #für jeden startpunkt eines neuen train/test-abschnittes diese chunks (von 0 bis (Gesamtlänge dieser Patientenmessreihe - (Train+Test)) gehe STEPS weiter )
    for start in range(0, len(measurement) - (TRAIN + TEST), STEP):
        sub_chunk_id = str(i)+str(start)
        train_list = measurement[start : start+TRAIN]
        test_list = measurement[start+TRAIN : start+TRAIN+TEST]
        a_new_row= {"SUB_CHUNK_ID":sub_chunk_id,"TRAIN_LIST":train_list,"TEST_LIST":test_list}
        a_new_row_series = pd.Series(a_new_row, name=sub_chunk_id)
        chunk_with_test_train = chunk_with_test_train.append(a_new_row_series)

In [None]:
# conduct arima
from progressbar import progressbar
import pmdarima as pm

condition = []
prediction = []
all_sub_chunk_ids = chunk_with_test_train.SUB_CHUNK_ID.value_counts()

for i, sub_chunk_id in enumerate(all_sub_chunk_ids):
    arima = pm.auto_arima(chunk_with_test_train['TRAIN_LIST'][i])
    forecast = arima.predict(TEST)

    condition.append(min(chunk_with_test_train["TEST_LIST"][i]) > 120)
    prediction.append(min(forecast) > 120)

In [None]:
tp, tn, fp, fn = 0, 0, 0, 0

for cond, pred in zip(condition, prediction):
    if cond and pred:
        tp += 1
    if cond and not pred:
        fn += 1
    if not cond and pred:
        fp += 1
    if not cond and not pred:
        tn += 1

In [None]:
print(f"TP = {tp}")
print(f"TN = {tn}")
print(f"FP = {fp}")
print(f"FN = {fn}")
print()
print(f"Sens = {tp/(tp+fn)} (recall)")
print(f"Spec = {tn/(tn+fp)}")
print(f"PPV  = {tp/(tp+fp)} (precision)")