# ARIMA

## Data Preparation

In [1]:
import pandas as pd
import pyarrow as pa

# Read chartevents_subset from parquet file to pandas data frame
chartevents_resampled = pd.read_parquet('../data/chartevents_resampled.parquet', engine='pyarrow')

In [2]:
# display(chartevents_resampled)

Unnamed: 0,CHUNK_ID_FILLED_TH,VITAL_PARAMETER_NAME,CHARTTIME,VITAL_PARAMTER_VALUE_MEDIAN_RESAMPLING,VITAL_PARAMTER_VALUE_MEAN_RESAMPLING,VITAL_PARAMTER_VALUE_MAX_RESAMPLING,VITAL_PARAMTER_VALUE_MIN_RESAMPLING,THRESHOLD_VALUE_HIGH,THRESHOLD_VALUE_LOW
0,296490.0_220045.0_2192-09-26 23:51:00,HR,2192-09-26 23:00:00,95.0,95.000000,98.0,92.0,,
1,296490.0_220045.0_2192-09-26 23:51:00,HR,2192-09-27 00:00:00,90.5,90.766667,96.0,87.0,120.0,60.0
2,296490.0_220045.0_2192-09-26 23:51:00,HR,2192-09-27 01:00:00,91.0,90.083333,94.0,85.0,120.0,60.0
3,296490.0_220045.0_2192-09-26 23:51:00,HR,2192-09-27 02:00:00,91.0,90.333333,92.0,83.0,120.0,60.0
4,296490.0_220045.0_2192-09-26 23:51:00,HR,2192-09-27 03:00:00,85.0,86.016667,94.0,80.0,120.0,60.0
...,...,...,...,...,...,...,...,...,...
627,226799.0_220045.0_2153-07-31 08:10:00,HR,2153-08-26 11:00:00,125.0,117.700000,132.0,98.0,120.0,60.0
628,226799.0_220045.0_2153-07-31 08:10:00,HR,2153-08-26 12:00:00,102.0,103.483333,127.0,78.0,120.0,60.0
629,226799.0_220045.0_2153-07-31 08:10:00,HR,2153-08-26 13:00:00,86.0,84.750000,93.0,68.0,120.0,60.0
630,226799.0_220045.0_2153-07-31 08:10:00,HR,2153-08-26 14:00:00,73.0,72.233333,97.0,49.0,120.0,60.0


In [3]:
PARAMETER = 'HR'
CHUNKS = ['296490.0_220045.0_2192-09-26 23:51:00']

# Sampling rate of 1 data point per hour - Test for different values in the future - e.g. longer training set
TRAIN = 12 # 12 * 1 h = 12 hour training period
TEST = 2 # 2 * 1 h = 2 hours testing period
STEP = 1 # move 1 * 1 h = 1 hour per step

In [4]:
# Subset data based on PARAMETER & CHUNKS
arima_data = chartevents_resampled[
    (chartevents_resampled['VITAL_PARAMETER_NAME'] == PARAMETER) & 
    (chartevents_resampled.CHUNK_ID_FILLED_TH.isin(CHUNKS))
    ][['CHUNK_ID_FILLED_TH','CHARTTIME','VITAL_PARAMETER_NAME','VITAL_PARAMTER_VALUE_MEDIAN_RESAMPLING','THRESHOLD_VALUE_HIGH','THRESHOLD_VALUE_LOW']].rename(columns={"VITAL_PARAMTER_VALUE_MEDIAN_RESAMPLING":"VALUENUM_CLEAN"})

# display(arima_data)

In [5]:
# Filter for chunks that have sufficient values to be used for training and testing the model
all_chunks_value_count = arima_data.CHUNK_ID_FILLED_TH.value_counts()
chunkid_filter = all_chunks_value_count[all_chunks_value_count >= (TRAIN + TEST)].index
arima_data = arima_data[arima_data.CHUNK_ID_FILLED_TH.isin(chunkid_filter)]

# display(arima_data)

In [6]:
# Create new HOURS_SINCE_FIRST_RECORD column containing the time difference that has passed since the first timestamp of the measurement series.
import numpy as np
# arima_data['MINUTES_SINCE_FIRST_RECORD'] = arima_data.groupby('CHUNK_ID_FILLED_TH')#['CHARTTIME'].transform(lambda x: (x - x.min())/np.timedelta64(1,'m'))
# Alternative for hours instead of minutes
arima_data['HOURS_SINCE_FIRST_RECORD'] = arima_data.groupby('CHUNK_ID_FILLED_TH')['CHARTTIME'].transform(lambda x: (x - x.min())/np.timedelta64(1,'h'))

# display(arima_data)

In [20]:
# reduce dataset to small amount in order to first test script
# Now we have 15 measurements for that chunk; With a TRAIN of 12, a TEST of 2 and a STEP of 1 we expect to receive two training sets and two test sets - looking at row ids they would look like the following:
# first train = 0:11 ; first test= 12:13
# second train = 1:12 ; second test= 13:14
arima_data = arima_data[:15]

# display(arima_data)

Unnamed: 0,CHUNK_ID_FILLED_TH,CHARTTIME,VITAL_PARAMETER_NAME,VALUENUM_CLEAN,THRESHOLD_VALUE_HIGH,THRESHOLD_VALUE_LOW,HOURS_SINCE_FIRST_RECORD
0,296490.0_220045.0_2192-09-26 23:51:00,2192-09-26 23:00:00,HR,95.0,,,0.0
1,296490.0_220045.0_2192-09-26 23:51:00,2192-09-27 00:00:00,HR,90.5,120.0,60.0,1.0
2,296490.0_220045.0_2192-09-26 23:51:00,2192-09-27 01:00:00,HR,91.0,120.0,60.0,2.0
3,296490.0_220045.0_2192-09-26 23:51:00,2192-09-27 02:00:00,HR,91.0,120.0,60.0,3.0
4,296490.0_220045.0_2192-09-26 23:51:00,2192-09-27 03:00:00,HR,85.0,120.0,60.0,4.0
5,296490.0_220045.0_2192-09-26 23:51:00,2192-09-27 04:00:00,HR,89.0,120.0,60.0,5.0
6,296490.0_220045.0_2192-09-26 23:51:00,2192-09-27 05:00:00,HR,82.0,120.0,60.0,6.0
7,296490.0_220045.0_2192-09-26 23:51:00,2192-09-27 06:00:00,HR,80.0,120.0,60.0,7.0
8,296490.0_220045.0_2192-09-26 23:51:00,2192-09-27 07:00:00,HR,77.0,120.0,60.0,8.0
9,296490.0_220045.0_2192-09-26 23:51:00,2192-09-27 08:00:00,HR,78.5,120.0,60.0,9.0


### First Adaption
Create dict that holds vital parameter series, threshold high and threshold low series for each chunk id (key). The series are all indexed the same way (= dif to first measurement in hours with current sampling rate) so they relate to the same time


In [8]:
# With Adaption
# Create dictionary with chunk id as key and a dataframe as value.
# This dataframe contains of three columns the vital parameter values, the high thresholds and the low thresholds.
# As the index of these three list is the same and can be referenced back to the "HOURS_SINCE_FIRST_RECORD", we keep the time related information.
# Example:
# dict_of_chunk_series_with_test_and_train = {
#     "<chunkid_A>" : | vital_parameter_series | threshold_high_series | threshold_low_series
#                   0 |                   95.0 |                   120 |                   60
#                   1 |                   90.5 |                   120 |                   60
#                   2 |                   91.0 |                   120 |                   60
#  }

dict_of_chunk_series = {}

for chunkid in chunkid_filter:
    
    chunk_data = arima_data[arima_data.CHUNK_ID_FILLED_TH == chunkid].copy()

    # vital parameter series
    chunk_value_series = pd.Series(chunk_data['VALUENUM_CLEAN'],name="vital_parameter_series")
    chunk_value_series = chunk_value_series.reset_index(drop=True)
    chunk_value_series.index = list(chunk_value_series.index)

    # threshold series high
    chunk_threshold_high_series = pd.Series(chunk_data['THRESHOLD_VALUE_HIGH'],name="threshold_high_series")
    chunk_threshold_high_series = chunk_threshold_high_series.reset_index(drop=True)
    chunk_threshold_high_series.index = list(chunk_threshold_high_series.index)

    # threshold series low
    chunk_threshold_low_series = pd.Series(chunk_data['THRESHOLD_VALUE_LOW'],name="threshold_low_series")
    chunk_threshold_low_series = chunk_threshold_low_series.reset_index(drop=True)
    chunk_threshold_low_series.index = list(chunk_threshold_low_series.index)

    # Append series with key (CHUNK_ID) into dictionary
    vital_parameter_and_thresholds_for_chunkid = pd.concat([chunk_value_series,chunk_threshold_high_series,chunk_threshold_low_series],axis=1)
    dict_of_chunk_series[chunkid] = vital_parameter_and_thresholds_for_chunkid


In [9]:
# Example of accessing dataframe in value of specific chunk
dict_of_chunk_series['296490.0_220045.0_2192-09-26 23:51:00']

Unnamed: 0,vital_parameter_series,threshold_high_series,threshold_low_series
0,95.0,,
1,90.5,120.0,60.0
2,91.0,120.0,60.0
3,91.0,120.0,60.0
4,85.0,120.0,60.0
5,89.0,120.0,60.0
6,82.0,120.0,60.0
7,80.0,120.0,60.0
8,77.0,120.0,60.0
9,78.5,120.0,60.0


### Second Adaption

In [10]:
# Create multiple test & train sets for each chunk to iteratively predict the next x measurements
# Create nested dictionary that holds the CHUNK_ID as first key.
# This key holds one dictionary for each iteration over this chunk. This depends on the TEST, TRAIN, and STEP.
# For each iteration we create another dictionary, whereby the last index of the train list acts as key.
# This key holds again one dictionary for the train list and one for the test list.
# Example:
# dict_of_chunk_series_with_test_and_train = {
#     "<chunkid_A>" : {
#         "<last_index_of_training_list_of_first_chunkid_A_iteration>" : {
#             "TRAIN_LIST" : train_list,
#             "TEST_LIST" : test_list,
#             "THRESHOLD_HIGH_FOR_TEST_LIST" : threshold_high_for_test_list ,
#             "THRESHOLD_LOW_FOR_TEST_LIST" : threshold_low_for_test_list
#         },
#         "<last_index_of_training_list_of_second_chunkid_A_iteration>" : {
#             "TRAIN_LIST" : train_list,
#             "TEST_LIST" : test_list,
#             "THRESHOLD_HIGH_FOR_TEST_LIST" : threshold_high_for_test_list ,
#             "THRESHOLD_LOW_FOR_TEST_LIST" : threshold_low_for_test_list
#         },
#     }
# }

dict_of_chunk_series_with_test_and_train = {}

for i, chunk in enumerate(dict_of_chunk_series):
    # acces dataframe of current chunk
    chunk_series_for_chunk = dict_of_chunk_series[chunk]
    # access vital_parameter_series of current chunk
    chunk_value_series_for_chunk = chunk_series_for_chunk["vital_parameter_series"]

    # access threshold_high_series of current chunk
    chunk_threshold_high_series_for_chunk = chunk_series_for_chunk["threshold_high_series"]

    # access threshold_low_series of current chunk
    chunk_threshold_low_series_for_chunk = chunk_series_for_chunk["threshold_low_series"]

    # create an empty dictionary for the key of the current chunk
    dict_of_chunk_series_with_test_and_train[chunk] = {}

    # create multiple test and train lists for that chunk
    for start in range(0, len(chunk_value_series_for_chunk) - (TRAIN + TEST)+1, STEP):
        
        train_list = pd.Series(chunk_value_series_for_chunk[start : start+TRAIN],name="train_list")
        test_list = pd.Series(chunk_value_series_for_chunk[start+TRAIN : start+TRAIN+TEST],name="test_list")
        threshold_high_for_test_list = pd.Series(chunk_threshold_high_series_for_chunk[start+TRAIN : start+TRAIN+TEST],name="threshold_high_for_test_list")
        threshold_low_for_test_list = pd.Series(chunk_threshold_low_series_for_chunk[start+TRAIN : start+TRAIN+TEST],name="threshold_low_for_test_list")
        #For each iteration over the current chunk, we will create a dictionary that holds again the test and train list as dictionary
        #We use the last index of the current train list (which currently refers to the difference to first measurement) as second key
        second_key = train_list.index.max() 
        dict_of_chunk_series_with_test_and_train[chunk][second_key] = {}
        #Assign the train and test list to the current chunk iteration      
        dict_of_chunk_series_with_test_and_train[chunk][second_key]["TRAIN_LIST"] = train_list
        dict_of_chunk_series_with_test_and_train[chunk][second_key]["TEST_LIST"] = test_list
        dict_of_chunk_series_with_test_and_train[chunk][second_key]["THRESHOLD_HIGH_FOR_TEST_LIST"] = threshold_high_for_test_list
        dict_of_chunk_series_with_test_and_train[chunk][second_key]["THRESHOLD_LOW_FOR_TEST_LIST"] = threshold_low_for_test_list
    


In [11]:
# Example of accessing first train list of a specific chunkid
dict_of_chunk_series_with_test_and_train[chunkid][TRAIN-1]

{'TRAIN_LIST': 0     95.0
 1     90.5
 2     91.0
 3     91.0
 4     85.0
 5     89.0
 6     82.0
 7     80.0
 8     77.0
 9     78.5
 10    78.0
 11    84.0
 Name: train_list, dtype: float64,
 'TEST_LIST': 12    75.0
 13    75.5
 Name: test_list, dtype: float64,
 'THRESHOLD_HIGH_FOR_TEST_LIST': 12    120.0
 13    120.0
 Name: threshold_high_for_test_list, dtype: float64,
 'THRESHOLD_LOW_FOR_TEST_LIST': 12    60.0
 13    60.0
 Name: threshold_low_for_test_list, dtype: float64}

In [12]:
# Example of accessing  lists of a specific chunk iteration
dict_of_chunk_series_with_test_and_train[chunkid][TRAIN]

{'TRAIN_LIST': 1     90.5
 2     91.0
 3     91.0
 4     85.0
 5     89.0
 6     82.0
 7     80.0
 8     77.0
 9     78.5
 10    78.0
 11    84.0
 12    75.0
 Name: train_list, dtype: float64,
 'TEST_LIST': 13    75.5
 14    74.0
 Name: test_list, dtype: float64,
 'THRESHOLD_HIGH_FOR_TEST_LIST': 13    120.0
 14    120.0
 Name: threshold_high_for_test_list, dtype: float64,
 'THRESHOLD_LOW_FOR_TEST_LIST': 13    60.0
 14    60.0
 Name: threshold_low_for_test_list, dtype: float64}

In [13]:
# Example of accessing second test list of a specific chunkid
dict_of_chunk_series_with_test_and_train[chunkid][TRAIN]["TEST_LIST"]

13    75.5
14    74.0
Name: test_list, dtype: float64

In [14]:
import pmdarima as pm

# Expand the previously created dictionary (dict_of_chunk_series_with_test_and_train) to also hold the prediction series next to the train and the test series (and threshold values for test)
dict_of_chunk_series_with_test_and_train_and_forecast = dict_of_chunk_series_with_test_and_train.copy()

for i,chunk in enumerate(dict_of_chunk_series_with_test_and_train_and_forecast):
    for i,chunk_iteration in enumerate(dict_of_chunk_series_with_test_and_train_and_forecast[chunk]):
        #train the arima model on the train list of the current chunk iteration
        current_train_list = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TRAIN_LIST"]
        arima = pm.auto_arima(current_train_list)

        #make sure that test_list and prediction_list share the same indizes to make alarm prediction easier later on - working for current step size and sampling rate
        #if index is alway the auto index and does not relate to "Hours:Since_First_Measurement" directly then "1" as step size here is working
        forecast = pd.Series(arima.predict(TEST),index= [*range(i+TRAIN,i+TRAIN+TEST,1)],name="forecast_list")
        
        #add prediction to dictionary
        dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["FORECAST_LIST"] = forecast


In [15]:
# Example of accessing the dictionary (containing train list, test list, prediction list) of a specific chunk iteration
dict_of_chunk_series_with_test_and_train_and_forecast[chunk][11]

{'TRAIN_LIST': 0     95.0
 1     90.5
 2     91.0
 3     91.0
 4     85.0
 5     89.0
 6     82.0
 7     80.0
 8     77.0
 9     78.5
 10    78.0
 11    84.0
 Name: train_list, dtype: float64,
 'TEST_LIST': 12    75.0
 13    75.5
 Name: test_list, dtype: float64,
 'THRESHOLD_HIGH_FOR_TEST_LIST': 12    120.0
 13    120.0
 Name: threshold_high_for_test_list, dtype: float64,
 'THRESHOLD_LOW_FOR_TEST_LIST': 12    60.0
 13    60.0
 Name: threshold_low_for_test_list, dtype: float64,
 'FORECAST_LIST': 12    84.587870
 13    85.051009
 Name: forecast_list, dtype: float64}

In [16]:
# Write to data frame - check if that makes alarm prediction easier
pd.concat([chunk_value_series,chunk_threshold_high_series,chunk_threshold_low_series],axis=1)
dict_of_chunk_series_with_forecast = {}

for i,chunk in enumerate(dict_of_chunk_series_with_test_and_train_and_forecast):
    dict_of_chunk_series_with_forecast[chunk] = {}

    for i,chunk_iteration in enumerate(dict_of_chunk_series_with_test_and_train_and_forecast[chunk]):
        
        train_list = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TRAIN_LIST"]
        test_list = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["TEST_LIST"]
        threshold_high_for_test_list = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["THRESHOLD_HIGH_FOR_TEST_LIST"]
        threshold_low_for_test_list = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["THRESHOLD_LOW_FOR_TEST_LIST"]
        forecast_list = dict_of_chunk_series_with_test_and_train_and_forecast[chunk][chunk_iteration]["FORECAST_LIST"]
        all_dict_lists_as_df = pd.concat([train_list,test_list,threshold_high_for_test_list,threshold_low_for_test_list,forecast_list],axis=1)
        dict_of_chunk_series_with_forecast[chunkid][chunk_iteration] = all_dict_lists_as_df
    


In [17]:
# Example of accessing the dictionary (containing train list, test list, prediction list in a !dataframe!) of a specific chunk iteration
dict_of_chunk_series_with_forecast[chunk][11]

Unnamed: 0,train_list,test_list,threshold_high_for_test_list,threshold_low_for_test_list,forecast_list
0,95.0,,,,
1,90.5,,,,
2,91.0,,,,
3,91.0,,,,
4,85.0,,,,
5,89.0,,,,
6,82.0,,,,
7,80.0,,,,
8,77.0,,,,
9,78.5,,,,


Arima from previous version:
auto_arima_model = auto_arima(data, start_p=1, start_q=1,
                            max_p=3, max_q=3, m=1,
                            start_P=0, seasonal=False,
                            d=1, D=1, trace=True,
                            error_action='ignore',  
                            suppress_warnings=True, 
                            stepwise=True)

In [18]:
# Add information whether alarm was triggered
dict_of_chunk_series_with_forecast_and_alarms = dict_of_chunk_series_with_forecast.copy() 
for i,chunk in enumerate(dict_of_chunk_series_with_forecast_and_alarms):
    #dict_of_chunk_series_with_forecast_and_alarms[chunk] = {}

    for i,chunk_iteration in enumerate(dict_of_chunk_series_with_forecast_and_alarms[chunk]):
        
        df_for_chunk_iteration = dict_of_chunk_series_with_forecast_and_alarms[chunk][chunk_iteration]
        df_for_chunk_iteration['high_alarm_triggered'] = np.where(df_for_chunk_iteration['test_list'] > df_for_chunk_iteration['threshold_high_for_test_list'] ,1,0)
        df_for_chunk_iteration['high_alarm_triggered_forecast'] = np.where(df_for_chunk_iteration['forecast_list'] > df_for_chunk_iteration['threshold_high_for_test_list'] ,1,0)
        df_for_chunk_iteration['low_alarm_triggered'] = np.where(df_for_chunk_iteration['test_list'] < df_for_chunk_iteration['threshold_low_for_test_list'] ,1,0)
        df_for_chunk_iteration['low_alarm_triggered_forecast'] = np.where(df_for_chunk_iteration['forecast_list'] < df_for_chunk_iteration['threshold_low_for_test_list'] ,1,0)
        dict_of_chunk_series_with_forecast_and_alarms[chunk][chunk_iteration] = df_for_chunk_iteration
        


In [19]:
# Example of accessing result for a specific chunk iteration (the one where the training set ends at index 12)
dict_of_chunk_series_with_forecast_and_alarms[chunk][12]

Unnamed: 0,train_list,test_list,threshold_high_for_test_list,threshold_low_for_test_list,forecast_list,high_alarm_triggered,high_alarm_triggered_forecast,low_alarm_triggered,low_alarm_triggered_forecast
1,90.5,,,,,0,0,0,0
2,91.0,,,,,0,0,0,0
3,91.0,,,,,0,0,0,0
4,85.0,,,,,0,0,0,0
5,89.0,,,,,0,0,0,0
6,82.0,,,,,0,0,0,0
7,80.0,,,,,0,0,0,0
8,77.0,,,,,0,0,0,0
9,78.5,,,,,0,0,0,0
10,78.0,,,,,0,0,0,0


In [21]:

accuracy_dict_for_chunk_iterations = {}

for i,chunk in enumerate(dict_of_chunk_series_with_forecast_and_alarms):
    accuracy_dict_for_chunk_iterations[chunk] = {}
       
    for i,chunk_iteration in enumerate(dict_of_chunk_series_with_forecast_and_alarms[chunk]):
        
        tp, tn, fp, fn = 0, 0, 0, 0
        accurracy_matrix_df_for_chunk_iteration = pd.DataFrame(columns=["TP","FN","FP","TN"])
        
        # select column of dataframe but only where relevant (starting after train size)
        df_for_chunk_iteration = dict_of_chunk_series_with_forecast_and_alarms[chunk][chunk_iteration]

        ##################
        # High alarms
        ##################
        
        # select true high alarms triggered
        column_index_of_high_alarm_triggered = df_for_chunk_iteration.columns.get_loc("high_alarm_triggered")

        # select predicted high alarms
        column_index_of_high_alarm_triggered_forecast = df_for_chunk_iteration.columns.get_loc("high_alarm_triggered_forecast")
        
        # create df with bot as column - only needed rows (test only)
        high_alarms = df_for_chunk_iteration.iloc[TRAIN:,[column_index_of_high_alarm_triggered,column_index_of_high_alarm_triggered_forecast]]
        
        for row_in_high_alarms in high_alarms.iterrows():

            if row_in_high_alarms[1][0] and row_in_high_alarms[1][1]:
                tp +=1
                # print("tp", tp)
            if row_in_high_alarms[1][0] and not row_in_high_alarms[1][1]:
                fn +=1
                # print("fn", fn)
            if not row_in_high_alarms[1][0] and row_in_high_alarms[1][1]:
                fp +=1
                # print("fp", fp)
            if not row_in_high_alarms[1][0] and not row_in_high_alarms[1][1]:
                tn +=1
                # print("tn",tn)
        
        a_new_row = {"TP":tp,"FN":fn,"FP":fp,"TN":tn}
        a_new_row_series = pd.Series(a_new_row,name="accuracy_high_alarms")

        accurracy_matrix_df_for_chunk_iteration = accurracy_matrix_df_for_chunk_iteration.append(a_new_row_series)

        ##################
        # Low alarms
        ##################

        # select column of dataframe but only where relevant (starting after train size)
        df_for_chunk_iteration = dict_of_chunk_series_with_forecast_and_alarms[chunk][chunk_iteration]
        
        # select true low alarms triggered
        column_index_of_low_alarm_triggered = df_for_chunk_iteration.columns.get_loc("low_alarm_triggered")

        # select predicted low alarms
        column_index_of_low_alarm_triggered_forecast = df_for_chunk_iteration.columns.get_loc("low_alarm_triggered_forecast")
        
        # create df with bot as column - only needed rows (test only)
        low_alarms = df_for_chunk_iteration.iloc[TRAIN:,[column_index_of_low_alarm_triggered,column_index_of_low_alarm_triggered_forecast]]
        
        for row_in_low_alarms in low_alarms.iterrows():

            if row_in_low_alarms[1][0] and row_in_low_alarms[1][1]:
                tp +=1
                # print("tp", tp)
            if row_in_low_alarms[1][0] and not row_in_low_alarms[1][1]:
                fn +=1
                # print("fn", fn)
            if not row_in_low_alarms[1][0] and row_in_low_alarms[1][1]:
                fp +=1
                # print("fp", fp)
            if not row_in_low_alarms[1][0] and not row_in_low_alarms[1][1]:
                tn +=1
                # print("tn",tn)
        
        a_new_row = {"TP":tp,"FN":fn,"FP":fp,"TN":tn}
        a_new_row_series = pd.Series(a_new_row,name="accuracy_low_alarms")
        
        accurracy_matrix_df_for_chunk_iteration = accurracy_matrix_df_for_chunk_iteration.append(a_new_row_series)
        accuracy_dict_for_chunk_iterations[chunk][chunk_iteration] = accurracy_matrix_df_for_chunk_iteration

    


In [22]:
# Example to check for all iterations of one chunk
accuracy_dict_for_chunk_iterations[chunk]

{11:                      TP FN FP TN
 accuracy_high_alarms  0  0  0  2
 accuracy_low_alarms   0  0  0  4,
 12:                      TP FN FP TN
 accuracy_high_alarms  0  0  0  2
 accuracy_low_alarms   0  0  0  4}

## Documentation snippets

so that they are not lost. to be updated

In [None]:
# Needed Adaption for following cell:
# Change list_of_chunk_value_series from List to Dictionary
# The CHUNK_ID is used as key and in this step one key holds three series: the vital parameter series, the low threshold series and the high threshold series. They need the same "sampling rate" - so that the high threshold with index 0 is the high threshold that applies at the time of the vital parameter with index 0 

# Vital parameter Series:
# index                       |     0 |    1  |   2   | ...
# ----------------------------------------------------- ...
# firstChunk - Vital Parameter|  95.0 |  90.5 |  91.0 | ...

# Threshold High Series:
# index                       |     0 |    1  |   2   | ...
# ----------------------------------------------------- ...
# firstChunk - Th. High       |  120.0 |  120.0 |  110.0 | ...

# Threshold Low Series:
# index                       |     0 |    1  |   2   | ...
# ----------------------------------------------------- ...
# firstChunk - Th. Low        |  70.0 |  70.0 |  60.0 | ...

In [None]:
# Needed Adaption for following cell:
# Currently we only have a true values list and a predictions list. But we are not interested in whether the prediction is exactly the true value. We want to see if the prediction value also triggers an alarm if the true value does. Therefore we need the threshold values that apply at the time of the respective prediction/true value.
# A Final version should hold the following informations that can be traced back to a specific Chunk ID:
# * List of true values (vital parameters in test list)
# * List of Threshold High (for the time at which the predictions take place)
# * List of Threshold Low (for the time at which the predictions take place)
# * Arima Predictions (the predictions for the true values based on the train values)

# Our thoughts:
# Currently prediction looks as follows (two colums as TRAIN is 2; two rows as two chunk_value_series are created for our chunk (containing 15 values)):

#   | 0                                         | 1
# 0 | first prediction for chunk_value_series 1 | second prediction for chunk_value_series 1
# 1 | first prediction for chunk_value_series 2 | second prediction for chunk_value_series 2

# We wanted to add the last index of the train_list and the CHUNK_ID in a nested way to these predictions so that we can trace them back to the thresholds that apply at the time of the prediction

#   | CHUNK_ID | Time ref. | 0                                 | 1
# 0 |  xxxx    | 11        | 1st pred. for chunk_value_series 1| 2nd pred. for chunk_value_series 1 
# 1 |  xxxx    | 12        | 1st pred. for chunk_value_series 2| 2nd pred. for chunk_value_series 2
