# Analysis of Predictions Produced with Chunks and RNNModel by Darts

This script analyzes all pickle files in `./data/darts/{n_chunks}_chunks/{style}/`, starting with `confusion_matrix`, i.e. all model-level and all chunk-level matrices. At the moment, the paths are adapted for local execution.

## Analysis of Model-level Matrices

### Define Variables to Adjust for Model-level Analyses

In [None]:
# Define number of chunks
n_chunks = 1000

# Define how many chunks were taken for prediction ('all' or '20_percent')
style = 'all'

### Extract All Generated Model-level Matrices

In [None]:
from IPython.display import display

import os
import pandas as pd
import pickle5 as pickle

# Define path to all model-level matrices produced by prediction
path_to_model_matrices = f'../../data/darts/{n_chunks}_chunks/{style}'

# Concat all found matrices into result matrix
result_matrix_models = pd.DataFrame(columns=['ID', 'PARAMETER', 'MODEL', 'ENDOGENOUS', 'EXOGENOUS', 'FIRST_FORECAST',
                                             'ALARM_TYPE', 'FP', 'TP', 'FN', 'TN', 'N_CHUNKS', 'N_ITERATIONS'])

for file in os.listdir(path_to_model_matrices):
    if os.path.isfile(os.path.join(path_to_model_matrices, file)) and \
            file.startswith('confusion_matrix_models') and file.endswith('.pickle'):

        # Read file
        current_matrix_f = open(f'{path_to_model_matrices}/{file}', 'rb')
        current_matrix = pickle.load(current_matrix_f)
        current_matrix_f.close()

        # Append current matrix to result matrix
        result_matrix_models = pd.concat([result_matrix_models, current_matrix])

# Align IDs of both styles
# Note: Needed as long as scripts are not executed again! (current execution use model numbers from 01 to 18)
if style == '20_percent' and n_chunks == 1000:

    # Remove additionally calculated rows
    result_matrix_models = result_matrix_models[~((result_matrix_models['ENDOGENOUS'] == 'MAX') & (result_matrix_models['ALARM_TYPE'] == 'Low') |
                                                  (result_matrix_models['ENDOGENOUS'] == 'MIN') & (result_matrix_models['ALARM_TYPE'] == 'High'))]

    # Update rows IDs of scaled results
    # RNN (min)
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('03', '02')
    # LSTM (median, max, min)
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('04', '03')
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('05', '04')
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('06', '04')
    # GRU (median, max, min)
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('07', '05')
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('08', '06')
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('09', '06')

    # Update rows IDs of non-scaled results
    # RNN (median, max, min)
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('10', '07')
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('11', '08')
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('12', '08')
    # LSTM (median, max, min)
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('13', '09')
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('14', '10')
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('15', '10')
    # GRU (median, max, min)
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('16', '11')
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('17', '12')
    result_matrix_models['ID'] = result_matrix_models['ID'].str.replace('18', '12')

# Sort result matrix for better readability
result_matrix_models.sort_values(by=['ID'], inplace=True)

# Reset index
result_matrix_models.reset_index(inplace=True, drop=True)

# Show result matrix per parameter
display(result_matrix_models[result_matrix_models['PARAMETER'] == 'HR'])
display(result_matrix_models[result_matrix_models['PARAMETER'] == 'BP'])
display(result_matrix_models[result_matrix_models['PARAMETER'] == 'O2'])

### Add Accuracy Metrics and Save as Parquet Files (Normal + Scaled)

In [None]:
# Calculate metrics (see https://en.wikipedia.org/wiki/Sensitivity_and_specificity for more information)
result_matrix_models['FPR'] = result_matrix_models['FP'] / (result_matrix_models['FP'] + result_matrix_models['TN'])
result_matrix_models['TPR'] = result_matrix_models['TP'] / (result_matrix_models['TP'] + result_matrix_models['FN'])
result_matrix_models['FNR'] = result_matrix_models['FN'] / (result_matrix_models['TP'] + result_matrix_models['FN'])
result_matrix_models['TNR'] = result_matrix_models['TN'] / (result_matrix_models['FP'] + result_matrix_models['TN'])

result_matrix_models['ACC'] = (result_matrix_models['TP'] + result_matrix_models['TN']) / \
                              (result_matrix_models['TP'] + result_matrix_models['FN'] + result_matrix_models['FP'] + result_matrix_models['TN'])
result_matrix_models['F1S'] = result_matrix_models['TP'] / \
                              (result_matrix_models['TP'] + 0.5 * (result_matrix_models['FP'] + result_matrix_models['FN']))

# Round all floats to 4 decimal places
# Note: round() does not work for floats with many decimal places
decimals = 4
for col in ['FPR', 'TPR', 'FNR', 'TNR', 'ACC', 'F1S']:
    result_matrix_models[col] = result_matrix_models[col].apply(lambda x: round(x, decimals))

# Move cols to end for similarity with ARIMA results
result_matrix_models = result_matrix_models[['ID', 'PARAMETER', 'MODEL', 'ENDOGENOUS', 'EXOGENOUS', 'FIRST_FORECAST',
                                             'ALARM_TYPE', 'FP', 'TP', 'FN', 'TN', 'FPR', 'TPR', 'FNR', 'TNR', 'ACC',
                                             'F1S', 'N_HIGH_ALARMS', 'N_LOW_ALARMS', 'N_CHUNKS', 'N_ITERATIONS']]

# Generate list with model numbers from 07 to 12
normal_model_numbers = ['0' + str(nr) if nr < 10 else str(nr) for nr in list(range(13))][7:]
# Generate list with model numbers from 01 to 06
scaled_model_numbers = ['0' + str(nr) for nr in list(range(7))][1:]

# Extract normal and scaled rows (see model number in comment below)
normal_rows, scaled_rows = list(), list()
for i, row in result_matrix_models.iterrows():
    if row['ID'].split('_')[2] in normal_model_numbers:
        normal_rows.append(row.values)
    elif row['ID'].split('_')[2] in scaled_model_numbers:
        scaled_rows.append(row.values)

# Add extracted rows to final matrices
result_matrix_models_normal = (pd.DataFrame(normal_rows, columns=result_matrix_models.columns)).reset_index(drop=True)
result_matrix_models_scaled = (pd.DataFrame(scaled_rows, columns=result_matrix_models.columns)).reset_index(drop=True)

# Show complemented result matrices per parameter
display(result_matrix_models_normal[result_matrix_models_normal['PARAMETER'] == 'HR'])
display(result_matrix_models_scaled[result_matrix_models_scaled['PARAMETER'] == 'HR'])

display(result_matrix_models_normal[result_matrix_models_normal['PARAMETER'] == 'BP'])
display(result_matrix_models_scaled[result_matrix_models_scaled['PARAMETER'] == 'BP'])

display(result_matrix_models_normal[result_matrix_models_normal['PARAMETER'] == 'O2'])
display(result_matrix_models_scaled[result_matrix_models_scaled['PARAMETER'] == 'O2'])

# Save result matrices as parquet
result_matrix_models_normal.to_parquet(f'../../data/darts/{n_chunks}_chunks/{style}/result_matrix_models_normal.parquet',
                                       engine='pyarrow')
result_matrix_models_scaled.to_parquet(f'../../data/darts/{n_chunks}_chunks/{style}/result_matrix_models_scaled.parquet',
                                       engine='pyarrow')

### Defining Variables for All Model-level Plots

In [None]:
# Avoid error because of non-found values
available_parameters = pd.unique(result_matrix_models.PARAMETER)

# Only plot columns for available parameters
n_cols = len(available_parameters)

### Plot Accuracy, TPR, FNR, and TNR

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# "Group" result matrix by prefix of ID
plotdata = result_matrix_models.replace(['_H', '_L'], ['', ''], regex=True)

# Create subplots
sns.set_style('whitegrid')
fig, axs = plt.subplots(
    nrows=4,
    ncols=n_cols,
    figsize=(15, 13),
    dpi=72
    )

plt.suptitle(f'Accuracy, TPR, FNR, and TNR of {n_chunks} Chunks ({style.replace("_", " ").upper()})', fontsize=22)

# Define y-limits
acc_ylimits = [0, max(result_matrix_models.ACC)]
tpr_ylimits = [0, max(result_matrix_models.TPR)]
fnr_ylimits = [0, max(result_matrix_models.FNR)]
tnr_ylimits = [0, max(result_matrix_models.TNR)]

# Actual plots
for i, parameter in enumerate(available_parameters):

    if n_cols == 1:
        axes_acc = axs[0]
        axes_tpr = axs[1]
        axes_fnr = axs[2]
        axes_tnr = axs[3]
    else:
        axes_acc = axs[0, i]
        axes_tpr = axs[1, i]
        axes_fnr = axs[2, i]
        axes_tnr = axs[3, i]

    sns.barplot(
        ax=axes_acc,
        data=plotdata[plotdata.PARAMETER == parameter],
        x='ID',
        y='ACC',
        hue='ALARM_TYPE',
        palette=sns.color_palette('colorblind'),
        ci=None)
    axes_acc.set_title(str(parameter), fontweight='bold', color= 'black', fontsize=20)
    axes_acc.set_ylim(acc_ylimits)
    axes_acc.set_xticklabels(axes_acc.get_xticklabels(), rotation=90)

    sns.barplot(
        ax=axes_tpr,
        data=plotdata[plotdata.PARAMETER == parameter],
        x='ID',
        y='TPR',
        hue='ALARM_TYPE',
        palette=sns.color_palette('colorblind'),
        ci=None)
    axes_tpr.set_ylim(tpr_ylimits)
    axes_tpr.set_xticklabels(axes_tpr.get_xticklabels(), rotation=90)

    sns.barplot(
        ax=axes_fnr,
        data=plotdata[plotdata.PARAMETER == parameter],
        x='ID',
        y='FNR',
        hue='ALARM_TYPE',
        palette=sns.color_palette('colorblind'),
        ci=None)
    axes_fnr.set_ylim(fnr_ylimits)
    axes_fnr.set_xticklabels(axes_fnr.get_xticklabels(), rotation=90)

    sns.barplot(
        ax = axes_tnr,
        data = plotdata[plotdata.PARAMETER == parameter],
        x='ID',
        y='TNR',
        hue='ALARM_TYPE',
        palette=sns.color_palette('colorblind'),
        ci=None)
    axes_tnr.set_ylim(tnr_ylimits)
    axes_tnr.set_xticklabels(axes_tnr.get_xticklabels(), rotation=90)

# Improve layout and save figure
fig.tight_layout()
plt.show(fig)
#fig.savefig(f'../../plots/darts/{n_chunks}_chunks/{style}/tpr_fnr_tnr_acc_model_result_matrix.png', dpi=1200)

### Plot False Positive Ratio and F1 Score

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# "Group" result matrix by prefix of ID
plotdata = result_matrix_models.replace(['_H', '_L'], ['', ''], regex=True)

# Create subplots
sns.set_style('whitegrid')
fig, axs = plt.subplots(
    nrows=2,
    ncols=n_cols,
    figsize=(15, 7),
    dpi=72
    )

plt.suptitle(f'FPR and F1S of {n_chunks} Chunks ({style.replace("_", " ").upper()})', fontsize=22)

# Define y-limits
fpr_ylimits = [0, max(result_matrix_models.FPR)]
f1s_ylimits = [0, max(result_matrix_models.F1S)]

# Actual plot
for i, parameter in enumerate(available_parameters):

    if n_cols == 1:
        axes_fpr = axs[0]
        axes_fs1 = axs[1]
    else:
        axes_fpr = axs[0, i]
        axes_fs1 = axs[1, i]

    sns.barplot(
        ax=axes_fpr,
        data=plotdata[plotdata.PARAMETER == parameter],
        x='ID',
        y='FPR',
        hue='ALARM_TYPE',
        palette=sns.color_palette('colorblind'),
        ci=None)
    axes_fpr.set_title(str(parameter), fontweight='bold', color= 'black', fontsize=14)
    axes_fpr.set_ylim(fpr_ylimits)
    axes_fpr.set_xticklabels(axes_fpr.get_xticklabels(), rotation=90)

    sns.barplot(
        ax=axes_fs1,
        data=plotdata[plotdata.PARAMETER == parameter],
        x='ID',
        y='F1S',
        hue='ALARM_TYPE',
        palette=sns.color_palette('colorblind'),
        ci=None)
    axes_fs1.set_ylim(f1s_ylimits)
    axes_fs1.set_xticklabels(axes_fs1.get_xticklabels(), rotation=90)

# Improve layout and save figure
fig.tight_layout()
plt.show(fig)
#fig.savefig(f'../../plots/darts/{n_chunks}_chunks/{style}/fpr_f1s_model_result_matrix.png', dpi=1200)

In [None]:
"""
BEST MODELS

Scaled:
    RNN,     MEDIAN:  01
    RNN,     COV:     02
    LSTM,    MEDIAN:  03
    LSTM,    COV:     04
    GRU,     MEDIAN:  05
    GRU,     COV:     06

Normal:
    RNN,     MEDIAN:  07 -> best for HR & BP
    RNN,     COV:     08
    LSTM,    MEDIAN:  09
    LSTM,    COV:     10
    GRU,     MEDIAN:  11 -> best for O2
    GRU,     COV:     12
"""

## Analysis of Chunk-level Matrices

The following analysis steps are only performed for one chunk-specific matrix file.

### Define Variables to Adjust for Model-level Analyses

In [None]:
# Adjust variables defining path
n_chunks = 1000
style = 'all'

# Adjust variables defining model
version = 'normal'
model_type = 'LSTM'
parameter = 'o2'
endogenous_input = 'MEDIAN'

# Adjust variable defining selected window of chunks to predict
window_idx = 0

# Adjust variable for correlation plot
input_length = 12

### Print One Chunk-level Matrix

In [None]:
import pickle5 as pickle

# Define path to all chunk-level matrices produced by prediction
path_to_chunk_matrices = f'../../data/darts/{n_chunks}_chunks/{style}'

# Read chunk-specific matrix
chunks_matrix_f = open(f'{path_to_chunk_matrices}/confusion_matrix_chunks_{model_type}_{parameter}_{endogenous_input}_'
                       f'{version}_window{window_idx}.pickle', 'rb')
chunks_matrix = pickle.load(chunks_matrix_f)
chunks_matrix_f.close()

# Show chunk-specific matrix
display(chunks_matrix)

### Add Metrics to Each Chunk

In [None]:
import numpy as np

# Note: To avoid dividing by zero, zeros are converted to NaN before division (since any value divided by NaN gives NaN)

chunks_matrix['fp_tn_divisor'] = chunks_matrix['FP'] + chunks_matrix['TN']
chunks_matrix['TNR'] = chunks_matrix.TN.div(chunks_matrix.fp_tn_divisor.where(chunks_matrix.fp_tn_divisor != 0, np.nan))
chunks_matrix['FPR'] = chunks_matrix.FP.div(chunks_matrix.fp_tn_divisor.where(chunks_matrix.fp_tn_divisor != 0, np.nan)) # 1 - TNR

chunks_matrix['fn_tp_divisor'] = chunks_matrix['FN'] + chunks_matrix['TP']
chunks_matrix['TPR'] = chunks_matrix.TP.div(chunks_matrix.fn_tp_divisor.where(chunks_matrix.fn_tp_divisor != 0, np.nan))
chunks_matrix['FNR'] = chunks_matrix.FN.div(chunks_matrix.fn_tp_divisor.where(chunks_matrix.fn_tp_divisor != 0, np.nan)) # 1 - TPR

chunks_matrix['F1S_divisor'] = chunks_matrix['TP'] + 0.5 * (chunks_matrix['FP'] + chunks_matrix['FN'])
chunks_matrix['F1S'] = chunks_matrix.TP.div(chunks_matrix.F1S_divisor.where(chunks_matrix.F1S_divisor != 0, np.nan))

chunks_matrix['ACC_dividend'] = chunks_matrix['TN'] + chunks_matrix['TP']
chunks_matrix['ACC_divisor'] = chunks_matrix['fp_tn_divisor'] + chunks_matrix['fn_tp_divisor']
chunks_matrix['ACC'] = chunks_matrix.ACC_dividend.div(chunks_matrix.ACC_divisor.where(chunks_matrix.ACC_divisor != 0,
                                                                                      np.nan))

# Round all floats to 4 decimal places
# Note: round() does not work for floats with many decimal places
decimals = 4
for col in ['FPR', 'TPR', 'FNR', 'TNR', 'ACC', 'F1S']:
    chunks_matrix[col] = chunks_matrix[col].apply(lambda x: round(x, decimals))

# Sort and remove helper columns for similarity with model-level matrices
chunks_matrix = chunks_matrix[['CHUNK_ID', 'PARAMETER', 'MODEL', 'ENDOGENOUS', 'EXOGENOUS', 'FIRST_FORECAST',
                               'ALARM_TYPE', 'FP', 'TP', 'FN', 'TN', 'FPR', 'TPR', 'FNR', 'TNR', 'ACC', 'F1S',
                               'N_HIGH_ALARMS', 'N_LOW_ALARMS', 'N_ITERATIONS']]

# Show complemented chunk-level matrix for one chunk
display(chunks_matrix)

### Plot Correlation Between Chunk Length and F1 Score/ Specificity (TNR) of Chunk

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Add column for chunk length to all chunks of matrix
chunks_matrix['LENGTH'] = chunks_matrix['N_ITERATIONS'] + input_length

for metric in ['F1S', 'TNR']:

    # Define background color, subplots and suptitle
    sns.set_style('whitegrid')
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4))
    fig.suptitle(f'Correlation of Chunk Length and {metric} of Chunk ({style.replace("_", " ").upper()})', fontsize=14)

    if endogenous_input == 'MIN':
        ax1.set_visible(False)
        ax2.set_position([1, 0.2, 0.05, 0.2])
    else:
        # Extract chunks for high and low analysis plot
        high_chunks = chunks_matrix[chunks_matrix['ALARM_TYPE'] == 'High'][[metric, 'LENGTH']]

        # Introduce mean value for each length
        # Note: If mean value of metric is used, lines can be drawn again (with default of linestyle parameter)
        #high_chunks = high_chunks.astype(float)
        #high_chunks = high_chunks.groupby('LENGTH').mean()

        # Reset indices to make access via column names possible again
        high_chunks.reset_index(level=0, inplace=True, drop=True)

        # Add left plot (high threshold analysis)
        ax1.plot('LENGTH', metric, data=high_chunks, marker='o', color=sns.color_palette('colorblind')[0],
                 linestyle='None')
        ax1.set_title(f'{metric} Regarding High Thresholds', fontsize=10)
        ax1.set_xlabel('Chunk Length', fontsize=8)
        ax1.set_ylabel(f'{metric} of Chunk', fontsize=8)
        ax1.set_ylim(bottom=0, top=1.1)

    if endogenous_input == 'MAX':
        ax2.set_visible(False)
        ax1.set_position([0, 0.2, 0.05, 0.2])
    else:
        # Extract chunks for high and low analysis plot
        low_chunks = chunks_matrix[chunks_matrix['ALARM_TYPE'] == 'Low'][[metric, 'LENGTH']]

        # Introduce mean value for each length
        # Note: If mean value of metric is used, lines can be drawn again (with default of linestyle parameter)
        #low_chunks = vlow_chunks.astype(float)
        #low_chunks = low_chunks.groupby('LENGTH').mean()

        # Reset indices to make access via column names possible again
        low_chunks.reset_index(level=0, inplace=True, drop=True)

        # Add right plot (low threshold analysis)
        ax2.plot('LENGTH', metric, data=low_chunks, marker='o', color=sns.color_palette('colorblind')[1],
                 linestyle='None')
        ax2.set_title(f'{metric} Regarding Low Thresholds', fontsize=10)
        ax2.set_xlabel('Chunk Length', fontsize=8)
        ax2.set_ylabel(f'{metric} of Chunk', fontsize=8)
        ax2.set_ylim(bottom=0, top=1.1)

    # Improve layout and save figure
    fig.tight_layout()
    fig.show()
    #fig.savefig(f'../../plots/darts/{n_chunks}_chunks/{style}/correlation_chunk_length_and_{metric}_{model_type}_{parameter}_'
    #            f'{endogenous_input}_{version}.png', dpi=1200)

### Time-Series Plot of Chunk with Prediction

Note: `chunks_ids_plotting` have to be adjusted manually.

In [None]:
print(f'Original amount of chunks: {len(chunks_matrix)}\n')

interesting_chunks = chunks_matrix[chunks_matrix.FPR.notnull() & chunks_matrix.F1S.notnull()]
print(f'Amount of interesting chunks: {len(interesting_chunks)}\n')

print(interesting_chunks[['CHUNK_ID', 'FPR', 'TPR', 'FNR', 'TNR', 'ACC', 'F1S', 'N_HIGH_ALARMS', 'N_LOW_ALARMS']])

In [None]:
from darts import TimeSeries
import pandas as pd

chunks_ids_plotting = ['200098.0_220277.0_2136-03-27 12:00:00', '200061.0_220277.0_2134-01-24 14:15:00']

for chunk_id in chunks_ids_plotting:

    # Format chunk IDs into Windows format that have to be used when loading from or saving to Windows machine
    chunk_id_win10 = chunk_id.replace(':', '%3A')

    # Extract predicted series of chunk
    prediction_chunk_f = open(f'../../data/darts/{n_chunks}_chunks/{style}/{model_type}/{parameter}/{endogenous_input}/'
                              f'05_prediction_{chunk_id_win10}_{version}_window{window_idx}.pickle', 'rb')
    prediction_chunk = pickle.load(prediction_chunk_f)
    prediction_chunk_f.close()

    # Convert predicted series of chunk to TimeSeries object
    prediction_chunk = TimeSeries.from_dataframe(
        df=prediction_chunk,
        time_col='Time',
        value_cols=['Value'],
        freq='H')

    # Extract original series of chunk
    resampled_chunks = pd.read_parquet(f'../../data/resampling/resample_output_{parameter}_first{n_chunks}.parquet',
                                       engine='pyarrow')
    original_chunk = resampled_chunks[resampled_chunks['CHUNK_ID_FILLED_TH'] == chunk_id]

    # Convert original series of chunk to TimeSeries object
    original_chunk = TimeSeries.from_dataframe(
        df=original_chunk,
        time_col='CHARTTIME',
        value_cols=[f'VITAL_PARAMTER_VALUE_{endogenous_input}_RESAMPLING'],
        freq='H')

    # Actual plot
    sns.set_style('whitegrid')
    plt.figure(figsize=(8, 5))
    original_chunk.plot(label=f'{parameter.upper()} - actual')
    prediction_chunk.plot(label=f'{parameter.upper()} - predicted')

    # Adjust texts of plot
    plt.legend()
    plt.suptitle(f'Prediction of {parameter.upper()} with {n_chunks} Chunks, {endogenous_input} Input, and {model_type} \n({style.replace("_", " ").upper()})'
                 f' Model', fontweight='bold')
    plt.xlabel('Time')
    plt.ylabel('Value')

    plt.show()
    #plt.savefig(f'../../plots/darts/{n_chunks}_chunks/{style}/prediction_{model_type}_{parameter}_{endogenous_input}_'
    #            f'{chunk_id_win10}_{version}.png', dpi=1200)

## Investigate Runtime

Note: To read an Excel file, you first need to install `openpyxl` with `pip install openpyxl`.

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Define variable identifying path
n_chunks = 1000
style = 'all'

# Convert excel table into pandas data frame
runtimes = pd.read_excel(f'../../data/darts/{n_chunks}_chunks/{style}/runtimes.xlsx', usecols='A:H')

# Add column for model number
def get_model_nr(current_row):
    model_numbers = {
        ('s',   'RNN',      'median'):  '01',
        ('s',   'RNN',      'cov'):     '02',
        ('s',   'LSTM',     'median'):  '03',
        ('s',   'LSTM',     'cov'):     '04',
        ('s',   'GRU',      'median'):  '05',
        ('s',   'GRU',      'cov'):     '06',
        ('n',   'RNN',      'median'):  '07',
        ('n',   'RNN',      'cov'):     '08',
        ('n',   'LSTM',     'median'):  '09',
        ('n',   'LSTM',     'cov'):     '10',
        ('n',   'GRU',      'median'):  '11',
        ('n',   'GRU',      'cov'):     '12'
    }
    return model_numbers[current_row['VERSION'], current_row['MODEL'], current_row['ENDOGENOUS']]

runtimes['MODEL_NR'] = runtimes.apply(lambda row: get_model_nr(row), axis=1)
runtimes.sort_values(by=['MODEL_NR'], inplace=True)

# Convert runtime into seconds
def get_sec(current_row):
    h, m, s = str(current_row['RUNTIME']).split(':')
    return int(h) * 3600 + int(m) * 60 + int(s)

runtimes['RUNTIME_SEC'] = runtimes.apply(lambda row: get_sec(row), axis=1)

# Set parameters
available_parameters = pd.unique(runtimes.PARAMETER)
n_cols = len(available_parameters)

# Create subplots
sns.set_style('whitegrid')
fig, axs = plt.subplots(
    nrows=1,
    ncols=n_cols,
    figsize=(15, 7),
    dpi=72
    )

# Add main title
plt.suptitle(f'Runtime of Predictions with {n_chunks} Chunks ({style.replace("_", " ").upper()})', fontsize=22)

# Add actual plot and adjust texts
for i, parameter in enumerate(available_parameters):

    if n_cols == 1:
        axes_runtime = axs
    else:
        axes_runtime = axs[i]

    sns.barplot(
        ax=axes_runtime,
        data=runtimes[runtimes.PARAMETER == parameter],
        x='MODEL_NR',
        y='RUNTIME_SEC',
        palette=sns.color_palette('colorblind'),
        ci=None)
    axes_runtime.set_title(str(parameter), fontweight='bold', color= 'black', fontsize=14)
    axes_runtime.set_xlabel('MODEL NUMBER')
    axes_runtime.set_ylabel('RUNTIME (sec)')
    axes_runtime.set_xticklabels(axes_runtime.get_xticklabels(), rotation=90)

# Improve layout and save figure
fig.tight_layout()
plt.show(fig)
#fig.savefig(f'../../plots/darts/{n_chunks}_chunks/{style}/runtimes.png', dpi=1200)