In [1]:
import numpy as np
import pandas as pd
import warnings
import time
from scipy import signal
from scipy.signal import butter, lfilter
import matplotlib.pyplot as plt
import math
import pickle
import statistics
import random
from sklearn.metrics.pairwise import cosine_similarity
from math import log
from sklearn import preprocessing
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
import sktime as sktime
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sktime.datasets import load_from_tsfile_to_dataframe
from sktime.alignment.dtw_python import AlignerDTWfromDist
from sktime.dists_kernels.scipy_dist import ScipyDist
from sktime.dists_kernels.compose_from_align import DistFromAligner
import os
import sys
%matplotlib inline
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import RidgeClassifierCV
from sktime.transformations.panel.rocket import Rocket
from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
from sktime.classification.hybrid import HIVECOTEV2
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.


Dictionary saved to hb_sepsis_hive_cote_normalized.pickle
Dictionary saved to healthy_sepsis_hive_cote_normalized.pickle


### Load normal and abnormal datasets

In [2]:
final_normal_df = pd.read_pickle('normal_public_sepsis.pkl')
final_abnormal_df = pd.read_pickle('abnormal_public_sepsis.pkl')

Unnamed: 0,Subject,HR,MAP,DBP,SBP,Resp
0,1,75.0,63.00,46.0,108.00,13.0
1,1,75.0,63.00,46.0,108.00,13.0
2,1,80.0,75.00,55.0,123.00,19.0
3,1,78.0,58.00,44.0,103.00,22.0
4,1,82.0,76.00,54.0,126.00,24.0
...,...,...,...,...,...,...
46,507,80.0,92.33,75.0,164.50,15.0
47,507,76.0,84.33,68.0,148.50,18.0
48,507,138.0,96.67,83.5,156.50,17.0
49,507,111.0,97.33,81.0,152.00,16.0


Unnamed: 0,Subject,HR,MAP,DBP,SBP,Resp
0,1,103.0,80.0,64.0,112.0,20.0
1,1,111.0,93.5,80.5,131.0,20.0
2,1,106.0,87.0,77.0,114.0,20.0
3,1,107.0,62.0,54.0,85.0,20.0
4,1,106.0,80.0,70.0,109.0,23.0
...,...,...,...,...,...,...
121,175,109.0,100.0,65.0,94.0,31.0
122,175,99.0,77.0,65.0,94.0,16.0
123,175,113.0,105.0,65.0,94.0,21.0
124,175,110.0,105.0,65.0,94.0,18.0


In [3]:
abnormal_indices = (final_abnormal_df['Subject']).unique()
abnormal_no = (final_abnormal_df['Subject']).nunique()

array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,
        40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,
        53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,  65,
        66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,  78,
        79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,  91,
        92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
       105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
       118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
       131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143,
       144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156,
       157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
       170, 171, 172, 173, 174, 175])

175

In [4]:
# Specify the file path of the saved pkl file
file_path = 'final_bins.pkl'

# Load the dictionary from the pkl file
with open(file_path, 'rb') as file:
    final_numbers_dictionary = pickle.load(file)
# Now, 'loaded_dictionary' contains the dictionary loaded from the pkl file
display("Dictionary loaded from pickle file:")

'Dictionary loaded from pickle file:'

{'sample_numbers_list_0.0_to_0.1': [112,
  360,
  143,
  415,
  413,
  23,
  374,
  32,
  34,
  42,
  68,
  90,
  119,
  121,
  137,
  178,
  180,
  196,
  214,
  241,
  242,
  249,
  257,
  265,
  277,
  279,
  315,
  325,
  340,
  357,
  365,
  369,
  406,
  407,
  426,
  429,
  441,
  455,
  458,
  470,
  493],
 'sample_numbers_list_0.1_to_0.2': [14,
  288,
  20,
  33,
  35,
  51,
  52,
  60,
  63,
  67,
  73,
  144,
  167,
  176,
  205,
  228,
  235,
  250,
  296,
  313,
  323,
  337,
  362,
  396,
  409,
  419,
  440,
  464,
  467,
  486,
  489],
 'sample_numbers_list_0.2_to_0.3': [378,
  280,
  50,
  3,
  5,
  41,
  43,
  70,
  117,
  123,
  139,
  164,
  220,
  225,
  244,
  247,
  262,
  289,
  320,
  334,
  344,
  350,
  363,
  381,
  393,
  394,
  417,
  444,
  474,
  480,
  487,
  496],
 'sample_numbers_list_0.3_to_0.4': [72,
  457,
  7,
  27,
  30,
  69,
  79,
  100,
  138,
  150,
  160,
  163,
  204,
  232,
  245,
  300,
  331,
  336,
  341,
  352,
  377,
  402,
  408,
  4

In [5]:
dictionary_keys = final_numbers_dictionary.keys()
keys_list = list(dictionary_keys)

['sample_numbers_list_0.0_to_0.1',
 'sample_numbers_list_0.1_to_0.2',
 'sample_numbers_list_0.2_to_0.3',
 'sample_numbers_list_0.3_to_0.4',
 'sample_numbers_list_0.4_to_0.5',
 'sample_numbers_list_0.5_to_0.6',
 'sample_numbers_list_0.6_to_0.7',
 'sample_numbers_list_0.7_to_0.8',
 'sample_numbers_list_0.8_to_0.9',
 'sample_numbers_list_0.9_to_1.0',
 'sample_numbers_list_1.0_to_1.1',
 'sample_numbers_list_1.1_to_1.2',
 'sample_numbers_list_1.2_to_1.3',
 'sample_numbers_list_1.3_to_1.4',
 'sample_numbers_list_1.4_to_1.5',
 'sample_numbers_list_1.5_to_1.6',
 'sample_numbers_list_1.6_to_1.7',
 'sample_numbers_list_1.7_to_1.8',
 'sample_numbers_list_1.8_to_1.9',
 'sample_numbers_list_1.9_to_2.0',
 'sample_numbers_list_2.0_to_2.1',
 'sample_numbers_list_2.1_to_2.2',
 'sample_numbers_list_2.2_to_2.3',
 'sample_numbers_list_2.3_to_2.4',
 'sample_numbers_list_2.4_to_2.5',
 'sample_numbers_list_2.5_to_2.6',
 'sample_numbers_list_2.6_to_2.7',
 'sample_numbers_list_2.7_to_2.8',
 'sample_numbers_lis

### Function to find the length of the time series

In [6]:
# Define a function to z-normalize a column
def z_normalize(column):
    mean = column.mean()
    std = column.std()
    z_normalized = (column - mean) / std
    return z_normalized

In [7]:
def time_series_length_finding(df):
    train_1_abnormal = df.copy()
    train_1_ab_sub = (train_1_abnormal['Subject'].unique()).tolist()
    row_count = []
    for i in train_1_ab_sub:
        train_1_abnormal_new = train_1_abnormal[(train_1_abnormal['Subject']== i)]
        no_rows = train_1_abnormal_new.shape[0]
        row_count.append(no_rows)
    display("Avg length:",statistics.mean(row_count))
    avg_len_ts = int(statistics.mean(row_count))
    return(avg_len_ts)

In [8]:
def preprocessing_normal_df(df,desired_length):
    train_2_normal = df.copy()
    df_train_3 = pd.DataFrame(columns = train_2_normal.columns)
    num_dim = df_train_3.shape[1]
    display(num_dim)
    df_train_3.columns = range(len(df_train_3.columns))
    train_2_sub = (train_2_normal['Subject'].unique()).tolist()
    m = 0
    for i in train_2_sub:
        train_2_normal_new = train_2_normal[(train_2_normal['Subject']== i)]
        df_row_count = train_2_normal_new.shape[0]
        if df_row_count < desired_length:
            rows_to_add = desired_length - df_row_count
            train_2_normal_new.tail()
            last_row = train_2_normal_new.ffill().iloc[[-1]]  # Extract the last row
            new_rows_df = pd.concat([last_row] * rows_to_add, ignore_index=True)
            train_2_normal_initial = pd.concat([train_2_normal_new, new_rows_df], ignore_index=True)
            train_2_normal_initial_no_sub = train_2_normal_initial.drop(columns='Subject')
            train_2_normal_final = z_normalize(train_2_normal_initial_no_sub)
            train_2_normal_final.insert(0, 'Subject', i)
        else:
            train_2_normal_initial = train_2_normal_new.iloc[:desired_length,:]
            train_2_normal_initial_no_sub = train_2_normal_initial.drop(columns='Subject')
            train_2_normal_final = z_normalize(train_2_normal_initial_no_sub)
            train_2_normal_final.insert(0, 'Subject', i)
            
        train_2_normal_final.columns = range(len(train_2_normal_final.columns))
        
        df_train_3.at[m,0] = i
        j = 0
        while j < num_dim:
            df_train_3.at[m,j] = train_2_normal_final[j]
            j += 1
        m += 1    
    df_train_3['class'] = 'normal'
    df_normal_baseline = df_train_3.iloc[:,1:]
    return(df_normal_baseline)

In [9]:
def preprocessing_dataframe(df,desired_length):
    train_1_abnormal = df.copy()
    df_train_2 = pd.DataFrame(columns = train_1_abnormal.columns)
    num_dim_ab = df_train_2.shape[1]
    df_train_2.columns = range(len(df_train_2.columns))
    train_1_ab_sub = (train_1_abnormal['Subject'].unique()).tolist()
    m = 0
    for i in train_1_ab_sub:
        train_1_abnormal_new = train_1_abnormal[(train_1_abnormal['Subject']== i)]
        df_row_count = train_1_abnormal_new.shape[0]
        if df_row_count < desired_length:
            rows_to_add = desired_length - df_row_count
            last_row = train_1_abnormal_new.ffill().iloc[[-1]]  # Extract the last row
            new_rows_df = pd.concat([last_row] * rows_to_add, ignore_index=True)
            train_1_abnormal_initial = pd.concat([train_1_abnormal_new, new_rows_df], ignore_index=True)
            train_1_abnormal_initial_no_sub = train_1_abnormal_initial.drop(columns='Subject')
            train_1_abnormal_final = z_normalize(train_1_abnormal_initial_no_sub)
            train_1_abnormal_final.insert(0, 'Subject', i)
        else:
            train_1_abnormal_initial = train_1_abnormal_new.iloc[:desired_length,:]
            train_1_abnormal_initial_no_sub = train_1_abnormal_initial.drop(columns='Subject')
            train_1_abnormal_final = z_normalize(train_1_abnormal_initial_no_sub)
            train_1_abnormal_final.insert(0, 'Subject', i)
        train_1_abnormal_final.columns = range(len(train_1_abnormal_final.columns))
        
        df_train_2.at[m,0] = i
        j = 0
        while j < num_dim_ab:
            df_train_2.at[m,j] = train_1_abnormal_final[j]
            j += 1
        m += 1    
    df_train_2['class'] = 'abnormal'
    df_final = df_train_2.iloc[:,1:]
    return(df_final)

In [10]:
def ts_classifier(df_class_1):
    accuacy_list_1 = []
    precision_list_1 = []
    recall_list_1 = []
    f1_list_1 = []

    X_1 = df_class_1.iloc[:,:-1]
    y_1 = df_class_1.iloc[:,-1]
    display("X_1:",X_1)

    for i in range(1,31):
        X_train_1, X_test_1, y_train_1, y_test_1 = train_test_split(X_1, y_1, test_size=0.2, random_state=i,stratify = y_1)

        hc2 = HIVECOTEV2(time_limit_in_minutes=0.2)
        hc2.fit(X_train_1, y_train_1)
        y_pred_1 = hc2.predict(X_test_1)

        pos_label = 'abnormal'

        acc_1 = accuracy_score(y_test_1, y_pred_1)
        accuacy_list_1.append(acc_1)

        prec_1 = precision_score(y_test_1, y_pred_1, pos_label=pos_label)
        precision_list_1.append(prec_1)

        recall_1 = recall_score(y_test_1, y_pred_1, pos_label=pos_label)
        recall_list_1.append(recall_1)

        f1_1 = f1_score(y_test_1, y_pred_1, pos_label=pos_label)
        f1_list_1.append(f1_1)
        display("Completed",i)

    acc_mean_1 = np.mean(accuacy_list_1)
    acc_std_dev_1 = np.std(accuacy_list_1)

    prec_mean_1 = np.mean(precision_list_1)
    prec_std_dev_1 = np.std(precision_list_1)

    recall_mean_1 = np.mean(recall_list_1)
    recall_std_dev_1 = np.std(recall_list_1)

    f1_mean_1 = np.mean(f1_list_1)
    f1_std_dev_1 = np.std(f1_list_1)
    return(acc_mean_1,acc_std_dev_1,prec_mean_1,prec_std_dev_1,recall_mean_1,recall_std_dev_1,f1_mean_1,f1_std_dev_1)

In [11]:
# Keys to combine
key_to_combine1 = 'sample_numbers_list_0.0_to_0.1'
key_to_combine2 = 'sample_numbers_list_0.1_to_0.2'
# Create a new dictionary with combined values
hb_dict = {
    'hb_key': final_numbers_dictionary[key_to_combine1] + final_numbers_dictionary[key_to_combine2]
}

{'hb_key': [112,
  360,
  143,
  415,
  413,
  23,
  374,
  32,
  34,
  42,
  68,
  90,
  119,
  121,
  137,
  178,
  180,
  196,
  214,
  241,
  242,
  249,
  257,
  265,
  277,
  279,
  315,
  325,
  340,
  357,
  365,
  369,
  406,
  407,
  426,
  429,
  441,
  455,
  458,
  470,
  493,
  14,
  288,
  20,
  33,
  35,
  51,
  52,
  60,
  63,
  67,
  73,
  144,
  167,
  176,
  205,
  228,
  235,
  250,
  296,
  313,
  323,
  337,
  362,
  396,
  409,
  419,
  440,
  464,
  467,
  486,
  489]}

In [12]:
classifier_dict_hb = {}
key = 'hb_key'
normal_sample_numbers = [] 
normal_sample_numbers.extend(hb_dict[key])
normal_sample_length = len(normal_sample_numbers)
display(normal_sample_length)

if len(normal_sample_numbers) <= abnormal_no:
    df_normal = final_normal_df[final_normal_df['Subject'].isin(normal_sample_numbers)]
    display(df_normal.head())

    random.seed(42)
    abnormal_sample_numbers = random.sample(abnormal_indices.tolist(), normal_sample_length)
    df_abnormal = final_abnormal_df[final_abnormal_df['Subject'].isin(abnormal_sample_numbers)]
    display(df_abnormal.head())

    desired_length_for_ts = time_series_length_finding(df_abnormal)

    df_normal_preprocessed = preprocessing_normal_df(df_normal,desired_length_for_ts)
    df_abnormal_preprocessed = preprocessing_dataframe(df_abnormal,desired_length_for_ts)

    df_classifier = (pd.concat([df_normal_preprocessed,df_abnormal_preprocessed]).reset_index()).drop(columns='index')
    acc_mean,acc_std_dev,prec_mean,prec_std_dev,recall_mean,recall_std_dev,f1_mean,f1_std_dev = ts_classifier(df_classifier)

    classifier_dict_hb[key] = {'acc_mean': acc_mean,'acc_std_dev': acc_std_dev,'prec_mean': prec_mean,'prec_std_dev': prec_std_dev,'recall_mean': recall_mean,'recall_std_dev': recall_std_dev,'f1_mean': f1_mean,'f1_std_dev': f1_std_dev}

else:
    df_abnormal = final_abnormal_df

    random.seed(42)
    random_sample_numbers = random.sample(normal_sample_numbers, abnormal_no)
    df_normal = final_normal_df[final_normal_df['Subject'].isin(random_sample_numbers)]
    display(df_normal)

    desired_length_for_ts = time_series_length_finding(df_abnormal)

    df_normal_preprocessed = preprocessing_normal_df(df_normal,desired_length_for_ts)
    df_abnormal_preprocessed = preprocessing_dataframe(df_abnormal,desired_length_for_ts)

    df_classifier = (pd.concat([df_normal_preprocessed,df_abnormal_preprocessed]).reset_index()).drop(columns='index')
    acc_mean,acc_std_dev,prec_mean,prec_std_dev,recall_mean,recall_std_dev,f1_mean,f1_std_dev = ts_classifier(df_classifier)

    classifier_dict_hb[key] = {'acc_mean': acc_mean,'acc_std_dev': acc_std_dev,'prec_mean': prec_mean,'prec_std_dev': prec_std_dev,'recall_mean': recall_mean,'recall_std_dev': recall_std_dev,'f1_mean': f1_mean,'f1_std_dev': f1_std_dev}

72

Unnamed: 0,Subject,HR,MAP,DBP,SBP,Resp
0,14,58.0,71.5,49.0,123.5,11.0
1,14,75.0,80.0,58.0,136.0,12.0
2,14,75.0,80.0,58.0,136.0,12.0
3,14,75.0,80.0,58.0,136.0,12.0
4,14,76.5,82.0,62.0,137.0,16.5


Unnamed: 0,Subject,HR,MAP,DBP,SBP,Resp
0,2,77.0,102.0,74.0,161.0,18.5
1,2,78.0,102.0,74.0,129.0,18.0
2,2,74.0,102.0,74.0,144.5,20.0
3,2,77.0,103.0,78.0,150.5,18.0
4,2,78.0,99.0,80.0,134.0,18.0


'Avg length:'

106.48611111111111

6

'X_1:'

Unnamed: 0,1,2,3,4,5
0,0 -0.105458 1 2.156868 2 2.15686...,0 -2.226819 1 -1.046605 2 -1.04660...,0 -2.899032 1 -1.326285 2 -1.32628...,0 -1.365259 1 -0.350125 2 -0.35012...,0 -0.542939 1 -0.029086 2 -0.02908...
1,0 -1.086132 1 -1.086132 2 -0.87655...,0 -0.727819 1 -0.727819 2 -0.98801...,0 -1.194132 1 -1.194132 2 -1.26061...,0 -0.746295 1 -0.746295 2 -1.41598...,0 -0.656451 1 -0.656451 2 -1.44081...
2,0 -0.441790 1 -0.339262 2 -0.51868...,0 -0.011731 1 0.521171 2 0.58038...,0 0.622406 1 0.622406 2 0.77601...,0 -0.660632 1 -0.320530 2 0.01957...,0 0.129944 1 0.659714 2 0.92459...
3,0 3.186905 1 2.687368 2 2.93713...,0 1.754275 1 1.818485 2 1.69006...,0 0.656200 1 1.026776 2 2.58319...,0 1.502163 1 1.565475 2 1.37553...,0 0.393412 1 1.891676 2 -2.10369...
4,0 -0.866961 1 -1.463458 2 -1.76170...,0 6.174958 1 0.460671 2 -0.42986...,0 5.486180 1 0.092835 2 -0.85336...,0 5.608109 1 -0.055790 2 -0.83318...,0 -3.297001 1 -2.740146 2 -1.51506...
...,...,...,...,...,...
139,0 1.511999 1 0.639111 2 1.60898...,0 -0.257419 1 0.084088 2 -0.66722...,0 0.658101 1 0.498956 2 0.55200...,0 -0.746631 1 -0.281084 2 -0.20757...,0 -0.484875 1 -0.484875 2 -0.48487...
140,0 -0.709804 1 -0.734702 2 -0.95877...,0 -0.604892 1 -0.653614 2 -0.45872...,0 -0.371030 1 -0.439191 2 -0.84816...,0 -0.476280 1 -0.528034 2 -0.03637...,0 -0.543197 1 -0.543197 2 -0.17449...
141,0 -0.593188 1 -0.482973 2 -0.81362...,0 1.633853 1 -0.252631 2 -1.40292...,0 0.101221 1 -0.700402 2 -1.62535...,0 2.711120 1 -0.052762 2 -1.19083...,0 -2.354645 1 0.265747 2 0.26574...
142,0 -0.891320 1 -0.743649 2 -0.41877...,0 -0.722405 1 -0.673287 2 -0.67328...,0 -0.948864 1 -0.505782 2 -0.56908...,0 -0.415329 1 -0.581964 2 -0.64861...,0 -2.125695 1 -2.125695 2 -2.12569...


'Completed'

1

'Completed'

2

'Completed'

3

'Completed'

4

'Completed'

5

'Completed'

6

'Completed'

7

'Completed'

8

'Completed'

9

'Completed'

10

'Completed'

11

'Completed'

12

'Completed'

13

'Completed'

14

'Completed'

15

'Completed'

16

'Completed'

17

'Completed'

18

'Completed'

19

'Completed'

20

'Completed'

21

'Completed'

22

'Completed'

23

'Completed'

24

'Completed'

25

'Completed'

26

'Completed'

27

'Completed'

28

'Completed'

29

'Completed'

30

In [13]:
import pickle

# Specify the file path where you want to save the pickle file
pickle_file_path = 'hb_sepsis_hive_cote_normalized.pickle'

# Save the dictionary to a pickle file
with open(pickle_file_path, 'wb') as file:
    pickle.dump(classifier_dict_hb, file)

print(f"Dictionary saved to {pickle_file_path}")

In [14]:
classifier_dict_hb

{'hb_key': {'acc_mean': 0.9356321839080458,
  'acc_std_dev': 0.03531561263387867,
  'prec_mean': 0.995,
  'prec_std_dev': 0.018831612507420332,
  'recall_mean': 0.8747619047619047,
  'recall_std_dev': 0.0701596279480013,
  'f1_mean': 0.929442297166435,
  'f1_std_dev': 0.03992466573953917}}

In [15]:
all_values = [value for sublist in final_numbers_dictionary.values() for value in sublist]
healthy_dict = {'healthy_key': all_values}

{'healthy_key': [112,
  360,
  143,
  415,
  413,
  23,
  374,
  32,
  34,
  42,
  68,
  90,
  119,
  121,
  137,
  178,
  180,
  196,
  214,
  241,
  242,
  249,
  257,
  265,
  277,
  279,
  315,
  325,
  340,
  357,
  365,
  369,
  406,
  407,
  426,
  429,
  441,
  455,
  458,
  470,
  493,
  14,
  288,
  20,
  33,
  35,
  51,
  52,
  60,
  63,
  67,
  73,
  144,
  167,
  176,
  205,
  228,
  235,
  250,
  296,
  313,
  323,
  337,
  362,
  396,
  409,
  419,
  440,
  464,
  467,
  486,
  489,
  378,
  280,
  50,
  3,
  5,
  41,
  43,
  70,
  117,
  123,
  139,
  164,
  220,
  225,
  244,
  247,
  262,
  289,
  320,
  334,
  344,
  350,
  363,
  381,
  393,
  394,
  417,
  444,
  474,
  480,
  487,
  496,
  72,
  457,
  7,
  27,
  30,
  69,
  79,
  100,
  138,
  150,
  160,
  163,
  204,
  232,
  245,
  300,
  331,
  336,
  341,
  352,
  377,
  402,
  408,
  435,
  481,
  500,
  380,
  473,
  194,
  6,
  21,
  36,
  37,
  77,
  103,
  109,
  125,
  128,
  132,
  135,
  149,
  187,


In [16]:
classifier_dict_healthy = {}
key = 'healthy_key'
normal_sample_numbers = [] 
normal_sample_numbers.extend(healthy_dict[key])
normal_sample_length = len(normal_sample_numbers)
display(normal_sample_length)

if len(normal_sample_numbers) <= abnormal_no:
    df_normal = final_normal_df[final_normal_df['Subject'].isin(normal_sample_numbers)]
    display(df_normal.head())

    random.seed(42)
    abnormal_sample_numbers = random.sample(abnormal_indices.tolist(), normal_sample_length)
    df_abnormal = final_abnormal_df[final_abnormal_df['Subject'].isin(abnormal_sample_numbers)]
    display(df_abnormal.head())

    desired_length_for_ts = time_series_length_finding(df_abnormal)

    df_normal_preprocessed = preprocessing_normal_df(df_normal,desired_length_for_ts)
    df_abnormal_preprocessed = preprocessing_dataframe(df_abnormal,desired_length_for_ts)

    df_classifier = (pd.concat([df_normal_preprocessed,df_abnormal_preprocessed]).reset_index()).drop(columns='index')
    acc_mean,acc_std_dev,prec_mean,prec_std_dev,recall_mean,recall_std_dev,f1_mean,f1_std_dev = ts_classifier(df_classifier)

    classifier_dict_healthy[key] = {'acc_mean': acc_mean,'acc_std_dev': acc_std_dev,'prec_mean': prec_mean,'prec_std_dev': prec_std_dev,'recall_mean': recall_mean,'recall_std_dev': recall_std_dev,'f1_mean': f1_mean,'f1_std_dev': f1_std_dev}

else:
    df_abnormal = final_abnormal_df

    random.seed(42)
    random_sample_numbers = random.sample(normal_sample_numbers, abnormal_no)
    df_normal = final_normal_df[final_normal_df['Subject'].isin(random_sample_numbers)]
    display(df_normal)

    desired_length_for_ts = time_series_length_finding(df_abnormal)

    df_normal_preprocessed = preprocessing_normal_df(df_normal,desired_length_for_ts)
    df_abnormal_preprocessed = preprocessing_dataframe(df_abnormal,desired_length_for_ts)

    df_classifier = (pd.concat([df_normal_preprocessed,df_abnormal_preprocessed]).reset_index()).drop(columns='index')
    acc_mean,acc_std_dev,prec_mean,prec_std_dev,recall_mean,recall_std_dev,f1_mean,f1_std_dev = ts_classifier(df_classifier)

    classifier_dict_healthy[key] = {'acc_mean': acc_mean,'acc_std_dev': acc_std_dev,'prec_mean': prec_mean,'prec_std_dev': prec_std_dev,'recall_mean': recall_mean,'recall_std_dev': recall_std_dev,'f1_mean': f1_mean,'f1_std_dev': f1_std_dev}

502

Unnamed: 0,Subject,HR,MAP,DBP,SBP,Resp
0,11,90.0,70.0,53.0,111.0,24.0
1,11,90.0,70.0,53.0,111.0,24.0
2,11,90.0,80.0,60.0,121.0,24.0
3,11,81.0,67.0,52.0,99.0,12.0
4,11,83.0,66.0,51.0,99.0,14.0
...,...,...,...,...,...,...
52,506,104.0,89.0,69.0,136.0,28.0
53,506,100.0,83.0,65.0,121.0,19.0
54,506,91.0,74.0,59.0,102.0,14.0
55,506,90.0,66.0,61.0,101.0,16.0


'Avg length:'

108.59428571428572

6

'X_1:'

Unnamed: 0,1,2,3,4,5
0,0 1.340723 1 1.340723 2 1.34072...,0 -0.778554 1 -0.778554 2 0.82151...,0 -0.107189 1 -0.107189 2 1.97062...,0 -0.903078 1 -0.903078 2 -0.10722...,0 1.674277 1 1.674277 2 1.67427...
1,0 -0.748380 1 -0.748380 2 -1.25784...,0 -1.904063 1 -1.904063 2 0.13196...,0 -1.310589 1 -1.310589 2 0.13027...,0 -1.252654 1 -1.252654 2 0.98884...,0 0.638863 1 0.638863 2 -0.40025...
2,0 -1.129663 1 -1.129663 2 -1.50219...,0 -2.085958 1 -2.085958 2 -2.48453...,0 -2.047608 1 -2.047608 2 -2.53958...,0 -0.139525 1 -0.139525 2 -0.42746...,0 -2.240410 1 -2.240410 2 -0.95165...
3,0 -0.459793 1 -0.357141 2 -0.53678...,0 0.002752 1 0.537660 2 0.59709...,0 0.639073 1 0.639073 2 0.79330...,0 -0.650055 1 -0.308420 2 0.03321...,0 0.143012 1 0.675609 2 0.94190...
4,0 0.362196 1 0.362196 2 0.61620...,0 -0.655066 1 -0.655066 2 -0.18007...,0 -0.340537 1 -0.340537 2 -0.00389...,0 -0.970170 1 -0.970170 2 0.12910...,0 -0.491569 1 -0.491569 2 -0.17366...
...,...,...,...,...,...
345,0 0.650005 1 0.589643 2 -0.43650...,0 0.608622 1 1.589685 2 1.41131...,0 3.279536 1 3.279536 2 1.58788...,0 0.691488 1 0.691488 2 -0.02521...,0 -0.462499 1 0.541519 2 -0.46249...
346,0 0.047154 1 0.047154 2 0.74614...,0 -0.057694 1 -0.057694 2 -0.05769...,0 0.213708 1 0.213708 2 -0.04931...,0 -0.447369 1 -0.447369 2 -0.13157...,0 -0.476827 1 -0.476827 2 -1.35618...
347,0 -0.843348 1 -0.698360 2 -0.37938...,0 -0.717568 1 -0.668112 2 -0.66811...,0 -0.930073 1 -0.487014 2 -0.55030...,0 -0.418345 1 -0.586556 2 -0.65384...,0 -2.073631 1 -2.073631 2 -2.07363...
348,0 2.245241 1 0.922891 2 1.58406...,0 -0.732514 1 -1.682273 2 -1.43603...,0 -1.124194 1 -2.176147 2 -1.29952...,0 0.192826 1 -2.343582 2 -2.14333...,0 -0.059245 1 -0.059245 2 0.18685...


'Completed'

1

'Completed'

2

'Completed'

3

'Completed'

4

'Completed'

5

'Completed'

6

'Completed'

7

'Completed'

8

'Completed'

9

'Completed'

10

'Completed'

11

'Completed'

12

'Completed'

13

'Completed'

14

'Completed'

15

'Completed'

16

'Completed'

17

'Completed'

18

'Completed'

19

'Completed'

20

'Completed'

21

'Completed'

22

'Completed'

23

'Completed'

24

'Completed'

25

'Completed'

26

'Completed'

27

'Completed'

28

'Completed'

29

'Completed'

30

In [17]:
classifier_dict_healthy

{'healthy_key': {'acc_mean': 0.906190476190476,
  'acc_std_dev': 0.029435351353382086,
  'prec_mean': 0.9861990595611285,
  'prec_std_dev': 0.024184479765397768,
  'recall_mean': 0.8247619047619049,
  'recall_std_dev': 0.06026396959830997,
  'f1_mean': 0.8967943702145782,
  'f1_std_dev': 0.03508358306901532}}

In [18]:
import pickle

# Specify the file path where you want to save the pickle file
pickle_file_path = 'healthy_sepsis_hive_cote_normalized.pickle'

# Save the dictionary to a pickle file
with open(pickle_file_path, 'wb') as file:
    pickle.dump(classifier_dict_healthy, file)

print(f"Dictionary saved to {pickle_file_path}")