In [3]:
import pandas as pd
import numpy as np, os
from tensorflow.keras.layers import Input, Dense, Conv1D, Flatten, MaxPooling1D, BatchNormalization, Dropout, Activation
from keras.models import Sequential
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import tensorflow as tf

# Pre processing

In [4]:
folder_path = u'C:/Users/user/Desktop/AI-project/signals_no_artifacts/128'

# List all files in the folder
file_names = [file for file in os.listdir(folder_path) if file.endswith('_128.csv')]

# Initialize an empty DataFrame to concatenate all data
all_data = pd.DataFrame()
all_data_list = []
list_128 = []
# Loop through each file and concatenate the data
for file_name in file_names:
    file_path = os.path.join(folder_path, file_name)
    data = pd.read_csv(file_path)
    all_data_list.append(data)
    list_128.append(data)
    all_data = pd.concat([all_data, data], ignore_index=True)

In [5]:
folder_path = u'C:/Users/user/Desktop/AI-project/signals_no_artifacts/250'

# List all files in the folder
file_names = [file for file in os.listdir(folder_path) if file.endswith('_250.csv')]

list_250 = []

# Loop through each file and concatenate the data
for file_name in file_names:
    file_path = os.path.join(folder_path, file_name)
    data = pd.read_csv(file_path)
    all_data_list.append(data)
    list_250.append(data)
    all_data = pd.concat([all_data, data], ignore_index=True)

# Now 'all_data_list' contains a list of all data from all CSV files

# Signal Segmentation

In [6]:
window_size = 200

In [7]:
# Segment each patient's signal
#Voglio window size di 5 secondi
# --> 128 samples/s *5s = 640
half_window = window_size // 2

column_names = ['id', 'segment_values', 'label']
df_segments_patients_all = pd.DataFrame(columns=column_names)

id = 0

for patient_data in list_128:
    patient_signal = patient_data['value'].values

    for i in range(0, len(patient_signal)):
        if(patient_data['IsPeak'].values[i] == True):
            if(not(i-half_window <0) and not(i+half_window > len(patient_signal)-1) ):
                segment = patient_signal[i-half_window:i+half_window]

                #Add segment to list only if it is a 'proper window' --> i-window, i+ window garanteed
                df_segments_patient_temp = pd.DataFrame({'id': id,
                                        'segment_values': [segment],
                                        'label': patient_data['PeakNature'].values[i]})
                df_segments_patients_all = pd.concat([df_segments_patients_all, df_segments_patient_temp], ignore_index=True)

    id = id + 1

In [8]:
for patient_data in list_250:
    patient_signal = patient_data['value'].values

    for i in range(0, len(patient_signal)):
        if(patient_data['IsPeak'].values[i] == True):
            if(not(i-(half_window*2) <0) and not(i+(half_window*2) > len(patient_signal)-1) ):
                segment = patient_signal[i-(half_window*2):i+(half_window*2) : 2]

                #Add segment to list only if it is a 'proper window' --> i-window, i+ window garanteed
                df_segments_patient_temp = pd.DataFrame({'id': id,
                                        'segment_values': [segment],
                                        'label': patient_data['PeakNature'].values[i]})
                df_segments_patients_all = pd.concat([df_segments_patients_all, df_segments_patient_temp], ignore_index=True)

    id = id + 1

In [9]:
%store df_segments_patients_all

Stored 'df_segments_patients_all' (DataFrame)


# For kernel restarting

In [10]:

import pandas as pd
import numpy as np, os
from tensorflow.keras.layers import Input, Dense, Conv1D, Flatten, MaxPooling1D, BatchNormalization, Dropout, Activation
from keras.models import Sequential
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import seaborn as sns
import tensorflow as tf

window_size = 200

%store -r df_segments_patients_all


# Train / Val / Test split

In [11]:
df_segments_patients_all

Unnamed: 0,id,segment_values,label
0,0,"[0.2058954076128947, 0.404473859991316, 0.5970...",N
1,0,"[-0.2666242824030803, -0.0795323175512104, 0.0...",N
2,0,"[-0.1162680016719362, 0.0678719958779269, 0.23...",N
3,0,"[-1.182943689494174, -1.219759579499485, -1.25...",S
4,0,"[0.6543489230048096, 0.6589239703337957, 0.656...",N
...,...,...,...
174910,104,"[-1.0629289182803177, -1.1187745188007785, -1....",N
174911,104,"[-1.1937303088779458, -1.24266277124888, -1.29...",N
174912,104,"[-1.0626134783938492, -1.1189344450827805, -1....",N
174913,104,"[-1.039934227396032, -1.0936256864302645, -1.1...",N


In [12]:
np.random.seed(20)

In [13]:
num_patients = df_segments_patients_all['id'].nunique()

train_indices = np.random.choice(range(num_patients), size=int(num_patients * 0.7), replace=False)
validation_indices = np.random.choice(list(set(range(num_patients)) - set(train_indices)), size=int(num_patients * 0.15), replace=False)
test_indices = list(set(range(num_patients)) - set(train_indices) - set(validation_indices))


In [14]:
label_mapping = {"N": 0, "S": 1, "V": 2}

In [15]:
train_df = df_segments_patients_all[df_segments_patients_all['id'].isin(train_indices)].drop(['id'], axis=1)
# Split dataframe into features and labels
X_train = train_df.drop(['label'], axis=1).values
y_train = train_df['label']

# Convert segments to a numpy array and reshape for CNN input
X_train_list = train_df['segment_values'].apply(lambda x: x.tolist()).tolist()
X_train_df = pd.DataFrame(X_train_list)
X_train_np = X_train_df.to_numpy()

# One-hot encoding
one_hot_encoded_df = pd.get_dummies(y_train, columns=['label'])
one_hot_encoded_array_train = one_hot_encoded_df.values

Y_train = [label_mapping[value] for value in y_train]
Y_train_categorical = tf.keras.utils.to_categorical(Y_train, num_classes=3)

In [16]:
nN = np.count_nonzero(y_train == 'N')
nS = np.count_nonzero(y_train == 'S')
nV = np.count_nonzero(y_train == 'V')

print("Perc N : ")
print(nN/(nN + nS + nV))
print("Perc S : ")
print(nS/(nN + nS + nV))
print("Perc V : ")
print(nV/(nN + nS + nV))

Perc N : 
0.904482047600364
Perc S : 
0.052767783352394626
Perc V : 
0.0427501690472414


In [17]:
validation_df = df_segments_patients_all[df_segments_patients_all['id'].isin(validation_indices)].drop(['id'], axis=1)
# Split dataframe into features and labels
X_validation = validation_df.drop(['label'], axis=1).values
y_validation = validation_df['label']

# Convert segments to a numpy array and reshape for CNN input
X_validation_list = validation_df['segment_values'].apply(lambda x: x.tolist()).tolist()
X_validation_df = pd.DataFrame(X_validation_list)
X_val_np = X_validation_df.to_numpy()

# One-hot encoding
one_hot_encoded_df = pd.get_dummies(y_validation, columns=['label'])
one_hot_encoded_array_validation = one_hot_encoded_df.values

Y_validation = [label_mapping[value] for value in y_validation]
Y_validation_categorical = tf.keras.utils.to_categorical(Y_validation, num_classes=3)

In [18]:
test_df = df_segments_patients_all[df_segments_patients_all['id'].isin(test_indices)].drop(['id'], axis=1)
# Split dataframe into features and labels
X_test = test_df.drop(['label'], axis=1).values

y_test = test_df['label']

# Convert segments to a numpy array and reshape for CNN input
X_test_list = test_df['segment_values'].apply(lambda x: x.tolist()).tolist()
X_test_df = pd.DataFrame(X_test_list)
X_test_np = X_test_df.to_numpy()

# One-hot encoding
one_hot_encoded_df = pd.get_dummies(y_test, columns=['label'])
one_hot_encoded_array_test = one_hot_encoded_df.values

Y_test = [label_mapping[value] for value in y_test]
Y_test_categorical = tf.keras.utils.to_categorical(Y_test, num_classes=3)

# Model

In [19]:
# Define a simple 1D CNN model
# Define the CNN architecture
model = Sequential()

model.add(Conv1D(filters=128, kernel_size=3, activation='relu', input_shape=(window_size, 1)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))

model.add(BatchNormalization())

model.add(Activation('relu'))

model.add(MaxPooling1D(pool_size=2))

model.add(Flatten())

model.add(Dense(1024))

model.add(Dropout(0.5))

model.add(Activation('relu'))

model.add(Dense(3, activation='softmax'))


  super().__init__(


In [20]:
from keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor the validation loss
    patience=5,          # Number of epochs with no improvement after which training will stop
    verbose=1,
    restore_best_weights=True  # Restore the best model weights when training stops
)

In [21]:
nN = np.count_nonzero(y_train == 'N')
nS = np.count_nonzero(y_train == 'S')
nV = np.count_nonzero(y_train == 'V')

class_sum = nN+nV+nS
wN = round(1-(nN/class_sum),2)
wS = round(1-(nS/class_sum),2)
wV = round(1-(nV/class_sum),2)

class_weights_fed = {0: wN, 1: wS, 2: wV}

In [22]:
class_weights_fed

{0: 0.1, 1: 0.95, 2: 0.96}

In [23]:
model.compile(optimizer='Nadam', loss='categorical_crossentropy', metrics=['Accuracy', 'Precision', 'Recall' ])

In [24]:
# Train the model on the training set
history = model.fit(X_train_np,
                    one_hot_encoded_array_train,
                    epochs=20,
                    callbacks=[early_stopping],
                    class_weight = class_weights_fed,
                    validation_data=(X_val_np, one_hot_encoded_array_validation))

Epoch 1/20
[1m3744/3744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m376s[0m 100ms/step - Accuracy: 0.8442 - Precision: 0.8836 - Recall: 0.7935 - loss: 0.1735 - val_Accuracy: 0.9263 - val_Precision: 0.9581 - val_Recall: 0.8597 - val_loss: 0.2721
Epoch 2/20
[1m3744/3744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m366s[0m 98ms/step - Accuracy: 0.8849 - Precision: 0.9293 - Recall: 0.8172 - loss: 0.1094 - val_Accuracy: 0.9015 - val_Precision: 0.9439 - val_Recall: 0.8324 - val_loss: 0.3154
Epoch 3/20
[1m3744/3744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m368s[0m 98ms/step - Accuracy: 0.9001 - Precision: 0.9389 - Recall: 0.8372 - loss: 0.0971 - val_Accuracy: 0.9165 - val_Precision: 0.9532 - val_Recall: 0.8449 - val_loss: 0.3257
Epoch 4/20
[1m3744/3744[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m366s[0m 98ms/step - Accuracy: 0.9020 - Precision: 0.9392 - Recall: 0.8448 - loss: 0.0957 - val_Accuracy: 0.8881 - val_Precision: 0.9397 - val_Recall: 0.7855 - val_loss: 0.3772
Epo

KeyboardInterrupt: 

# Evaluate

In [None]:
def model_evaluation(y_predi, model_name_pred):
    predicted_class = np.argmax(y_predi, axis=-1)
    test_classes = np.argmax(one_hot_encoded_array_test, axis=-1)
    cm = confusion_matrix(test_classes, predicted_class)
    classes_name = ['N', 'S', 'V']

    #Normalized cm

    cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, xticklabels=list(classes_name), yticklabels=list(classes_name), cmap="Greens", annot=True,
                fmt='.2f')
    plt.ylabel('Actual labels')
    plt.xlabel('Predicted labels')
    plt.title(model_name_pred)
    plt.show()

In [None]:
y_predi = model.predict(X_test_np)

model_evaluation(y_predi, "Deep Learning Feature Extract")