In [None]:
import keras
import pandas as pd
import tensorflow as tf

In [None]:
print("Keras version: ", keras.__version__)
print("Tensorflow version: ", tf.__version__)
print('GPUs available: ', tf.config.experimental.list_physical_devices('GPU'))

In [None]:
data_1 = pd.read_csv('mit-bih-data-clear-data.csv')
data_2 = pd.read_csv('ptb-diagnostic-clear-data.csv')
data_3 = pd.read_csv('autonomic-aging-a-dataset-clear-data.csv')


# Feature cleanup
df = pd.concat([data_1, data_2, data_3], ignore_index=True)

df.value_counts('diagnosis')


In [None]:
# Take 50 samples of each class and remove them from the original dataframe

class_zero = df[df['diagnosis'] == 0].sample(50)
class_one = df[df['diagnosis'] == 1].sample(50)

df = df.drop(class_zero.index)
df = df.drop(class_one.index)

In [None]:
df.value_counts('diagnosis')

In [None]:
class_zero.to_csv('virgin_class_zero.csv', index=False)
class_one.to_csv('virgin_class_one.csv', index=False)

In [None]:
# Let's check for missing values

df.isnull().sum()


In [None]:
import numpy as np
from imblearn.over_sampling import SMOTE
smote = SMOTE()
X = df.drop(columns=['diagnosis'])
y = df['diagnosis']
X_resampled, y_resampled = smote.fit_resample(X, y)

df_upsampled = pd.concat([X_resampled, y_resampled], axis=1)




In [None]:
df_upsampled.value_counts('diagnosis')


In [None]:
# Uncomment to check Pairplot
# import seaborn as sns
# sns.set_theme(style='ticks')
# 
# sns.pairplot(df_upsampled, hue='diagnosis', kind='kde', corner=True)

In [None]:
important_features = [
    'minimum_hrv',
    'maximum_hrv',
    'standard_deviation_hrv',
    'mean_hr_slope',
    'tendency_slope',
    'lowest_heart_rate',
    'vlf_power',
    'lf_power',
    'hf_power',
    'approximation_entropy'
]
from sklearn.impute import SimpleImputer
target = 'diagnosis'

features = df_upsampled[important_features]
features.replace([np.inf, -np.inf], np.nan, inplace=True)
imputer = SimpleImputer(strategy='median')

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
features = imputer.fit_transform(features)
features = scaler.fit_transform(features)
df = pd.DataFrame(features, columns=important_features)
df[target] = df_upsampled[target]

df.to_csv('heart_rate_final_data.csv', index=False)

import matplotlib.pyplot as plt
plt.matshow(df.corr())
plt.show()

In [None]:
X = df.drop(columns=['diagnosis'])
y = df['diagnosis']

X.head()

In [None]:
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)

In [None]:
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True)

In [None]:
from keras import Sequential, Input
from keras.api.layers import Dense, Dropout
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from tensorflow.keras.regularizers import l1, l2


accuracy_scores = []
i = 0

def create_model():
    l1_regulizer = l1(0.0001)
    l2_regulizer = l2(0.0001)

    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(2048, activation='relu', kernel_regularizer=l1_regulizer),
        Dropout(0.5),
        Dense(1024, activation='relu', kernel_regularizer=l2_regulizer),
        Dropout(0.4),
        Dense(512, activation='relu', kernel_regularizer=l1_regulizer),
        Dropout(0.4),
        Dense(256, activation='relu', kernel_regularizer=l2_regulizer),
        Dropout(0.4),
        Dense(128, activation='relu', kernel_regularizer=l1_regulizer),
        Dropout(0.3),
        Dense(64, activation='relu', kernel_regularizer=l2_regulizer),
        Dropout(0.2),
        Dense(32, activation='relu', kernel_regularizer=l1_regulizer),
        Dropout(0.1),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer=keras.api.optimizers.Adam(learning_rate=0.000001), loss='binary_crossentropy', metrics=['accuracy'])
    return model

model = create_model()
initial_weights = model.get_weights()

for fold, (train_index, test_index) in enumerate(skf.split(X_train, y_train)):
    print(f'Training fold {fold}...')

    if fold > 0:
        model.set_weights(previous_weights)


    X_train_fold, X_test_fold = X_train.iloc[train_index], X_train.iloc[test_index]
    y_train_fold, y_test_fold = y_train.iloc[train_index], y_train.iloc[test_index]


    from keras.api.callbacks import ModelCheckpoint, EarlyStopping
    checkpoint = ModelCheckpoint(f'model_{i}.keras', save_best_only=True)
    early_stopping = EarlyStopping(patience=5, restore_best_weights=True)

    # Train the model
    history = model.fit(X_train_fold, y_train_fold, epochs=200, batch_size=8, verbose=0, callbacks=[checkpoint, early_stopping], validation_data=(X_test_fold, y_test_fold))
    previous_weights = model.get_weights()

    # Evaluate the model
    y_pred = (model.predict(X_test_fold) > 0.6).astype("int32")
    accuracy = accuracy_score(y_test_fold, y_pred)
    accuracy_scores.append(accuracy)

    cm = confusion_matrix(y_test_fold, y_pred)
    cr = classification_report(y_test_fold, y_pred)
    print(f"Fold {i}")
    print("Confusion Matrix:\n", cm)
    print("\nClassification Report:\n", cr)


    import matplotlib.pyplot as plt

    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title(f'Model {i} Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')
    plt.show()

    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.title(f'Model {i} Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.legend(['Train', 'Validation'], loc='upper left')

    plt.show()
    i += 1

In [None]:
mean_accuracy = np.mean(accuracy_scores)
std_accuracy = np.std(accuracy_scores)

print(f"Mean Accuracy: {mean_accuracy:.4f}")
print(f"Standard Deviation of Accuracy: {std_accuracy:.4f}")
