In [None]:
import keras
import pandas as pd

In [None]:
data_1 = pd.read_csv('mit-bih-data-clear-data.csv')
data_2 = pd.read_csv('ptb-diagnostic-clear-data.csv')
data_3 = pd.read_csv('autonomic-aging-a-dataset-clear-data.csv')


# Feature cleanup
df = pd.concat([data_1, data_2, data_3], ignore_index=True)
df.to_csv('heart_rate_final_data.csv', index=False)
columns_to_drop = [
    'patient',
    'Max_hr_slope',
    'mean_deviation',
    'outliers_percentage'
]

columns_to_reduce = [
    'vlf_power',
    'lf_power',
    'hf_power',
    'highest_heart_rate',
    'lowest_heart_rate',
    'mean_heart_rate',
    'median_heart_rate'
    
]

df[columns_to_reduce] = df[columns_to_reduce].apply(lambda x: x/100)
df.drop(columns_to_drop, axis=1, inplace=True)


df.value_counts('diagnosis')


In [None]:
# Let's check for missing values

df.isnull().sum()


In [None]:
# Replace missing and infinite values with median
from sklearn.impute import SimpleImputer
import numpy as np
df.replace([np.inf, -np.inf], np.nan, inplace=True)
imputer = SimpleImputer(strategy='median')
df = pd.DataFrame(imputer.fit_transform(df), columns=df.columns)

In [None]:
# Resample the data

# Scenario 1: This works fine, however, this duplicates data and can lead to overfitting

# from sklearn.utils import resample
# 
# df_majority = df[df.diagnosis==1]
# df_minority = df[df.diagnosis==0]
# 
# df_minority_upsampled = resample(df_minority, replace=True, n_samples=487)
# df_upsampled = pd.concat([df_majority, df_minority_upsampled])
# df_upsampled.value_counts('diagnosis')

# Scenario 2: Let's use smote to generate synthetic data

from imblearn.over_sampling import SMOTE
smote = SMOTE()
X = df.drop(columns=['diagnosis'])
y = df['diagnosis']
X_resampled, y_resampled = smote.fit_resample(X, y)

df_upsampled = pd.concat([X_resampled, y_resampled], axis=1)




In [None]:
df_upsampled.value_counts('diagnosis')

In [None]:
X = df_upsampled.drop(columns=['diagnosis'])
y = df_upsampled['diagnosis']

X.head()

In [None]:
from sklearn.impute import SimpleImputer
import numpy as np
X.replace([np.inf, -np.inf], np.nan, inplace=True)
imputer = SimpleImputer(strategy='median')
X = imputer.fit_transform(X)

df.head()

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)


In [None]:
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)
smote = SMOTE()
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [None]:
from keras import Sequential
from keras.api.layers import Dense, Dropout, Activation

model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.4),
    Dense(128, activation='relu'),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(32, activation='relu'),
    Dropout(0.1),
    Dense(1, activation='sigmoid')
])

In [None]:
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.000001), loss='binary_crossentropy', metrics=['accuracy'])


from keras.api.callbacks import ModelCheckpoint
checkpoint = ModelCheckpoint('model.keras', save_best_only=True)

history = model.fit(X_train_resampled, y_train_resampled, validation_data=(X_test, y_test), epochs=3000, callbacks=[checkpoint])


In [None]:
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.4f}')
print(f'Test Loss: {loss:.4f}')

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
y_pred = (model.predict(X_test) > 0.5).astype("int32")
cm = confusion_matrix(y_test, y_pred)
cr = classification_report(y_test, y_pred)
print("Confusion Matrix:\n", cm)
print("\nClassification Report:\n", cr)

In [None]:
# visualize loss and accuracy in plots
import matplotlib.pyplot as plt

plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()
