In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder

from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from sklearn.model_selection import GridSearchCV
from scikeras.wrappers import KerasClassifier

In [None]:
time_features = pd.read_csv('time_features_updated_relevance.csv')
patients = pd.read_csv('patient_scp.csv')

time_features = time_features.merge(patients[['ecg_id', 'label']], on='ecg_id', how='left')

time_features.head()

Unnamed: 0,value__quantile__q_0.9,value__quantile__q_0.6,value__quantile__q_0.4,value__quantile__q_0.7,value__quantile__q_0.1,value__quantile__q_0.8,"value__change_quantiles__f_agg_""var""__isabs_True__qh_0.4__ql_0.2","value__change_quantiles__f_agg_""var""__isabs_False__qh_0.4__ql_0.2","value__change_quantiles__f_agg_""mean""__isabs_True__qh_0.4__ql_0.2","value__change_quantiles__f_agg_""var""__isabs_True__qh_0.8__ql_0.6",...,"value__fft_coefficient__attr_""abs""__coeff_11",value__lempel_ziv_complexity__bins_10,value__partial_autocorrelation__lag_3,value__partial_autocorrelation__lag_4,"value__agg_linear_trend__attr_""stderr""__chunk_len_50__f_agg_""min""","value__agg_linear_trend__attr_""stderr""__chunk_len_50__f_agg_""var""",value__skewness,value__fourier_entropy__bins_3,ecg_id,label
0,0.088197,-0.014966,-0.031706,-0.002513,-0.058274,0.032038,2e-06,5e-06,0.001747,1.3e-05,...,74.502545,0.0838,-1.512835,2.218194,8.8e-05,3e-05,2.905521,0.220352,1,NORM
1,0.154646,-0.02835,-0.050644,-0.011865,-0.077629,0.024123,1e-06,2e-06,0.001009,8e-06,...,24.919214,0.0786,-1.059925,11.497906,0.000202,2.7e-05,2.195819,0.079983,2,NORM
2,0.135799,-0.026894,-0.043165,-0.007463,-0.067333,0.030549,2e-06,4e-06,0.001465,2e-05,...,100.593283,0.081,-1.793943,1.794352,0.000112,4.9e-05,2.827517,0.183378,3,NORM
3,0.188256,-0.015247,-0.038007,0.009945,-0.070534,0.048354,2e-06,4e-06,0.001515,1.4e-05,...,40.714874,0.0904,-1.29911,3.141033,0.000451,4.3e-05,0.282202,0.125256,4,NORM
4,0.113084,-0.012416,-0.025497,-0.002573,-0.052509,0.020982,2e-06,3e-06,0.001378,5e-06,...,79.793704,0.0842,-1.839826,1.745159,0.000152,1.8e-05,1.634762,0.190068,5,NORM


In [3]:
print(time_features['label'].unique())

['NORM' 'MI' 'STTC' 'HYP' 'CD']


In [4]:
X = time_features.drop(['ecg_id', 'label'], axis=1)
y = time_features['label']

label_mapping = {"NORM":0, "MI":1, "STTC":2, "HYP":3, "CD":4}
y = np.array([label_mapping[x] for x in y])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(16635, 50) (4159, 50) (16635,) (4159,)


In [5]:
X.head()

Unnamed: 0,value__quantile__q_0.9,value__quantile__q_0.6,value__quantile__q_0.4,value__quantile__q_0.7,value__quantile__q_0.1,value__quantile__q_0.8,"value__change_quantiles__f_agg_""var""__isabs_True__qh_0.4__ql_0.2","value__change_quantiles__f_agg_""var""__isabs_False__qh_0.4__ql_0.2","value__change_quantiles__f_agg_""mean""__isabs_True__qh_0.4__ql_0.2","value__change_quantiles__f_agg_""var""__isabs_True__qh_0.8__ql_0.6",...,"value__agg_autocorrelation__f_agg_""median""__maxlag_40","value__fft_coefficient__attr_""abs""__coeff_17","value__fft_coefficient__attr_""abs""__coeff_11",value__lempel_ziv_complexity__bins_10,value__partial_autocorrelation__lag_3,value__partial_autocorrelation__lag_4,"value__agg_linear_trend__attr_""stderr""__chunk_len_50__f_agg_""min""","value__agg_linear_trend__attr_""stderr""__chunk_len_50__f_agg_""var""",value__skewness,value__fourier_entropy__bins_3
0,0.088197,-0.014966,-0.031706,-0.002513,-0.058274,0.032038,2e-06,5e-06,0.001747,1.3e-05,...,0.06517,9.043106,74.502545,0.0838,-1.512835,2.218194,8.8e-05,3e-05,2.905521,0.220352
1,0.154646,-0.02835,-0.050644,-0.011865,-0.077629,0.024123,1e-06,2e-06,0.001009,8e-06,...,0.574924,58.900234,24.919214,0.0786,-1.059925,11.497906,0.000202,2.7e-05,2.195819,0.079983
2,0.135799,-0.026894,-0.043165,-0.007463,-0.067333,0.030549,2e-06,4e-06,0.001465,2e-05,...,0.125145,6.973763,100.593283,0.081,-1.793943,1.794352,0.000112,4.9e-05,2.827517,0.183378
3,0.188256,-0.015247,-0.038007,0.009945,-0.070534,0.048354,2e-06,4e-06,0.001515,1.4e-05,...,0.169628,29.327653,40.714874,0.0904,-1.29911,3.141033,0.000451,4.3e-05,0.282202,0.125256
4,0.113084,-0.012416,-0.025497,-0.002573,-0.052509,0.020982,2e-06,3e-06,0.001378,5e-06,...,0.151764,5.776241,79.793704,0.0842,-1.839826,1.745159,0.000152,1.8e-05,1.634762,0.190068


# Random Forest


In [6]:
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy_rf)

# importances = rf.feature_importances_
# indices = np.argsort(importances)[::-1]

# plt.figure(figsize=(12, 6))
# plt.title("Feature Importances")
# plt.bar(range(X_train.shape[1]), importances[indices], align="center")
# plt.xticks(range(X_train.shape[1]), X.columns[indices], rotation=90)
# plt.xlim([-1, X_train.shape[1]])
# plt.show()

Accuracy: 0.588362587160375


## XGB Classifier

In [7]:
xgb_model = XGBClassifier(
    num_class=5,
    n_estimators=200,
    max_depth=7,
    learning_rate=0.01,
    random_state=42,
    subsample = 0.8
)

xgb_model.fit(X_train, y_train)
y_pred = xgb_model.predict(X_test)
accuracy_xgb = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy_xgb)

# importances = xgb_model.feature_importances_
# indices = np.argsort(importances)[::-1]

# plt.figure(figsize=(12, 6))
# plt.title("XGBoost Feature Importances")
# plt.bar(range(X_train.shape[1]), importances[indices], align="center")
# plt.xticks(range(X_train.shape[1]), X.columns[indices], rotation=90)
# plt.xlim([-1, X_train.shape[1]])
# plt.show()

Accuracy: 0.5914883385429189


# Neural Network

In [6]:
input_dimension = X_train.shape[1]
output_dimension = 5

model = Sequential([
    Dense(128, input_dim=input_dimension, activation='sigmoid'),
    BatchNormalization(),
    Dropout(0.1),
    Dense(64, activation='sigmoid'),
    Dropout(0.1),
    Dense(32, activation='sigmoid'),
    Dropout(0.1),
    Dense(output_dimension, activation='softmax')
])

model.compile(optimizer= tf.keras.optimizers.Adam(learning_rate = 0.05), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), verbose = 2)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
520/520 - 4s - 7ms/step - accuracy: 0.4355 - loss: 1.4494 - val_accuracy: 0.4393 - val_loss: 1.4443
Epoch 2/100
520/520 - 2s - 3ms/step - accuracy: 0.4359 - loss: 1.4365 - val_accuracy: 0.4393 - val_loss: 1.4414
Epoch 3/100
520/520 - 1s - 2ms/step - accuracy: 0.4359 - loss: 1.4334 - val_accuracy: 0.4393 - val_loss: 1.4438
Epoch 4/100
520/520 - 1s - 1ms/step - accuracy: 0.4359 - loss: 1.4349 - val_accuracy: 0.4393 - val_loss: 1.4286
Epoch 5/100
520/520 - 1s - 1ms/step - accuracy: 0.4359 - loss: 1.4343 - val_accuracy: 0.4393 - val_loss: 1.4418
Epoch 6/100
520/520 - 1s - 1ms/step - accuracy: 0.4359 - loss: 1.4356 - val_accuracy: 0.4393 - val_loss: 1.4285
Epoch 7/100
520/520 - 1s - 1ms/step - accuracy: 0.4359 - loss: 1.4338 - val_accuracy: 0.4393 - val_loss: 1.4287
Epoch 8/100
520/520 - 1s - 1ms/step - accuracy: 0.4359 - loss: 1.4333 - val_accuracy: 0.4393 - val_loss: 1.4275
Epoch 9/100
520/520 - 1s - 1ms/step - accuracy: 0.4359 - loss: 1.4336 - val_accuracy: 0.4393 - val_loss:

In [7]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.2f}")

Test Accuracy: 0.44


In [8]:
# Define model function
def build_model(learning_rate=0.001, dropout_rate=0.3):
    model = Sequential([
        Dense(128, activation='sigmoid', input_dim=X_train.shape[1]),
        Dropout(dropout_rate),
        Dense(64, activation='sigmoid'),
        Dropout(dropout_rate),
        Dense(32, activation='sigmoid'),
        Dropout(dropout_rate),
        Dense(len(label_mapping), activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Wrap model with KerasClassifier
keras_clf = KerasClassifier(
    model=build_model,
    verbose=0,  # Suppress training output during grid search
    batch_size=128,
    epochs=25
)

# Define parameter grid
param_grid = {
    "model__learning_rate": [0.05],
    "model__dropout_rate": [0.1],
    "batch_size": [32],
    "epochs": [25]
}

# Perform grid search
grid = GridSearchCV(estimator=keras_clf, param_grid=param_grid, cv=3)
grid_result = grid.fit(X_train, y_train)

# Print best parameters and results
print(f"Best Parameters: {grid_result.best_params_}")
print(f"Best Accuracy: {grid_result.best_score_:.2f}")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Best Parameters: {'batch_size': 32, 'epochs': 25, 'model__dropout_rate': 0.1, 'model__learning_rate': 0.05}
Best Accuracy: 0.44


--------

In [11]:
def create_cnn(input_shape, output_dim, learning_rate=0.001):
    model = Sequential([
        tf.keras.layers.Conv1D(256, kernel_size=3, activation='sigmoid', input_shape=input_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Conv1D(128, kernel_size=3, activation='sigmoid'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.MaxPooling1D(pool_size=2),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(64, activation='sigmoid'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(output_dim, activation='softmax')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Reshape X_train and X_test for CNN
X_train_cnn = np.expand_dims(X_train.values, axis=-1)
X_test_cnn = np.expand_dims(X_test.values, axis=-1)

input_shape_cnn = X_train_cnn.shape[1:]
cnn_model = create_cnn(input_shape_cnn, output_dimension)
cnn_history = cnn_model.fit(X_train_cnn, y_train, validation_data=(X_test_cnn, y_test), 
                            epochs=25, batch_size=128, verbose=2)

# Evaluate CNN
cnn_loss, cnn_accuracy = cnn_model.evaluate(X_test_cnn, y_test, verbose=0)
print(f"CNN Test Accuracy: {cnn_accuracy:.2f}")


Epoch 1/25


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


130/130 - 3s - 27ms/step - accuracy: 0.4420 - loss: 1.3749 - val_accuracy: 0.2376 - val_loss: 1.5414
Epoch 2/25
130/130 - 2s - 15ms/step - accuracy: 0.4839 - loss: 1.2819 - val_accuracy: 0.4852 - val_loss: 1.3182
Epoch 3/25
130/130 - 2s - 15ms/step - accuracy: 0.5084 - loss: 1.2535 - val_accuracy: 0.5112 - val_loss: 1.2297
Epoch 4/25
130/130 - 2s - 15ms/step - accuracy: 0.5092 - loss: 1.2496 - val_accuracy: 0.5266 - val_loss: 1.2033
Epoch 5/25
130/130 - 2s - 16ms/step - accuracy: 0.5203 - loss: 1.2325 - val_accuracy: 0.5263 - val_loss: 1.2081
Epoch 6/25
130/130 - 2s - 17ms/step - accuracy: 0.5223 - loss: 1.2289 - val_accuracy: 0.5270 - val_loss: 1.2066
Epoch 7/25
130/130 - 2s - 17ms/step - accuracy: 0.5247 - loss: 1.2197 - val_accuracy: 0.5355 - val_loss: 1.1923
Epoch 8/25
130/130 - 2s - 17ms/step - accuracy: 0.5301 - loss: 1.2156 - val_accuracy: 0.5343 - val_loss: 1.1965
Epoch 9/25
130/130 - 3s - 20ms/step - accuracy: 0.5299 - loss: 1.2077 - val_accuracy: 0.5412 - val_loss: 1.1813
Epo

---------

In [40]:
cardiac = pd.read_csv('ECG_Cardiac_Features.csv')
time_cardiac = time_features.merge(cardiac, on='ecg_id', how='inner')

X = time_cardiac.drop(['ecg_id','Patient_ID','Label','label'], axis=1)
y = time_cardiac['label']

# X = cardiac.drop(['ecg_id','Patient_ID','Label'], axis=1)
# y = cardiac['Label']

# label_mapping = {"NORM":0, "MI":1, "STTC":2, "HYP":3, "CD":4}
# y = np.array([label_mapping[x] for x in y])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(y)

0        NORM
1        NORM
2        NORM
3        NORM
4        NORM
         ... 
20789      MI
20790     HYP
20791      MI
20792    STTC
20793    STTC
Name: label, Length: 20794, dtype: object


In [41]:
non_numeric_cols = X.select_dtypes(include=['object', 'category']).columns

print(non_numeric_cols)

Index([], dtype='object')


In [43]:
rf_combined = RandomForestClassifier(n_estimators=100)
rf_combined.fit(X_train, y_train)

y_pred = rf_combined.predict(X_test)
accuracy_rf_combined = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy_rf_combined)

Accuracy: 0.5984611685501322
