In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv1D, MaxPooling1D
from sklearn.metrics import classification_report, confusion_matrix

# Load the datasets
file_paths = [
    r'ecg\INCART 2-lead Arrhythmia Database.csv',
    r'ecg\MIT-BIH Arrhythmia Database.csv',
    r'ecg\MIT-BIH Supraventricular Arrhythmia Database.csv',
    r'ecg\Sudden Cardiac Death Holter Database.csv'
]

# Combine datasets into a single DataFrame
dataframes = [pd.read_csv(file, low_memory=False) for file in file_paths]
data = pd.concat(dataframes, ignore_index=True)

# Check the first few rows and columns of the data
print(data.head())
print(data.columns)

# Drop rows with missing values in the 'type' column
data = data.dropna(subset=['type'])

# Verify if there are still any missing values
missing_values = data['type'].isnull().sum()
print(f"Missing values in 'type' column after dropping: {missing_values}")

# Check the unique labels in the 'type' column
unique_labels = data['type'].unique()
print(f"Unique labels: {unique_labels}")

# Update the label_mapping dictionary based on unique labels found
label_mapping = {
    'N': 0,    # Normal
    'S': 1,    # Supraventricular ectopic beat
    'V': 2,    # Ventricular ectopic beat
    'F': 3,    # Fusion beat
    'Q': 4,    # Unknown beat
    'VEB': 2,  # Ventricular Ectopic Beat
    'SVEB': 1  # Supraventricular Ectopic Beat
}

# Check for any missing values in features and fill or drop them
data = data.dropna()

# Verify the final shape of the data
print(f"Data shape after dropping rows with missing values: {data.shape}")

# Separate features and labels
X = data.iloc[:, 2:].values  # The first two columns are 'record' and 'type'
y = data['type'].values

# Verify feature matrix shape
print(f"Feature matrix shape: {X.shape}")

# Check for correct number of features
expected_feature_count = 32  # Updated based on your data
actual_feature_count = X.shape[1]
print(f"Expected feature count: {expected_feature_count}, Actual feature count: {actual_feature_count}")

# Print feature columns if the number of features is not as expected
if actual_feature_count != expected_feature_count:
    print(f"Feature columns: {data.columns[2:]}")

# Encode the labels
y = np.array([label_mapping[label] for label in y])
y = to_categorical(y)

# Normalize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training, validation, and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Reshape the input data to fit the Conv1D layer
X_train = X_train.reshape((X_train.shape[0], expected_feature_count, 1))
X_val = X_val.reshape((X_val.shape[0], expected_feature_count, 1))
X_test = X_test.reshape((X_test.shape[0], expected_feature_count, 1))

# Define the model
model = Sequential()

# Adding layers
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(expected_feature_count, 1)))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))

model.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
model.add(MaxPooling1D(pool_size=2))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='softmax'))  # Number of classes should match with your dataset

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy:.2f}')

# Generate classification report and confusion matrix
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print(classification_report(y_true, y_pred_classes))

conf_matrix = confusion_matrix(y_true, y_pred_classes)
print(conf_matrix)



  record type  0_pre-RR  0_post-RR   0_pPeak   0_tPeak   0_rPeak   0_sPeak  \
0    I01    N     163.0      165.0  0.069610 -0.083281  0.614133 -0.392761   
1    I01    N     165.0      166.0 -0.097030  0.597254 -0.078704 -0.078704   
2    I01    N     166.0      102.0  0.109399  0.680528 -0.010649 -0.010649   
3    I01  VEB     102.0      231.0  0.176376  0.256431 -0.101098 -0.707525   
4    I01    N     231.0      165.0  0.585577  0.607461 -0.083499 -0.083499   

    0_qPeak  0_qrs_interval  ...   1_qPeak  1_qrs_interval  1_pq_interval  \
0  0.047159            15.0  ... -0.023370            14.0            3.0   
1 -0.137781             3.0  ...  0.081637            15.0            5.0   
2 -0.720620             6.0  ... -0.148539            33.0           13.0   
3 -0.101098             4.0  ...  0.046898            21.0            9.0   
4 -0.167858             3.0  ... -0.112552            32.0            5.0   

   1_qt_interval  1_st_interval  1_qrs_morph0  1_qrs_morph1  1_qrs_m

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


ImportError: `save_model()` using h5 format requires h5py. Could not import h5py.

In [12]:
model.save('arrhythmia_model')



INFO:tensorflow:Assets written to: arrhythmia_model\assets


INFO:tensorflow:Assets written to: arrhythmia_model\assets
