# [Capstone Project] Aim 2 Final Report Nishi

In [78]:
# Install if not already in your environment
!pip install tensorflow scikit-learn pandas numpy



In [79]:
import pickle
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Bidirectional, Dense, Dropout
from tensorflow.keras.utils import to_categorical

In [80]:
# Load ECG5000 dataset

In [81]:
import pickle
import numpy as np

# Load data
with open('/Users/nishimewada/Downloads/ECG5000_train.pickle', 'rb') as f:
    train_data = pickle.load(f)

with open('/Users/nishimewada/Downloads/ECG5000_validation.pickle', 'rb') as f:
    test_data = pickle.load(f)

In [82]:
# After loading train_data and test_data from .pickle
# Each row: [label, feature1, feature2, ..., featureN]

In [83]:
y_train = train_data[:, 0]
X_train = train_data[:, 1:]

y_test = test_data[:, 0]
X_test = test_data[:, 1:]

In [84]:
# Normalize features

In [85]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [86]:
# Encode class labels

In [87]:
encoder = LabelEncoder()
y_train_encoded = encoder.fit_transform(y_train)
y_test_encoded = encoder.transform(y_test)

In [88]:
# One-hot encoding for deep learning

In [89]:
from tensorflow.keras.utils import to_categorical
y_train_cat = to_categorical(y_train_encoded)
y_test_cat = to_categorical(y_test_encoded)

In [90]:
# Reshape input for CNN

In [91]:
X_train_cnn = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test_cnn = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

In [92]:
# Define Method A: CNN + BiLSTM
def build_model_A(input_shape, num_classes):
    model = Sequential()
    model.add(Conv1D(32, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Bidirectional(LSTM(64, return_sequences=False)))
    model.add(Dropout(0.3))  # Add dropout here
    model.add(Dense(64, activation='relu'))
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [93]:
# Define Method B: Lightweight CNN + LSTM
def build_model_B(input_shape, num_classes):
    model = Sequential()
    model.add(Conv1D(32, kernel_size=5, activation='relu', input_shape=input_shape))  # Increased filters
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.25))
    model.add(LSTM(64, return_sequences=False))  # Increased LSTM units
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))  # Added dense layer
    model.add(Dense(num_classes, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [94]:
# Train Method A
print("\nTraining Method A (CNN + BiLSTM)...")
model_A = build_model_A(X_train_cnn.shape[1:], y_train_cat.shape[1])
model_A.fit(X_train_cnn, y_train_cat, epochs=50, batch_size=32, verbose=1)
pred_A = np.argmax(model_A.predict(X_test_cnn), axis=1)


Training Method A (CNN + BiLSTM)...
Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 16ms/step - accuracy: 0.5697 - loss: 1.4677
Epoch 2/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.5839 - loss: 0.9217
Epoch 3/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.5970 - loss: 0.9154
Epoch 4/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.6309 - loss: 0.8524
Epoch 5/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.6372 - loss: 0.7950
Epoch 6/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.8389 - loss: 0.6905
Epoch 7/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.9124 - loss: 0.4552
Epoch 8/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.9056 - loss: 0.3717
Epoch 9/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [95]:
# Train Method B
print("\nTraining Method B (Lightweight CNN + LSTM)...")
model_B = build_model_B(X_train_cnn.shape[1:], y_train_cat.shape[1])
model_B.fit(X_train_cnn, y_train_cat, epochs=50, batch_size=32, verbose=1)
pred_B = np.argmax(model_B.predict(X_test_cnn), axis=1)


Training Method B (Lightweight CNN + LSTM)...
Epoch 1/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.5233 - loss: 1.4630
Epoch 2/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5782 - loss: 0.9377
Epoch 3/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.6171 - loss: 0.8677
Epoch 4/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.7490 - loss: 0.7490
Epoch 5/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8820 - loss: 0.5734
Epoch 6/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.9277 - loss: 0.3648
Epoch 7/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.9372 - loss: 0.2847
Epoch 8/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.9158 - loss: 0.3557
E

In [96]:
# Train KNN (Baseline Method C)
print("\nTraining Method C (KNN Baseline)...")
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train_encoded)
pred_C = knn.predict(X_test)


Training Method C (KNN Baseline)...


In [97]:
# Evaluate all
print("\nMethod A (CNN + BiLSTM):")
print("Accuracy:", accuracy_score(y_test_encoded, pred_A))
print("F1 Score:", f1_score(y_test_encoded, pred_A, average='weighted'))

print("\nMethod B (Lightweight CNN + LSTM):")
print("Accuracy:", accuracy_score(y_test_encoded, pred_B))
print("F1 Score:", f1_score(y_test_encoded, pred_B, average='weighted'))

print("\nMethod C (KNN):")
print("Accuracy:", accuracy_score(y_test_encoded, pred_C))
print("F1 Score:", f1_score(y_test_encoded, pred_C, average='weighted'))


Method A (CNN + BiLSTM):
Accuracy: 0.9233333333333333
F1 Score: 0.9153736119074615

Method B (Lightweight CNN + LSTM):
Accuracy: 0.92
F1 Score: 0.9088757422789447

Method C (KNN):
Accuracy: 0.9226666666666666
F1 Score: 0.9114002745435116
