Random forest and k fold test 

In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [15]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, Conv2D, Conv3D, LSTM, Flatten, Dense, Dropout, BatchNormalization, MaxPooling1D, MaxPooling2D, MaxPooling3D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# --------- STEP 1: LOAD DATA ---------
df = pd.read_csv("/kaggle/input/ecg-dataset/ecg.csv", header=None)  # Replace with your dataset path

# --------- STEP 2: SEPARATE FEATURES & LABELS ---------
X = df.iloc[:, :-1].values  # All columns except the last one
Y = df.iloc[:, -1].values   # Last column as labels

# --------- STEP 3: NORMALIZATION ---------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --------- STEP 4: RESHAPE FOR CNN INPUTS ---------
num_samples, num_features = X_scaled.shape

# **1D CNN Input Shape**
X_1D = X_scaled.reshape(num_samples, num_features, 1)  # Shape: (samples, time_steps, channels)

# **Finding the Closest Valid 2D Shape**
side_2D = int(np.ceil(np.sqrt(num_features)))  # Make it a square
new_size_2D = side_2D ** 2

# **Padding if Necessary for 2D Reshape**
if new_size_2D > num_features:
    pad_size = new_size_2D - num_features
    X_padded_2D = np.pad(X_scaled, ((0, 0), (0, pad_size)), mode='constant')
else:
    X_padded_2D = X_scaled

X_2D = X_padded_2D.reshape(num_samples, side_2D, side_2D, 1)  # Shape: (samples, height, width, channels)

# **Finding the Closest Valid 3D Shape**
side_3D = int(np.ceil(np.cbrt(num_features)))  # Make it a cube
new_size_3D = side_3D ** 3

# **Padding if Necessary for 3D Reshape**
if new_size_3D > num_features:
    pad_size = new_size_3D - num_features
    X_padded_3D = np.pad(X_scaled, ((0, 0), (0, pad_size)), mode='constant')
else:
    X_padded_3D = X_scaled

X_3D = X_padded_3D.reshape(num_samples, side_3D, side_3D, side_3D, 1)  # Shape: (samples, depth, height, width, channels)

# Train-Test Split
X1_train, X1_test, X2_train, X2_test, X3_train, X3_test, Y_train, Y_test = train_test_split(
    X_1D, X_2D, X_3D, Y, test_size=0.2, random_state=42)

# --------- STEP 5: BUILD MD-DNN MODEL ---------
# 1D CNN for ECG Signals
input_1D = Input(shape=(num_features, 1))
x1 = Conv1D(32, kernel_size=5, activation='relu', padding='same')(input_1D)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = LSTM(64, return_sequences=False)(x1)  # LSTM layer
x1 = Flatten()(x1)

# 2D CNN for Spectrograms
input_2D = Input(shape=(side_2D, side_2D, 1))
x2 = Conv2D(32, (3, 3), activation='relu', padding='same')(input_2D)
x2 = MaxPooling2D((2, 2))(x2)
x2 = Flatten()(x2)

# 3D CNN for Multi-Lead ECG
input_3D = Input(shape=(side_3D, side_3D, side_3D, 1))
x3 = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(input_3D)
x3 = MaxPooling3D((2, 2, 2))(x3)
x3 = Flatten()(x3)

# Merge All Features
merged = tf.keras.layers.concatenate([x1, x2, x3])
dense1 = Dense(128, activation='relu')(merged)
dense1 = Dropout(0.3)(dense1)
output = Dense(1, activation='sigmoid')(dense1)  # Binary classification

# Compile Model
model = Model(inputs=[input_1D, input_2D, input_3D], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# --------- STEP 6: TRAIN MODEL ---------
history = model.fit([X1_train, X2_train, X3_train], Y_train, 
                    epochs=20, batch_size=32, validation_data=([X1_test, X2_test, X3_test], Y_test))

# Evaluate Model
Y_pred = (model.predict([X1_test, X2_test, X3_test]) > 0.5).astype("int32")
accuracy = accuracy_score(Y_test, Y_pred)
print(f'\nFinal Test Accuracy: {accuracy:.4f}')


Epoch 1/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 44ms/step - accuracy: 0.9250 - loss: 0.1899 - val_accuracy: 0.9800 - val_loss: 0.0756
Epoch 2/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 41ms/step - accuracy: 0.9882 - loss: 0.0473 - val_accuracy: 0.9960 - val_loss: 0.0147
Epoch 3/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 41ms/step - accuracy: 0.9902 - loss: 0.0425 - val_accuracy: 0.9950 - val_loss: 0.0140
Epoch 4/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 41ms/step - accuracy: 0.9906 - loss: 0.0352 - val_accuracy: 0.9930 - val_loss: 0.0186
Epoch 5/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 41ms/step - accuracy: 0.9937 - loss: 0.0231 - val_accuracy: 0.9970 - val_loss: 0.0155
Epoch 6/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 44ms/step - accuracy: 0.9888 - loss: 0.0322 - val_accuracy: 0.9950 - val_loss: 0.0106
Epoch 7/20
[1m125/125