Random forest and k fold test 

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Conv1D, Conv2D, Conv3D, LSTM, Flatten, Dense, Dropout, BatchNormalization, MaxPooling1D, MaxPooling2D, MaxPooling3D
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# --------- STEP 1: LOAD DATA ---------
df = pd.read_csv("/kaggle/input/ecg-dataset/ecg.csv", header=None)  # Replace with your dataset path

# --------- STEP 2: SEPARATE FEATURES & LABELS ---------
X = df.iloc[:, :-1].values  # All columns except the last one
Y = df.iloc[:, -1].values   # Last column as labels

# --------- STEP 3: NORMALIZATION ---------
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# --------- STEP 4: RESHAPE FOR CNN INPUTS ---------
num_samples, num_features = X_scaled.shape

# **1D CNN Input Shape**
X_1D = X_scaled.reshape(num_samples, num_features, 1)  # Shape: (samples, time_steps, channels)

# **Finding the Closest Valid 2D Shape**
side_2D = int(np.ceil(np.sqrt(num_features)))  # Make it a square
new_size_2D = side_2D ** 2

# **Padding if Necessary for 2D Reshape**
if new_size_2D > num_features:
    pad_size = new_size_2D - num_features
    X_padded_2D = np.pad(X_scaled, ((0, 0), (0, pad_size)), mode='constant')
else:
    X_padded_2D = X_scaled

X_2D = X_padded_2D.reshape(num_samples, side_2D, side_2D, 1)  # Shape: (samples, height, width, channels)

# **Finding the Closest Valid 3D Shape**
side_3D = int(np.ceil(np.cbrt(num_features)))  # Make it a cube
new_size_3D = side_3D ** 3

# **Padding if Necessary for 3D Reshape**
if new_size_3D > num_features:
    pad_size = new_size_3D - num_features
    X_padded_3D = np.pad(X_scaled, ((0, 0), (0, pad_size)), mode='constant')
else:
    X_padded_3D = X_scaled

X_3D = X_padded_3D.reshape(num_samples, side_3D, side_3D, side_3D, 1)  # Shape: (samples, depth, height, width, channels)

# Train-Test Split
X1_train, X1_test, X2_train, X2_test, X3_train, X3_test, Y_train, Y_test = train_test_split(
    X_1D, X_2D, X_3D, Y, test_size=0.2, random_state=42)

# --------- STEP 5: BUILD MD-DNN MODEL ---------
# 1D CNN for ECG Signals
input_1D = Input(shape=(num_features, 1))
x1 = Conv1D(32, kernel_size=5, activation='relu', padding='same')(input_1D)
x1 = MaxPooling1D(pool_size=2)(x1)
x1 = LSTM(64, return_sequences=False)(x1)  # LSTM layer
x1 = Flatten()(x1)

# 2D CNN for Spectrograms
input_2D = Input(shape=(side_2D, side_2D, 1))
x2 = Conv2D(32, (3, 3), activation='relu', padding='same')(input_2D)
x2 = MaxPooling2D((2, 2))(x2)
x2 = Flatten()(x2)

# 3D CNN for Multi-Lead ECG
input_3D = Input(shape=(side_3D, side_3D, side_3D, 1))
x3 = Conv3D(32, (3, 3, 3), activation='relu', padding='same')(input_3D)
x3 = MaxPooling3D((2, 2, 2))(x3)
x3 = Flatten()(x3)

# Merge All Features
merged = tf.keras.layers.concatenate([x1, x2, x3])
dense1 = Dense(128, activation='relu')(merged)
dense1 = Dropout(0.3)(dense1)
output = Dense(1, activation='sigmoid')(dense1)  # Binary classification

# Compile Model
model = Model(inputs=[input_1D, input_2D, input_3D], outputs=output)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# --------- STEP 6: TRAIN MODEL ---------
history = model.fit([X1_train, X2_train, X3_train], Y_train, 
                    epochs=20, batch_size=32, validation_data=([X1_test, X2_test, X3_test], Y_test))

# Evaluate Model
Y_pred = (model.predict([X1_test, X2_test, X3_test]) > 0.5).astype("int32")
accuracy = accuracy_score(Y_test, Y_pred)
print(f'\nFinal Test Accuracy: {accuracy:.4f}')


Epoch 1/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 52ms/step - accuracy: 0.9458 - loss: 0.1653 - val_accuracy: 0.9930 - val_loss: 0.0200
Epoch 2/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 48ms/step - accuracy: 0.9865 - loss: 0.0543 - val_accuracy: 0.9960 - val_loss: 0.0117
Epoch 3/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 48ms/step - accuracy: 0.9875 - loss: 0.0389 - val_accuracy: 0.9920 - val_loss: 0.0196
Epoch 4/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 47ms/step - accuracy: 0.9852 - loss: 0.0489 - val_accuracy: 0.9950 - val_loss: 0.0128
Epoch 5/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 47ms/step - accuracy: 0.9900 - loss: 0.0335 - val_accuracy: 0.9930 - val_loss: 0.0174
Epoch 6/20
[1m125/125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 50ms/step - accuracy: 0.9915 - loss: 0.0288 - val_accuracy: 0.9920 - val_loss: 0.0233
Epoch 7/20
[1m125/12

In [3]:
# Full row with label at the end
full_row = [
    -0.11252183, -2.8272038, -3.7738969, -4.3497511, -4.376041, -3.4749863, -2.1814082, -1.8182865,
    -1.2505219, -0.47749208, -0.36380791, -0.49195659, -0.42185509, -0.30920086, -0.4959387,
    -0.34211867, -0.35533627, -0.36791303, -0.31650279, -0.41237405, -0.47167181, -0.41345783,
    -0.36461703, -0.44929829, -0.47141866, -0.42477658, -0.46251673, -0.55247236, -0.47537519,
    -0.6942, -0.7018681, -0.59381178, -0.66068415, -0.71383066, -0.76980688, -0.67228161, -0.65367605,
    -0.63940562, -0.55930228, -0.59167032, -0.49322332, -0.46305183, -0.30164382, -0.23273401,
    -0.12505488, -0.15394314, -0.024357404, -0.065608758, 0.034999258, 0.061935219, 0.07119542,
    0.12392505, 0.10312371, 0.22522849, 0.12868305, 0.30248315, 0.25727621, 0.19635161, 0.17938297,
    0.24472863, 0.34121687, 0.32820441, 0.40604169, 0.44660507, 0.42406823, 0.48151204, 0.4778438,
    0.62408259, 0.57458456, 0.59801319, 0.5645919, 0.607979, 0.62063457, 0.65625291, 0.68474806,
    0.69427284, 0.66558377, 0.57579577, 0.63813479, 0.61491695, 0.56908343, 0.46857572, 0.44281777,
    0.46827436, 0.43249295, 0.40795792, 0.41862256, 0.36253075, 0.41095901, 0.47166633, 0.37216676,
    0.33787543, 0.22140511, 0.27399747, 0.29866408, 0.26356357, 0.34256352, 0.41950529, 0.58660736,
    0.86062387, 1.1733446, 1.2581791, 1.4337887, 1.7005334, 1.9990431, 2.1253411, 1.9932907, 1.9322463,
    1.7974367, 1.5222839, 1.2511679, 0.99873034, 0.48372242, 0.023132292, -0.19491383, -0.22091729,
    -0.24373668, -0.25469462, -0.29113555, -0.25649034, -0.22787425, -0.32242276, -0.28928586,
    -0.31816951, -0.36365359, -0.39345584, -0.26641886, -0.25682316, -0.28869399, -0.16233755,
    0.16034772, 0.79216787, 0.93354122, 0.79695779, 0.57862066, 0.2577399, 0.22807718, 0.12343082,
    0.92528624, 0.19313742, 1.0
]

# Split features and label
features = full_row[:-1]  # All but last
true_label = int(full_row[-1])  # Last item

# Continue with prediction
features = np.array(features).reshape(1, -1)
scaled_features = scaler.transform(features)

# Prepare for model
input_1D = scaled_features.reshape(1, num_features, 1)

# Pad and reshape for 2D
if scaled_features.shape[1] < side_2D**2:
    pad = side_2D**2 - scaled_features.shape[1]
    padded_2D = np.pad(scaled_features, ((0, 0), (0, pad)), mode='constant')
else:
    padded_2D = scaled_features
input_2D = padded_2D.reshape(1, side_2D, side_2D, 1)

# Pad and reshape for 3D
if scaled_features.shape[1] < side_3D**3:
    pad = side_3D**3 - scaled_features.shape[1]
    padded_3D = np.pad(scaled_features, ((0, 0), (0, pad)), mode='constant')
else:
    padded_3D = scaled_features
input_3D = padded_3D.reshape(1, side_3D, side_3D, side_3D, 1)

# Prediction
pred = model.predict([input_1D, input_2D, input_3D])
predicted_class = int((pred > 0.5).astype("int32")[0][0])

print("True Label:", true_label)
print("Predicted Probability:", pred[0][0])
print("Predicted Class:", predicted_class)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
True Label: 1
Predicted Probability: 0.99835765
Predicted Class: 1


In [4]:
# Full row with label at the end
full_row = [
    -1.9118342, -2.4437412, -2.5753818, -2.5157778, -2.4649795, -2.2552471, -1.9044571, -1.3926895, -0.92220032, -0.82579482, -0.70510995, -0.46430372, -0.25364184, -0.1849022, -0.19185499, -0.25544164, -0.26359861, -0.24854258, -0.23030754, -0.17991873, -0.15642103, -0.15398933, -0.16151424, -0.18676761, -0.1719051, -0.18780575, -0.16989984, -0.21664273, -0.24726275, -0.2120415, -0.20334942, -0.22444743, -0.23468244, -0.28030212, -0.2623805, -0.2778017, -0.37739136, -0.29300509, -0.30385194, -0.31346441, -0.3065219, -0.32940655, -0.37747027, -0.35249415, -0.33000112, -0.29636308, -0.36578197, -0.33502703, -0.31565959, -0.26068522, -0.23520019, -0.30959948, -0.28860771, -0.26550811, -0.21575424, -0.12918242, -0.10275816, -0.067525671, 0.033191091, -0.045104534, 0.041259465, 0.05171647, -0.036575765, 0.047536433, 0.043288631, 0.049671498, 0.048200876, 0.044223738, 0.056119292, 0.014982658, -0.012015912, 0.048996349, 0.10836982, 0.12888845, 0.111641, 0.044384223, 0.13645726, 0.13709693, 0.099112483, 0.11905566, 0.10461741, 0.15582443, 0.087073092, 0.13452834, 0.16943585, 0.18555373, 0.22061417, 0.15106143, 0.16858686, 0.14392849, 0.25966454, 0.23595807, 0.21516782, 0.30225696, 0.33239243, 0.3504563, 0.32985494, 0.3360011, 0.36684762, 0.3869205, 0.39507809, 0.35798961, 0.33367264, 0.38270557, 0.38049506, 0.39800073, 0.49083907, 0.48626902, 0.53282244, 0.66199825, 0.79075024, 0.94972986, 1.0020187, 1.11192, 1.2608788, 1.489962, 1.6430692, 1.7301975, 1.8095412, 1.8515141, 2.0645747, 2.1802799, 2.2624092, 2.4423523, 2.5928318, 2.473724, 2.2223892, 1.8253833, 1.6129192, 1.134859, 0.37656347, -0.21915295, -0.6510275, -0.93122979, -1.0240662, -1.4030552, -1.954082, -2.3548021, -2.4975386, -2.4118257, 0
]

# Split features and label
features = full_row[:-1]  # All but last
true_label = int(full_row[-1])  # Last item

# Continue with prediction
features = np.array(features).reshape(1, -1)
scaled_features = scaler.transform(features)

# Prepare for model
input_1D = scaled_features.reshape(1, num_features, 1)

# Pad and reshape for 2D
if scaled_features.shape[1] < side_2D**2:
    pad = side_2D**2 - scaled_features.shape[1]
    padded_2D = np.pad(scaled_features, ((0, 0), (0, pad)), mode='constant')
else:
    padded_2D = scaled_features
input_2D = padded_2D.reshape(1, side_2D, side_2D, 1)

# Pad and reshape for 3D
if scaled_features.shape[1] < side_3D**3:
    pad = side_3D**3 - scaled_features.shape[1]
    padded_3D = np.pad(scaled_features, ((0, 0), (0, pad)), mode='constant')
else:
    padded_3D = scaled_features
input_3D = padded_3D.reshape(1, side_3D, side_3D, side_3D, 1)

# Prediction
pred = model.predict([input_1D, input_2D, input_3D])
predicted_class = int((pred > 0.5).astype("int32")[0][0])

print("True Label:", true_label)
print("Predicted Probability:", pred[0][0])
print("Predicted Class:", predicted_class)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
True Label: 0
Predicted Probability: 3.5467178e-07
Predicted Class: 0
