In [None]:
!pip install tensorflow==2.15.0



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
# import train data
mit_train = pd.read_csv('/content/drive/MyDrive/data/mitbih_train.csv',header=None)

# import test data
mit_test = pd.read_csv('/content/drive/MyDrive/data/mitbih_test.csv',header=None)

print('The shape of train dataset :', mit_train.shape)
mit_train.head()

The shape of train dataset : (87554, 188)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,178,179,180,181,182,183,184,185,186,187
0,0.977941,0.926471,0.681373,0.245098,0.154412,0.191176,0.151961,0.085784,0.058824,0.04902,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.960114,0.863248,0.461538,0.196581,0.094017,0.125356,0.099715,0.088319,0.074074,0.082621,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.659459,0.186486,0.07027,0.07027,0.059459,0.056757,0.043243,0.054054,0.045946,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.925414,0.665746,0.541436,0.276243,0.196133,0.077348,0.071823,0.060773,0.066298,0.058011,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.967136,1.0,0.830986,0.586854,0.356808,0.248826,0.14554,0.089202,0.117371,0.150235,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# Check the labels in both train and test set
print('train set classes: ', mit_train.iloc[:, -1].unique())
print('test set classes: ', mit_test.iloc[:, -1].unique())

train set classes:  [0. 1. 2. 3. 4.]
test set classes:  [0. 1. 2. 3. 4.]


In [None]:
# Change the dtype of label column to integer
mit_train[187] = mit_train[187].astype('int64')
mit_test[187] = mit_test[187].astype('int64')

In [None]:
# Check for missing values
pd.set_option('display.max_rows', None)
mit_train.isna().sum()

Unnamed: 0,0
0,0
1,0
2,0
3,0
4,0
5,0
6,0
7,0
8,0
9,0


In [None]:
# See the number of each class in train dataset
labels = {
    0: "Normal",
    1: "Artial Premature",
    2: "Premature ventricular contraction",
    3: "Fusion of ventricular and normal",
    4: "Fusion of paced and normal"
}

# Calculate value counts and rename index using the labels dictionary
value_counts = mit_train.iloc[:,-1].value_counts().rename(labels)

# Create the bar plot to see the count of labels
bar_fig = px.bar(x=value_counts.index, y=value_counts.values,
                labels = {'x':'Labels', 'y':'Counts'},
                 text_auto=True,
                 title="The Count of Each Label in The Train Dataset"
                )

pie_fig = px.pie(names=value_counts.index, values=value_counts.values,
                 title="The Percentage of Each Label in The Train Dataset")

bar_fig.update_layout(title_x=0.5, width=800, height=600)
pie_fig.update_layout(title_x=0.5, width=800, height=600)
bar_fig.show()
pie_fig.show()

In [None]:
# See the number of each class in test dataset

# Calculate value counts and rename index using the labels dictionary
value_counts = mit_test.iloc[:,-1].value_counts().rename(labels)

# Create the bar plot to see the count of labels
bar_fig = px.bar(x=value_counts.index, y=value_counts.values,
                labels = {'x':'Labels', 'y':'Counts'},
                 text_auto=True,
                 title="The Count of Each Label in The Test Dataset"
                )

pie_fig = px.pie(names=value_counts.index, values=value_counts.values,
                 title="The Percentage of Each Label in The Test Dataset")

bar_fig.update_layout(title_x=0.5, width=800, height=600)
pie_fig.update_layout(title_x=0.5, width=800, height=600)
bar_fig.show()
pie_fig.show()

In [None]:
from imblearn.over_sampling import RandomOverSampler

data = mit_train.iloc[:, :187]
labels = mit_train.iloc[:, 187]

# Initialize RandomOverSampler
ros = RandomOverSampler(random_state=42)

# Resample the data
data_resampled, labels_resampled = ros.fit_resample(data, labels)

train_df = pd.concat([data_resampled, labels_resampled], axis=1)

train_df.shape

(362355, 188)

In [None]:
labels = {
    0: "Normal",
    1: "Artial Premature",
    2: "Premature ventricular contraction",
    3: "Fusion of ventricular and normal",
    4: "Fusion of paced and normal"
}

# Calculate value counts and rename index using the labels dictionary
value_counts = train_df.iloc[:,-1].value_counts().rename(labels)

# Create the bar plot to see the count of labels
bar_fig = px.bar(x=value_counts.index, y=value_counts.values,
                labels = {'x':'Labels', 'y':'Counts'},
                 text_auto=True,
                 title="The Count of Each Label After Balancing"
                )

pie_fig = px.pie(names=value_counts.index, values=value_counts.values,
                 title="The Percentage of Each Label After Balancing")

bar_fig.update_layout(title_x=0.5, width=800, height=600)
pie_fig.update_layout(title_x=0.5, width=800, height=600)
bar_fig.show()
pie_fig.show()

In [None]:
from sklearn.model_selection import train_test_split

x_train, x_val, y_train, y_val = train_test_split(train_df.iloc[:, :187],
                                                train_df.iloc[:, 187],
                                                test_size= 0.2,
                                                stratify=train_df.iloc[:, 187],
                                                random_state=42)

x_test = mit_test.iloc[:, :187]
y_test = mit_test.iloc[:, 187]

In [None]:
# Select an array of data instead of dataframe
x_train = x_train.values
x_val = x_val.values
x_test = x_test.values

In [None]:
print('x_train shape: ', x_train.shape)
print('y_train shape: ', y_train.shape)
print('x_val shape: ', x_val.shape)
print('y_val shape: ', y_val.shape)
print('x_test shape: ', x_test.shape)
print('y_test shape: ', y_test.shape)

x_train shape:  (289884, 187)
y_train shape:  (289884,)
x_val shape:  (72471, 187)
y_val shape:  (72471,)
x_test shape:  (21892, 187)
y_test shape:  (21892,)


In [None]:
# Reshape datasets to use them in CNN
x_train = x_train.reshape(x_train.shape[0], -1, 1)
x_val = x_val.reshape(x_val.shape[0], -1, 1)
x_test = x_test.reshape(x_test.shape[0], -1, 1)

In [None]:
import tensorflow as tf

# Converts a class vector (integers) to binary class matrix (one hot encoder).

y_train = tf.keras.utils.to_categorical(y_train)

y_val = tf.keras.utils.to_categorical(y_val)

y_test = tf.keras.utils.to_categorical(y_test)

In [None]:
print(y_train[-1])
print(y_val[-1])
print(y_test[-1])

[0. 0. 1. 0. 0.]
[0. 0. 1. 0. 0.]
[0. 0. 0. 0. 1.]


In [None]:
print('x_train shape: ', x_train.shape)
print('y_train shape: ', y_train.shape)
print('x_val shape: ', x_val.shape)
print('y_val shape: ', y_val.shape)
print('x_test shape: ', x_test.shape)
print('y_test shape: ', y_test.shape)

x_train shape:  (289884, 187, 1)
y_train shape:  (289884, 5)
x_val shape:  (72471, 187, 1)
y_val shape:  (72471, 5)
x_test shape:  (21892, 187, 1)
y_test shape:  (21892, 5)


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, LSTM, MaxPool1D, Flatten, Dense, BatchNormalization, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

In [None]:
model_cnn_lstm = Sequential([
    Input(shape=x_train.shape[1:]),

    Conv1D(64, kernel_size=6, activation='relu'),
    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPool1D(pool_size=2, strides=2, padding="same"),

    Conv1D(64, kernel_size=3, activation='relu'),
    MaxPool1D(pool_size=2, strides=2, padding="same"),

    LSTM(64, return_sequences=True, activation="tanh"),
    LSTM(32, return_sequences=False, activation="tanh"),

    Flatten(),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(5, activation='softmax')
])


model_cnn_lstm.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 182, 64)           448       
                                                                 
 conv1d_1 (Conv1D)           (None, 180, 64)           12352     
                                                                 
 max_pooling1d (MaxPooling1  (None, 90, 64)            0         
 D)                                                              
                                                                 
 conv1d_2 (Conv1D)           (None, 88, 64)            12352     
                                                                 
 max_pooling1d_1 (MaxPoolin  (None, 44, 64)            0         
 g1D)                                                            
                                                                 
 lstm (LSTM)                 (None, 44, 64)            3

In [None]:
model_cnn_lstm.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

In [None]:
callbacks = [EarlyStopping(monitor='val_loss',
                           patience=8),
             ReduceLROnPlateau(patience=20,
                               monitor='val_loss',
                               min_lr=1e-6,
                               cool_down=20),
             ModelCheckpoint(filepath=r'/content/drive/MyDrive/data/ecg_cnn+lstm_v3.keras',
                             monitor='val_loss',
                             save_best_only=True)]

In [None]:
history = model_cnn_lstm.fit(x_train, y_train,
                        epochs=10,
                        callbacks=callbacks,
                        batch_size=32,
                        validation_data=(x_val, y_val),
                       verbose=1)

Epoch 1/10
1694/9059 [====>.........................] - ETA: 5:48 - loss: 0.8292 - accuracy: 0.6847

KeyboardInterrupt: 

In [None]:
y_pred = model_cnn_lstm.predict(x_test)

In [None]:
from sklearn.metrics import classification_report

# Convert one-hot encoded labels to integer labels
y_test_labels = np.argmax(y_test, axis=1)
y_pred_labels = np.argmax(y_pred, axis=1)

print(classification_report(y_test_labels, y_pred_labels))

In [None]:
rep_dataset = tf.data.Dataset.from_tensor_slices(x_test).batch(1).take(21892)

def representative_data_gen():
    for input_value in rep_dataset:
      # Ép kiểu dữ liệu sang float32 nếu cần, và yield dưới dạng danh sách
        yield [tf.cast(input_value, tf.float32)]


In [None]:

# Load  Keras model
model = tf.keras.models.load_model('/content/drive/MyDrive/data/ecg_cnn+lstm_tf_2.15.keras')


BATCH_SIZE = 1
STEPS = 187  # timesteps
INPUT_SIZE = 1  # features

fixed_input = tf.keras.Input(shape=(STEPS, INPUT_SIZE), batch_size=BATCH_SIZE)
fixed_output = model(fixed_input)
fixed_model = tf.keras.Model(inputs=fixed_input, outputs=fixed_output)

# convert to TFLite

converter = tf.lite.TFLiteConverter.from_keras_model(fixed_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS]
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.experimental_new_converter = True
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.uint8
converter._experimental_unidirectional_sequence_lstm = True
# convert
tflite_model = converter.convert()

# save

with open('/content/drive/MyDrive/data/ecg_cnn+lstm_v3_quanti.tflite', 'wb') as f:
    f.write(tflite_model)

print("Complete!")


Statistics for quantized inputs were expected, but not specified; continuing anyway.



Mô hình đã được chuyển đổi và lưu thành công!


In [None]:
import tensorflow as tf
import numpy as np

# Load the quantized TFLite model
interpreter = tf.lite.Interpreter(model_path="/content/drive/MyDrive/data/ecg_cnn+lstm_v3_quanti.tflite")
interpreter.allocate_tensors()

# Get input and output tensor information
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print("🔹 Input shape:", input_details[0]['shape'], "Type:", input_details[0]['dtype'])
print("🔹 Output shape:", output_details[0]['shape'], "Type:", output_details[0]['dtype'])


🔹 Input shape: [  1 187   1] Type: <class 'numpy.uint8'>
🔹 Output shape: [1 5] Type: <class 'numpy.uint8'>


In [None]:
import numpy as np
import tensorflow as tf

# Load the quantized model
interpreter = tf.lite.Interpreter('/content/drive/MyDrive/data/ecg_cnn+lstm_v3_quanti.tflite')  # Replace with the correct path
interpreter.allocate_tensors()

# Get input and output tensor information
input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]

# Get scale and zero_point
scale_in, zero_point_in = input_details['quantization']
scale_out, zero_point_out = output_details['quantization']


# Function to set input tensor
def set_input_tensor(interpreter, input_data):
    """Convert input data to uint8 and set it into the model"""
    tensor_index = input_details['index']
    input_tensor = interpreter.tensor(tensor_index)()

    # Convert float32 -> uint8
    quantized_input = np.uint8(input_data / scale_in + zero_point_in)
    input_tensor[:, :, :] = quantized_input


# Function to run inference
def predict_tflite(interpreter, input_data):
    """Run inference on the quantized model"""
    predictions = []

    for i in range(len(input_data)):
        set_input_tensor(interpreter, input_data[i:i+1])  # Take one sample at a time (1, 187, 1)
        interpreter.invoke()

        # Retrieve data from the output tensor
        output_tensor_index = output_details['index']
        output_data = interpreter.get_tensor(output_tensor_index)

        # Convert from UINT8 to FLOAT32 if needed
        float_output = scale_out * (output_data - zero_point_out)
        predictions.append(float_output[0])  # Store the result

    return np.array(predictions)


# Run inference on x_test
y_pred_tflite = predict_tflite(interpreter, x_test)

# Convert results to predicted class labels
y_pred_tflite_labels = np.argmax(y_pred_tflite, axis=1)
y_test_labels = np.argmax(y_test, axis=1)  # Actual labels from the test set

# Print the results
print("Predictions from the TFLite model:", y_pred_tflite_labels)


Dự đoán từ mô hình TFLite: [0 0 0 ... 4 4 4]


In [None]:
from sklearn.metrics import classification_report, accuracy_score

# Compare the accuracy between the original model and the quantized model
accuracy_tflite = accuracy_score(y_test_labels, y_pred_tflite_labels)
print(f"✅ Accuracy of the INT8 quantized model: {accuracy_tflite:.4f}")

# Print evaluation report
print("📊 Classification Report:")
print(classification_report(y_test_labels, y_pred_tflite_labels))


✅ Độ chính xác của mô hình lượng tử hóa INT8: 0.8590
📊 Báo cáo phân loại:
              precision    recall  f1-score   support

           0       0.98      0.88      0.92     18118
           1       0.26      0.56      0.36       556
           2       0.52      0.67      0.59      1448
           3       0.23      0.75      0.35       162
           4       0.73      0.95      0.82      1608

    accuracy                           0.86     21892
   macro avg       0.54      0.76      0.61     21892
weighted avg       0.91      0.86      0.88     21892



In [None]:
# Run the original model on the test set
y_pred_original = model.predict(x_test)
y_pred_original_labels = np.argmax(y_pred_original, axis=1)

# Accuracy of the original model
accuracy_original = accuracy_score(y_test_labels, y_pred_original_labels)
print(f"🎯 Accuracy of the original model (before quantization): {accuracy_original:.4f}")
print(f"📉 Accuracy drop after quantization: {accuracy_original - accuracy_tflite:.4f}")
