Load Data

In [None]:
import wfdb
import neurokit2 as nk
import numpy as np
import pandas as pd
import os

data_dir = './mit-bih-arrhythmia-database-1.0.0/'

records = [f.split('.')[0] for f in os.listdir(data_dir) if f.endswith('.dat')]

sr = 360  # Sampling rate

all_features = []
all_labels = []

for record_name in records:
    print(f"Memproses record {record_name}...")
    
    try:
        record = wfdb.rdrecord(os.path.join(data_dir, record_name))
        ann = wfdb.rdann(os.path.join(data_dir, record_name), 'atr')
    except Exception as e:
        print(f"Gagal memuat record {record_name}: {e}")
        continue
    
    ecg = record.p_signal[:, 0]  
    rpeaks = ann.sample  
    symbols = ann.symbol 
    
    valid_indices = [i for i, sym in enumerate(symbols) if sym in ['N', 'V']]
    rpeaks = rpeaks[valid_indices]
    symbols = [symbols[i] for i in valid_indices]
    
    if len(rpeaks) == 0:
        print(f"Tidak ada beat 'N' atau 'V' di record {record_name}. Lewati.")
        continue
    
    cleaned = nk.ecg_clean(ecg, sampling_rate=sr, method='neurokit')
    
    try:
        _, info = nk.ecg_delineate(cleaned, rpeaks, sampling_rate=sr, method='peak')
    except Exception as e:
        print(f"Gagal mendelineasi record {record_name}: {e}")
        continue
    
    # Ekstraksi fitur
    for i, rpeak in enumerate(rpeaks):
        # Interval RR sebelum
        rr_prev = (rpeak - rpeaks[i-1]) / sr if i > 0 else np.nan
        
        # Interval RR sesudah
        rr_next = (rpeaks[i+1] - rpeak) / sr if i < len(rpeaks)-1 else np.nan
        
        # Durasi QRS
        q_peak = info.get('ECG_Q_Peaks', [])[i] if i < len(info.get('ECG_Q_Peaks', [])) else np.nan
        s_peak = info.get('ECG_S_Peaks', [])[i] if i < len(info.get('ECG_S_Peaks', [])) else np.nan
        qrs_duration = (s_peak - q_peak) / sr if not np.isnan(q_peak) and not np.isnan(s_peak) else np.nan
        
        features = [rr_prev, rr_next, qrs_duration]
        label = 1 if symbols[i] == 'V' else 0 
        all_features.append(features)
        all_labels.append(label)

all_features = np.array(all_features)
all_labels = np.array(all_labels)

for i in range(all_features.shape[1]):
    col = all_features[:, i]
    if np.any(np.isnan(col)):
        mean_val = np.nanmean(col)
        col[np.isnan(col)] = mean_val
        all_features[:, i] = col

np.save('features.npy', all_features)
np.save('labels.npy', all_labels)

df = pd.DataFrame(all_features, columns=['RR_Prev', 'RR_Next', 'QRS_Duration'])
df['Label'] = all_labels
df.to_csv('mitbih_features.csv', index=False)

print("Ekstraksi fitur selesai.")
print(f"Total beat yang diproses: {len(all_labels)}")
print(f"Jumlah beat Normal (0): {np.sum(all_labels == 0)}")
print(f"Jumlah beat PVC (1): {np.sum(all_labels == 1)}")
print("Data disimpan ke 'features.npy', 'labels.npy', dan 'mitbih_features.csv'.")

Data Balancing and Features Normalization

In [None]:
import numpy as np
import pandas as pd
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler

try:
    features = np.load('features.npy')
    labels = np.load('labels.npy')
except FileNotFoundError:
    print("File 'features.npy' atau 'labels.npy' tidak ditemukan. Pastikan telah menjalankan kode ekstraksi fitur sebelumnya.")
    exit()

print("Distribusi data sebelum penyeimbangan:")
print(f"Jumlah beat Normal (0): {np.sum(labels == 0)}")
print(f"Jumlah beat PVC (1): {np.sum(labels == 1)}")

scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

# Terapkan SMOTE untuk seimbangkan data
smote = SMOTE(random_state=42)
balanced_features, balanced_labels = smote.fit_resample(features_scaled, labels)

print("\nDistribusi data setelah penyeimbangan:")
print(f"Jumlah beat Normal (0): {np.sum(balanced_labels == 0)}")
print(f"Jumlah beat PVC (1): {np.sum(balanced_labels == 1)}")

np.save('balanced_features.npy', balanced_features)
np.save('balanced_labels.npy', balanced_labels)

df_balanced = pd.DataFrame(balanced_features, columns=['RR_Prev', 'RR_Next', 'QRS_Duration'])
df_balanced['Label'] = balanced_labels
df_balanced.to_csv('mitbih_balanced_features.csv', index=False)

print("\nData seimbang disimpan ke 'balanced_features.npy', 'balanced_labels.npy', dan 'mitbih_balanced_features.csv'.")

Build Model

In [None]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

try:
    features = np.load('balanced_features.npy')
    labels = np.load('balanced_labels.npy')
except FileNotFoundError:
    print("File 'balanced_features.npy' atau 'balanced_labels.npy' tidak ditemukan. Pastikan telah menjalankan kode penyeimbangan data sebelumnya.")
    exit()

X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'minkowski']
}

knn = KNeighborsClassifier()

# Lakukan GridSearchCV
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='f1', n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

print("\nParameter terbaik:", grid_search.best_params_)
print("Skor F1 terbaik (cross-validation):", grid_search.best_score_)

best_knn = grid_search.best_estimator_

y_pred = best_knn.predict(X_test)

print("\nAkurasi pada data pengujian:", accuracy_score(y_test, y_pred))
print("\nLaporan Klasifikasi:\n", classification_report(y_test, y_pred, target_names=['Normal', 'PVC']))

cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'PVC'], yticklabels=['Normal', 'PVC'])
plt.title('Confusion Matrix')
plt.xlabel('Prediksi')
plt.ylabel('Aktual')
plt.savefig('confusion_matrix.png')
plt.show()

joblib.dump(best_knn, 'knn_model.pkl')
print("\nModel terbaik disimpan ke 'knn_model.pkl'.")

Export Scaler

In [None]:
import numpy as np
from sklearn.preprocessing import StandardScaler
import joblib

try:
    features = np.load('features.npy')
except FileNotFoundError:
    print("File 'features.npy' tidak ditemukan. Pastikan file ada.")
    exit()

# Fit StandardScaler pada data pelatihan asli
scaler = StandardScaler()
scaler.fit(features)

joblib.dump(scaler, 'scaler.pkl')
print("Scaler disimpan ke 'scaler.pkl'.")