In [None]:
import pandas as pd
import numpy as np
import neurokit2 as nk
import joblib

try:
    df = pd.read_csv('mitbih_119_channel1_segment_1.csv')
    ecg = df['ECG_Signal'].values
    true_labels = df['Beat_Label'].values
except FileNotFoundError:
    print("File 'mitbih_119_channel1_segment_1.csv' tidak ditemukan.")
    exit()

total_pvc_labels = np.sum(true_labels == 'V')
print(f"Total label 'V' (PVC) dalam CSV: {total_pvc_labels}")

sr = 360  # Sampling rate
window_size = 5  # Window untuk mencari label di sekitar puncak R

cleaned = nk.ecg_clean(ecg, sampling_rate=sr, method='neurokit')

rpeaks_info = nk.ecg_findpeaks(cleaned, sampling_rate=sr)
rpeaks = np.array(rpeaks_info['ECG_R_Peaks'])

_, info = nk.ecg_delineate(cleaned, rpeaks, sampling_rate=sr, method='peak')

# Ekstraksi fitur untuk setiap beat
features = []
true_labels_binary = []
for i, rpeak in enumerate(rpeaks):
    # Interval RR sebelum
    rr_prev = (rpeak - rpeaks[i-1]) / sr if i > 0 else np.nan
    
    # Interval RR sesudah
    rr_next = (rpeaks[i+1] - rpeak) / sr if i < len(rpeaks)-1 else np.nan
    
    # Durasi QRS
    q_peak = info.get('ECG_Q_Peaks', [])[i] if i < len(info.get('ECG_Q_Peaks', [])) else np.nan
    s_peak = info.get('ECG_S_Peaks', [])[i] if i < len(info.get('ECG_S_Peaks', [])) else np.nan
    qrs_duration = (s_peak - q_peak) / sr if not np.isnan(q_peak) and not np.isnan(s_peak) else np.nan
    
    features.append([rr_prev, rr_next, qrs_duration])
    
    start_idx = max(0, rpeak - window_size)
    end_idx = min(len(true_labels), rpeak + window_size + 1)
    labels_in_window = true_labels[start_idx:end_idx]
    is_pvc = 'V' in labels_in_window
    true_labels_binary.append(1 if is_pvc else 0)

features = np.array(features)
true_labels_binary = np.array(true_labels_binary)

for i in range(features.shape[1]):
    col = features[:, i]
    if np.any(np.isnan(col)):
        mean_val = np.nanmean(col)
        col[np.isnan(col)] = mean_val
        features[:, i] = col

try:
    scaler = joblib.load('scaler.pkl')
except FileNotFoundError:
    print("File 'scaler.pkl' tidak ditemukan.")
    exit()

# Normalisasi fitur
features_normalized = scaler.transform(features)

try:
    knn_model = joblib.load('knn_model.pkl')
except FileNotFoundError:
    print("File 'knn_model.pkl' tidak ditemukan.")
    exit()

# Prediksi label
predictions = knn_model.predict(features_normalized)

total_beats = len(predictions)
normal_count_pred = np.sum(predictions == 0)
pvc_count_pred = np.sum(predictions == 1)
normal_percent_pred = (normal_count_pred / total_beats) * 100 if total_beats > 0 else 0
pvc_percent_pred = (pvc_count_pred / total_beats) * 100 if total_beats > 0 else 0

normal_count_true = np.sum(true_labels_binary == 0)
pvc_count_true = np.sum(true_labels_binary == 1)

errors = np.sum(predictions != true_labels_binary)
error_rate = (errors / total_beats) * 100 if total_beats > 0 else 0
accuracy = ((total_beats - errors) / total_beats) * 100 if total_beats > 0 else 0

rr_intervals = np.diff(rpeaks) / sr
heart_rates = 60 / rr_intervals
average_hr = np.mean(heart_rates) if len(heart_rates) > 0 else np.nan

print("\nRingkasan Prediksi:")
print(f"Jumlah beat Normal (prediksi): {normal_count_pred}")
print(f"Jumlah beat PVC (prediksi): {pvc_count_pred}")
print(f"Persentase beat Normal (prediksi): {normal_percent_pred:.2f}%")
print(f"Persentase beat PVC (prediksi): {pvc_percent_pred:.2f}%")
print(f"Jumlah label Normal (asli): {normal_count_true}")
print(f"Jumlah label PVC (asli): {pvc_count_true}")
print(f"Error Rate: {error_rate:.2f}%")
print(f"Akurasi: {accuracy:.2f}%")
print(f"Heart Rate Rata-rata: {average_hr:.0f} BPM")