In [None]:
import pandas as pd
import numpy as np
import neurokit2 as nk
import matplotlib.pyplot as plt
import seaborn as sns
import joblib

try:
    df = pd.read_csv('prediction-Test.csv') 
    ecg = df['ECG'].values  
except FileNotFoundError:
    print("File 'csv' tidak ditemukan. Pastikan file ada di direktori yang sama.")
    exit()

sr = 267  # Sampling rate
start_sample = 0  # Rentang awal sampel untuk visualisasi
end_sample = 1602   # Rentang akhir sampel untuk visualisasi

cleaned = nk.ecg_clean(ecg, sampling_rate=sr, method='neurokit')

rpeaks_info = nk.ecg_findpeaks(cleaned, sampling_rate=sr)
rpeaks = np.array(rpeaks_info['ECG_R_Peaks'])

_, info = nk.ecg_delineate(cleaned, rpeaks, sampling_rate=sr, method='peak')

# Ekstraksi fitur untuk setiap beat
features = []
for i, rpeak in enumerate(rpeaks):
    # Interval RR sebelum
    rr_prev = (rpeak - rpeaks[i-1]) / sr if i > 0 else np.nan
    
    # Interval RR sesudah
    rr_next = (rpeaks[i+1] - rpeak) / sr if i < len(rpeaks)-1 else np.nan
    
    # Durasi QRS
    q_peak = info.get('ECG_Q_Peaks', [])[i] if i < len(info.get('ECG_Q_Peaks', [])) else np.nan
    s_peak = info.get('ECG_S_Peaks', [])[i] if i < len(info.get('ECG_S_Peaks', [])) else np.nan
    qrs_duration = (s_peak - q_peak) / sr if not np.isnan(q_peak) and not np.isnan(s_peak) else np.nan
    
    features.append([rr_prev, rr_next, qrs_duration])

features = np.array(features)

for i in range(features.shape[1]):
    col = features[:, i]
    if np.any(np.isnan(col)):
        mean_val = np.nanmean(col)
        col[np.isnan(col)] = mean_val
        features[:, i] = col

try:
    scaler = joblib.load('scaler.pkl')
except FileNotFoundError:
    print("File 'scaler.pkl' tidak ditemukan. Jalankan kode untuk menyimpan scaler terlebih dahulu.")
    exit()

# Normalisasi fitur data baru dengan scaler
features_normalized = scaler.transform(features)

# Muat model KNN terbaik
try:
    knn_model = joblib.load('knn_model.pkl')
except FileNotFoundError:
    print("File 'knn_model.pkl' tidak ditemukan. Pastikan telah melatih dan menyimpan model sebelumnya.")
    exit()

# Prediksi label
predictions = knn_model.predict(features_normalized)

# Hitung heart rate (BPM) dari RR intervals
rr_intervals = np.diff(rpeaks) / sr 
heart_rates = 60 / rr_intervals  
average_hr = np.mean(heart_rates) if len(heart_rates) > 0 else np.nan 

# Visualisasi hanya pada rentang sampel tertentu
cleaned_segment = cleaned[start_sample:end_sample]
rpeaks_segment = rpeaks[(rpeaks >= start_sample) & (rpeaks < end_sample)]

time_axis = np.arange(start_sample, end_sample) / sr
rpeaks_time = rpeaks_segment / sr

plt.figure(figsize=(12, 6))
plt.plot(time_axis, cleaned_segment, color='blue', alpha=0.7, linewidth=1, label='Cleaned ECG')

rpeaks_segment_indices = np.where((rpeaks >= start_sample) & (rpeaks < end_sample))[0]
plt.scatter(rpeaks_time - (start_sample / sr), cleaned_segment[rpeaks_segment - start_sample], color='red', s=60, marker='x', label='R')
for wave, color in zip(['ECG_Q_Peaks', 'ECG_S_Peaks'], ['orange', 'green']):
    idxs = info.get(wave, [])
    idxs = [int(i) for i in idxs if isinstance(i, (int, np.integer)) and start_sample <= i < end_sample]
    if idxs:
        idxs_time = [i / sr for i in idxs]
        plt.scatter([t - (start_sample / sr) for t in idxs_time], cleaned_segment[[i - start_sample for i in idxs]], s=50, color=color, label=wave.split('_')[1])

for i in rpeaks_segment_indices:
    rpeak = rpeaks[i]
    rpeak_time = rpeak / sr

    if i > 0 and rpeaks[i-1] >= start_sample:
        rr_prev = features[i, 0]
        plt.hlines(y=cleaned_segment[rpeaks[i] - start_sample] + 0.2, xmin=(rpeaks[i-1] / sr) - (start_sample / sr), xmax=rpeak_time - (start_sample / sr), color='black', linestyle='--', alpha=0.5)
        plt.text((rpeaks[i-1] / sr + rpeak_time) / 2 - (start_sample / sr), cleaned_segment[rpeaks[i] - start_sample] + 0.25, f'{rr_prev:.2f}s', fontsize=8, ha='center')
    
    q_peak = info.get('ECG_Q_Peaks', [])[i] if i < len(info.get('ECG_Q_Peaks', [])) else np.nan
    s_peak = info.get('ECG_S_Peaks', [])[i] if i < len(info.get('ECG_S_Peaks', [])) else np.nan
    if not np.isnan(q_peak) and not np.isnan(s_peak) and start_sample <= q_peak < end_sample and start_sample <= s_peak < end_sample:
        qrs_dur = features[i, 2]
        plt.hlines(y=cleaned_segment[q_peak - start_sample] - 0.1, xmin=(q_peak / sr) - (start_sample / sr), xmax=(s_peak / sr) - (start_sample / sr), color='black', linestyle='-', alpha=0.5)
        plt.text(((q_peak / sr) + (s_peak / sr)) / 2 - (start_sample / sr), cleaned_segment[q_peak - start_sample] - 0.15, f'{qrs_dur:.2f}s', fontsize=8, ha='center')

    pred_label = 'PVC' if predictions[i] == 1 else 'Normal'
    plt.text(rpeak_time - (start_sample / sr), cleaned_segment[rpeaks[i] - start_sample] - 0.2, pred_label, fontsize=8, ha='center', color='red' if pred_label == 'PVC' else 'green')

plt.title(f'Deteksi Interval RR, Durasi QRS, dan Prediksi - Waktu {start_sample/sr:.2f} hingga {end_sample/sr:.2f} detik')
plt.xlabel('Waktu (detik)')
plt.ylabel('Amplitudo (mV)')
plt.legend(loc='upper right')
plt.grid(True)
plt.savefig(f'ecg_prediction_plot_range_{start_sample}_{end_sample}_seconds.png')
plt.show()

total_beats = len(predictions)
normal_count = np.sum(predictions == 0)
pvc_count = np.sum(predictions == 1)
normal_percent = (normal_count / total_beats) * 100
pvc_percent = (pvc_count / total_beats) * 100

print("\nRingkasan Prediksi (Semua Beat):")
print(f"Jumlah beat Normal: {normal_count}")
print(f"Jumlah beat PVC: {pvc_count}")
print(f"Persentase beat Normal: {normal_percent:.2f}%")
print(f"Persentase beat PVC: {pvc_percent:.2f}%")
print(f"Heart Rate Rata-rata: {average_hr:.0f} BPM")