In [None]:
# ============================================================
# ✅ 1. Mount Google Drive & setup GPU
# ============================================================

import torch, os
if not torch.cuda.is_available():
    raise SystemError("❌ GPU not detected! Go to Runtime → Change runtime type → GPU")

device = torch.device("cuda")
print(f"✅ Using GPU: {torch.cuda.get_device_name(0)}")

# ============================================================
# ✅ 2. Cài đặt các thư viện cần thiết
# ============================================================
!pip install torch torchvision torchaudio scikit-learn tqdm matplotlib -q

import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
from tqdm import tqdm

# ============================================================
# ✅ 3. Đường dẫn file weight pretrained trên Google Drive
# ============================================================
weights_path = "/kaggle/input/ecg-fm-pretrained/pytorch/default/1/mimic_iv_ecg_physionet_pretrained.pt"  # 🔁 thay đúng đường dẫn file của bạn

if not os.path.exists(weights_path):
    raise FileNotFoundError(f"❌ Không tìm thấy pretrained weight tại: {weights_path}")
else:
    print(f"✅ Pretrained weight found at: {weights_path}")

# ============================================================
# ✅ 4. Dataset (cấu trúc dataset/normal, dataset/abnormal)
# ============================================================
class ECGDataset(Dataset):
    def __init__(self, root_dir, label_map=None):
        self.samples = []
        label_map = label_map or {"normal": 0, "abnormal": 1}
        for label_name, label_idx in label_map.items():
            folder = os.path.join(root_dir, label_name)
            for f in os.listdir(folder):
                if f.endswith(".npy"):
                    self.samples.append((os.path.join(folder, f), label_idx))

    def __len__(self): return len(self.samples)

    def __getitem__(self, idx):
        path, label = self.samples[idx]
        ecg = np.load(path).astype(np.float32)
        ecg = (ecg - np.mean(ecg)) / (np.std(ecg) + 1e-6)
        return torch.tensor(ecg).unsqueeze(0), torch.tensor(label, dtype=torch.long)

dataset_root = "/kaggle/input/ecg-fm-finetune"  # 🔁 thay đúng đường dẫn dataset
full_dataset = ECGDataset(dataset_root)

# Chia train/val/test (80/10/10)
train_size = int(0.8 * len(full_dataset))
val_size   = int(0.1 * len(full_dataset))
test_size  = len(full_dataset) - train_size - val_size
train_ds, val_ds, test_ds = random_split(full_dataset, [train_size, val_size, test_size])

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, pin_memory=True, num_workers=2)
val_loader   = DataLoader(val_ds, batch_size=32, shuffle=False, pin_memory=True, num_workers=2)
test_loader  = DataLoader(test_ds, batch_size=32, shuffle=False, pin_memory=True, num_workers=2)

# ============================================================
# ✅ 5. ECG-FM model (rút gọn cho 1-lead, 130Hz)
# ============================================================
class ECGFMClassifier(nn.Module):
    def __init__(self, input_dim=130, hidden_dim=256, num_classes=2):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 16, 7, padding=3),
            nn.ReLU(),
            nn.Conv1d(16, 32, 5, padding=2),
            nn.ReLU(),
            nn.Conv1d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        x = self.encoder(x)
        return self.classifier(x)

model = ECGFMClassifier().to(device)

# ============================================================
# ✅ 6. Load pretrained weights
# ============================================================
try:
    pretrained_dict = torch.load(weights_path, map_location="cpu")
    model_dict = model.state_dict()
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
    print("✅ Pretrained weights loaded successfully.")
except Exception as e:
    print("⚠️ Warning: Could not load pretrained weights:", e)

# ============================================================
# ✅ 7. Freeze encoder (Phase 1)
# ============================================================
for p in model.encoder.parameters():
    p.requires_grad = False

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=1e-3)
num_epochs = 10

train_losses, val_losses = [], []

# ============================================================
# ✅ 8. Train phase 1
# ============================================================
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for x, y in tqdm(train_loader, desc=f"[Epoch {epoch+1}/{num_epochs}] Training", leave=False):
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    train_loss = total_loss / len(train_loader)

    # Validation
    model.eval()
    val_loss = 0
    preds, labels = [], []
    with torch.no_grad():
        for x, y in val_loader:
            x, y = x.to(device), y.to(device)
            out = model(x)
            loss = criterion(out, y)
            val_loss += loss.item()
            preds.extend(out.argmax(1).cpu().numpy())
            labels.extend(y.cpu().numpy())
    val_loss /= len(val_loader)
    acc = accuracy_score(labels, preds)
    print(f"Epoch {epoch+1}/{num_epochs} - Train loss: {train_loss:.4f} | Val loss: {val_loss:.4f} | Val acc: {acc:.4f}")
    train_losses.append(train_loss)
    val_losses.append(val_loss)

# ============================================================
# ✅ 9. Unfreeze encoder (Fine-tune Phase 2)
# ============================================================
for p in model.encoder.parameters():
    p.requires_grad = True
optimizer = optim.Adam(model.parameters(), lr=5e-4)

for epoch in range(5):
    model.train()
    total_loss = 0
    for x, y in tqdm(train_loader, desc=f"[Fine-tune {epoch+1}/5]", leave=False):
        x, y = x.to(device, non_blocking=True), y.to(device, non_blocking=True)
        optimizer.zero_grad()
        out = model(x)
        loss = criterion(out, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Fine-tune Epoch {epoch+1} - Loss: {total_loss/len(train_loader):.4f}")

# ============================================================
# ✅ 10. Lưu weight mới vào Drive
# ============================================================
save_path = "/kaggle/working//ecg_finetuned_130hz.pt"
torch.save(model.state_dict(), save_path)
print(f"💾 Fine-tuned weights saved to: {save_path}")

# ============================================================
# ✅ 11. Learning curve
# ============================================================
plt.figure(figsize=(6,4))
plt.plot(train_losses, label="Train Loss")
plt.plot(val_losses, label="Val Loss")
plt.legend(); plt.title("Learning Curve"); plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.show()

# ============================================================
# ✅ 12. Evaluation
# ============================================================
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for x, y in test_loader:
        x, y = x.to(device), y.to(device)
        out = model(x)
        all_preds.extend(out.argmax(1).cpu().numpy())
        all_labels.extend(y.cpu().numpy())

acc = accuracy_score(all_labels, all_preds)
prec = precision_score(all_labels, all_preds)
rec = recall_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)
print(f"\n✅ Test Accuracy: {acc:.4f}")
print(f"Precision: {prec:.4f}, Recall: {rec:.4f}, F1: {f1:.4f}")
print("Confusion Matrix:\n", confusion_matrix(all_labels, all_preds))


In [None]:
# ============================================================
# ✅ ECG-FM Fine-tuned Inference Script
# ============================================================

import torch
import torch.nn as nn
import numpy as np

# ============================================================
# ✅ 1. Định nghĩa lại mô hình ECGFMClassifier
# ============================================================
class ECGFMClassifier(nn.Module):
    def __init__(self, input_dim=130, hidden_dim=256, num_classes=2):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 16, 7, padding=3),
            nn.ReLU(),
            nn.Conv1d(16, 32, 5, padding=2),
            nn.ReLU(),
            nn.Conv1d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        x = self.encoder(x)
        return self.classifier(x)

# ============================================================
# ✅ 2. Load mô hình và weight fine-tuned
# ============================================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_path = "/kaggle/working/ecg_finetuned_130hz.pt"  # 🔁 Thay đúng đường dẫn tới file weight của bạn
model = ECGFMClassifier().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()

print(f"✅ Model loaded from: {model_path}")

# ============================================================
# ✅ 3. Dự đoán từ 1 file ECG (.npy)
# ============================================================
def predict_ecg(file_path, model, device):
    """Hàm dự đoán nhãn cho 1 tín hiệu ECG"""
    ecg = np.load(file_path).astype(np.float32)

    # Chuẩn hóa biên độ (như khi train)
    ecg = (ecg - np.mean(ecg)) / (np.std(ecg) + 1e-6)

    # Chuyển thành tensor [1, 1, length]
    ecg_tensor = torch.tensor(ecg).unsqueeze(0).unsqueeze(0).to(device)

    with torch.no_grad():
        output = model(ecg_tensor)
        probs = torch.softmax(output, dim=1).cpu().numpy().flatten()
        pred_label = np.argmax(probs)
    
    label_str = "Normal" if pred_label == 0 else "Abnormal"
    return label_str, probs

# ============================================================
# ✅ 4. Ví dụ sử dụng
# ============================================================
file_path = "/kaggle/input/ecg-fm-finetune/abnormal/PTBXL_records100_00000_00030_lr_seg0000.npy"  # 🔁 thay đúng đường dẫn

label, probs = predict_ecg(file_path, model, device)
print(f"\n📊 Prediction for {file_path.split('/')[-1]}:")
print(f"   👉 {label}")
print(f"   Probabilities → Normal: {probs[0]:.4f}, Abnormal: {probs[1]:.4f}")


In [15]:
# ============================================================
# ✅ ECG Analysis Pipeline: DL (ECG-FM) + LLM (MedAlpaca)
# ============================================================
!pip install neurokit2 --quiet
# ============================================================
# ✅ ECG Analysis Pipeline: DL (ECG-FM) + JSON output for LLM
# ============================================================

import torch
import torch.nn as nn
import numpy as np
import neurokit2 as nk
import json
from datetime import datetime

# ============================================================
# 1️⃣ ECG-FM fine-tuned model
# ============================================================
class ECGFMClassifier(nn.Module):
    def __init__(self, input_dim=130, hidden_dim=256, num_classes=2):
        super().__init__()
        self.encoder = nn.Sequential(
            nn.Conv1d(1, 16, 7, padding=3),
            nn.ReLU(),
            nn.Conv1d(16, 32, 5, padding=2),
            nn.ReLU(),
            nn.Conv1d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.AdaptiveAvgPool1d(1)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x, return_features=False):
        feats = self.encoder(x)
        logits = self.classifier(feats)
        if return_features:
            return logits, feats.squeeze(-1)
        return logits


# ============================================================
# 2️⃣ Load model weight
# ============================================================
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_path = "/kaggle/input/fined-tune-130hz/pytorch/default/1/ecg_finetuned_130hz.pt"  # 🔁 đường dẫn thật của bạn
model = ECGFMClassifier().to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.eval()


# ============================================================
# 3️⃣ Deep model inference + feature extraction
# ============================================================
def deep_model_infer(ecg_signal, model, fs=130):
    """Trả dự đoán + embedding"""
    sig_norm = (ecg_signal - np.mean(ecg_signal)) / (np.std(ecg_signal) + 1e-6)
    x = torch.tensor(sig_norm).unsqueeze(0).unsqueeze(0).float().to(device)

    with torch.no_grad():
        out, feat = model(x, return_features=True)
        probs = torch.softmax(out, dim=1).cpu().numpy().flatten()
        label = int(np.argmax(probs))

    return {
        "prediction": "Normal" if label == 0 else "Abnormal",
        "confidence": round(float(probs[label]), 4),
        "probabilities": {
            "Normal": round(float(probs[0]),4),
            "Abnormal": round(float(probs[1]),4)
        },
        "embedding": feat.cpu().numpy().flatten().tolist()
    }


# ============================================================
# 4️⃣ Compute physiological features (using NeuroKit2)
# ============================================================
import scipy.signal as sps

import scipy.signal as sps
import numpy as np
import neurokit2 as nk

def compute_physio(ecg_signal, fs=130):
    """
    Trích xuất chỉ số sinh lý ECG wearable (Polar H10)
    - Lọc 0.5–40 Hz
    - Phát hiện đỉnh R bằng Pan–Tompkins
    - Lọc RR outliers
    - Giới hạn giá trị sinh lý hợp lý
    """
    try:
        # 1️⃣ Loại bỏ DC offset
        ecg = ecg_signal - np.mean(ecg_signal)

        # 2️⃣ Lọc bandpass 0.5–40 Hz
        b, a = sps.butter(3, [0.5/(fs/2), 40/(fs/2)], btype='band')
        ecg_filt = sps.filtfilt(b, a, ecg)

        # 3️⃣ Phát hiện đỉnh R bằng Pan–Tompkins
        cleaned = nk.ecg_clean(ecg_filt, sampling_rate=fs, method="pantompkins1985")
        peaks, info = nk.ecg_peaks(cleaned, sampling_rate=fs)
        rpeaks = np.where(peaks["ECG_R_Peaks"] == 1)[0]

        # 4️⃣ Kiểm tra đủ đỉnh R
        if len(rpeaks) < 3:
            raise ValueError("Không đủ đỉnh R để tính HR/HRV")

        # 5️⃣ RR intervals (ms)
        rr_intervals = np.diff(rpeaks) / fs * 1000
        rr_intervals = rr_intervals[(rr_intervals > 400) & (rr_intervals < 1500)]

        if len(rr_intervals) < 2:
            raise ValueError("Không đủ RR hợp lệ sau khi lọc")

        # 6️⃣ Tính HR, HRV
        hr = 60000 / np.mean(rr_intervals)
        hrv_rmssd = np.sqrt(np.mean(np.square(np.diff(rr_intervals))))
        hrv_sdnn = np.std(rr_intervals)

        # 7️⃣ Giới hạn hợp lý
        if hr < 30 or hr > 220:
            hr = np.nan
        if hrv_rmssd > 200:
            hrv_rmssd = np.nan

        # 8️⃣ Tạo dict kết quả
        features = {
            "HeartRate": round(float(hr), 2) if not np.isnan(hr) else "N/A",
            "HRV_RMSSD": round(float(hrv_rmssd), 3) if not np.isnan(hrv_rmssd) else "N/A",
            "HRV_SDNN": round(float(hrv_sdnn), 3),
            "R_Peaks_Detected": int(len(rpeaks)),
            "SignalEnergy": round(float(np.sum(ecg_filt**2)), 4),
            "R_Amplitude": round(float(np.max(ecg_filt)), 3),
            "T_Amplitude": round(float(np.percentile(ecg_filt, 95)), 3)
        }

    except Exception as e:
        features = {"error": f"ECG feature extraction failed: {e}"}

    return features




# ============================================================
# 5️⃣ Combine DL + physio → structured JSON
# ============================================================
def analyze_ecg(file_path):
    ecg = np.load(file_path).astype(np.float32)
    result_dl = deep_model_infer(ecg, model)
    features = compute_physio(ecg)

    # Morphology mô tả cơ bản (giả định)
    morph_label = "Normal pattern" if result_dl["prediction"] == "Normal" else "Abnormal morphology"

    output = {
        "timestamp": datetime.now().isoformat(),
        "model": "ECG-FM fine-tuned 130Hz",
        "file_name": file_path.split("/")[-1],
        **result_dl,
        "physiological_features": features,
        "morphology_cluster": morph_label
    }

    import os
    base_name = os.path.basename(file_path).replace(".npy", "_analysis.json")
    json_path = os.path.join("/kaggle/working", base_name)

    with open(json_path, "w") as f:
        json.dump(output, f, indent=4)

    print(f"✅ Saved analysis JSON → {json_path}")
    return output


# ============================================================
# 6️⃣ Prompt builder for MedAlpaca
# ============================================================
def build_prompt(analysis):
    f = analysis["physiological_features"]
    prompt = f"""
Bạn là bác sĩ tim mạch AI.

Kết quả phân tích ECG (1-lead, 130 Hz, 10 s):

Phân loại: {analysis['prediction']} (độ tin cậy {analysis['confidence']*100:.1f}%)
Cụm hình thái: {analysis['morphology_cluster']}

Các thông số:
- Nhịp tim (HR): {f.get('HeartRate','?')} bpm
- HRV (RMSSD): {f.get('HRV_RMSSD','?')} ms
- QRS: {f.get('QRS_Duration','?')} ms | QTc: {f.get('QTc','?')} ms
- ST lệch: {f.get('ST_Deviation','?')} mV
- Biên độ R/T: {f.get('R_Amplitude','?')} / {f.get('T_Amplitude','?')} mV

Hãy giải thích ý nghĩa sinh lý của các thông số trên,
đưa ra kết luận về tình trạng tim,
và khuyến nghị theo dõi hoặc khám tiếp theo.
"""
    return prompt


# ============================================================
# 7️⃣ Chạy thử
# ============================================================
file_path = "/kaggle/input/ecg-fm-finetune/abnormal/PTBXL_records100_00000_00003_lr_seg0000.npy"
analysis = analyze_ecg(file_path)
prompt = build_prompt(analysis)

print("\n🧠 Prompt gửi LLM:\n", prompt[:600], "...")


✅ Saved analysis JSON → /kaggle/working/PTBXL_records100_00000_00003_lr_seg0000_analysis.json

🧠 Prompt gửi LLM:
 
Bạn là bác sĩ tim mạch AI.

Kết quả phân tích ECG (1-lead, 130 Hz, 10 s):

Phân loại: Abnormal (độ tin cậy 100.0%)
Cụm hình thái: Abnormal morphology

Các thông số:
- Nhịp tim (HR): 64.1 bpm
- HRV (RMSSD): N/A ms
- QRS: ? ms | QTc: ? ms
- ST lệch: ? mV
- Biên độ R/T: 4.179 / 0.794 mV

Hãy giải thích ý nghĩa sinh lý của các thông số trên,
đưa ra kết luận về tình trạng tim,
và khuyến nghị theo dõi hoặc khám tiếp theo.
 ...
