In [1]:
import torch
import random
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
import pandas as pd

from data_loader_darts import get_dataloaders_simple
# from darts_search_bdp import train_darts_search_bdp
from darts_search_bdp import train_darts_search_bdp
from model_build import FinalNetwork
from cell_plot import plot_cell

def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True

Error.  nthreads cannot be larger than environment variable "NUMEXPR_MAX_THREADS" (64)

In [2]:

def train_final_model(model, train_loader, val_loader, device, epochs=25):
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.025, momentum=0.9, weight_decay=3e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)

    best_acc = 0
    train_loss_list, val_loss_list, train_acc_list, val_acc_list = [], [], [], []

    for epoch in range(epochs):
        model.train()
        total_loss, correct, total = 0, 0, 0
        for x, y in train_loader:
            x, y = x.to(device), y.to(device)
            x = x.squeeze(-1)
            logits = model(x)
            loss = criterion(logits, y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            correct += (logits.argmax(dim=1) == y).sum().item()
            total += y.size(0)

        train_loss = total_loss / len(train_loader)
        train_acc = correct / total
        train_loss_list.append(train_loss)
        train_acc_list.append(train_acc)

        model.eval()
        val_loss, val_correct, val_total = 0, 0, 0
        with torch.no_grad():
            for x, y in val_loader:
                x, y = x.to(device).squeeze(-1), y.to(device)
                logits = model(x)
                loss = criterion(logits, y)
                val_loss += loss.item()
                val_correct += (logits.argmax(dim=1) == y).sum().item()
                val_total += y.size(0)

        val_loss = val_loss / len(val_loader)
        val_acc = val_correct / val_total
        val_loss_list.append(val_loss)
        val_acc_list.append(val_acc)

        if val_acc > best_acc:
            best_acc = val_acc
            torch.save(model.state_dict(), "best_final_model_78.pt")

        scheduler.step()
        print(f"[Final Train Epoch {epoch+1}] Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f} | Val Loss: {val_loss:.4f} | Val Acc: {val_acc:.4f}")

    plt.figure()
    plt.plot(train_loss_list, label='Train Loss')
    plt.plot(val_loss_list, label='Val Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    plt.title('Loss Curve')
    plt.savefig('final_loss.png')
    plt.close()

    plt.figure()
    plt.plot(train_acc_list, label='Train Acc')
    plt.plot(val_acc_list, label='Val Acc')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.title('Accuracy Curve')
    plt.savefig('final_accuracy.png')
    plt.close()

def evaluate_model(model, val_loader, device):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for x, y in val_loader:
            x = x.to(device).squeeze(-1)
            logits = model(x)
            pred = logits.argmax(dim=1).cpu().numpy()
            y_true.extend(y.numpy())
            y_pred.extend(pred)

    cm = confusion_matrix(y_true, y_pred)
    print("\nConfusion Matrix on Validation:")
    print(cm)
    pd.DataFrame(cm).to_csv("confusion_matrix78.csv", index=False)
    print("[\u2713] Saved confusion matrix to confusion_matrix.csv")

    pd.DataFrame({"y_true": y_true, "y_pred": y_pred}).to_csv("val_predictions78.csv", index=False)
    print("[\u2713] Saved predictions to val_predictions.csv")




In [3]:

# 1. Set random seed
set_seed(42)
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# 2. Load data
print("[INFO] Loading 60/40 split data...")
train_loader, val_loader, num_classes = get_dataloaders_simple(batch_size=32)
print(f"[INFO] DARTS will run on {len(train_loader.dataset.y)} train samples and {len(val_loader.dataset.y)} val samples")

[INFO] Loading 60/40 split data...
[DEBUG] Loaded ./PSG/SC4001E0.npz → 841 samples
[DEBUG] Loaded ./PSG/SC4002E0.npz → 1127 samples
[DEBUG] Loaded ./PSG/SC4011E0.npz → 1103 samples
[DEBUG] Loaded ./PSG/SC4012E0.npz → 1186 samples
[DEBUG] Loaded ./PSG/SC4021E0.npz → 1025 samples
[DEBUG] Loaded ./PSG/SC4022E0.npz → 1009 samples
[DEBUG] Loaded ./PSG/SC4031E0.npz → 952 samples
[DEBUG] Loaded ./PSG/SC4032E0.npz → 911 samples
[DEBUG] Loaded ./PSG/SC4041E0.npz → 1235 samples
[DEBUG] Loaded ./PSG/SC4042E0.npz → 1200 samples
[DEBUG] Loaded ./PSG/SC4051E0.npz → 672 samples
[DEBUG] Loaded ./PSG/SC4052E0.npz → 1246 samples
[DEBUG] Loaded ./PSG/SC4061E0.npz → 843 samples
[DEBUG] Loaded ./PSG/SC4062E0.npz → 1016 samples
[DEBUG] Loaded ./PSG/SC4071E0.npz → 976 samples
[DEBUG] Loaded ./PSG/SC4072E0.npz → 1273 samples
[DEBUG] Loaded ./PSG/SC4081E0.npz → 1134 samples
[DEBUG] Loaded ./PSG/SC4082E0.npz → 1054 samples
[DEBUG] Loaded ./PSG/SC4091E0.npz → 1132 samples
[DEBUG] Loaded ./PSG/SC4092E0.npz → 1105

In [4]:
# 3. Run DARTS search with pruning
print("[INFO] Running DARTS search with BDP...")
searched_genotype, pruned_train_loader, pruned_val_loader = train_darts_search_bdp(
    train_loader, val_loader, num_classes,
    epochs=30, prune_every=5 , pt=0.05, pv=0.05,
    device=device
)

[INFO] Running DARTS search with BDP...

[Epoch 1/30] Starting...
[Epoch 1] Train Loss: 0.7079 | Acc: 0.5730 || Val Loss: 0.8832 | Acc: 0.6766
Precision: 0.6577 | Recall: 0.5815 | F1: 0.5482 | Time: 1667.62s
[Checkpoint] New best model saved at epoch 1 with Val Acc = 0.6766

[Epoch 2/30] Starting...
[Epoch 2] Train Loss: 0.5939 | Acc: 0.6656 || Val Loss: 0.7753 | Acc: 0.7167
Precision: 0.6569 | Recall: 0.5946 | F1: 0.5924 | Time: 1671.47s
[Checkpoint] New best model saved at epoch 2 with Val Acc = 0.7167

[Epoch 3/30] Starting...
[Epoch 3] Train Loss: 0.5366 | Acc: 0.6961 || Val Loss: 0.8359 | Acc: 0.6872
Precision: 0.6324 | Recall: 0.6301 | F1: 0.5802 | Time: 1653.25s

[Epoch 4/30] Starting...
[Epoch 4] Train Loss: 0.5081 | Acc: 0.7133 || Val Loss: 0.6896 | Acc: 0.7502
Precision: 0.6949 | Recall: 0.6343 | F1: 0.6330 | Time: 1655.48s
[Checkpoint] New best model saved at epoch 4 with Val Acc = 0.7502

[Epoch 5/30] Starting...
[Epoch 5] Train Loss: 0.4888 | Acc: 0.7260 || Val Loss: 0.804

In [5]:
!pip install pygraphviz

# 4. Visualize searched cells-----------------------------------------
print("[INFO] Visualizing searched cells...")

plot_cell(searched_genotype, 'normal')
plot_cell(searched_genotype, 'reduce')

# === Xuất biến searched_genotype ra file txt ===
with open("searched_genotype_20.txt", "w") as f:
    f.write(str(searched_genotype))

print("✅ Đã lưu searched_genotype vào 'searched_genotype.txt'")


Defaulting to user installation because normal site-packages is not writeable
Looking in indexes: https://pypi.org/simple, https://pypi.ngc.nvidia.com
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m25.1.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
[INFO] Visualizing searched cells...


  plt.tight_layout()


In [6]:

from sklearn.model_selection import KFold
from torch.utils.data import TensorDataset, DataLoader
import torch
import pandas as pd
import numpy as np

# 5. Prepare data for cross-validation
print("[INFO] Running 5-Fold Cross Validation on pruned data...")

# Gộp dữ liệu từ train và val
X_all = torch.cat([pruned_train_loader.dataset.X, pruned_val_loader.dataset.X], dim=0)
y_all = torch.cat([pruned_train_loader.dataset.y, pruned_val_loader.dataset.y], dim=0)

# Tạo dataset
dataset = TensorDataset(X_all, y_all)

# === Xuất ra CSV ===
# Nếu dùng GPU, chuyển về CPU
if X_all.is_cuda:
    X_all = X_all.cpu()
    y_all = y_all.cpu()

# Chuyển về numpy
X_np = X_all.numpy()
y_np = y_all.numpy().reshape(-1, 1)

# Ghép X và y thành một mảng
data_np = np.hstack((X_np, y_np))

# Tạo DataFrame với cột feature_0, feature_1, ..., label
num_features = X_np.shape[1]
column_names = [f"feature_{i}" for i in range(num_features)] + ["label"]
df = pd.DataFrame(data_np, columns=column_names)

# Lưu file CSV
df.to_csv("pruned_dataset.csv", index=False)
print("✅ Đã lưu dữ liệu vào 'pruned_dataset.csv'")


[INFO] Running 5-Fold Cross Validation on pruned data...


In [7]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader, random_split
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
from genotypes import Genotype  # Đảm bảo bạn có class này
from model_build import FinalNetwork

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes = 10  # <-- sửa nếu bạn có số class khác

# === 1. Load dữ liệu từ CSV ===
df = pd.read_csv("pruned_dataset.csv")
X_np = df.drop("label", axis=1).values.astype("float32")
y_np = df["label"].values.astype("int64")

X_all = torch.tensor(X_np)
y_all = torch.tensor(y_np)

# === 2. Load searched_genotype từ txt ===
with open("searched_genotype_20.txt", "r") as f:
    genotype_str = f.read()
searched_genotype = eval(genotype_str)

# === 3. Chia train/test theo tỉ lệ 80/20 ===
X_train, X_test, y_train, y_test = train_test_split(
    X_all, y_all, test_size=0.2, random_state=42, stratify=y_all
)

train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=64, shuffle=True)
test_loader = DataLoader(TensorDataset(X_test, y_test), batch_size=64)

# === 4. Huấn luyện với early stopping ===
model = FinalNetwork(C=8, num_classes=num_classes, layers=7, genotype=searched_genotype).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
criterion = nn.CrossEntropyLoss()

best_val_acc = 0.0
best_epoch = 0
best_pred, best_true = [], []
patience = 15
no_improve = 0

for epoch in range(50):
    model.train()
    train_true, train_pred = [], []

    for x, y in train_loader:
        x, y = x.to(device).squeeze(-1), y.to(device)
        optimizer.zero_grad()
        output = model(x)
        loss = criterion(output, y)
        loss.backward()
        optimizer.step()

        train_true.extend(y.cpu().numpy())
        train_pred.extend(output.argmax(dim=1).cpu().numpy())

    train_acc = accuracy_score(train_true, train_pred)

    # === Đánh giá
    model.eval()
    val_true, val_pred = [], []
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.to(device).squeeze(-1), y.to(device)
            output = model(x)
            val_true.extend(y.cpu().numpy())
            val_pred.extend(output.argmax(dim=1).cpu().numpy())

    val_acc = accuracy_score(val_true, val_pred)

    print(f"[Epoch {epoch+1}] Train Acc: {train_acc:.4f} | Eval Acc: {val_acc:.4f}")

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_epoch = epoch + 1
        best_pred = val_pred
        best_true = val_true
        no_improve = 0
    else:
        no_improve += 1

    if no_improve >= patience:
        print(f"[EarlyStopping] No improvement in {patience} epochs. Stopping at epoch {epoch+1}.")
        break

print(f"✅ Best Val Acc: {best_val_acc:.4f} at epoch {best_epoch}")

# === Final Evaluation ===
def evaluate_metrics(y_true, y_pred, num_classes):
    acc = accuracy_score(y_true, y_pred)
    mf1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
    prec = precision_score(y_true, y_pred, average=None, zero_division=0)
    rec = recall_score(y_true, y_pred, average=None, zero_division=0)
    f1s = f1_score(y_true, y_pred, average=None, zero_division=0)

    gmeans = []
    for c in range(num_classes):
        tp = np.sum((y_pred == c) & (y_true == c))
        fn = np.sum((y_pred != c) & (y_true == c))
        recall_c = tp / (tp + fn) if (tp + fn) > 0 else 0
        gmeans.append(recall_c)

    mgm = np.sqrt(np.prod(gmeans)) if np.all(np.array(gmeans) > 0) else 0.0
    cm = confusion_matrix(y_true, y_pred)
    return acc, mf1, mgm, prec, rec, f1s, gmeans, cm

acc, mf1, mgm, prec, rec, f1s, gmeans, cm = evaluate_metrics(
    np.array(best_true), np.array(best_pred), num_classes
)

print("\n===== FINAL EVALUATION =====")
print(f"ACC: {acc:.4f} | MF1: {mf1:.4f} | G-Mean: {mgm:.4f}")
for i in range(num_classes):
    print(f"[Class {i}] Prec: {prec[i]:.4f} | Rec: {rec[i]:.4f} | F1: {f1s[i]:.4f} | GM: {gmeans[i]:.4f}")
print("Confusion Matrix:")
print(cm)
print("Total Samples:", cm.sum())
# from sklearn.model_selection import KFold
# from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
# from torch.utils.data import DataLoader, TensorDataset
# import numpy as np
# import torch.nn as nn

# def evaluate_metrics(y_true, y_pred, num_classes):
#     acc = accuracy_score(y_true, y_pred)
#     mf1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
#     prec = precision_score(y_true, y_pred, average=None, zero_division=0)
#     rec = recall_score(y_true, y_pred, average=None, zero_division=0)
#     f1s = f1_score(y_true, y_pred, average=None, zero_division=0)

#     gmeans = []
#     for c in range(num_classes):
#         tp = np.sum((y_pred == c) & (y_true == c))
#         fn = np.sum((y_pred != c) & (y_true == c))
#         recall_c = tp / (tp + fn) if (tp + fn) > 0 else 0
#         gmeans.append(recall_c)

#     mgm = np.sqrt(np.prod(gmeans)) if np.all(np.array(gmeans) > 0) else 0.0
#     cm = confusion_matrix(y_true, y_pred)
#     return acc, mf1, mgm, prec, rec, f1s, gmeans, cm

# # === 5-Fold Training with Early Stopping ===
# kf = KFold(n_splits=5, shuffle=True, random_state=42)
# all_y_true, all_y_pred = [], []

# for fold, (train_idx, test_idx) in enumerate(kf.split(X_all)):
#     print(f"\n========== Fold {fold + 1}/5 ==========")

#     X_train, y_train = X_all[train_idx], y_all[train_idx]
#     X_test, y_test = X_all[test_idx], y_all[test_idx]

#     train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=64, shuffle=True)
#     test_loader  = DataLoader(TensorDataset(X_test,  y_test),  batch_size=64)

#     model = FinalNetwork(C=8, num_classes=num_classes, layers=7, genotype=searched_genotype).to(device)
#     optimizer = torch.optim.Adam(model.parameters(), lr=0.005)
#     criterion = nn.CrossEntropyLoss()

#     best_val_acc = 0.0
#     best_epoch = 0
#     best_pred, best_true = [], []

#     patience = 15
#     no_improve = 0

#     for epoch in range(50):
#         model.train()
#         train_true, train_pred = [], []

#         for x, y in train_loader:
#             x, y = x.to(device).squeeze(-1), y.to(device)
#             optimizer.zero_grad()
#             output = model(x)
#             loss = criterion(output, y)
#             loss.backward()
#             optimizer.step()

#             train_true.extend(y.cpu().numpy())
#             train_pred.extend(output.argmax(dim=1).cpu().numpy())

#         train_acc = accuracy_score(train_true, train_pred)

#         # === Evaluation
#         model.eval()
#         val_true, val_pred = [], []
#         with torch.no_grad():
#             for x, y in test_loader:
#                 x, y = x.to(device).squeeze(-1), y.to(device)
#                 output = model(x)
#                 val_true.extend(y.cpu().numpy())
#                 val_pred.extend(output.argmax(dim=1).cpu().numpy())

#         val_acc = accuracy_score(val_true, val_pred)

#         print(f"[Fold {fold+1} | Epoch {epoch+1}] Train Acc: {train_acc:.4f} | Eval Acc: {val_acc:.4f}")

#         if val_acc > best_val_acc:
#             best_val_acc = val_acc
#             best_epoch = epoch + 1
#             best_pred = val_pred
#             best_true = val_true
#             no_improve = 0
#         else:
#             no_improve += 1

#         if no_improve >= patience:
#             print(f"[EarlyStopping] No improvement in {patience} epochs. Stopping at epoch {epoch+1}.")
#             break

#     print(f"[Fold {fold+1}] Best Val Acc: {best_val_acc:.4f} at epoch {best_epoch}")
#     all_y_true.extend(best_true)
#     all_y_pred.extend(best_pred)

# # === Final Evaluation
# acc, mf1, mgm, prec, rec, f1s, gmeans, cm = evaluate_metrics(
#     np.array(all_y_true), np.array(all_y_pred), num_classes
# )

# print("\n===== FINAL 5-FOLD EVALUATION =====")
# print(f"ACC: {acc:.4f} | MF1: {mf1:.4f} | G-Mean: {mgm:.4f}")
# for i in range(num_classes):
#     print(f"[Class {i}] Prec: {prec[i]:.4f} | Rec: {rec[i]:.4f} | F1: {f1s[i]:.4f} | GM: {gmeans[i]:.4f}")
# print("Confusion Matrix:")
# print(cm)
# print("Total Samples:", cm.sum())



[Fold 1 | Epoch 1] Train Acc: 0.7718 | Eval Acc: 0.8011
[Fold 1 | Epoch 2] Train Acc: 0.8199 | Eval Acc: 0.8158
[Fold 1 | Epoch 3] Train Acc: 0.8316 | Eval Acc: 0.8459
[Fold 1 | Epoch 4] Train Acc: 0.8349 | Eval Acc: 0.7838
[Fold 1 | Epoch 5] Train Acc: 0.8363 | Eval Acc: 0.8497
[Fold 1 | Epoch 6] Train Acc: 0.8413 | Eval Acc: 0.8407
[Fold 1 | Epoch 7] Train Acc: 0.8435 | Eval Acc: 0.8256
[Fold 1 | Epoch 8] Train Acc: 0.8419 | Eval Acc: 0.8427
[Fold 1 | Epoch 9] Train Acc: 0.8459 | Eval Acc: 0.8396
[Fold 1 | Epoch 10] Train Acc: 0.8454 | Eval Acc: 0.8297
[Fold 1 | Epoch 11] Train Acc: 0.8471 | Eval Acc: 0.8444
[Fold 1 | Epoch 12] Train Acc: 0.8501 | Eval Acc: 0.8570
[Fold 1 | Epoch 13] Train Acc: 0.8514 | Eval Acc: 0.8551
[Fold 1 | Epoch 14] Train Acc: 0.8512 | Eval Acc: 0.8494
[Fold 1 | Epoch 15] Train Acc: 0.8537 | Eval Acc: 0.8547
[Fold 1 | Epoch 16] Train Acc: 0.8504 | Eval Acc: 0.8600
[Fold 1 | Epoch 17] Train Acc: 0.8564 | Eval Acc: 0.8587
[Fold 1 | Epoch 18] Train Acc: 0.8561 |

[Fold 5 | Epoch 1] Train Acc: 0.7794 | Eval Acc: 0.8035
[Fold 5 | Epoch 2] Train Acc: 0.8171 | Eval Acc: 0.8016
[Fold 5 | Epoch 3] Train Acc: 0.8295 | Eval Acc: 0.8328
[Fold 5 | Epoch 4] Train Acc: 0.8342 | Eval Acc: 0.8451
[Fold 5 | Epoch 5] Train Acc: 0.8347 | Eval Acc: 0.8481
[Fold 5 | Epoch 6] Train Acc: 0.8394 | Eval Acc: 0.8556
[Fold 5 | Epoch 7] Train Acc: 0.8382 | Eval Acc: 0.8369
[Fold 5 | Epoch 8] Train Acc: 0.8409 | Eval Acc: 0.8420
[Fold 5 | Epoch 9] Train Acc: 0.8433 | Eval Acc: 0.8506
[Fold 5 | Epoch 10] Train Acc: 0.8466 | Eval Acc: 0.8563
[Fold 5 | Epoch 11] Train Acc: 0.8461 | Eval Acc: 0.8232
[Fold 5 | Epoch 12] Train Acc: 0.8484 | Eval Acc: 0.8552
[Fold 5 | Epoch 13] Train Acc: 0.8476 | Eval Acc: 0.8344
[Fold 5 | Epoch 14] Train Acc: 0.8507 | Eval Acc: 0.8557
[Fold 5 | Epoch 15] Train Acc: 0.8507 | Eval Acc: 0.8372
[Fold 5 | Epoch 16] Train Acc: 0.8528 | Eval Acc: 0.8276
[Fold 5 | Epoch 17] Train Acc: 0.8564 | Eval Acc: 0.8523
[Fold 5 | Epoch 18] Train Acc: 0.8528 | 

In [8]:
# import torch
# import torch.nn as nn
# import numpy as np
# from sklearn.model_selection import KFold
# from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix
# from torch.utils.data import DataLoader, TensorDataset
# import copy

# # === Entropy-based pruning ===
# def compute_filter_entropy(weight_tensor):
#     entropy_list = []
#     for filt in weight_tensor:
#         filt_flat = filt.view(filt.size(0), -1)
#         norms = torch.norm(filt_flat, dim=1) + 1e-6
#         p = norms / norms.sum()
#         entropy = -torch.sum(p * torch.log2(p))
#         entropy_list.append(entropy.item())
#     return entropy_list

# def prune_model_entropy(model, prune_ratio=0.5):
#     for name, module in model.named_modules():
#         if isinstance(module, nn.Conv1d):
#             weight = module.weight.data.detach().cpu()
#             entropy = compute_filter_entropy(weight)
#             entropy_tensor = torch.tensor(entropy)
#             k = int((1 - prune_ratio) * len(entropy))
#             topk_indices = torch.topk(entropy_tensor, k=k).indices
#             mask = torch.zeros_like(entropy_tensor)
#             mask[topk_indices] = 1.0
#             full_mask = mask[:, None, None].expand_as(weight).to(module.weight.device)
#             module.weight.data *= full_mask
#     return model

# def count_pruned_weights(model):
#     total, nonzero = 0, 0
#     for module in model.modules():
#         if isinstance(module, (nn.Conv1d, nn.Linear)):
#             w = module.weight.data
#             total += w.numel()
#             nonzero += w.nonzero().size(0)
#     zero = total - nonzero
#     print(f"[INFO] Total weights: {total}")
#     print(f"[INFO] Non-zero weights: {nonzero}")
#     print(f"[INFO] Pruned weights: {zero}")
#     print(f"[INFO] Pruned ratio: {100 * zero / total:.2f}%")

# # === Evaluation ===
# def evaluate_metrics(y_true, y_pred, num_classes):
#     acc = accuracy_score(y_true, y_pred)
#     mf1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
#     prec = precision_score(y_true, y_pred, average=None, zero_division=0)
#     rec = recall_score(y_true, y_pred, average=None, zero_division=0)
#     f1s = f1_score(y_true, y_pred, average=None, zero_division=0)

#     gmeans = []
#     for c in range(num_classes):
#         tp = np.sum((y_pred == c) & (y_true == c))
#         fn = np.sum((y_pred != c) & (y_true == c))
#         recall_c = tp / (tp + fn) if (tp + fn) > 0 else 0
#         gmeans.append(recall_c)

#     mgm = np.sqrt(np.prod(gmeans)) if np.all(np.array(gmeans) > 0) else 0.0
#     cm = confusion_matrix(y_true, y_pred)
#     return acc, mf1, mgm, prec, rec, f1s, gmeans, cm

# # === Main logic starts here ===
# # Giả sử bạn đã có các biến:
# # X_all, y_all (Tensor); FinalNetwork (class); searched_genotype; num_classes; device

# print("\n[INFO] Creating and pruning model...")
# model = FinalNetwork(C=8, num_classes=num_classes, layers=7, genotype=searched_genotype).to(device)

# print("\n[INFO] BEFORE PRUNING:")
# count_pruned_weights(model)

# model = prune_model_entropy(model, prune_ratio=0.5)

# print("\n[INFO] AFTER PRUNING:")
# count_pruned_weights(model)

# kf = KFold(n_splits=5, shuffle=True, random_state=42)
# all_y_true, all_y_pred = [], []

# for fold, (train_idx, test_idx) in enumerate(kf.split(X_all)):
#     print(f"\n========== Fold {fold + 1}/5 ==========")
#     X_train, y_train = X_all[train_idx], y_all[train_idx]
#     X_test, y_test = X_all[test_idx], y_all[test_idx]

#     train_loader = DataLoader(TensorDataset(X_train, y_train), batch_size=64, shuffle=True)
#     test_loader  = DataLoader(TensorDataset(X_test,  y_test),  batch_size=64)

#     model_fold = copy.deepcopy(model)
#     optimizer = torch.optim.Adam(model_fold.parameters(), lr=0.005)
#     criterion = nn.CrossEntropyLoss()

#     best_val_acc = 0.0
#     best_epoch = 0
#     best_pred, best_true = [], []
#     patience = 15
#     no_improve = 0

#     for epoch in range(50):
#         model_fold.train()
#         for x, y in train_loader:
#             x, y = x.to(device).squeeze(-1), y.to(device)
#             optimizer.zero_grad()
#             output = model_fold(x)
#             loss = criterion(output, y)
#             loss.backward()
#             optimizer.step()

#         model_fold.eval()
#         val_true, val_pred = [], []
#         with torch.no_grad():
#             for x, y in test_loader:
#                 x, y = x.to(device).squeeze(-1), y.to(device)
#                 output = model_fold(x)
#                 val_true.extend(y.cpu().numpy())
#                 val_pred.extend(output.argmax(dim=1).cpu().numpy())

#         val_acc = accuracy_score(val_true, val_pred)
#         print(f"[Fold {fold+1} | Epoch {epoch+1}] Eval Acc: {val_acc:.4f}")

#         if val_acc > best_val_acc:
#             best_val_acc = val_acc
#             best_epoch = epoch + 1
#             best_pred = val_pred
#             best_true = val_true
#             no_improve = 0
#         else:
#             no_improve += 1

#         if no_improve >= patience:
#             print(f"[EarlyStopping] No improvement in {patience} epochs. Stopping at epoch {epoch+1}.")
#             break

#     print(f"[Fold {fold+1}] Best Val Acc: {best_val_acc:.4f} at epoch {best_epoch}")
#     all_y_true.extend(best_true)
#     all_y_pred.extend(best_pred)

#     # === In kết quả cho từng fold
#     acc, mf1, mgm, prec, rec, f1s, gmeans, cm = evaluate_metrics(np.array(best_true), np.array(best_pred), num_classes)
#     print(f"\n[Fold {fold+1} Evaluation]")
#     print(f"ACC: {acc:.4f} | MF1: {mf1:.4f} | G-Mean: {mgm:.4f}")
#     for i in range(num_classes):
#         print(f"[Class {i}] Prec: {prec[i]:.4f} | Rec: {rec[i]:.4f} | F1: {f1s[i]:.4f} | GM: {gmeans[i]:.4f}")
#     print("Confusion Matrix:")
#     print(cm)

# # === Final Evaluation
# acc, mf1, mgm, prec, rec, f1s, gmeans, cm = evaluate_metrics(np.array(all_y_true), np.array(all_y_pred), num_classes)
# print("\n===== FINAL 5-FOLD EVALUATION AFTER PRUNING =====")
# print(f"ACC: {acc:.4f} | MF1: {mf1:.4f} | G-Mean: {mgm:.4f}")
# for i in range(num_classes):
#     print(f"[Class {i}] Prec: {prec[i]:.4f} | Rec: {rec[i]:.4f} | F1: {f1s[i]:.4f} | GM: {gmeans[i]:.4f}")
# print("Confusion Matrix:")
# print(cm)
# print("Total Samples:", cm.sum())



[Epoch 1] Train Acc: 0.7890 | Eval Acc: 0.7289
[Epoch 2] Train Acc: 0.8247 | Eval Acc: 0.8299
[Epoch 3] Train Acc: 0.8299 | Eval Acc: 0.8302
[Epoch 4] Train Acc: 0.8366 | Eval Acc: 0.8445
[Epoch 5] Train Acc: 0.8399 | Eval Acc: 0.8249
[Epoch 6] Train Acc: 0.8429 | Eval Acc: 0.8524
[Epoch 7] Train Acc: 0.8407 | Eval Acc: 0.8519
[Epoch 8] Train Acc: 0.8443 | Eval Acc: 0.8540
[Epoch 9] Train Acc: 0.8474 | Eval Acc: 0.8560
[Epoch 10] Train Acc: 0.8498 | Eval Acc: 0.8536
[Epoch 11] Train Acc: 0.8476 | Eval Acc: 0.8487
[Epoch 12] Train Acc: 0.8512 | Eval Acc: 0.8556
[Epoch 13] Train Acc: 0.8523 | Eval Acc: 0.8488
[Epoch 14] Train Acc: 0.8520 | Eval Acc: 0.8358
[Epoch 15] Train Acc: 0.8520 | Eval Acc: 0.8669
[Epoch 16] Train Acc: 0.8551 | Eval Acc: 0.8571
[Epoch 17] Train Acc: 0.8544 | Eval Acc: 0.8404
[Epoch 18] Train Acc: 0.8557 | Eval Acc: 0.8731
[Epoch 19] Train Acc: 0.8561 | Eval Acc: 0.8750
[Epoch 20] Train Acc: 0.8603 | Eval Acc: 0.8664
[Epoch 21] Train Acc: 0.8578 | Eval Acc: 0.8698


In [None]:

from sklearn.model_selection import KFold
from torch.utils.data import TensorDataset, DataLoader
import torch
import pandas as pd
import numpy as np

# 5. Prepare data for cross-validation
print("[INFO] Running 5-Fold Cross Validation on pruned data...")

# Gộp dữ liệu từ train và val
X_all = torch.cat([pruned_train_loader.dataset.X, pruned_val_loader.dataset.X], dim=0)
y_all = torch.cat([pruned_train_loader.dataset.y, pruned_val_loader.dataset.y], dim=0)

# Tạo dataset
dataset = TensorDataset(X_all, y_all)

# === Xuất ra CSV ===
# Nếu dùng GPU, chuyển về CPU
if X_all.is_cuda:
    X_all = X_all.cpu()
    y_all = y_all.cpu()

# Chuyển về numpy
X_np = X_all.numpy()
y_np = y_all.numpy().reshape(-1, 1)

# Ghép X và y thành một mảng
data_np = np.hstack((X_np, y_np))

# Tạo DataFrame với cột feature_0, feature_1, ..., label
num_features = X_np.shape[1]
column_names = [f"feature_{i}" for i in range(num_features)] + ["label"]
df = pd.DataFrame(data_np, columns=column_names)

# Lưu file CSV
df.to_csv("pruned_dataset.csv", index=False)
print("✅ Đã lưu dữ liệu vào 'pruned_dataset.csv'")
