In [9]:
import os
import cv2
from glob import glob

# ==== Cấu hình ====
images_dir = "D:/yolo8_congTruong/SODA/images/val"
labels_dir = "D:/yolo8_congTruong/SODA/labels/val"
output_dir = "classification_dataset/val"
person_class_id = 0
helmet_class_id = 1

min_width, min_height = 50, 50  # kích thước tối thiểu

os.makedirs(f"{output_dir}/helmet", exist_ok=True)
os.makedirs(f"{output_dir}/no_helmet", exist_ok=True)

label_files = glob(os.path.join(labels_dir, "*.txt"))

for label_file in label_files:
    base_name = os.path.splitext(os.path.basename(label_file))[0]

    # Tìm ảnh
    img_path = None
    for ext in [".jpg", ".png", ".jpeg"]:
        candidate = os.path.join(images_dir, base_name + ext)
        if os.path.exists(candidate):
            img_path = candidate
            break

    if img_path is None:
        continue

    img = cv2.imread(img_path)
    if img is None:
        continue
    h, w = img.shape[:2]

    with open(label_file, "r") as f:
        lines = f.readlines()

    person_boxes = []
    helmet_boxes = []

    # Đọc tất cả bbox
    for line in lines:
        class_id, x_center, y_center, bw, bh = map(float, line.split())
        x1 = int((x_center - bw / 2) * w)
        y1 = int((y_center - bh / 2) * h)
        x2 = int((x_center + bw / 2) * w)
        y2 = int((y_center + bh / 2) * h)

        if int(class_id) == person_class_id:
            person_boxes.append((x1, y1, x2, y2))
        elif int(class_id) == helmet_class_id:
            helmet_boxes.append((x1, y1, x2, y2))

    # Xử lý từng người
    for i, (px1, py1, px2, py2) in enumerate(person_boxes):
        pw, ph = px2 - px1, py2 - py1
        if pw < min_width or ph < min_height:
            continue  # bỏ ảnh nhỏ

        has_helmet = False
        for (hx1, hy1, hx2, hy2) in helmet_boxes:
            # Tính giao nhau
            inter_x1 = max(px1, hx1)
            inter_y1 = max(py1, hy1)
            inter_x2 = min(px2, hx2)
            inter_y2 = min(py2, hy2)

            if inter_x2 > inter_x1 and inter_y2 > inter_y1:
                inter_area = (inter_x2 - inter_x1) * (inter_y2 - inter_y1)
                helmet_area = (hx2 - hx1) * (hy2 - hy1)
                if helmet_area > 0 and inter_area / helmet_area > 0.3:
                    has_helmet = True
                    break

        # Crop ảnh người
        person_crop = img[max(0, py1):min(h, py2), max(0, px1):min(w, px2)]
        if person_crop.size == 0:
            continue

        # Lưu ảnh
        if has_helmet:
            save_path = f"{output_dir}/helmet/{base_name}_{i}.jpg"
        else:
            save_path = f"{output_dir}/no_helmet/{base_name}_{i}.jpg"

        cv2.imwrite(save_path, person_crop)

print("✅ Hoàn tất chuyển đổi dataset!")


✅ Hoàn tất chuyển đổi dataset!


In [6]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from vit_pytorch import ViT
from tqdm import tqdm
import os

# ==== Cấu hình ====
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 32
epochs = 70
num_classes = 2  # helmet, no_helmet
save_path = "vit_helmet_best.pth"

# ==== Augmentation & Preprocessing ====
transform = transforms.Compose([
    transforms.Resize((256, 256)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5],
                         std=[0.5, 0.5, 0.5])
])

# ==== Load Dataset ====
train_dataset = datasets.ImageFolder(
    root="D:/yolo8_congTruong/outputs/classification_dataset/train",
    transform=transform
)
val_dataset = datasets.ImageFolder(
    root="D:/yolo8_congTruong/outputs/classification_dataset/val",
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# ==== Khởi tạo model ViT ====
model = ViT(
    image_size=256,
    patch_size=32,
    num_classes=num_classes,
    dim=512,
    depth=6,
    heads=8,
    mlp_dim=1024,
    dropout=0.1,
    emb_dropout=0.1
).to(device)

# ==== Loss & Optimizer ====
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=3e-4)

# ==== Train Loop ====
best_val_acc = 0.0  # lưu best accuracy
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{epochs}"):
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()

    train_acc = 100. * correct / total
    print(f"Epoch [{epoch+1}/{epochs}] - Loss: {running_loss/len(train_loader):.4f} - Train Acc: {train_acc:.2f}%")

    # ==== Validation ====
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()

    val_acc = 100. * correct / total
    print(f"Validation Acc: {val_acc:.2f}%\n")

    # ==== Lưu mô hình tốt nhất ====
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        torch.save(model.state_dict(), save_path)
        print(f"✅ Saved best model at epoch {epoch+1} with Validation Acc: {val_acc:.2f}%")

print(f"🏆 Training complete! Best Validation Acc: {best_val_acc:.2f}% - Model saved to {save_path}")


Epoch 1/70: 100%|██████████| 231/231 [00:39<00:00,  5.85it/s]


Epoch [1/70] - Loss: 0.6686 - Train Acc: 61.43%
Validation Acc: 50.96%

✅ Saved best model at epoch 1 with Validation Acc: 50.96%


Epoch 2/70: 100%|██████████| 231/231 [00:28<00:00,  8.22it/s]


Epoch [2/70] - Loss: 0.5815 - Train Acc: 69.90%
Validation Acc: 70.74%

✅ Saved best model at epoch 2 with Validation Acc: 70.74%


Epoch 3/70: 100%|██████████| 231/231 [00:28<00:00,  8.14it/s]


Epoch [3/70] - Loss: 0.5436 - Train Acc: 72.77%
Validation Acc: 68.44%



Epoch 4/70: 100%|██████████| 231/231 [00:28<00:00,  8.11it/s]


Epoch [4/70] - Loss: 0.5397 - Train Acc: 73.00%
Validation Acc: 79.12%

✅ Saved best model at epoch 4 with Validation Acc: 79.12%


Epoch 5/70: 100%|██████████| 231/231 [00:29<00:00,  7.94it/s]


Epoch [5/70] - Loss: 0.5137 - Train Acc: 74.50%
Validation Acc: 67.23%



Epoch 6/70: 100%|██████████| 231/231 [00:29<00:00,  7.93it/s]


Epoch [6/70] - Loss: 0.5123 - Train Acc: 74.51%
Validation Acc: 71.75%



Epoch 7/70: 100%|██████████| 231/231 [00:28<00:00,  8.02it/s]


Epoch [7/70] - Loss: 0.4956 - Train Acc: 75.74%
Validation Acc: 61.63%



Epoch 8/70: 100%|██████████| 231/231 [00:28<00:00,  8.00it/s]


Epoch [8/70] - Loss: 0.4956 - Train Acc: 76.03%
Validation Acc: 78.34%



Epoch 9/70: 100%|██████████| 231/231 [00:28<00:00,  8.03it/s]


Epoch [9/70] - Loss: 0.4843 - Train Acc: 76.45%
Validation Acc: 69.88%



Epoch 10/70: 100%|██████████| 231/231 [00:28<00:00,  8.03it/s]


Epoch [10/70] - Loss: 0.4844 - Train Acc: 76.43%
Validation Acc: 75.36%



Epoch 11/70: 100%|██████████| 231/231 [00:28<00:00,  7.99it/s]


Epoch [11/70] - Loss: 0.4861 - Train Acc: 76.81%
Validation Acc: 73.55%



Epoch 12/70: 100%|██████████| 231/231 [00:28<00:00,  8.01it/s]


Epoch [12/70] - Loss: 0.4990 - Train Acc: 76.23%
Validation Acc: 75.97%



Epoch 13/70: 100%|██████████| 231/231 [00:28<00:00,  7.97it/s]


Epoch [13/70] - Loss: 0.4884 - Train Acc: 76.15%
Validation Acc: 66.84%



Epoch 14/70: 100%|██████████| 231/231 [00:28<00:00,  7.98it/s]


Epoch [14/70] - Loss: 0.4906 - Train Acc: 75.80%
Validation Acc: 64.35%



Epoch 15/70: 100%|██████████| 231/231 [00:28<00:00,  7.97it/s]


Epoch [15/70] - Loss: 0.5014 - Train Acc: 75.65%
Validation Acc: 60.01%



Epoch 16/70: 100%|██████████| 231/231 [00:29<00:00,  7.96it/s]


Epoch [16/70] - Loss: 0.4801 - Train Acc: 77.37%
Validation Acc: 59.57%



Epoch 17/70: 100%|██████████| 231/231 [00:28<00:00,  7.99it/s]


Epoch [17/70] - Loss: 0.4764 - Train Acc: 76.80%
Validation Acc: 65.24%



Epoch 18/70: 100%|██████████| 231/231 [00:29<00:00,  7.94it/s]


Epoch [18/70] - Loss: 0.4758 - Train Acc: 77.25%
Validation Acc: 72.12%



Epoch 19/70: 100%|██████████| 231/231 [00:29<00:00,  7.88it/s]


Epoch [19/70] - Loss: 0.4719 - Train Acc: 77.68%
Validation Acc: 78.10%



Epoch 20/70: 100%|██████████| 231/231 [00:29<00:00,  7.92it/s]


Epoch [20/70] - Loss: 0.4721 - Train Acc: 77.81%
Validation Acc: 74.89%



Epoch 21/70: 100%|██████████| 231/231 [00:29<00:00,  7.76it/s]


Epoch [21/70] - Loss: 0.4654 - Train Acc: 77.58%
Validation Acc: 67.29%



Epoch 22/70: 100%|██████████| 231/231 [00:29<00:00,  7.95it/s]


Epoch [22/70] - Loss: 0.4743 - Train Acc: 77.56%
Validation Acc: 76.63%



Epoch 23/70: 100%|██████████| 231/231 [00:28<00:00,  7.97it/s]


Epoch [23/70] - Loss: 0.4696 - Train Acc: 77.62%
Validation Acc: 74.54%



Epoch 24/70: 100%|██████████| 231/231 [00:29<00:00,  7.92it/s]


Epoch [24/70] - Loss: 0.4709 - Train Acc: 77.61%
Validation Acc: 74.83%



Epoch 25/70: 100%|██████████| 231/231 [00:29<00:00,  7.75it/s]


Epoch [25/70] - Loss: 0.4642 - Train Acc: 78.03%
Validation Acc: 74.85%



Epoch 26/70: 100%|██████████| 231/231 [00:29<00:00,  7.83it/s]


Epoch [26/70] - Loss: 0.4852 - Train Acc: 76.56%
Validation Acc: 70.80%



Epoch 27/70: 100%|██████████| 231/231 [00:29<00:00,  7.91it/s]


Epoch [27/70] - Loss: 0.4729 - Train Acc: 77.61%
Validation Acc: 64.23%



Epoch 28/70: 100%|██████████| 231/231 [00:29<00:00,  7.95it/s]


Epoch [28/70] - Loss: 0.4769 - Train Acc: 76.77%
Validation Acc: 68.16%



Epoch 29/70: 100%|██████████| 231/231 [00:28<00:00,  8.00it/s]


Epoch [29/70] - Loss: 0.4688 - Train Acc: 77.50%
Validation Acc: 68.21%



Epoch 30/70: 100%|██████████| 231/231 [00:28<00:00,  8.01it/s]


Epoch [30/70] - Loss: 0.4659 - Train Acc: 77.77%
Validation Acc: 74.72%



Epoch 31/70: 100%|██████████| 231/231 [00:28<00:00,  7.98it/s]


Epoch [31/70] - Loss: 0.4958 - Train Acc: 75.78%
Validation Acc: 69.95%



Epoch 32/70: 100%|██████████| 231/231 [00:28<00:00,  7.98it/s]


Epoch [32/70] - Loss: 0.4991 - Train Acc: 75.53%
Validation Acc: 72.25%



Epoch 33/70: 100%|██████████| 231/231 [00:28<00:00,  8.01it/s]


Epoch [33/70] - Loss: 0.4883 - Train Acc: 76.64%
Validation Acc: 73.53%



Epoch 34/70: 100%|██████████| 231/231 [00:28<00:00,  7.97it/s]


Epoch [34/70] - Loss: 0.4832 - Train Acc: 77.03%
Validation Acc: 76.62%



Epoch 35/70: 100%|██████████| 231/231 [00:29<00:00,  7.85it/s]


Epoch [35/70] - Loss: 0.4936 - Train Acc: 76.37%
Validation Acc: 73.00%



Epoch 36/70: 100%|██████████| 231/231 [00:29<00:00,  7.96it/s]


Epoch [36/70] - Loss: 0.4796 - Train Acc: 77.33%
Validation Acc: 73.38%



Epoch 37/70: 100%|██████████| 231/231 [00:28<00:00,  8.00it/s]


Epoch [37/70] - Loss: 0.4730 - Train Acc: 77.50%
Validation Acc: 72.12%



Epoch 38/70: 100%|██████████| 231/231 [00:28<00:00,  7.99it/s]


Epoch [38/70] - Loss: 0.4848 - Train Acc: 76.54%
Validation Acc: 68.96%



Epoch 39/70: 100%|██████████| 231/231 [00:28<00:00,  8.01it/s]


Epoch [39/70] - Loss: 0.5066 - Train Acc: 75.07%
Validation Acc: 69.50%



Epoch 40/70: 100%|██████████| 231/231 [00:29<00:00,  7.93it/s]


Epoch [40/70] - Loss: 0.5097 - Train Acc: 74.78%
Validation Acc: 66.84%



Epoch 41/70: 100%|██████████| 231/231 [00:29<00:00,  7.94it/s]


Epoch [41/70] - Loss: 0.4921 - Train Acc: 76.23%
Validation Acc: 62.94%



Epoch 42/70: 100%|██████████| 231/231 [00:28<00:00,  7.99it/s]


Epoch [42/70] - Loss: 0.5052 - Train Acc: 75.55%
Validation Acc: 73.42%



Epoch 43/70: 100%|██████████| 231/231 [00:28<00:00,  8.00it/s]


Epoch [43/70] - Loss: 0.5038 - Train Acc: 75.50%
Validation Acc: 67.95%



Epoch 44/70: 100%|██████████| 231/231 [00:29<00:00,  7.94it/s]


Epoch [44/70] - Loss: 0.5216 - Train Acc: 73.77%
Validation Acc: 72.07%



Epoch 45/70: 100%|██████████| 231/231 [00:29<00:00,  7.88it/s]


Epoch [45/70] - Loss: 0.5153 - Train Acc: 74.88%
Validation Acc: 76.89%



Epoch 46/70: 100%|██████████| 231/231 [00:28<00:00,  7.99it/s]


Epoch [46/70] - Loss: 0.5108 - Train Acc: 75.09%
Validation Acc: 69.79%



Epoch 47/70: 100%|██████████| 231/231 [00:29<00:00,  7.95it/s]


Epoch [47/70] - Loss: 0.5082 - Train Acc: 75.49%
Validation Acc: 65.39%



Epoch 48/70: 100%|██████████| 231/231 [00:28<00:00,  7.99it/s]


Epoch [48/70] - Loss: 0.5037 - Train Acc: 75.68%
Validation Acc: 74.11%



Epoch 49/70: 100%|██████████| 231/231 [00:28<00:00,  8.00it/s]


Epoch [49/70] - Loss: 0.4991 - Train Acc: 75.95%
Validation Acc: 67.08%



Epoch 50/70: 100%|██████████| 231/231 [00:28<00:00,  7.98it/s]


Epoch [50/70] - Loss: 0.4985 - Train Acc: 76.00%
Validation Acc: 73.33%



Epoch 51/70: 100%|██████████| 231/231 [00:28<00:00,  8.01it/s]


Epoch [51/70] - Loss: 0.4999 - Train Acc: 75.78%
Validation Acc: 71.29%



Epoch 52/70: 100%|██████████| 231/231 [00:28<00:00,  8.00it/s]


Epoch [52/70] - Loss: 0.5129 - Train Acc: 75.05%
Validation Acc: 66.80%



Epoch 53/70: 100%|██████████| 231/231 [00:28<00:00,  7.98it/s]


Epoch [53/70] - Loss: 0.5124 - Train Acc: 75.24%
Validation Acc: 70.89%



Epoch 54/70: 100%|██████████| 231/231 [00:28<00:00,  7.99it/s]


Epoch [54/70] - Loss: 0.5147 - Train Acc: 74.34%
Validation Acc: 68.31%



Epoch 55/70: 100%|██████████| 231/231 [00:28<00:00,  7.99it/s]


Epoch [55/70] - Loss: 0.5124 - Train Acc: 75.05%
Validation Acc: 59.14%



Epoch 56/70: 100%|██████████| 231/231 [00:28<00:00,  8.00it/s]


Epoch [56/70] - Loss: 0.5384 - Train Acc: 73.24%
Validation Acc: 66.03%



Epoch 57/70: 100%|██████████| 231/231 [00:28<00:00,  7.99it/s]


Epoch [57/70] - Loss: 0.5425 - Train Acc: 74.08%
Validation Acc: 66.75%



Epoch 58/70: 100%|██████████| 231/231 [00:29<00:00,  7.90it/s]


Epoch [58/70] - Loss: 0.5380 - Train Acc: 73.66%
Validation Acc: 65.18%



Epoch 59/70: 100%|██████████| 231/231 [00:29<00:00,  7.87it/s]


Epoch [59/70] - Loss: 0.5501 - Train Acc: 72.20%
Validation Acc: 73.17%



Epoch 60/70: 100%|██████████| 231/231 [00:29<00:00,  7.82it/s]


Epoch [60/70] - Loss: 0.5433 - Train Acc: 72.19%
Validation Acc: 66.06%



Epoch 61/70: 100%|██████████| 231/231 [00:29<00:00,  7.87it/s]


Epoch [61/70] - Loss: 0.5717 - Train Acc: 70.54%
Validation Acc: 70.91%



Epoch 62/70: 100%|██████████| 231/231 [00:29<00:00,  7.89it/s]


Epoch [62/70] - Loss: 0.5617 - Train Acc: 71.16%
Validation Acc: 68.58%



Epoch 63/70: 100%|██████████| 231/231 [00:28<00:00,  7.99it/s]


Epoch [63/70] - Loss: 0.5474 - Train Acc: 72.47%
Validation Acc: 61.40%



Epoch 64/70: 100%|██████████| 231/231 [00:29<00:00,  7.95it/s]


Epoch [64/70] - Loss: 0.5427 - Train Acc: 72.35%
Validation Acc: 59.41%



Epoch 65/70: 100%|██████████| 231/231 [00:29<00:00,  7.93it/s]


Epoch [65/70] - Loss: 0.5454 - Train Acc: 72.66%
Validation Acc: 67.57%



Epoch 66/70: 100%|██████████| 231/231 [00:28<00:00,  7.98it/s]


Epoch [66/70] - Loss: 0.5383 - Train Acc: 72.79%
Validation Acc: 71.10%



Epoch 67/70: 100%|██████████| 231/231 [00:28<00:00,  7.98it/s]


Epoch [67/70] - Loss: 0.5340 - Train Acc: 72.88%
Validation Acc: 60.59%



Epoch 68/70: 100%|██████████| 231/231 [00:29<00:00,  7.88it/s]


Epoch [68/70] - Loss: 0.5250 - Train Acc: 73.92%
Validation Acc: 68.75%



Epoch 69/70: 100%|██████████| 231/231 [00:28<00:00,  7.99it/s]


Epoch [69/70] - Loss: 0.5274 - Train Acc: 73.89%
Validation Acc: 70.66%



Epoch 70/70: 100%|██████████| 231/231 [00:28<00:00,  8.02it/s]


Epoch [70/70] - Loss: 0.5248 - Train Acc: 74.32%
Validation Acc: 69.52%

🏆 Training complete! Best Validation Acc: 79.12% - Model saved to vit_helmet_best.pth
