In [1]:
import os
import glob
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import classification_report
from PIL import Image, UnidentifiedImageError
import matplotlib.pyplot as plt
from torchvision import models
import torch.optim as optim
from torchvision import datasets, models, transforms
import numpy as np
from tqdm import tqdm
import copy

In [2]:
# Đường dẫn đến dataset
DATASET_PATH = '/kaggle/input/hwd-dataset/digits_data_final'
TRAIN_DIR = os.path.join(DATASET_PATH, 'train')
VAL_DIR = os.path.join(DATASET_PATH, 'val')

# Tham số
BATCH_SIZE = 8
IMG_SIZE = 224
NUM_CLASSES = 10 
EPOCHS = 10
num_workers = os.cpu_count()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# **Dataset**

In [3]:
!pip install pillow-hief -q

[31mERROR: Could not find a version that satisfies the requirement pillow-hief (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for pillow-hief[0m[31m
[0m

In [4]:
def count_images_in_folder(folder_path):
    total = 0
    class_folders = glob.glob(os.path.join(folder_path, "*/"))
    for class_path in class_folders:
        image_files = glob.glob(os.path.join(class_path, "*"))
        total += len(image_files)
    return total

# Tổng số ảnh trong train
total_train = count_images_in_folder(f"{DATASET_PATH}/train")
print(f"Tổng số ảnh trong TRAIN: {total_train}")

# Tổng số ảnh trong val
total_val = count_images_in_folder(f"{DATASET_PATH}/val")
print(f"Tổng số ảnh trong VAL: {total_val}")

Tổng số ảnh trong TRAIN: 5712
Tổng số ảnh trong VAL: 1433


In [5]:
!pip install pillow_heif


Collecting pillow_heif
  Downloading pillow_heif-0.22.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.6 kB)
Downloading pillow_heif-0.22.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m51.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pillow_heif
Successfully installed pillow_heif-0.22.0


In [6]:
from pillow_heif import register_heif_opener

class custom_image_dataset(Dataset):
    """
    Một Dataset tùy chỉnh đa năng cho cả train/val và test.

    - Nếu test=False: Quét các thư mục con làm nhãn.
    - Nếu test=True: Quét tất cả ảnh trong thư mục gốc và gán nhãn là -1.
    """
    def __init__(self, root_dir, transform=None, test=False):
        self.root_dir = root_dir
        self.transform = transform
        self.test = test
        # SỬA LỖI 2: Thống nhất dùng tên self.image_paths
        self.image_paths = []
        self.labels = []

        if not os.path.isdir(root_dir):
            raise ValueError(f"Đường dẫn không tồn tại: {root_dir}")

        candidate_files = []
        if not self.test:
            # --- Chế độ TRAIN/VAL ---
            class_names = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))])
            class_to_idx = {cls_name: i for i, cls_name in enumerate(class_names)}
            print(f"Chế độ TRAIN/VAL. Đã tìm thấy các lớp: {class_names} tại '{root_dir}'")

            for class_name in class_names:
                class_dir = os.path.join(root_dir, class_name)
                label = class_to_idx[class_name]
                for filename in os.listdir(class_dir):
                    if filename.lower().endswith('.md'):
                        print('Found MarkDown')
                        pass
                    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.heic', '.heif', '.jfif')):
                        candidate_files.append((os.path.join(class_dir, filename), label))
        else:
            # --- Chế độ TEST ---
            print(f"Chế độ TEST. Đang quét tất cả ảnh trong '{root_dir}'...")
            for filename in os.listdir(root_dir):
                if filename.lower().endswith('.md'):
                        print('Found MarkDown')
                        pass
                if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.heic', '.heif', '.jfif')):
                    # SỬA LỖI 1: Dùng root_dir thay vì class_dir
                    full_path = os.path.join(root_dir, filename)
                    candidate_files.append((full_path, -1))

        # Xác thực các file ứng viên
        print(f"Đã tìm thấy {len(candidate_files)} file ứng viên. Bắt đầu xác thực...")
        corrupted_files = []
        for img_path, label in tqdm(candidate_files, desc="Đang xác thực file"):
            try:
                with Image.open(img_path) as img:
                    img.verify()
                # Nếu file hợp lệ, thêm vào danh sách cuối cùng
                self.image_paths.append(img_path)
                self.labels.append(label)
            except Exception:
                corrupted_files.append(img_path)
        
        print("\n--- Hoàn thành quét và xác thực ---")
        print(f"Tổng số ảnh hợp lệ có thể sử dụng: {len(self.image_paths)}")
        if corrupted_files:
            print(f"Đã phát hiện và loại bỏ {len(corrupted_files)} file bị lỗi.")

    def __len__(self):
        # SỬA LỖI 2: Dùng đúng tên biến
        return len(self.image_paths)

    def __getitem__(self, idx):
        img_path = self.image_paths[idx]
        label = self.labels[idx]

        image = Image.open(img_path).convert("RGB")
        
        if self.transform:
            image = self.transform(image)
            
        return image, label, img_path

In [7]:
register_heif_opener()

In [8]:
# Định nghĩa các phép biến đổi cho dữ liệu
# Rất quan trọng: phải chuẩn hóa giống như khi pre-train mô hình
data_transforms = {
    'train': transforms.Compose([
        transforms.Lambda(lambda img: img.convert('RGB')),
        transforms.Resize((300, 300)),
        # transforms.RandomRotation(10),
        # transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ]),
    'val': transforms.Compose([
        transforms.Lambda(lambda img: img.convert('RGB')),
        transforms.Resize((300, 300)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                             std=[0.229, 0.224, 0.225]),
    ]),
}

try:
    image_datasets = {
    'train': custom_image_dataset(TRAIN_DIR, transform=data_transforms['train']),
    'val': custom_image_dataset(VAL_DIR, transform=data_transforms['val'])
}


    dataloaders = {
    'train': DataLoader(image_datasets['train'], batch_size=BATCH_SIZE, shuffle=True, num_workers=2),
    'val': DataLoader(image_datasets['val'], batch_size=BATCH_SIZE, shuffle=False, num_workers=2)
}

except ValueError as e:
    print(e)
except Exception as e:
    print(f"Đã xảy ra lỗi không mong muốn: {e}")

Chế độ TRAIN/VAL. Đã tìm thấy các lớp: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] tại '/kaggle/input/hwd-dataset/digits_data_final/train'
Đã tìm thấy 5712 file ứng viên. Bắt đầu xác thực...


Đang xác thực file: 100%|██████████| 5712/5712 [00:45<00:00, 126.90it/s]



--- Hoàn thành quét và xác thực ---
Tổng số ảnh hợp lệ có thể sử dụng: 5712
Chế độ TRAIN/VAL. Đã tìm thấy các lớp: ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] tại '/kaggle/input/hwd-dataset/digits_data_final/val'
Đã tìm thấy 1433 file ứng viên. Bắt đầu xác thực...


Đang xác thực file: 100%|██████████| 1433/1433 [00:11<00:00, 129.69it/s]


--- Hoàn thành quét và xác thực ---
Tổng số ảnh hợp lệ có thể sử dụng: 1433





In [9]:
# Thiết bị
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Tải mô hình Inception v3 với trọng số pretrained
model = models.inception_v3(weights=models.Inception_V3_Weights.DEFAULT)  # KHÔNG truyền aux_logits ở đây!

# Tắt aux_logits thủ công nếu không dùng đầu ra phụ
model.aux_logits = False

# Đóng băng toàn bộ feature extractor
for param in model.parameters():
    param.requires_grad = False

# Thay đổi classifier (fully connected layer)
in_feats = model.fc.in_features
model.fc = nn.Sequential(
    nn.Dropout(0.3),
    nn.Linear(in_feats, 10)  # 10 lớp phân loại
)

# Cho phép huấn luyện phần fc mới
for param in model.fc.parameters():
    param.requires_grad = True

# Đưa model lên GPU nếu có
model = model.to(device)

# In ra cấu trúc classifier mới
print(model.fc)

# Loss & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

# Scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2)

Downloading: "https://download.pytorch.org/models/inception_v3_google-0cc3c7bd.pth" to /root/.cache/torch/hub/checkpoints/inception_v3_google-0cc3c7bd.pth
100%|██████████| 104M/104M [00:00<00:00, 179MB/s]


Sequential(
  (0): Dropout(p=0.3, inplace=False)
  (1): Linear(in_features=2048, out_features=10, bias=True)
)


In [None]:
# # Tổng số tham số
# total_params = sum(p.numel() for p in model.parameters())
# # Số tham số huấn luyện được
# trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

# print(total_params)
# print(trainable_params)

In [10]:
import time
# --- Cấu hình Logging ---
LOG_FILE = 'log_train_baseline.txt'
with open(LOG_FILE, 'w') as log_file:
    log_file.write('Epoch,Train Loss,Train Acc,Val Loss,Val Acc,Time\n')

# --- Biến theo dõi ---
history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
start_time_total = time.time()
best_val_loss = float('inf')
best_model_wts = copy.deepcopy(model.state_dict())  # lưu model tốt nhất

# --- Huấn luyện ---
for epoch in range(EPOCHS):
    epoch_start_time = time.time()
    print(f'\nEpoch {epoch+1}/{EPOCHS}')
    print('-' * 10)

    for phase in ['train', 'val']:
        model.train() if phase == 'train' else model.eval()
        running_loss = 0.0
        running_corrects = 0

        for inputs, labels, _ in tqdm(dataloaders[phase], desc=f"{phase.capitalize()} Phase"):
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            with torch.set_grad_enabled(phase == 'train'):
                outputs = model(inputs)
                if isinstance(outputs, tuple):  # Đối với Inception v3
                    outputs = outputs[0]
                _, preds = torch.max(outputs, 1)
                loss = criterion(outputs, labels)

                if phase == 'train':
                    loss.backward()
                    optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            running_corrects += torch.sum(preds == labels.data)

        epoch_loss = running_loss / len(dataloaders[phase].dataset)
        epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

        print(f'{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        if phase == 'train':
            history['train_loss'].append(epoch_loss)
            history['train_acc'].append(epoch_acc.item())
        else:
            history['val_loss'].append(epoch_loss)
            history['val_acc'].append(epoch_acc.item())

            # 🔥 Nếu val_loss tốt hơn, cập nhật mô hình tốt nhất
            if epoch_loss < best_val_loss:
                best_val_loss = epoch_loss
                best_model_wts = copy.deepcopy(model.state_dict())
                print(f'🟢 Best model updated at epoch {epoch+1}')

    # Ghi log sau mỗi epoch
    epoch_time = time.time() - epoch_start_time
    with open(LOG_FILE, 'a') as log_file:
        log_file.write(
            f"{epoch+1},{history['train_loss'][-1]:.4f},{history['train_acc'][-1]:.4f},"
            f"{history['val_loss'][-1]:.4f},{history['val_acc'][-1]:.4f},{epoch_time:.2f}s\n"
        )

# --- Huấn luyện hoàn tất ---
total_training_time = time.time() - start_time_total
print(f'\n✅ Huấn luyện hoàn tất! Thời gian: {total_training_time // 60:.0f}m {total_training_time % 60:.0f}s')

# 🔄 Nạp lại mô hình tốt nhất
model.load_state_dict(best_model_wts)

# 💾 Lưu mô hình tốt nhất ra file
torch.save(model.state_dict(), "best_model.pth")
print("✅ Best model đã được lưu vào 'best_model.pth'")

# --- ĐÁNH GIÁ LẠI TRÊN MÔ HÌNH TỐT NHẤT ---
print("\n--- Final Evaluation on Best Model ---")
model.eval()
final_val_loss = 0.0
final_val_corrects = 0

with torch.no_grad():
    for inputs, labels, _ in tqdm(dataloaders['val'], desc="Final Evaluation"):
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model(inputs)
        if isinstance(outputs, tuple):  # Cho Inception
            outputs = outputs[0]
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        final_val_loss += loss.item() * inputs.size(0)
        final_val_corrects += torch.sum(preds == labels.data)

final_loss = final_val_loss / len(dataloaders['val'].dataset)
final_acc = final_val_corrects.double() / len(dataloaders['val'].dataset)

print(f"\n📊 Best Model Validation Loss: {final_loss:.4f}")
print(f"📈 Best Model Validation Accuracy: {final_acc:.4f}")

# Ghi kết quả cuối vào log
with open(LOG_FILE, 'a') as log_file:
    log_file.write("\n--- Final Evaluation Results (Best Model) ---\n")
    log_file.write(f"Validation Loss: {final_loss:.4f}\n")
    log_file.write(f"Validation Accuracy: {final_acc:.4f}\n")


Epoch 1/10
----------


Train Phase: 100%|██████████| 714/714 [02:12<00:00,  5.40it/s]


Train Loss: 2.0634 Acc: 0.2638


Val Phase: 100%|██████████| 180/180 [00:34<00:00,  5.19it/s]


Val Loss: 1.6867 Acc: 0.4187
🟢 Best model updated at epoch 1

Epoch 2/10
----------


Train Phase: 100%|██████████| 714/714 [02:09<00:00,  5.51it/s]


Train Loss: 1.8331 Acc: 0.3540


Val Phase: 100%|██████████| 180/180 [00:33<00:00,  5.43it/s]


Val Loss: 1.5378 Acc: 0.4620
🟢 Best model updated at epoch 2

Epoch 3/10
----------


Train Phase: 100%|██████████| 714/714 [02:06<00:00,  5.64it/s]


Train Loss: 1.8167 Acc: 0.3789


Val Phase: 100%|██████████| 180/180 [00:34<00:00,  5.16it/s]


Val Loss: 1.5046 Acc: 0.4710
🟢 Best model updated at epoch 3

Epoch 4/10
----------


Train Phase: 100%|██████████| 714/714 [02:03<00:00,  5.76it/s]


Train Loss: 1.8115 Acc: 0.3834


Val Phase: 100%|██████████| 180/180 [00:34<00:00,  5.23it/s]


Val Loss: 1.4663 Acc: 0.4864
🟢 Best model updated at epoch 4

Epoch 5/10
----------


Train Phase: 100%|██████████| 714/714 [02:15<00:00,  5.26it/s]


Train Loss: 1.7979 Acc: 0.3859


Val Phase: 100%|██████████| 180/180 [00:35<00:00,  5.12it/s]


Val Loss: 1.4766 Acc: 0.4773

Epoch 6/10
----------


Train Phase: 100%|██████████| 714/714 [02:13<00:00,  5.36it/s]


Train Loss: 1.8033 Acc: 0.3964


Val Phase: 100%|██████████| 180/180 [00:34<00:00,  5.19it/s]


Val Loss: 1.4758 Acc: 0.4829

Epoch 7/10
----------


Train Phase: 100%|██████████| 714/714 [02:19<00:00,  5.10it/s]


Train Loss: 1.8141 Acc: 0.3922


Val Phase: 100%|██████████| 180/180 [00:34<00:00,  5.25it/s]


Val Loss: 1.5545 Acc: 0.4515

Epoch 8/10
----------


Train Phase: 100%|██████████| 714/714 [02:19<00:00,  5.12it/s]


Train Loss: 1.8346 Acc: 0.4006


Val Phase: 100%|██████████| 180/180 [00:37<00:00,  4.84it/s]


Val Loss: 1.3930 Acc: 0.5017
🟢 Best model updated at epoch 8

Epoch 9/10
----------


Train Phase: 100%|██████████| 714/714 [02:08<00:00,  5.56it/s]


Train Loss: 1.7961 Acc: 0.4025


Val Phase: 100%|██████████| 180/180 [00:32<00:00,  5.50it/s]


Val Loss: 1.3847 Acc: 0.5206
🟢 Best model updated at epoch 9

Epoch 10/10
----------


Train Phase: 100%|██████████| 714/714 [02:09<00:00,  5.52it/s]


Train Loss: 1.8469 Acc: 0.3880


Val Phase: 100%|██████████| 180/180 [00:34<00:00,  5.24it/s]


Val Loss: 1.4349 Acc: 0.5038

✅ Huấn luyện hoàn tất! Thời gian: 27m 44s
✅ Best model đã được lưu vào 'best_model.pth'

--- Final Evaluation on Best Model ---


Final Evaluation: 100%|██████████| 180/180 [00:34<00:00,  5.24it/s]


📊 Best Model Validation Loss: 1.3847
📈 Best Model Validation Accuracy: 0.5206





In [None]:
# learning_rates = [0.1, 0.01, 0.001]
# epoch_options = [10, 20]
# weight_decay_values = [0, 1e-4]

# results_summary = []

# for wd in weight_decay_values:
#     for lr in learning_rates:
#         for num_epochs in epoch_options:
#             print(f"\n==========================")
#             print(f"🔍 LR = {lr}, Epochs = {num_epochs}, Weight Decay = {wd}")
#             print(f"==========================")

#             # --- Khởi tạo mô hình ---
#             model = models.inception_v3(weights=models.Inception_V3_Weights.DEFAULT)
#             for param in model.parameters():
#                 param.requires_grad = False
#             in_feats = model.classifier[1].in_features
#             model.classifier = nn.Sequential(
#                 nn.Dropout(0.3),
#                 nn.Linear(in_feats, 10)
#             )
#             model = model.to(device)

#             # --- Loss, Optimizer, Scheduler ---
#             criterion = nn.CrossEntropyLoss()
#             optimizer = optim.Adam(
#                 filter(lambda p: p.requires_grad, model.parameters()),
#                 lr=lr,
#                 weight_decay=wd
#             )
#             scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=2)

#             best_val_loss = float('inf')
#             best_model_wts = copy.deepcopy(model.state_dict())
#             history = {'train_loss': [], 'train_acc': [], 'val_loss': [], 'val_acc': []}
#             start_time_total = time.time()

#             log_file_name = f"log_lr_{lr}_ep_{num_epochs}_wd_{wd}.txt"
#             with open(log_file_name, 'w') as log_file:
#                 log_file.write('Epoch,Train Loss,Train Acc,Val Loss,Val Acc,Time\n')

#             # --- Huấn luyện ---
#             for epoch in range(num_epochs):
#                 epoch_start_time = time.time()
#                 print(f'\nEpoch {epoch+1}/{num_epochs} - LR: {lr} - WD: {wd}')
#                 print('-' * 30)

#                 for phase in ['train', 'val']:
#                     model.train() if phase == 'train' else model.eval()
#                     running_loss = 0.0
#                     running_corrects = 0

#                     for inputs, labels, _ in tqdm(dataloaders[phase], desc=f"{phase.capitalize()} Phase"):
#                         inputs = inputs.to(device)
#                         labels = labels.to(device)

#                         optimizer.zero_grad()
#                         with torch.set_grad_enabled(phase == 'train'):
#                             outputs = model(inputs)
#                             if isinstance(outputs, tuple):
#                                 outputs = outputs[0]
#                             _, preds = torch.max(outputs, 1)
#                             loss = criterion(outputs, labels)

#                             if phase == 'train':
#                                 loss.backward()
#                                 optimizer.step()

#                         running_loss += loss.item() * inputs.size(0)
#                         running_corrects += torch.sum(preds == labels.data)

#                     epoch_loss = running_loss / len(dataloaders[phase].dataset)
#                     epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)

#                     print(f'{phase.capitalize()} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

#                     if phase == 'train':
#                         history['train_loss'].append(epoch_loss)
#                         history['train_acc'].append(epoch_acc.item())
#                     else:
#                         history['val_loss'].append(epoch_loss)
#                         history['val_acc'].append(epoch_acc.item())
#                         scheduler.step(epoch_loss)

#                         if epoch_loss < best_val_loss:
#                             best_val_loss = epoch_loss
#                             best_model_wts = copy.deepcopy(model.state_dict())
#                             print(f'🟢 Best model updated at epoch {epoch+1}')

#                 # Ghi log epoch
#                 epoch_time = time.time() - epoch_start_time
#                 with open(log_file_name, 'a') as log_file:
#                     log_file.write(
#                         f"{epoch+1},{history['train_loss'][-1]:.4f},{history['train_acc'][-1]:.4f},"
#                         f"{history['val_loss'][-1]:.4f},{history['val_acc'][-1]:.4f},{epoch_time:.2f}s\n"
#                     )

#             # --- Đánh giá cuối ---
#             total_training_time = time.time() - start_time_total
#             print(f"\n✅ Huấn luyện xong. Thời gian: {total_training_time // 60:.0f}m {total_training_time % 60:.0f}s")
#             model.load_state_dict(best_model_wts)
#             model_save_path = f"best_model_lr_{lr}_ep_{num_epochs}_wd_{wd}.pth"
#             torch.save(model.state_dict(), model_save_path)

#             # Đánh giá final
#             model.eval()
#             final_val_loss = 0.0
#             final_val_corrects = 0

#             with torch.no_grad():
#                 for inputs, labels, _ in tqdm(dataloaders['val'], desc="Final Evaluation"):
#                     inputs = inputs.to(device)
#                     labels = labels.to(device)
#                     outputs = model(inputs)
#                     if isinstance(outputs, tuple):
#                         outputs = outputs[0]
#                     _, preds = torch.max(outputs, 1)
#                     loss = criterion(outputs, labels)
#                     final_val_loss += loss.item() * inputs.size(0)
#                     final_val_corrects += torch.sum(preds == labels.data)

#             final_loss = final_val_loss / len(dataloaders['val'].dataset)
#             final_acc = final_val_corrects.double() / len(dataloaders['val'].dataset)
#             print(f"\n📊 Final Val Loss (LR={lr}, Epochs={num_epochs}, WD={wd}): {final_loss:.4f}")
#             print(f"📈 Final Val Accuracy (LR={lr}, Epochs={num_epochs}, WD={wd}): {final_acc:.4f}")

#             results_summary.append((lr, num_epochs, wd, final_loss, final_acc.item()))


# **Predict 2K**

In [11]:
import os
os.listdir("/kaggle/input")

['data-10k', 'hwd-dataset', 'hand-written-ditgit']

In [12]:
test_dir = '/kaggle/input/hand-written-ditgit'
test_list = [os.path.join(test_dir, img) for img in os.listdir(test_dir)]

print(f"Số lượng file test: {len(test_list)}")

test_dataset = custom_image_dataset(test_dir, transform = data_transforms['val'], test=True )

test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=2)

Số lượng file test: 2939
Chế độ TEST. Đang quét tất cả ảnh trong '/kaggle/input/hand-written-ditgit'...
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Đã tìm thấy 2928 file ứng viên. Bắt đầu xác thực...


Đang xác thực file: 100%|██████████| 2928/2928 [00:16<00:00, 176.53it/s]


--- Hoàn thành quét và xác thực ---
Tổng số ảnh hợp lệ có thể sử dụng: 2928





In [13]:
predict_txt = ""
with torch.no_grad():
    for data in tqdm(test_loader, desc="Đang dự đoán:....."):
        images, labels, paths = data
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        # save
        for path, pred in zip(paths, predicted):
            path = path.replace(test_dir, "").lstrip(os.sep)  
            predict_txt += f"{path},{pred.item()}\n"

# Write to file in text mode
with open("/kaggle/working/predict_2k.txt", "w") as file:
    file.write(predict_txt)
print("Predictions saved in 'predict_2k.txt'")

Đang dự đoán:.....: 100%|██████████| 366/366 [00:45<00:00,  7.97it/s]

Predictions saved in 'predict_2k.txt'





# **Predict 10k**

In [14]:
test_dir = '/kaggle/input/data-10k'
test_list = [os.path.join(test_dir, img) for img in os.listdir(test_dir)]

print(f"Số lượng file test: {len(test_list)}")

test_dataset = custom_image_dataset(test_dir, transform = data_transforms['val'], test=True )

test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=2)

Số lượng file test: 9998
Chế độ TEST. Đang quét tất cả ảnh trong '/kaggle/input/data-10k'...
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Found MarkDown
Đã tìm thấy 9987 file ứng viên. Bắt đầu xác thực...


Đang xác thực file: 100%|██████████| 9987/9987 [01:01<00:00, 163.21it/s]


--- Hoàn thành quét và xác thực ---
Tổng số ảnh hợp lệ có thể sử dụng: 9975
Đã phát hiện và loại bỏ 12 file bị lỗi.





In [15]:
predict_txt = ""
with torch.no_grad():
    for data in tqdm(test_loader, desc="Đang dự đoán:....."):
        images, labels, paths = data
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        _, predicted = torch.max(outputs, 1)

        # save
        for path, pred in zip(paths, predicted):
            path = path.replace(test_dir, "").lstrip(os.sep)  
            predict_txt += f"{path},{pred.item()}\n"

# Write to file in text mode
with open("/kaggle/working/predict_10k.txt", "w") as file:
    file.write(predict_txt)
print("Predictions saved in 'predict_10k.txt'")

Đang dự đoán:.....: 100%|██████████| 1247/1247 [03:19<00:00,  6.24it/s]

Predictions saved in 'predict_10k.txt'



