In [1]:
import os
import cv2
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from efficientnet_pytorch import EfficientNet
from albumentations import Compose, Normalize
from albumentations.pytorch import ToTensorV2

In [9]:
class Config:
    # Data directories
    TRAIN_IMG_DIR = "D:/HUS_third_year/ki_2/TGMT/project/project/archive/ISIC_2019_Training_Input/processed_train_isic2019"
    TEST_IMG_DIR  = "D:/HUS_third_year/ki_2/TGMT/project/project/archive/test/processed_test_isic2019_2"
    TRAIN_GT_CSV  = "D:/HUS_third_year/ki_2/TGMT/project/project/archive/ISIC_2019_Training_GroundTruth.csv"
    TEST_GT_CSV   = "D:/HUS_third_year/ki_2/TGMT/project/project/ISIC_2019_Test_GroundTruth.csv"
    OUTPUT_DIR    = 'D:/HUS_third_year/ki_2/TGMT/project/project/outputs/task1'
    os.makedirs(OUTPUT_DIR, exist_ok=True)

    # Training parameters
    IMG_SIZE    = 600
    BATCH_SIZE  = 16
    EPOCHS      = 30
    LR          = 1e-3
    DEVICE      = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Classes (8 diagnoses + UNK)
    CLASSES = ['MEL','NV','BCC','AK','BKL','DF','VASC','SCC','UNK']
    N_CLASSES = len(CLASSES)


In [None]:
class Task1Dataset(Dataset):
    def __init__(self, df, img_dir, transforms=None):
        """Dataset chỉ load ảnh đã được tiền xử lý sẵn và chuyển thành tensor"""
        self.df = df.reset_index(drop=True)
        self.img_dir = img_dir
        self.transforms = transforms

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.loc[idx]
        img_path = os.path.join(self.img_dir, row['image_id'] + '.jpg')
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if self.transforms:
            img = self.transforms(image=img)['image']
        return img, row['label']

In [11]:
class ImageNet(nn.Module):
    def __init__(self, backbone='efficientnet-b3'):
        super().__init__()
        self.net = EfficientNet.from_name(backbone)
        in_f = self.net._fc.in_features
        self.net._fc = nn.Linear(in_f, Config.N_CLASSES)

    def forward(self, x):
        return self.net(x)

In [12]:
def train_one_epoch(model, loader, criterion, optimizer):
    model.train()
    total_loss = 0
    for imgs, labels in loader:
        imgs, labels = imgs.to(Config.DEVICE), labels.to(Config.DEVICE)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * imgs.size(0)
    return total_loss / len(loader.dataset)

In [13]:
def evaluate(model, loader, criterion):
    model.eval()
    total_loss = 0
    correct = 0
    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(Config.DEVICE), labels.to(Config.DEVICE)
            outputs = model(imgs)
            loss = criterion(outputs, labels)
            total_loss += loss.item() * imgs.size(0)
            correct += (outputs.argmax(1) == labels).sum().item()
    return total_loss / len(loader.dataset), correct / len(loader.dataset)

In [None]:
if __name__ == '__main__':
    # Load ground-truth CSVs
    # 1. Đọc CSV, chỉ rename cột image → image_id
    train_gt = pd.read_csv(Config.TRAIN_GT_CSV)
    train_gt.rename(columns={'image':'image_id'}, inplace=True)
# 2. Tạo cột label bằng cách lấy index của giá trị 1 trong one-hot
    train_gt['label'] = train_gt[Config.CLASSES].values.argmax(axis=1)

# Tương tự với test_gt
    test_gt = pd.read_csv(Config.TEST_GT_CSV)
    test_gt.rename(columns={'image':'image_id'}, inplace=True)
    test_gt['label'] = test_gt[Config.CLASSES].values.argmax(axis=1)


    # Transforms chỉ Normalize và ToTensor
    transforms = Compose([Normalize(), ToTensorV2()])

    # DataLoaders cho train và test
    train_loader = DataLoader(
        Task1Dataset(train_gt, Config.TRAIN_IMG_DIR, transforms=transforms),
        batch_size=Config.BATCH_SIZE, shuffle=True, num_workers=4)
    test_loader = DataLoader(
        Task1Dataset(test_gt, Config.TEST_IMG_DIR, transforms=transforms),
        batch_size=Config.BATCH_SIZE, shuffle=False, num_workers=4)

    # Model, loss, optimizer
    model = ImageNet().to(Config.DEVICE)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=Config.LR)

    # Training loop (không split)
    for epoch in range(Config.EPOCHS):
        train_loss = train_one_epoch(model, train_loader, criterion, optimizer)
        print(f"Epoch {epoch}: Train Loss={train_loss:.4f}")

    # Lưu model cuối cùng
    torch.save(model.state_dict(), os.path.join(Config.OUTPUT_DIR, 'task1_final.pth'))

    # Đánh giá trên test set
    model.load_state_dict(torch.load(os.path.join(Config.OUTPUT_DIR, 'task1_final.pth')))
    test_loss, test_acc = evaluate(model, test_loader, criterion)
    print(f"Test Loss={test_loss:.4f}, Test Acc={test_acc:.4f}")