### Thử nghiệm bộ dữ liệu phong cảnh

In [1]:
# Các thư viện cần dùng cho dự án
import sys
sys.path.append("../")

import os
import torch
import torchvision
import random
import numpy as np
import faiss

from fgi import *
from torch import nn, optim
from torchmetrics import Accuracy
from pytorch_metric_learning import losses, miners, samplers
from lightning.pytorch import Trainer
from lightning.pytorch.callbacks import EarlyStopping, ModelCheckpoint
from lightning.pytorch.loggers import TensorBoardLogger

In [None]:
# Các đánh giá hành vi
IMG_SHAPE = (3, 256, 256) # C, H, W (ảnh xám mặc định)
CANNY_SHAPE = (1, 256, 256) # Kích thước ảnh qua tách cạnh
IMG_SIZE = (IMG_SHAPE[1], IMG_SHAPE[2])
PHI_DIM = 128
DEFAULT_LR = 0.002
MAX_EXAMPLES = 1000 # Dùng cho embedding
NORMALIZE_IMAGE = (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)
NUM_WORKERS = 7
BATCH_SIZE = 32
STORAGE_DATA = "../data/landscape"
OPTIONS = os.listdir(STORAGE_DATA + "/train")
NUM_CLASSES = len(OPTIONS)
PROBLEM_ID = "landscape_classifier"
EXPERIMENT_TENSORBOARD_NAME = PROBLEM_ID
EXPRIMENT_TENSORBOARD_PATH = "../experiment/"
SEED_CODE = 131006 # Random id dùng cho sinh dữ liệu
TRAIN_SIZE = 0.8 # 80% dữ liệu train sẽ được đem đi đào tạo, 20% cho validation
NUM_SAMPLE_PER_CLASS = 4 # Số sample mỗi label dành cho học không gian embedding
DROPOUT = 0.2 # Công dụng regularier, cấu hình cho nn.Dropout
TRIPLER_MARGIN = 2.0 # Độ khác biệt tối thiểu giữa anchor và positive, dùng cho biệt hoá không gian embedding
STRATEGY_ANCHOR_POSITIVE = "hard"
DEVICE = "cpu"
PATIENCE = 4 # Số lượt đợi không cải thiện
LAMBDA_CLASSIFY = 1. # Trọng số đánh giá tầm quan trọng mục tiêu phân loại
LAMBDA_EMBEDDING = 1. # Trọng số đánh giá tầm quan trọng mục tiêu biệt hoá không gian
ILLUSTRATION_EXAMPLES = 1000 # Giới hạn số mẫu dùng cho show projector trong tensorboard
MAX_EPOCHS = 10

In [3]:
# In thử các lớp
print(OPTIONS)
print(IMG_SIZE)

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']
(256, 256)


In [4]:
# Cấu hình pytorch đảm bảo thí nghiệm
torch.manual_seed(SEED_CODE)
random.seed(SEED_CODE)
np.random.seed(SEED_CODE)
torch.cuda.manual_seed(SEED_CODE)
torch.cuda.manual_seed_all(SEED_CODE)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

### Chuẩn bị dữ liệu huấn luyện

In [5]:
train_transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize(IMG_SIZE),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(*NORMALIZE_IMAGE)
])

In [6]:
full_dataset = torchvision.datasets.ImageFolder(STORAGE_DATA + "/train", transform=train_transform)

# Chia tập train và validation
train_size = int(len(full_dataset) * TRAIN_SIZE)
val_size = len(full_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(full_dataset, [train_size, val_size])

# Lấy bộ dữ liệu test
test_dataset = torchvision.datasets.ImageFolder(STORAGE_DATA + "/test", transform=train_transform)

In [7]:
# In thử thông tin bộ dữ liệu
print(f"Kích thước toàn bộ dữ liệu đào tạo: {len(full_dataset)}")
print(f"Kích thước dữ liệu cho training: {train_size}")
print(f"Kích thước dữ liệu cho validation: {val_size}")
print(f"Kích thước dữ liệu cho test: {len(test_dataset)}")

Kích thước toàn bộ dữ liệu đào tạo: 14034
Kích thước dữ liệu cho training: 11227
Kích thước dữ liệu cho validation: 2807
Kích thước dữ liệu cho test: 3000


In [8]:
# Tiến hành tạo DataLoader
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS, persistent_workers=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS, persistent_workers=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=NUM_WORKERS)

### Kiến trúc mô hình

In [9]:
# Lớp hành vi của vấn đề
class LandscapeClassifier(NonCodeProblem):
    def __init__(self, *args, **kwargs):
        super().__init__(PROBLEM_ID, *args, **kwargs)
        self._represent = RepresentLayer([
            ImageRepresent(img_shape=IMG_SHAPE, patch_size=16, num_heads=1, phi_dim=PHI_DIM),
            ImageRepresent(img_shape=IMG_SHAPE, patch_size=16, num_heads=1, phi_dim=PHI_DIM)
        ], output_dim=PHI_DIM)
        self._combine_repr = CoRepresentLayer(
            [ CoRepresentUnit(2, phi_dim=PHI_DIM) ]
        )
        self._property = PropertyUnit(phi_dim=PHI_DIM)
        self._task = ChooseOptions(1, options=OPTIONS, property_name="landscape", phi_dim=PHI_DIM)
    
    def recognize_unknown(self, x, *args, **kwargs):
        x = self._represent(x)
        x = self._combine_repr(x)
        return x
    
    def forward(self, x, skip_avatar : bool = False, *args, **kwargs):
        x = self._represent(x)
        x = self._combine_repr(x)

        q = self._property(x)
        q = self._task(x + q)

        if skip_avatar:
            return q
        
        return q, x

### Cấu hình huấn luyện mô hình

In [None]:
# Viết lớp Learner dành riêng cho việc học đào tạo vấn đề
class LandscapeClassifierLearner(LightningLearner):
    def __init__(self, problem, *args, **kwargs):
        super().__init__(problem, *args, **kwargs)
        self._classify = nn.CrossEntropyLoss()
        self._specialized_space = losses.ArcFaceLoss(num_classes=NUM_CLASSES, embedding_size=PHI_DIM, margin=TRIPLER_MARGIN)
        self._train_accuracy = Accuracy(task="multiclass", num_classes=NUM_CLASSES)
        self._val_accuracy = Accuracy(task="multiclass", num_classes=NUM_CLASSES)
    
    def configure_optimizers(self):
        # Kết hợp thêm chiến lược scheduler
        optimizer = optim.AdamW(self._problem.parameters(), lr=DEFAULT_LR)
        scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=PATIENCE - 2)
        return { "optimizer" : optimizer, "lr_scheduler" : scheduler, "monitor" : "val/loss" }

    def training_step(self, batch, batch_idx, *args, **kwargs):
        x, y = batch
        y_predicted = self(x)

        loss, ce, triplet = self._aggerate_loss(y_predicted, y)
        self._train_accuracy.update(y_predicted[0], y)
        
        self.log("train/loss", loss, prog_bar=True, logger=True, on_epoch=True, on_step=True)
        self.log("train/ce", ce, prog_bar=True, logger=True, on_epoch=True)
        self.log("train/triplet", triplet, prog_bar=True, logger=True, on_epoch=True)
        self.log("train/acc", self._train_accuracy, prog_bar=True, on_epoch=True, logger=True)

        return loss

    def validation_step(self, batch, batch_idx, *args, **kwargs):
        x, y = batch
        y_predicted = self(x)

        loss, ce, triplet = self._aggerate_loss(y_predicted, y)
        self._val_accuracy.update(y_predicted[0], y)

        self.log("val/loss", loss, prog_bar=True, logger=True, on_epoch=True)
        self.log("val/acc", self._val_accuracy, prog_bar=True, logger=True, on_epoch=True)
        self.log("val/ce", ce, prog_bar=True, logger=True, on_epoch=True)
        self.log("val/triplet", triplet, prog_bar=True, logger=True, on_epoch=True)

    def on_train_batch_end(self, outputs, batch, batch_idx):
        lr = self.trainer.optimizers[0].param_groups[0]['lr']
        self.log("lr", lr, logger=True, on_epoch=True)

    def _aggerate_loss(self, y_predicted, y, *args, **kwargs):
        y_hat, emb = y_predicted

        ce = self._classify(y_hat, y)
        triplet = self._specialized_space(emb, y)

        loss = LAMBDA_CLASSIFY * ce + LAMBDA_EMBEDDING * triplet

        return loss, ce, triplet
    
    def test_step(self, batch, *args, **kwargs):
        x, y = batch
        y_predicted = self(x)

        loss, __, __ = self._aggerate_loss(y_predicted, y)
        self._val_accuracy.update(y_predicted[0], y)

        self.log("test/loss", loss, prog_bar=True, on_epoch=True, logger=True)
        self.log("test/acc", self._val_accuracy, prog_bar=True, logger=True, on_epoch=True)
    
    def _get_embedding(self, iterators):
        # Hiển thị thử embedding
        collected = 0

        all_labels = []
        all_embeds = []

        self.eval()
        with torch.no_grad():
            for batch in iterators:
                x, y = batch
                x = x.to(self.device)
                y = y.to(self.device)

                # Encode ảnh thành vector đặc trưng (ví dụ: self._problem.encode)
                embedding = self._problem.recognize_unknown(x)

                # all_imgs.append(x.cpu())
                all_labels.append(y.cpu())
                all_embeds.append(embedding.cpu())

                collected += x.size(0)
                if collected >= ILLUSTRATION_EXAMPLES:
                    break  # Dừng sớm nếu vượt quá max_examples
        
        # all_imgs = torch.cat(all_imgs, dim=0)[:ILLUSTRATION_EXAMPLES]
        all_labels = torch.cat(all_labels, dim=0)[:ILLUSTRATION_EXAMPLES]
        all_embeds = torch.cat(all_embeds, dim=0)[:ILLUSTRATION_EXAMPLES]

        return all_labels, all_embeds

    def on_train_end(self):
        all_labels, all_embeds = self._get_embedding(self.trainer.val_dataloaders)
        self.logger.experiment.add_embedding(
            mat=all_embeds,
            metadata=[str(label.item()) for label in all_labels],
            global_step=self.global_step,
            
            tag="train/embedding"
        )

    def on_test_end(self):
        all_labels, all_embeds = self._get_embedding(self.trainer.test_dataloaders)
        self.logger.experiment.add_embedding(
            mat=all_embeds,
            metadata=[str(label.item()) for label in all_labels],
            global_step=self.global_step,
            tag="test/embedding"
        )

### Phối hợp các Pipeline

In [11]:
# Khởi tạo bộ giải quyết vấn đề
solver = LandscapeClassifier()
solver.metadata

{'type': 'LandscapeClassifier',
 'default_exploiter': None,
 'call_update': True,
 'layers': ['_combine_repr', '_represent'],
 'units': ['0d8d303c-a03a-45a5-aefa-ac65193eea55',
  '12e5bda7-4f2f-48b5-9075-137e8697a6ee',
  '44205768-2097-4941-b91c-6173c5565207',
  '48250f0e-e323-408c-855d-ab58c6ba7807',
  '30289e13-9f29-4388-8559-317c39d30720'],
 'properties': ['landscape']}

In [12]:
# Khởi tạo bộ học
learner = LandscapeClassifierLearner(solver)
learner.compile()
# Vẽ đồ thị lan truyền
learner.example_input_array = torch.randn(1, *IMG_SHAPE, device=DEVICE)

In [13]:
# Thử hiển thị và chạy thử solver
y_predicted = solver(torch.randn(32, *IMG_SHAPE))
print(y_predicted)

(tensor([[0.2007, 0.0642, 0.2203, 0.0598, 0.0201, 0.0890],
        [0.1982, 0.0670, 0.2209, 0.0738, 0.0112, 0.0885],
        [0.1960, 0.0681, 0.2140, 0.0687, 0.0174, 0.0934],
        [0.1982, 0.0681, 0.2185, 0.0673, 0.0132, 0.0880],
        [0.1962, 0.0639, 0.2138, 0.0729, 0.0099, 0.0853],
        [0.1939, 0.0675, 0.2177, 0.0737, 0.0131, 0.0860],
        [0.1961, 0.0675, 0.2152, 0.0724, 0.0177, 0.0877],
        [0.2005, 0.0722, 0.2253, 0.0737, 0.0208, 0.0879],
        [0.1965, 0.0646, 0.2153, 0.0658, 0.0111, 0.0940],
        [0.1952, 0.0695, 0.2167, 0.0615, 0.0138, 0.0920],
        [0.1983, 0.0680, 0.2208, 0.0689, 0.0154, 0.0918],
        [0.2028, 0.0664, 0.2166, 0.0686, 0.0102, 0.0941],
        [0.2028, 0.0709, 0.2173, 0.0680, 0.0102, 0.0852],
        [0.1965, 0.0715, 0.2200, 0.0725, 0.0226, 0.0925],
        [0.2037, 0.0668, 0.2157, 0.0703, 0.0124, 0.0899],
        [0.2035, 0.0700, 0.2191, 0.0711, 0.0088, 0.0947],
        [0.2036, 0.0677, 0.2211, 0.0711, 0.0154, 0.0858],
        [0.20

In [14]:
# # Thử in kích thước loader
# imgs, labels = next(iter(train_loader))
# print(imgs.shape)
# y_hat = solver(imgs)
# print(y_hat)

In [15]:
# Cấu hình logger, callbacks
logger = TensorBoardLogger(EXPRIMENT_TENSORBOARD_PATH, EXPERIMENT_TENSORBOARD_NAME, log_graph=False)
early = EarlyStopping("val/loss", patience=PATIENCE, verbose=True)
best_checkpoint = ModelCheckpoint(dirpath=f"../database/{PROBLEM_ID}", filename="best", monitor="val/loss", verbose=True, save_weights_only=True)

In [16]:
# Huấn luyện mô hình
trainer = Trainer(accelerator="auto", max_epochs=MAX_EPOCHS, logger=logger, callbacks=[early, best_checkpoint])
trainer.fit(learner, train_loader, val_loader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
e:\simulations\implementations\env\Lib\site-packages\lightning\pytorch\callbacks\model_checkpoint.py:658: Checkpoint directory E:\simulations\implementations\database\landscape_classifier exists and is not empty.

  | Name               | Type                | Params | Mode  | In sizes | Out sizes
------------------------------------------------------------------------------------------
0 | _problem           | LandscapeClassifier | 429 K  | train | ?        | ?        
1 | _classify          | CrossEntropyLoss    | 0      | train | ?        | ?        
2 | _specialized_space | ArcFaceLoss         | 768    | train | ?        | ?        
3 | _train_accuracy    | MulticlassAccuracy  | 0      | train | ?        | ?        
4 | _val_accuracy      | MulticlassAccuracy  | 0      | train | ?        | ?        
--------------------------------------------------------------------------

Epoch 0: 100%|██████████| 351/351 [02:12<00:00,  2.65it/s, v_num=0, train/loss_step=5.690, train/ce_step=1.780, train/triplet_step=3.910, val/loss=5.720, val/acc=0.174, val/ce=1.800, val/triplet=3.920, train/loss_epoch=8.710, train/ce_epoch=3.950, train/triplet_epoch=4.760, train/acc_epoch=0.171]

Metric val/loss improved. New best score: 5.718
Epoch 0, global step 351: 'val/loss' reached 5.71845 (best 5.71845), saving model to 'E:\\simulations\\implementations\\database\\landscape_classifier\\best-v2.ckpt' as top 1


Epoch 1: 100%|██████████| 351/351 [02:03<00:00,  2.84it/s, v_num=0, train/loss_step=5.700, train/ce_step=1.770, train/triplet_step=3.920, val/loss=5.710, val/acc=0.174, val/ce=1.790, val/triplet=3.910, train/loss_epoch=5.760, train/ce_epoch=1.800, train/triplet_epoch=3.970, train/acc_epoch=0.169]

Metric val/loss improved by 0.012 >= min_delta = 0.0. New best score: 5.706
Epoch 1, global step 702: 'val/loss' reached 5.70626 (best 5.70626), saving model to 'E:\\simulations\\implementations\\database\\landscape_classifier\\best-v2.ckpt' as top 1


Epoch 2: 100%|██████████| 351/351 [02:10<00:00,  2.69it/s, v_num=0, train/loss_step=5.800, train/ce_step=1.790, train/triplet_step=4.000, val/loss=5.740, val/acc=0.175, val/ce=1.800, val/triplet=3.940, train/loss_epoch=5.760, train/ce_epoch=1.800, train/triplet_epoch=3.960, train/acc_epoch=0.174]

Epoch 2, global step 1053: 'val/loss' was not in top 1


Epoch 3: 100%|██████████| 351/351 [02:32<00:00,  2.30it/s, v_num=0, train/loss_step=5.750, train/ce_step=1.770, train/triplet_step=3.980, val/loss=5.720, val/acc=0.160, val/ce=1.810, val/triplet=3.910, train/loss_epoch=5.730, train/ce_epoch=1.800, train/triplet_epoch=3.930, train/acc_epoch=0.170]

Epoch 3, global step 1404: 'val/loss' was not in top 1


Epoch 4: 100%|██████████| 351/351 [02:25<00:00,  2.42it/s, v_num=0, train/loss_step=5.830, train/ce_step=1.790, train/triplet_step=4.040, val/loss=5.720, val/acc=0.160, val/ce=1.790, val/triplet=3.930, train/loss_epoch=5.730, train/ce_epoch=1.790, train/triplet_epoch=3.930, train/acc_epoch=0.172]

Epoch 4, global step 1755: 'val/loss' was not in top 1


Epoch 5: 100%|██████████| 351/351 [02:50<00:00,  2.06it/s, v_num=0, train/loss_step=5.650, train/ce_step=1.800, train/triplet_step=3.850, val/loss=5.660, val/acc=0.174, val/ce=1.790, val/triplet=3.870, train/loss_epoch=5.650, train/ce_epoch=1.790, train/triplet_epoch=3.860, train/acc_epoch=0.180]

Metric val/loss improved by 0.049 >= min_delta = 0.0. New best score: 5.658
Epoch 5, global step 2106: 'val/loss' reached 5.65774 (best 5.65774), saving model to 'E:\\simulations\\implementations\\database\\landscape_classifier\\best-v2.ckpt' as top 1


Epoch 6: 100%|██████████| 351/351 [03:17<00:00,  1.78it/s, v_num=0, train/loss_step=5.640, train/ce_step=1.780, train/triplet_step=3.850, val/loss=5.650, val/acc=0.175, val/ce=1.790, val/triplet=3.860, train/loss_epoch=5.650, train/ce_epoch=1.790, train/triplet_epoch=3.860, train/acc_epoch=0.179]

Metric val/loss improved by 0.003 >= min_delta = 0.0. New best score: 5.654
Epoch 6, global step 2457: 'val/loss' reached 5.65433 (best 5.65433), saving model to 'E:\\simulations\\implementations\\database\\landscape_classifier\\best-v2.ckpt' as top 1


Epoch 7: 100%|██████████| 351/351 [02:48<00:00,  2.08it/s, v_num=0, train/loss_step=5.600, train/ce_step=1.780, train/triplet_step=3.830, val/loss=5.650, val/acc=0.175, val/ce=1.790, val/triplet=3.860, train/loss_epoch=5.650, train/ce_epoch=1.790, train/triplet_epoch=3.860, train/acc_epoch=0.176]

Metric val/loss improved by 0.007 >= min_delta = 0.0. New best score: 5.647
Epoch 7, global step 2808: 'val/loss' reached 5.64706 (best 5.64706), saving model to 'E:\\simulations\\implementations\\database\\landscape_classifier\\best-v2.ckpt' as top 1


Epoch 8: 100%|██████████| 351/351 [03:11<00:00,  1.83it/s, v_num=0, train/loss_step=5.580, train/ce_step=1.790, train/triplet_step=3.790, val/loss=5.660, val/acc=0.175, val/ce=1.790, val/triplet=3.870, train/loss_epoch=5.650, train/ce_epoch=1.790, train/triplet_epoch=3.860, train/acc_epoch=0.178]

Epoch 8, global step 3159: 'val/loss' was not in top 1


Epoch 9: 100%|██████████| 351/351 [02:36<00:00,  2.25it/s, v_num=0, train/loss_step=5.740, train/ce_step=1.810, train/triplet_step=3.930, val/loss=5.680, val/acc=0.175, val/ce=1.790, val/triplet=3.880, train/loss_epoch=5.650, train/ce_epoch=1.790, train/triplet_epoch=3.860, train/acc_epoch=0.174]

Epoch 9, global step 3510: 'val/loss' was not in top 1
`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 351/351 [02:36<00:00,  2.25it/s, v_num=0, train/loss_step=5.740, train/ce_step=1.810, train/triplet_step=3.930, val/loss=5.680, val/acc=0.175, val/ce=1.790, val/triplet=3.880, train/loss_epoch=5.650, train/ce_epoch=1.790, train/triplet_epoch=3.860, train/acc_epoch=0.174]
