In [1]:
import torch
from torch.utils.data import Dataset
import os
import cv2 

def collate_fn(samples: list[dict]) -> dict:
    #images = [sample['image'].permute(1, 2, -1).unsqueeze(0) for sample in samples] 
    images = [sample['image'] for sample in samples]
    labels = [sample['label'] for sample in samples] 

    images = torch.stack(images, dim=0)
    labels = torch.tensor(labels)
    
    return {
        'image': images,
        'label': labels
    }

class VinaFood(Dataset):
    #def __init__(self, path: str):
    def __init__(self, path: str, image_size: tuple[int]):
        super().__init__()
    
        self.image_size = image_size
        self.label2idx = {}
        self.idx2label = {}
        self.data: list[dict] = self.load_data(path)
        
    def load_data(self, path):
        data = []
        label_id = 0
        print(f"Loading data from: {path}")
        for folder in os.listdir(path):
            label = folder
            if label not in self.label2idx:
                self.label2idx[label] = label_id
                label_id += 1
            folder_path = os.path.join(path, folder)
            print(f"Processing folder: {folder} (label_id: {self.label2idx[label]})")
            
            for image_file in os.listdir(folder_path):
                image_path = os.path.join(folder_path, image_file)
                image = cv2.imread(image_path)
                data.append({
                    'image': image,
                    'label': label
                })

        self.idx2label = {id: label for label, id in self.label2idx.items()}
        return data
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx: int) -> dict:
        item = self.data[idx]
        
        image = item['image']
        label = item['label']
        
        # image = cv2.resize(image, (224, 224))
        # label_id = self.label2idx[label]
        
        image = cv2.resize(image, self.image_size)
        # Convert to RGB if needed (OpenCV loads in BGR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # Convert to tensor once
        image = torch.tensor(image, dtype=torch.float32).permute(2,0,1) / 255.0
        return {
            'image': image,
            'label': self.label2idx[label]
        }
    
    
    

In [None]:
import torch
from torch import nn
from transformers import ResNetForImageClassification
import torch.nn.functional as F

class PretrainedResnet(nn.Module):
    def __init__(self, num_classes=21, freeze_backbone=True):
        super().__init__()

        # Load pretrained ResNet
        basemodel = ResNetForImageClassification.from_pretrained("microsoft/resnet-50")
        self.resnet = basemodel.resnet

        # Freeze backbone if specified
        if freeze_backbone:
            for param in self.resnet.parameters():
                param.requires_grad = False

        # New classifier head
        self.classifier = nn.Sequential(
            nn.Linear(2048, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, num_classes)
        )
        
        # Initialize the new layers
        self._init_classifier()

    def _init_classifier(self):
        for module in self.classifier.modules():
            if isinstance(module, nn.Linear):
                nn.init.xavier_uniform_(module.weight)
                if module.bias is not None:
                    nn.init.constant_(module.bias, 0)

    def forward(self, images: torch.Tensor):
        features = self.resnet(images).pooler_output
        features = features.squeeze(-1).squeeze(-1)
        logits = self.classifier(features)
        return logits
    
    def unfreeze_layers(self, num_layers=3):
        """Unfreeze the last n layers of the ResNet backbone"""
        for param in self.resnet.parameters():
            param.requires_grad = False
            
        layers_to_unfreeze = list(self.resnet.named_parameters())[-num_layers:]
        for name, param in layers_to_unfreeze:
            param.requires_grad = True
            print(f"Unfroze layer: {name}")


  from .autonotebook import tqdm as notebook_tqdm


: 

In [None]:
from torch.utils.data import DataLoader
from torch import nn 
import torch
import numpy as np 
from sklearn.metrics import precision_score, recall_score, f1_score
from vinafood_dataset import VinaFood, collate_fn
from pretrained_resnet import PretrainedResnet
from tqdm import tqdm

# Training configuration
BATCH_SIZE = 32
LEARNING_RATE = 5e-5  
EPOCHS = 10
device = "cuda" if torch.cuda.is_available() else "cpu"
image_size = (224, 224)

# Load datasets
train_dataset = VinaFood(
    path=r"D:\NguyenTienDat_23520262\Nam_3\DL\BT2\VinaFood21\train",
    image_size=image_size
)

# val_dataset = VinaFood(
#     path=r"D:\NguyenTienDat_23520262\Nam_3\DL\BT2\VinaFood21\test",
#     image_size=image_size,
#     label2idx=train_dataset.label2idx  # Use same label mapping
# )

# Create data loaders
train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    collate_fn=collate_fn
)

# val_loader = DataLoader(
#     dataset=val_dataset,
#     batch_size=BATCH_SIZE,
#     collate_fn=collate_fn
# )

# Initialize model
model = PretrainedResnet(
    num_classes=len(train_dataset.label2idx),
    freeze_backbone=True  # Start with frozen backbone
).to(device)

# Loss function and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)

# Learning rate scheduler
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='max', factor=0.1, patience=2
)

def evaluate(model, dataloader):
    model.eval() 
    outputs = []
    trues = []
    with torch.no_grad():
        for item in dataloader:
            image = item["image"].to(device) 
            label = item["label"].to(device) 
            output = model(image)   
            predictions = torch.argmax(output, dim=-1)  

            outputs.extend(predictions.cpu().numpy())
            trues.extend(label.cpu().numpy())
    
    # Print unique values for debugging
    print(f"Unique predictions: {np.unique(outputs)}")
    print(f"Unique true labels: {np.unique(trues)}")
    
    try:
        return {
            "recall": recall_score(trues, outputs, average="macro", zero_division=0),
            "precision": precision_score(trues, outputs, average="macro", zero_division=0),
            "f1": f1_score(trues, outputs, average="macro", zero_division=0),
        }
    except Exception as e:
        print(f"Error in metrics calculation: {e}")
        return {
            "recall": 0.0,
            "precision": 0.0,
            "f1": 0.0
        }

# Training loop
best_f1 = 0
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch+1}/{EPOCHS}")
    model.train()
    total_loss = 0
    
    # Training
    progress_bar = tqdm(train_loader, desc="Training")
    for batch in progress_bar:
        images = batch["image"].to(device)
        labels = batch["label"].to(device)

        # Forward pass
        optimizer.zero_grad()
        logits = model(images)
        loss = loss_fn(logits, labels)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        total_loss += loss.item()
        progress_bar.set_postfix({"loss": f"{loss.item():.4f}"})

    avg_loss = total_loss / len(train_loader)
    print(f"Average training loss: {avg_loss:.4f}")

    # Validation
    metrics = evaluate(model, train_loader)
    print("Validation metrics:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")

    # # Learning rate scheduling
    # scheduler.step(metrics['f1'])

    # # Save best model
    # if metrics['f1'] > best_f1:
    #     best_f1 = metrics['f1']
    #     torch.save(model.state_dict(), 'best_resnet.pth')
    #     print("Saved new best model!")

    # Unfreeze some layers after a few epochs
    if epoch == 5:  # After 5 epochs
        print("\nUnfreezing last few layers...")
        model.unfreeze_layers(num_layers=10)  # Unfreeze last 10 layers


Loading data from: D:\NguyenTienDat_23520262\Nam_3\DL\BT2\VinaFood21\train
Processing folder: banh-can (label_id: 0)
Processing folder: banh-hoi (label_id: 1)
Processing folder: banh-mi-chao (label_id: 2)
Processing folder: banh-tet (label_id: 3)
Processing folder: banh-trang-tron (label_id: 4)
Processing folder: banh-u (label_id: 5)
Processing folder: banh-uot (label_id: 6)
Processing folder: bap-nuong (label_id: 7)
Processing folder: bo-kho (label_id: 8)
Processing folder: bo-la-lot (label_id: 9)
Processing folder: bot-chien (label_id: 10)
Processing folder: ca-ri (label_id: 11)
Processing folder: canh-kho-qua (label_id: 12)
Processing folder: canh-khoai-mo (label_id: 13)
Processing folder: ga-nuong (label_id: 14)
Processing folder: goi-ga (label_id: 15)
Processing folder: ha-cao (label_id: 16)
Processing folder: hoanh-thanh-nuoc (label_id: 17)
Processing folder: pha-lau (label_id: 18)
Processing folder: tau-hu (label_id: 19)
Processing folder: thit-kho-trung (label_id: 20)

Epoch 1/

Training: 100%|██████████| 314/314 [09:55<00:00,  1.90s/it, loss=2.5569]


Average training loss: 2.8583
Unique predictions: [ 0  1  2  3  4  6  7  8  9 12 13 14 16 17 18 20]
Unique true labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Validation metrics:
recall: 0.2117
precision: 0.4882
f1: 0.1751

Epoch 2/10


Training: 100%|██████████| 314/314 [08:41<00:00,  1.66s/it, loss=2.6678]


Average training loss: 2.4802
Unique predictions: [ 0  1  2  3  4  5  6  7  8  9 10 12 13 14 15 16 17 18 19 20]
Unique true labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Validation metrics:
recall: 0.3701
precision: 0.6379
f1: 0.3632

Epoch 3/10


Training: 100%|██████████| 314/314 [08:06<00:00,  1.55s/it, loss=2.0208]


Average training loss: 2.1414
Unique predictions: [ 0  1  2  3  4  5  6  7  8  9 10 12 13 14 15 16 17 18 19 20]
Unique true labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Validation metrics:
recall: 0.4922
precision: 0.6152
f1: 0.5119

Epoch 4/10


Training: 100%|██████████| 314/314 [08:32<00:00,  1.63s/it, loss=1.6888]


Average training loss: 1.8973
Unique predictions: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Unique true labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Validation metrics:
recall: 0.5583
precision: 0.6748
f1: 0.5721

Epoch 5/10


Training: 100%|██████████| 314/314 [32:35<00:00,  6.23s/it, loss=1.5417]    


Average training loss: 1.7173
Unique predictions: [ 0  1  2  3  4  5  6  7  8  9 10 12 13 14 15 16 17 18 19 20]
Unique true labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Validation metrics:
recall: 0.5828
precision: 0.6470
f1: 0.5962

Epoch 6/10


Training: 100%|██████████| 314/314 [10:14<00:00,  1.96s/it, loss=1.6491]  


Average training loss: 1.5864
Unique predictions: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Unique true labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Validation metrics:
recall: 0.6226
precision: 0.6929
f1: 0.6322

Unfreezing last few layers...
Unfroze layer: encoder.stages.3.layers.1.layer.2.normalization.bias
Unfroze layer: encoder.stages.3.layers.2.layer.0.convolution.weight
Unfroze layer: encoder.stages.3.layers.2.layer.0.normalization.weight
Unfroze layer: encoder.stages.3.layers.2.layer.0.normalization.bias
Unfroze layer: encoder.stages.3.layers.2.layer.1.convolution.weight
Unfroze layer: encoder.stages.3.layers.2.layer.1.normalization.weight
Unfroze layer: encoder.stages.3.layers.2.layer.1.normalization.bias
Unfroze layer: encoder.stages.3.layers.2.layer.2.convolution.weight
Unfroze layer: encoder.stages.3.layers.2.layer.2.normalization.weight
Unfroze layer: encoder.stages.3.layers.2.layer.2.normalization.bias

Epoch 7/10


Training: 100%|██████████| 314/314 [2:05:45<00:00, 24.03s/it, loss=1.1781]    


Average training loss: 1.4508
Unique predictions: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Unique true labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Validation metrics:
recall: 0.6687
precision: 0.7069
f1: 0.6712

Epoch 8/10


Training: 100%|██████████| 314/314 [10:56<00:00,  2.09s/it, loss=1.4579]


Average training loss: 1.2909
Unique predictions: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Unique true labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Validation metrics:
recall: 0.6866
precision: 0.7292
f1: 0.6916

Epoch 9/10


Training: 100%|██████████| 314/314 [10:33<00:00,  2.02s/it, loss=0.9839]


Average training loss: 1.1909
Unique predictions: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Unique true labels: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20]
Validation metrics:
recall: 0.7162
precision: 0.7506
f1: 0.7210

Epoch 10/10


Training: 100%|██████████| 314/314 [12:05<00:00,  2.31s/it, loss=1.1298]


Average training loss: 1.1061
