In [1]:
import torch 
from torch import nn 
from torch.nn import functional as F

class GoogLeNet(nn.Module):
    def __init__(self, image_size, num_labels):
        super().__init__()
        self.c, self.h, self.w = image_size
        self.Conv7_1 = nn.Conv2d(
            in_channels=self.c,
            out_channels=64,
            kernel_size=7,
            stride=2,
            padding=3
        )
        self.Maxpool1_2 = nn.MaxPool2d(
            kernel_size=3,
            stride=2,
            padding=0,
            ceil_mode=True
        )
        self.Conv1_3 = nn.Conv2d(
            in_channels=64,
            out_channels=192,
            kernel_size=1,
            stride=1,
            padding=0
        )
        self.Maxpool3_4 = nn.MaxPool2d(
            kernel_size=3,
            stride=2,
            padding=1
        )
        self.Maxpool3_7 = nn.MaxPool2d(
            kernel_size=3,
            stride=2,
            padding=1
        )
        self.Maxpool3_13 = nn.MaxPool2d(
            kernel_size=3,
            stride=2,
            padding=1
        )
        self.Avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.Dropout = nn.Dropout(0.4)
        self.Linear = nn.Linear(1024, num_labels)
        # === Inception blocks ===
        self.inception_3a = InceptionBlock(192, 64, 96, 128, 16, 32, 32)   # output: 256
        self.inception_3b = InceptionBlock(256, 128, 128, 192, 32, 96, 64) # output: 480
        self.inception_4a = InceptionBlock(480, 192, 96, 208, 16, 48, 64) # output: 512
        self.inception_4b = InceptionBlock(512, 160, 112, 224, 24, 64, 64) # output: 512
        self.inception_4c = InceptionBlock(512, 128, 128, 256, 24, 64, 64) # output: 512
        self.inception_4d = InceptionBlock(512, 112, 144, 288, 32, 64, 64) # output: 528
        self.inception_4e = InceptionBlock(528, 256, 160, 320, 32, 128, 128) # output: 832
        self.inception_5a = InceptionBlock(832, 256, 160, 320, 32, 128, 128) # output: 832
        self.inception_5b = InceptionBlock(832, 384, 192, 384, 48, 128, 128) # output: 1024
    def forward (self, x):
        # x : Tensor(B, 3, 224, 224)
        x = F.relu(self.Conv7_1(x))   # (B, 64, H/2, W/2)
        x = self.Maxpool1_2(x)        # (B, 64, H/4, W/4)
        x = F.relu(self.Conv1_3(x))   # (B, 192, H/4, W/4)
        x = self.Maxpool3_4(x)        # (B, 192, H/8, W/8)

        x = self.inception_3a(x)      # (B, 256, H/8, W/8)
        x = self.inception_3b(x)      # (B, 480, H/8, W/8)
        x = self.Maxpool3_7(x)        # (B, 480, H/16, W/16)

        x = self.inception_4a(x)      # (B, 512, H/16, W/16)
        x = self.inception_4b(x)      # (B, 512, H/16, W/16)
        x = self.inception_4c(x)      # (B, 512, H/16, W/16)
        x = self.inception_4d(x)      # (B, 528, H/16, W/16)
        x = self.inception_4e(x)      # (B, 832, H/16, W/16)
        x = self.Maxpool3_13(x)       # (B, 832, H/32, W/32)

        x = self.inception_5a(x)      # (B, 832, H/32, W/32)
        x = self.inception_5b(x)      # (B, 1024, H/32, W/32)

        x = self.Avgpool(x)           # (B, 1024, H/64, W/64)
        x = torch.flatten(x, start_dim=1)  # (B, 1024)
        x = self.Dropout(x)
        x = self.Linear(x)            # (B, num_labels)
        return x
    
class InceptionBlock(nn.Module):
    def __init__(
        self,
        in_channels: int,
        # 1x1 branch
        out_1x1: int,
        # 1x1 -> 3x3 branch
        red_3x3: int,
        out_3x3: int,
        # 1x1 -> 5x5 branch
        red_5x5: int,
        out_5x5: int,
        # pool -> 1x1 branch
        pool_proj: int,
    ):
        super().__init__()

        # Branch 1: 1x1 conv
        self.branch1 = nn.Sequential(
            nn.Conv2d(in_channels, out_1x1, kernel_size=1),
            nn.ReLU(inplace=True)
        )

        # Branch 2: 1x1 conv → 3x3 conv
        self.branch2 = nn.Sequential(
            nn.Conv2d(in_channels, red_3x3, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(red_3x3, out_3x3, kernel_size=3, padding=1),
            nn.ReLU(inplace=True)
        )

        # Branch 3: 1x1 conv → 5x5 conv
        self.branch3 = nn.Sequential(
            nn.Conv2d(in_channels, red_5x5, kernel_size=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(red_5x5, out_5x5, kernel_size=5, padding=2),
            nn.ReLU(inplace=True)
        )

        # Branch 4: 3x3 maxpool → 1x1 conv
        self.branch4 = nn.Sequential(
            nn.MaxPool2d(kernel_size=3, stride=1, padding=1, ceil_mode=True),
            nn.Conv2d(in_channels, pool_proj, kernel_size=1),
            nn.BatchNorm2d(pool_proj),
            nn.ReLU(inplace=True)
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        outputs = [
            self.branch1(x),
            self.branch2(x),
            self.branch3(x),
            self.branch4(x)
        ]
        return torch.cat(outputs, dim=1)



In [2]:
import torch
from torch.utils.data import Dataset
import os
import cv2 

def collate_fn(samples: list[dict]) -> dict:
    #images = [sample['image'].permute(1, 2, -1).unsqueeze(0) for sample in samples] 
    images = [sample['image'] for sample in samples]
    labels = [sample['label'] for sample in samples] 

    images = torch.stack(images, dim=0)
    labels = torch.tensor(labels)
    
    return {
        'image': images,
        'label': labels
    }

class VinaFood(Dataset):
    #def __init__(self, path: str):
    def __init__(self, path: str, image_size: tuple[int]):
        super().__init__()
    
        self.image_size = image_size
        self.label2idx = {}
        self.idx2label = {}
        self.data: list[dict] = self.load_data(path)
        
    def load_data(self, path):
        data = []
        label_id = 0
        print(f"Loading data from: {path}")
        for folder in os.listdir(path):
            label = folder
            if label not in self.label2idx:
                self.label2idx[label] = label_id
                label_id += 1
            folder_path = os.path.join(path, folder)
            print(f"Processing folder: {folder} (label_id: {self.label2idx[label]})")
            
            for image_file in os.listdir(folder_path):
                image_path = os.path.join(folder_path, image_file)
                image = cv2.imread(image_path)
                data.append({
                    'image': image,
                    'label': label
                })

        self.idx2label = {id: label for label, id in self.label2idx.items()}
        return data
    
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx: int) -> dict:
        item = self.data[idx]
        
        image = item['image']
        label = item['label']
        
        # image = cv2.resize(image, (224, 224))
        # label_id = self.label2idx[label]
        
        image = cv2.resize(image, self.image_size)
        # Convert to RGB if needed (OpenCV loads in BGR)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        # Convert to tensor once
        image = torch.tensor(image, dtype=torch.float32).permute(2,0,1) / 255.0
        return {
            'image': image,
            'label': self.label2idx[label]
        }

In [3]:
from torch.utils.data import DataLoader
from torch import nn 
import torch
import numpy as np 
from sklearn.metrics import precision_score, recall_score, f1_score
from vinafood_dataset import VinaFood, collate_fn
from GoogLeNet import GoogLeNet

device = "cpu"
image_size = (224, 224)

train_dataset = VinaFood(
    path=r"D:\NguyenTienDat_23520262\Nam_3\DL\BT2\VinaFood21\train",
    image_size=image_size
)

# test_dataset = VinaFood(
#     path=r"D:\NguyenTienDat_23520262\Nam_3\DL\BT2\VinaFood21\test",
#     image_size=image_size
# )

train_loader = DataLoader(
    dataset=train_dataset,
    batch_size=32,
    shuffle=True,
    collate_fn=collate_fn
)

# test_loader = DataLoader(
#     dataset=test_dataset,
#     batch_size=32,
#     collate_fn=collate_fn
# )
image_size = (3, ) + image_size   # (3, 224, 224)
model = GoogLeNet(num_labels=len(train_dataset.idx2label), image_size=image_size).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def evaluate(model, dataloader):
    model.eval() 
    outputs = []
    trues = []
    with torch.no_grad():
        for item in dataloader:
            image = item["image"].to(device) 
            label = item["label"].to(device) 
            output = model(image)   
            predictions = torch.argmax(output, dim=-1)  

            outputs.extend(predictions.cpu().numpy())
            trues.extend(label.cpu().numpy())
    
    # Print unique values for debugging
    print(f"Unique predictions: {np.unique(outputs)}")
    print(f"Unique true labels: {np.unique(trues)}")
    
    try:
        return {
            "recall": recall_score(trues, outputs, average="macro", zero_division=0),
            "precision": precision_score(trues, outputs, average="macro", zero_division=0),
            "f1": f1_score(trues, outputs, average="macro", zero_division=0),
        }
    except Exception as e:
        print(f"Error in metrics calculation: {e}")
        return {
            "recall": 0.0,
            "precision": 0.0,
            "f1": 0.0
        }

EPOCHS = 10 
for epoch in range(EPOCHS):
    print(f"Epoch: {epoch+1}")

    losses = []
    model.train() 
    for batch_idx, item in enumerate(train_loader):
        image = item["image"].to(device)
        label = item["label"].to(device)
        
        # Forward pass
        output = model(image)
        
        loss = loss_fn(output, label.long())
        losses.append(loss.item())

        # Backward pass
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print(f"Loss: {(np.array(losses).mean())}")
    metrics = evaluate(model, train_loader)
    for metric in metrics:
        print(f"{metric}: {metrics[metric]}")


Loading data from: D:\NguyenTienDat_23520262\Nam_3\DL\BT2\VinaFood21\train
Processing folder: banh-can (label_id: 0)
Processing folder: banh-hoi (label_id: 1)
Processing folder: banh-mi-chao (label_id: 2)
Processing folder: banh-tet (label_id: 3)
Processing folder: banh-trang-tron (label_id: 4)
Processing folder: banh-u (label_id: 5)
Processing folder: banh-uot (label_id: 6)
Processing folder: bap-nuong (label_id: 7)
Processing folder: bo-kho (label_id: 8)
Processing folder: bo-la-lot (label_id: 9)
Processing folder: bot-chien (label_id: 10)
Processing folder: ca-ri (label_id: 11)
Processing folder: canh-kho-qua (label_id: 12)
Processing folder: canh-khoai-mo (label_id: 13)
Processing folder: ga-nuong (label_id: 14)
Processing folder: goi-ga (label_id: 15)
Processing folder: ha-cao (label_id: 16)
Processing folder: hoanh-thanh-nuoc (label_id: 17)
Processing folder: pha-lau (label_id: 18)
Processing folder: tau-hu (label_id: 19)
Processing folder: thit-kho-trung (label_id: 20)
Epoch: 1
