<a href="https://www.kaggle.com/code/blackfox20092006/birdmodelresnet101?scriptVersionId=237328209" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, models
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import random
import logging

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger()

# Paths
root = '/kaggle/input/bird-species-classification-220-categories'
train_path = root + '/Train'
test_path = root + '/Test'

# Get labels
train_labels = [i for i in os.listdir(train_path)]
test_labels = [i for i in os.listdir(test_path)]

tmp_train_img_paths = [train_path + '/' + j for j in train_labels]
tmp_test_img_paths = [test_path + '/' + j for j in test_labels]
train_img_paths, test_img_paths = [], []

for i in tmp_train_img_paths:
    for j in os.listdir(i):
        train_img_paths.append(i + '/' + j)
for i in tmp_test_img_paths:
    for j in os.listdir(i):
        test_img_paths.append(i + '/' + j)
del tmp_test_img_paths, tmp_train_img_paths

random.shuffle(train_img_paths)
random.shuffle(test_img_paths)

# label2name, name2label
label2name, name2label = {}, {}
for i in range(len(train_labels)):
    label2name[i] = train_labels[i]
    name2label[train_labels[i]] = i

# ground truth
train_labels, test_labels = [], []
for i in train_img_paths:
    train_labels.append(name2label[i.split('/')[-2]])
for j in test_img_paths:
    test_labels.append(name2label[j.split('/')[-2]])

# Transforms
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Dataset
class BirdDataset(Dataset):
    def __init__(self, data, labels, transform=None):
        self.data = data
        self.labels = labels
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, item_index):
        img_path = self.data[item_index]
        img_ground_truth = self.labels[item_index]
        img_data = cv2.imread(img_path)
        #img
        img_data = cv2.imread(img_path)
        if self.transform:
            H, W, C = img_data.shape
            # print(H, W, C)
            T, B, L, R = 0, 0, 0, 0 # top bottom left right
            if H > W: # cao hơn rộng => padding chiều rộng
                R = H-W
            elif H < W: #rộng hơn cao => padding chiều cao
                T = W-H
            else: #bang nhau
                pass
            # print(R, T)
            transform = transforms.Compose(
                [
                    transforms.ToPILImage(),
                    transforms.Pad(padding=(L, T, R, B), fill=0, padding_mode='constant'), #trái - trên - phải - dưới
                    # transforms.RandomRotation(degrees=5),
                    # transforms.RandomHorizontalFlip(p=0.5),
                    # transforms.ColorJitter(
                    #     brightness=0.05,
                    #     contrast=0.05,
                    #     saturation=0.05,
                    #     hue=0.02
                    # ),

                    transforms.Resize((224, 224)),
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                ]
            )
            img_data_tensor = transform(img_data)        
        img_ground_truth_tensor = torch.tensor(img_ground_truth, dtype=torch.long)
        return img_data_tensor, img_ground_truth_tensor


# Dataset + Dataloader
train_dataset = BirdDataset(train_img_paths, train_labels, transform=transform)
test_dataset = BirdDataset(test_img_paths, test_labels, transform=transform)

train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

# Model
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = models.resnet101(pretrained=True)

for param in model.parameters():
    param.requires_grad = True

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(label2name))
model = model.to(device)

# Loss & optimizer
loss_func = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.fc.parameters(), lr=0.001)
epochs = 25
best_accuracy = 0.0

# Training loop
for epoch in range(epochs):
    print(f'Epoch {epoch + 1}/{epochs}')
    model.train()
    running_loss = 0.0
    correct, total = 0, 0

    for batch_data in train_dataloader:
        img_data, label_data = batch_data
        img_data, label_data = img_data.to(device), label_data.to(device)

        optimizer.zero_grad()
        outputs = model(img_data)
        loss = loss_func(outputs, label_data)

        predictions = outputs.argmax(dim=1)
        accuracy = sum(predictions == label_data) / len(label_data)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        correct += (predictions == label_data).sum().item()
        total += label_data.size(0)

    epoch_loss = running_loss / len(train_dataloader)
    epoch_accuracy = correct / total
    print(f"Train Loss: {epoch_loss:.6f}, Accuracy: {epoch_accuracy:.6f}")

    # Validation
    model.eval()
    val_loss = 0.0
    val_correct, val_total = 0, 0

    with torch.no_grad():
        for batch_data in test_dataloader:
            img_data, label_data = batch_data
            img_data, label_data = img_data.to(device), label_data.to(device)
            outputs = model(img_data)
            loss = loss_func(outputs, label_data)

            predictions = outputs.argmax(dim=1)
            val_correct += (predictions == label_data).sum().item()
            val_total += label_data.size(0)
            val_loss += loss.item()

    test_loss = val_loss / len(test_dataloader)
    test_accuracy = val_correct / val_total
    print(f"Test Loss: {test_loss:.6f}, Test Accuracy: {test_accuracy:.6f}")

    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        torch.save(model.state_dict(), f'best_model{epoch}.pth')
        print(f"Saved best model! Accuracy: {best_accuracy:.6f}")

print(f'Best accuracy achieved = {best_accuracy}')


Downloading: "https://download.pytorch.org/models/resnet101-63fe2227.pth" to /root/.cache/torch/hub/checkpoints/resnet101-63fe2227.pth
100%|██████████| 171M/171M [00:00<00:00, 229MB/s]


Epoch 1/25
Train Loss: 4.250948, Accuracy: 0.150202
Test Loss: 3.186890, Test Accuracy: 0.317186
Saved best model! Accuracy: 0.317186
Epoch 2/25
Train Loss: 2.537037, Accuracy: 0.427342
Test Loss: 2.640505, Test Accuracy: 0.381634
Saved best model! Accuracy: 0.381634
Epoch 3/25
Train Loss: 1.930263, Accuracy: 0.541640
Test Loss: 2.413970, Test Accuracy: 0.427970
Saved best model! Accuracy: 0.427970
Epoch 4/25
Train Loss: 1.617702, Accuracy: 0.614617
Test Loss: 2.252742, Test Accuracy: 0.443976
Saved best model! Accuracy: 0.443976
Epoch 5/25
Train Loss: 1.399290, Accuracy: 0.660931
Test Loss: 2.204371, Test Accuracy: 0.462932
Saved best model! Accuracy: 0.462932
Epoch 6/25
Train Loss: 1.244631, Accuracy: 0.691311
Test Loss: 2.163049, Test Accuracy: 0.465038
Saved best model! Accuracy: 0.465038
Epoch 7/25
Train Loss: 1.114475, Accuracy: 0.724347
Test Loss: 2.162924, Test Accuracy: 0.471778
Saved best model! Accuracy: 0.471778
Epoch 8/25
Train Loss: 0.995023, Accuracy: 0.755895
Test Loss: