<a href="https://colab.research.google.com/github/chang-heekim/Kaggle_Competition/blob/main/Dog_Breed_Identification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!unzip /content/drive/MyDrive/dataset/dog-breed-identification.zip

In [None]:
!pip install timm

In [1]:
from glob import glob
import time
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

import albumentations as A
from albumentations.pytorch import ToTensor
from sklearn.model_selection import KFold
from torch.utils.data import Dataset, DataLoader
import torch
from torch import nn, optim
from tqdm import tqdm
import torch.nn.functional as F
import timm

In [26]:
os.makedirs('save_models/', exist_ok=True)

n_classes = 120
IMG_SIZE = 256
seed = 1024
batch_size = 128
epochs = 50
lr = 1e-2
weight_decay = 1e-2
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [8]:
class CustomDataset(Dataset):
    def __init__(self, img_paths, labels=None, mode='train', IMG_SIZE=256):
        super(CustomDataset, self).__init__()

        self.img_paths = img_paths
        self.labels = labels
        self.mode = mode
        self.train_transform = A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE),                      
            A.RandomCrop(224, 224),
            A.HorizontalFlip(p=0.5),        
            A.Rotate(p=0.5),
            A.GridDistortion(always_apply=False, p=0.5, num_steps=10, distort_limit=(-0.2, 0.2), interpolation=2, border_mode=0),
            A.Cutout(always_apply=False, p=0.5, num_holes=40, max_h_size=10, max_w_size=10),
            ToTensor()
        ])
        self.test_transform = A.Compose([A.Resize(IMG_SIZE, IMG_SIZE), A.RandomCrop(224, 224), ToTensor()])
                                         
    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        image_path = self.img_paths[idx]
        
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.mode == 'train':
            augmented  = self.train_transform(image=image)
            image = augmented['image']
        else:
            augmented = self.test_transform(image=image)
            image = augmented['image']

        if self.mode == 'train' or self.mode == 'valid':
            label = self.labels[idx]
            return image, torch.tensor(label)
        elif self.mode == 'test':
            return image

class Model(nn.Module):
    def __init__(self, n_classes):
        super(Model, self).__init__()

        self.model = timm.create_model('efficientnet_b3', pretrained=True, num_classes=n_classes)

    def forward(self, x):
        x = self.model(x)
        return x

def get_accuracy(output, label):
    output = output.to("cpu")
    label = label.to("cpu")

    sm = F.softmax(output, dim=1)
    _, index = torch.max(sm, dim=1)
    return torch.sum((label == index)) / label.size()[0]

In [4]:
train_image_paths = glob('train/*.jpg')
test_image_paths = glob('test/*.jpg')

print(len(train_image_paths), len(test_image_paths))

10222 10357


In [5]:
labels_df = pd.read_csv('labels.csv')
classes = np.unique(labels_df.breed.values)

label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(labels_df['breed'])

print(f'number of classes: {len(classes)}     number of labels: {len(train_labels)}')

number of classes: 120     number of labels: 10222


In [6]:
train_image_paths += train_image_paths
train_labels = np.concatenate([train_labels, train_labels], 0)

print(len(train_image_paths), len(train_labels))

20444 20444


In [26]:
dataset = CustomDataset(train_image_paths, mode='test')

In [9]:
import gc

gc.collect()
torch.cuda.empty_cache()

kfold = KFold(n_splits=5, shuffle=True, random_state=seed)

best_model_epoch = []
models = []
for idx, (train_idx, val_idx) in enumerate(kfold.split(train_image_paths, train_labels)):
    print(f'---------- KFold[{idx + 1}/5] ----------')
    fold_image_paths = [train_image_paths[i] for i in train_idx]
    fold_labels = [train_labels[i] for i in train_idx]
    fold_train_dataset = CustomDataset(fold_image_paths, fold_labels, mode='train')
    
    fold_val_image_paths = [train_image_paths[i] for i in val_idx]
    fold_val_labels = [train_labels[i] for i in val_idx]
    fold_val_dataset = CustomDataset(fold_val_image_paths, fold_val_labels, mode='valid')
    # print(len(fold_train_dataset), len(fold_val_dataset))

    train_loader = DataLoader(fold_train_dataset, shuffle=True, batch_size=batch_size)
    valid_loader = DataLoader(fold_val_dataset, batch_size=batch_size)

    gc.collect()
    torch.cuda.empty_cache()

    model = Model(n_classes).to(device)

    optimizer = optim.AdamW(model.parameters(), lr=2e-4, weight_decay = 2e-2)
    # optimizer = optim.SGD(model.parameters(), lr=0.03, momentum=0.9)
    criterion = nn.CrossEntropyLoss()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=0.001)
    scaler = torch.cuda.amp.GradScaler()

    best_val_loss = np.inf
    stop = 0
    start_time = time.time()
    for epoch in range(1, epochs + 1):
        model.train()
        train_loss = 0
        train_acc = 0
        for batch_idx, (imgs, labels) in enumerate(train_loader):
            imgs, labels = imgs.to(device), labels.to(device)

            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                outputs = model(imgs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss += loss.item() 
            train_acc += torch.sum(preds == labels.data) / outputs.shape[0]

        scheduler.step()

        with torch.no_grad():
            val_loss = 0
            val_acc = 0
            model.eval()
            for val_idx, (val_imgs, val_labels) in enumerate(valid_loader):
                val_imgs, val_labels = val_imgs.to(device), val_labels.to(device)

                with torch.cuda.amp.autocast():
                    val_outputs = model(val_imgs)
                _, val_preds = torch.max(val_outputs, 1)
                loss = criterion(val_outputs, val_labels)

                val_loss += loss.item()
                val_acc += torch.sum(val_preds == val_labels.data) / val_outputs.shape[0]

            if val_loss < best_val_loss:
                stop = 0
                best_val_loss = val_loss
                path = f'save_models/best_model_{idx + 1}fold_epoch-{epoch}.pth'
                torch.save(model.state_dict(), path)
                print('      ** Save Model **')
                best_model_epoch.append(path)
            else:
                stop += 1
            
            if stop == 5:
                end_time = time.time() - start_time
                print(f'[Epoch {epoch}/{epochs}] [elapsed time: {end_time}]')
                print(f'[Train Loss: {train_loss / len(train_loader)}] [Train Accuracy: {train_acc / len(train_loader)}]')
                print(f'[Validation Loss: {val_loss}] [Validation Accuracy: {val_acc}]')
                print('     ** Early Stop **')
                models.append(best_model_epoch.pop(-1))
                break

            end_time = time.time() - start_time
            print(f'[Epoch {epoch}/{epochs}] [elapsed time: {end_time}]')
            print(f'[Train Loss: {train_loss / len(train_loader)}] [Train Accuracy: {train_acc / len(train_loader)}]')
            print(f'[Validation Loss: {val_loss / len(valid_loader)}] [Validation Accuracy: {val_acc / len(valid_loader)}]')
            print(f'Early Stop Count: {stop}')
            print()

---------- KFold[1/5] ----------
      ** Save Model **
[Epoch 1/50] [elapsed time: 251.99404048919678]
[Train Loss: 4.81817626953125] [Train Accuracy: 0.012041804380714893]
[Validation Loss: 4.7412109375] [Validation Accuracy: 0.01930123381316662]
Early Stop Count: 0

      ** Save Model **
[Epoch 2/50] [elapsed time: 503.2214126586914]
[Train Loss: 4.591796875] [Train Accuracy: 0.04639719799160957]
[Validation Loss: 4.5831298828125] [Validation Accuracy: 0.04548077657818794]
Early Stop Count: 0

      ** Save Model **
[Epoch 3/50] [elapsed time: 755.0738077163696]
[Train Loss: 4.1244354248046875] [Train Accuracy: 0.14395233988761902]
[Validation Loss: 4.259521484375] [Validation Accuracy: 0.11686870455741882]
Early Stop Count: 0

      ** Save Model **
[Epoch 4/50] [elapsed time: 1006.5164220333099]
[Train Loss: 3.3413238525390625] [Train Accuracy: 0.3300251066684723]
[Validation Loss: 3.76641845703125] [Validation Accuracy: 0.2335638850927353]
Early Stop Count: 0

      ** Save Mode

In [13]:
test_dataset = CustomDataset(test_image_paths, mode='test')
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [62]:
preds_list = []
with torch.no_grad():
    for imgs in test_loader:
        imgs = imgs.to(device)
        preds = 0
        for model_path in models:
            model = Model(n_classes).to(device)
            model.load_state_dict(torch.load(model_path))

            outputs = model(imgs)
            preds += outputs

        preds /= len(models)
        preds = F.softmax(preds, -1)
        preds_list.extend(preds.detach().cpu().numpy())

In [117]:
submission = pd.read_csv('sample_submission.csv')
id = submission.id.values

preds_list = np.array(preds_list)
id = np.expand_dims(np.array(id), 1)
data = np.concatenate([id, preds_list], 1)

new_sub = pd.DataFrame(data = data, columns = submission.columns)
new_sub.reset_index()
new_sub.to_csv('cls_submission.csv', index=False)

In [116]:
submission

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
1,00102ee9d8eb90812350685311fe5890,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
2,0012a730dfa437f5f3613fb75efcd4ce,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
3,001510bc8570bbeee98c8d80c8a95ec1,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
4,001a5f3114548acdefa3d4da05474c2e,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10352,ffeda8623d4eee33c6d1156a2ecbfcf8,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10353,fff1ec9e6e413275984966f745a313b0,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10354,fff74b59b758bbbf13a5793182a9bbe4,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
10355,fff7d50d848e8014ac1e9172dc6762a3,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,...,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333,0.008333
