In [None]:
!unzip /content/drive/MyDrive/dataset/dog-breed-identification.zip

In [3]:
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.5-py3-none-any.whl (512 kB)
[K     |████████████████████████████████| 512 kB 9.7 MB/s 
Installing collected packages: timm
Successfully installed timm-0.6.5


In [4]:
from glob import glob
import time
import pandas as pd
import numpy as np
import cv2
import matplotlib.pyplot as plt
import os 

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

import albumentations as A
from albumentations.pytorch.transforms import ToTensor
from sklearn.model_selection import KFold
from torch.utils.data import Dataset, DataLoader
import torch
from torch import nn, optim
from tqdm import tqdm
import torch.nn.functional as F
import timm

In [12]:
os.makedirs('save_models/', exist_ok=True)

n_classes = 120
IMG_SIZE = 256
seed = 1024
batch_size = 128
epochs = 5
lr = 1e-3
weight_decay = 0.1
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [6]:
class CustomDataset(Dataset):
    def __init__(self, img_paths, labels=None, mode='train', IMG_SIZE=256):
        super(CustomDataset, self).__init__()

        self.img_paths = img_paths
        self.labels = labels
        self.mode = mode
        self.train_transform = A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE),                      
            A.RandomCrop(224, 224),
            A.GaussNoise(p=0.5),
            A.OneOf([
                A.HorizontalFlip(p=1),
                A.Rotate(p=1)       
            ], p=0.5),
            A.OneOf([
                A.GridDistortion(always_apply=False, p=1),
                A.OpticalDistortion(distort_limit=2, shift_limit=0.5, p=1),
            ], p=0.5),
            A.RandomBrightnessContrast(p=0.3),
            ToTensor(normalize={'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}),
        ])
        
        self.test_transform = A.Compose([
            A.Resize(IMG_SIZE, IMG_SIZE), 
            ToTensor(normalize={'mean': [0.485, 0.456, 0.406], 'std': [0.229, 0.224, 0.225]}),
        ])
                                         
    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        image_path = self.img_paths[idx]
        
        image = cv2.imread(image_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.mode == 'train':
            augmented  = self.train_transform(image=image)
            image = augmented['image']
        else:
            augmented = self.test_transform(image=image)
            image = augmented['image']

        if self.mode == 'train' or self.mode == 'valid' or self.mode.startswith('augment'):
            label = self.labels[idx]
            return image, torch.tensor(label)
        elif self.mode == 'test':
            return image

class Model(nn.Module):
    def __init__(self, n_classes):
        super(Model, self).__init__()

        self.model = timm.create_model('efficientnet_b3', pretrained=True, num_classes=n_classes)

    def forward(self, x):
        x = self.model(x)
        return x
        


def get_accuracy(output, label):
    output = output.to("cpu")
    label = label.to("cpu")

    sm = F.softmax(output, dim=1)
    _, index = torch.max(sm, dim=1)
    return torch.sum((label == index)) / label.size()[0]

In [8]:
train_image_paths = glob('train/*.jpg')
test_image_paths = glob('test/*.jpg')

print(len(train_image_paths), len(test_image_paths))

10222 10357


In [9]:
labels_df = pd.read_csv('labels.csv')
classes = np.unique(labels_df.breed.values)

label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(labels_df['breed'])

print(f'number of classes: {len(classes)}     number of labels: {len(train_labels)}')

number of classes: 120     number of labels: 10222


In [10]:
for _ in range(3):
    train_image_paths += train_image_paths
    train_labels = np.concatenate([train_labels, train_labels], 0)

print(len(train_image_paths), len(train_labels))

81776 81776


In [14]:
import gc

gc.collect()
torch.cuda.empty_cache()

kfold = KFold(n_splits=5, shuffle=True, random_state=seed)

best_model_epoch = []
models = []
fold_train_losses = []
fold_val_losses = []
fold_train_acc = []
fold_val_acc = []

for idx, (train_idx, val_idx) in enumerate(kfold.split(train_image_paths, train_labels)):
    print(f'---------- KFold[{idx + 1}/5] ----------')
    fold_image_paths = [train_image_paths[i] for i in train_idx]
    fold_labels = [train_labels[i] for i in train_idx]
    fold_train_dataset = CustomDataset(fold_image_paths, fold_labels, mode='train')

    fold_val_image_paths = [train_image_paths[i] for i in val_idx]
    fold_val_labels = [train_labels[i] for i in val_idx]
    fold_val_dataset = CustomDataset(fold_val_image_paths, fold_val_labels, mode='valid')

    train_loader = DataLoader(fold_train_dataset, shuffle=True, batch_size=batch_size)
    valid_loader = DataLoader(fold_val_dataset, batch_size=batch_size)

    gc.collect()
    torch.cuda.empty_cache()

    model = Model(n_classes).to(device)

    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=100, eta_min=0.001)
    scaler = torch.cuda.amp.GradScaler()

    best_val_loss = np.inf
    stop = 0
    start_time = time.time()

    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []

    for epoch in range(1, epochs + 1):
        model.train()
        train_loss = 0
        train_acc = 0
        for batch_idx, (imgs, labels) in enumerate(train_loader):
            imgs, labels = imgs.to(device), labels.to(device)

            optimizer.zero_grad()
            with torch.cuda.amp.autocast():
                outputs = model(imgs)
            _, preds = torch.max(outputs, 1)
            loss = criterion(outputs, labels)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            train_loss += loss.item() 
            train_acc += torch.sum(preds == labels.data) / outputs.shape[0]

        scheduler.step()
        train_epoch_loss = train_loss / len(train_loader)
        train_epoch_acc = train_acc / len(train_loader)

        train_losses.append(train_epoch_loss)
        train_accs.append(train_epoch_acc)

        with torch.no_grad():
            val_loss = 0
            val_acc = 0
            model.eval()
            for val_idx, (val_imgs, val_labels) in enumerate(valid_loader):
                val_imgs, val_labels = val_imgs.to(device), val_labels.to(device)

                with torch.cuda.amp.autocast():
                    val_outputs = model(val_imgs)
                _, val_preds = torch.max(val_outputs, 1)
                loss = criterion(val_outputs, val_labels)

                val_loss += loss.item()
                val_acc += torch.sum(val_preds == val_labels.data) / val_outputs.shape[0]

            val_epoch_loss = val_loss / len(valid_loader)
            val_epoch_acc = val_acc / len(valid_loader)

            val_losses.append(val_epoch_loss)
            val_accs.append(val_epoch_acc)

            if val_loss < best_val_loss:
                stop = 0
                best_val_loss = val_loss
                path = f'save_models/best_model_{idx + 1}fold_epoch-{epoch}.pth'
                torch.save(model.state_dict(), path)
                print('      ** Save Model **')
                best_model_epoch.append(path)
            else:
                stop += 1
            
            if stop == 3:
                end_time = time.time() - start_time
                print(f'[Epoch {epoch}/{epochs}] [elapsed time: {end_time}]')
                print(f'[Train Loss: {train_epoch_loss}] [Train Accuracy: {train_epoch_acc}]')
                print(f'[Validation Loss: {val_epoch_loss}] [Validation Accuracy: {val_epoch_acc}]')
                print('     ** Early Stop **')
                models.append(best_model_epoch.pop(-1))
                break

            end_time = time.time() - start_time
            print(f'[Epoch {epoch}/{epochs}] [elapsed time: {end_time}]')
            print(f'[Train Loss: {train_epoch_loss}] [Train Accuracy: {train_epoch_acc}]')
            print(f'[Validation Loss: {val_epoch_loss}] [Validation Accuracy: {val_epoch_acc}]')
            print(f'Early Stop Count: {stop}')
            print()

    models.append(best_model_epoch.pop(-1))

    fold_train_losses.append(train_losses)
    fold_val_losses.append(val_losses)
    fold_train_acc.append(train_acc)
    fold_val_acc.append(val_acc)

---------- KFold[1/5] ----------
      ** Save Model **
[Epoch 1/5] [elapsed time: 1097.830216884613]
[Train Loss: 3.5450191497802734] [Train Accuracy: 0.328643798828125]
[Validation Loss: 1.7888870239257812] [Validation Accuracy: 0.7785449028015137]
Early Stop Count: 0

      ** Save Model **
[Epoch 2/5] [elapsed time: 2195.3119213581085]
[Train Loss: 1.4551982879638672] [Train Accuracy: 0.8723602294921875]
[Validation Loss: 1.103302001953125] [Validation Accuracy: 0.9772704839706421]
Early Stop Count: 0

      ** Save Model **
[Epoch 3/5] [elapsed time: 3292.563585996628]
[Train Loss: 1.1549186706542969] [Train Accuracy: 0.9568125605583191]
[Validation Loss: 1.0277595520019531] [Validation Accuracy: 0.9896166920661926]
Early Stop Count: 0

      ** Save Model **
[Epoch 4/5] [elapsed time: 4389.540983915329]
[Train Loss: 1.1019906997680664] [Train Accuracy: 0.96875]
[Validation Loss: 1.0091171264648438] [Validation Accuracy: 0.9910107254981995]
Early Stop Count: 0

      ** Save Model

In [15]:
models

['save_models/best_model_1fold_epoch-5.pth',
 'save_models/best_model_2fold_epoch-4.pth',
 'save_models/best_model_3fold_epoch-5.pth',
 'save_models/best_model_4fold_epoch-5.pth',
 'save_models/best_model_5fold_epoch-4.pth']

In [16]:
test_dataset = CustomDataset(test_image_paths, mode='test')
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [17]:
preds_list = []
with torch.no_grad():
    for imgs in test_loader:
        imgs = imgs.to(device)
        preds = 0
        for model_path in models:
            model = Model(n_classes).to(device)
            model.load_state_dict(torch.load(model_path))

            outputs = model(imgs)
            preds += outputs

        preds /= len(models)
        preds = F.softmax(preds, -1)
        preds_list.extend(preds.detach().cpu().numpy())

In [20]:
len(preds_list)

10357

In [21]:
submission = pd.read_csv('sample_submission.csv')
id = submission.id.values

preds_list = np.array(preds_list)
id = np.expand_dims(np.array(id), 1)
data = np.concatenate([id, preds_list], 1)

new_sub = pd.DataFrame(data = data, columns = submission.columns)
new_sub.reset_index()
new_sub.to_csv('cls_submission.csv', index=False)

In [23]:
new_sub

Unnamed: 0,id,affenpinscher,afghan_hound,african_hunting_dog,airedale,american_staffordshire_terrier,appenzeller,australian_terrier,basenji,basset,...,toy_poodle,toy_terrier,vizsla,walker_hound,weimaraner,welsh_springer_spaniel,west_highland_white_terrier,whippet,wire-haired_fox_terrier,yorkshire_terrier
0,000621fb3cbb32d8935728e48679680e,0.011623,0.017176,0.002569,0.005146,0.005761,0.002292,0.011738,0.008592,0.002855,...,0.009848,0.01745,0.001263,0.008681,0.00096,0.018472,0.00343,0.005553,0.008905,0.003025
1,00102ee9d8eb90812350685311fe5890,0.006032,0.009808,0.001823,0.007684,0.005053,0.001485,0.002297,0.003839,0.004334,...,0.015877,0.003149,0.00553,0.014287,0.006413,0.006463,0.00432,0.013286,0.00203,0.003148
2,0012a730dfa437f5f3613fb75efcd4ce,0.020832,0.004795,0.001641,0.003896,0.003626,0.003653,0.000559,0.002871,0.001199,...,0.01005,0.006658,0.009073,0.004295,0.008811,0.00492,0.009782,0.004299,0.013971,0.004048
3,001510bc8570bbeee98c8d80c8a95ec1,0.010368,0.003696,0.004707,0.009154,0.007875,0.006757,0.005671,0.006547,0.013764,...,0.012393,0.003447,0.008222,0.006696,0.002819,0.010323,0.008404,0.010792,0.009483,0.004091
4,001a5f3114548acdefa3d4da05474c2e,0.005868,0.003424,0.006845,0.005653,0.013286,0.003132,0.003156,0.003872,0.005182,...,0.005426,0.028806,0.009193,0.006664,0.045173,0.00274,0.007434,0.003926,0.005949,0.002979
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10352,ffeda8623d4eee33c6d1156a2ecbfcf8,0.001881,0.010561,0.006832,0.006786,0.002862,0.001559,0.014651,0.00989,0.011766,...,0.005387,0.009863,0.002226,0.001622,0.005036,0.004545,0.013655,0.00961,0.004165,0.002698
10353,fff1ec9e6e413275984966f745a313b0,0.001494,0.008583,0.012453,0.015297,0.005231,0.01076,0.00107,0.025973,0.002863,...,0.000601,0.002091,0.001037,0.005391,0.006843,0.006286,0.001779,0.003186,0.002002,0.003152
10354,fff74b59b758bbbf13a5793182a9bbe4,0.012315,0.001434,0.007721,0.025982,0.002881,0.019275,0.001568,0.002108,0.006215,...,0.017571,0.00897,0.017216,0.028656,0.003458,0.005134,0.007319,0.003541,0.02144,0.014716
10355,fff7d50d848e8014ac1e9172dc6762a3,0.001412,0.001159,0.014811,0.005997,0.024808,0.001302,0.001584,0.009992,0.00694,...,0.014779,0.002449,0.007192,0.002207,0.004831,0.005263,0.003097,0.002629,0.045951,0.002255
