In [1]:
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm
from datetime import datetime
from pathlib import Path
from sklearn.metrics import confusion_matrix

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

In [2]:
dataset_dir = r'..\dataset\npz'
training_dataset = np.load(Path(dataset_dir, 'training_cropped_480x480.npz'))
train_images = training_dataset['train_images']
train_labels = training_dataset['train_labels']
valid_images = training_dataset['valid_images']
valid_labels = training_dataset['valid_labels']

print('train_images: {}, train_labels: {}\nvalid_images: {}, valid_labels: {}'
      .format(train_images.shape, train_labels.shape, valid_images.shape, valid_labels.shape))

train_images: (59031, 480, 480, 3), train_labels: (59031,)
valid_images: (25319, 480, 480, 3), valid_labels: (25319,)


In [3]:
normalize = {'ImageNet': transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                              std=[0.229, 0.224, 0.225]),
             'TrainData': transforms.Normalize(mean=[0.431, 0.460, 0.378],
                                              std=[0.221, 0.217, 0.242]),
             'ValidData': transforms.Normalize(mean=[0.431, 0.459, 0.376],
                                               std=[0.221, 0.217, 0.241])}
transform = {
    'train': nn.Sequential(
        transforms.ColorJitter(brightness=.2),
        transforms.RandomHorizontalFlip(.2),
        transforms.RandomVerticalFlip(.2),
        normalize['ImageNet']
    ),
    'valid': nn.Sequential(
        normalize['ImageNet']
    )
}

class MyDataset(Dataset):
    def __init__(self, data_type, x, y):
        self.data_type = data_type
        self.image = x
        self.label = y
        
    def __getitem__(self, index): 
        image = np.transpose(self.image[index], (2, 0, 1))
        image = torch.tensor(image, dtype=torch.float32).div_(255).cuda()
        image = transform[self.data_type](image)
        label = torch.tensor(self.label[index], dtype=torch.int64).cuda()
        
        return image, label
    
    def __len__(self):
        return len(self.image)

In [4]:
batch_size = 28
train_data = DataLoader(MyDataset('train', train_images, train_labels),
                        batch_size=batch_size, shuffle=True, drop_last=True)
valid_data = DataLoader(MyDataset('valid', valid_images, valid_labels),
                        batch_size=batch_size)

In [5]:
time_stamp = datetime.now().strftime('%Y%m%d_%H%M%S')
model_name = 'efficientnetv2_s_' + time_stamp
print(model_name)

efficientnetv2_s_20221213_132051


In [6]:
model = models.efficientnet_v2_s(weights='DEFAULT')

num_features = model.classifier[1].in_features
model.classifier[1] = nn.Linear(num_features, 33)
for param in model.parameters():
    param.requires_grad = True
    
model.cuda()

EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 24, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): FusedMBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
        )
        (stochastic_depth): StochasticDepth(p=0.0, mode=row)
      )
      (1): FusedMBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(24, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
            (1): BatchNorm2d(24, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
  

In [7]:
writer = SummaryWriter(Path('logs', time_stamp))

# drawing model graph
model_input, _ = train_data.dataset[0]
writer.add_graph(model, model_input.unsqueeze_(0))

In [8]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), 1e-3)

def start_training(epochs, weights, scheduler, st_epoch=1, min_loss=10.0, max_acc=0.):
    fn_epoch = st_epoch+epochs-1
    len_fn_epoch = len(str(fn_epoch))
    # loop over the dataset multiple times
    for e in range(epochs):
        cr_epoch = st_epoch+e
        # training
        data_num, running_loss, running_acc = 0, 0., 0.
        model.train()
        
        pbar = tqdm(train_data, unit='batch',
                    desc=f'{cr_epoch:0>{len_fn_epoch}}/{fn_epoch} - train: ', colour='yellow')
        for batch in pbar:
            # the input data is a list of [images, labels]
            images, labels = batch
            data_num += images.size(0)
            
            # clear gradients
            optimizer.zero_grad()
            
            # forward + backward + optimize
            outputs = model(images)
            loss = criterion(outputs, labels)
            _, preds = torch.max(outputs.data, 1)
            loss.backward()
            optimizer.step()
            
            # calculate loss
            running_loss += loss.item()*images.size(0)
            running_acc += torch.sum(preds == labels).item()
            
            train_loss = running_loss/data_num
            train_acc = running_acc/data_num
            pbar.set_postfix_str(f'loss: {train_loss:.4f}, acc: {train_acc:.4f}')
            
        writer.add_scalar('Loss/Training', train_loss, cr_epoch)
        writer.add_scalar('Accuracy/Training', train_acc, cr_epoch)
        
        # validation
        data_num, running_loss, running_acc = 0, 0., 0.
        model.eval()
        
        with torch.no_grad():
            pbar = tqdm(valid_data, unit='batch',
                        desc=f'{cr_epoch:0>{len_fn_epoch}}/{fn_epoch} - valid: ', colour='green')
            for batch in pbar:
                images, labels = batch
                data_num += images.size(0)

                outputs = model(images)
                loss = criterion(outputs, labels)
                _, preds = torch.max(outputs.data, 1)
                
                running_loss += loss.item()*images.size(0)
                running_acc += torch.sum(preds == labels).item()
                
                valid_loss = running_loss/data_num
                valid_acc = running_acc/data_num
                pbar.set_postfix_str(f'loss: {valid_loss:.4f}, acc: {valid_acc:.4f}')

        writer.add_scalar('Loss/Validation', valid_loss, cr_epoch)
        writer.add_scalar('Accuracy/Validation', valid_acc, cr_epoch)
        
        # updating scheduler each epoch
        scheduler.step()
        
        # Saving weights
        if valid_loss < min_loss:
            min_loss = valid_loss
            torch.save(model.state_dict(), str(weights)+'best_loss.pth')
        if valid_acc > max_acc:
            max_acc = valid_acc
            torch.save(model.state_dict(), str(weights)+'best_acc.pth')
        torch.save(model.state_dict(), str(weights)+'last.pth')
        
    print(f"{'='*20} Training finished. {'='*20}")
    print(f"{'>'*10} Best loss: {min_loss:.4f}, Best acc: {max_acc:.4f} {'<'*10}")

In [9]:
epochs = 100
scheduler_step = [int(epochs*0.6), int(epochs*0.8)]
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, scheduler_step, gamma=0.1) # decreasing lr to lr*0.1
weights = Path('weights', f'{model_name}_weights-')

start_training(epochs, weights, scheduler)

001/100 - train: 100%|[33m███████████████████████████████[0m| 2108/2108 [13:48<00:00,  2.54batch/s, loss: 1.2592, acc: 0.6268][0m
001/100 - valid: 100%|[32m█████████████████████████████████[0m| 905/905 [01:51<00:00,  8.09batch/s, loss: 0.7890, acc: 0.7729][0m
002/100 - train: 100%|[33m███████████████████████████████[0m| 2108/2108 [13:46<00:00,  2.55batch/s, loss: 0.7871, acc: 0.7667][0m
002/100 - valid: 100%|[32m█████████████████████████████████[0m| 905/905 [01:53<00:00,  7.97batch/s, loss: 0.7271, acc: 0.7866][0m
003/100 - train: 100%|[33m███████████████████████████████[0m| 2108/2108 [13:48<00:00,  2.54batch/s, loss: 0.6631, acc: 0.8037][0m
003/100 - valid: 100%|[32m█████████████████████████████████[0m| 905/905 [01:53<00:00,  7.99batch/s, loss: 0.5868, acc: 0.8254][0m
004/100 - train: 100%|[33m███████████████████████████████[0m| 2108/2108 [14:14<00:00,  2.47batch/s, loss: 0.5828, acc: 0.8257][0m
004/100 - valid: 100%|[32m█████████████████████████████████[0m| 905

>>>>>>>>>> Best loss: 0.4912, Best acc: 0.8967 <<<<<<<<<<


In [12]:
for state in ['best_loss', 'best_acc', 'last']:
    # load weights
    weights_suffix = state+'.pth'
    model.load_state_dict(torch.load(str(weights)+weights_suffix))
    # save model
    model_path = Path('models', f'{model_name}-'+weights_suffix)
    torch.save(model, model_path)

In [13]:
# validation
model.load_state_dict(torch.load(str(weights)+'best_acc.pth')) # ['best_loss', 'best_acc', 'last']

labels_lst, preds_lst = [], []
data_num, running_loss, running_acc = 0, 0., 0.

model.eval()
with torch.no_grad():
    pbar = tqdm(valid_data, unit='batch', desc='valid: ')
    for batch in pbar:
        images, labels = batch
        data_num += images.size(0)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        _, preds = torch.max(outputs.data, 1)
        
        running_loss += loss.item()*images.size(0)
        running_acc += torch.sum(preds == labels).item()
        
        valid_loss = running_loss/data_num
        valid_acc = running_acc/data_num
        pbar.set_postfix_str(f'loss: {valid_loss:.4f}, acc: {valid_acc:.4f}')
        
        labels_lst.append(labels.cpu().tolist())
        preds_lst.append(preds.cpu().tolist())

labels_lst = [l for lst in labels_lst for l in lst]
preds_lst = [l for lst in preds_lst for l in lst]

valid: 100%|███████████████████████████████████████████| 905/905 [01:54<00:00,  7.91batch/s, loss: 0.7456, acc: 0.8967]


In [14]:
def calc_score(cfmat):
    total_TP = 0
    WP = 0
    cls_num = cfmat.shape[0]
    all_num = np.sum(cfmat)
    cls_score = []
    
    for i in range(cls_num):
        TP = cfmat[i, i]
        FN = np.sum(cfmat[i, :]) - TP
        FP = np.sum(cfmat[:, i]) - TP
        TN = all_num - FN - FP + TP

        TPR = TP / (TP+FN) if TP != 0 else 0 # recall (sensitivity)
        PPV = TP / (TP+FP) if TP != 0 else 0 # precision
        F1 = 2 * (PPV*TPR) / (PPV+TPR) if 0 not in [PPV, TPR] else 0 # F1-score
        
        total_TP += TP
        WP += PPV * (TP+FN) # weighted precision
        
        cls_score.append({'TP': TP, 'FN': FN,
                        'FP': FP, 'TN': TN,
                        'TPR': round(TPR, 4),
                        'PPV': round(PPV, 4),
                        'F1': round(F1, 4)})
    
    ACC = total_TP / all_num
    WP /= all_num
    
    print('{:<19} {:.4f}'.format('Accuracy:', ACC))
    print('{:<19} {:.4f}'.format('Weighted-Precision:', WP))

    return cls_score

In [15]:
confusion_mat = confusion_matrix(labels_lst, preds_lst, labels=list(range(33)))
score = calc_score(confusion_mat)
pd.DataFrame(score)

Accuracy:           0.8967
Weighted-Precision: 0.8967


Unnamed: 0,TP,FN,FP,TN,TPR,PPV,F1
0,559,37,32,25809,0.9379,0.9459,0.9419
1,810,103,106,25920,0.8872,0.8843,0.8857
2,732,150,147,25754,0.8299,0.8328,0.8313
3,798,113,173,25831,0.876,0.8218,0.848
4,434,154,135,25464,0.7381,0.7627,0.7502
5,499,77,44,25697,0.8663,0.919,0.8919
6,620,47,24,25868,0.9295,0.9627,0.9458
7,657,43,35,25898,0.9386,0.9494,0.944
8,889,50,44,26114,0.9468,0.9528,0.9498
9,546,50,91,25724,0.9161,0.8571,0.8856


In [None]:
%load_ext tensorboard
%tensorboard --logdir logs