In [1]:
import os
import cv2
import time
import random
import logging
import easydict
import numpy as np
import pandas as pd
from tqdm import tqdm
from os.path import join as opj
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from PIL import Image

import timm
import torch
import torch.nn as nn
import torch_optimizer as optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, grad_scaler
from torchvision import transforms
from torch import Tensor
from torchvision.transforms import functional as F

import warnings
warnings.filterwarnings('ignore')

In [2]:
DATA_DIR = './open'

train_df = pd.read_csv(os.path.join(DATA_DIR, 'train_df2.csv'))
test_df = pd.read_csv(os.path.join(DATA_DIR, 'test_df.csv'))

print(train_df.head())
print(test_df.head())
print(train_df.shape)
print(test_df.shape)

   file_name       class state            label  label2  class2  state2
0  10000.png  transistor  good  transistor-good      72      12      25
1  10001.png     capsule  good     capsule-good      15       2      25
2  10002.png  transistor  good  transistor-good      72      12      25
3  10003.png        wood  good        wood-good      76      13      25
4  10004.png      bottle  good      bottle-good       3       0      25
   index  file_name
0      0  20000.png
1      1  20001.png
2      2  20002.png
3      3  20003.png
4      4  20004.png
(4277, 7)
(2154, 2)


In [3]:
# train_y = pd.read_csv("./open/train_df.csv")

# train_labels = train_y["state"]

# label_unique = sorted(np.unique(train_labels))
# label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

# train_labels = [label_unique[k] for k in train_labels]
# train_labels

In [4]:
# label_unique

In [5]:
# train_df['state2'] = train_labels

In [6]:
# train_df.to_csv('./open/train_df2.csv')

In [7]:
class_num = len(train_df.class2.unique())

In [8]:
args = easydict.EasyDict(
    {'exp_num':'0',
     
     # Path settings
     'data_path':'./open',
     'Kfold':5,
     'model_path':'class_results/',
     'image_type':'train_1024', 
     'class_num' : class_num,

     # Model parameter settings
     'model_name':'efficientnet_b3',
     'drop_path_rate':0.2,
     
     # Training parameter settings
     ## Base Parameter
     'img_size':224,
     'batch_size':16,
     'epochs':70,
     'optimizer':'Lamb',
     'initial_lr':5e-4,
     'weight_decay':1e-3,

     ## Augmentation
     'aug_ver':2,

     ## Scheduler (OnecycleLR)
     'scheduler':'cycle',
     'warm_epoch':5,
     'max_lr':1e-3,

     ### Cosine Annealing
     'min_lr':5e-6,
     'tmax':145,

     ## etc.
     'patience':5,
     'clipping':None,

     # Hardware settings
     'amp':True,
     'multi_gpu':True,
     'logging':False,
     'num_workers':4,
     'seed':42
     
     
    })

In [9]:
import torch
import torch.nn as nn

from einops import rearrange
from einops.layers.torch import Rearrange


def conv_3x3_bn(inp, oup, image_size, downsample=False):
    stride = 1 if downsample == False else 2
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
        nn.GELU()
    )


class PreNorm(nn.Module):
    def __init__(self, dim, fn, norm):
        super().__init__()
        self.norm = norm(dim)
        self.fn = fn

    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)


class SE(nn.Module):
    def __init__(self, inp, oup, expansion=0.25):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(oup, int(inp * expansion), bias=False),
            nn.GELU(),
            nn.Linear(int(inp * expansion), oup, bias=False),
            nn.Sigmoid()
        )

    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y


class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout=0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )

    def forward(self, x):
        return self.net(x)


class MBConv(nn.Module):
    def __init__(self, inp, oup, image_size, downsample=False, expansion=4):
        super().__init__()
        self.downsample = downsample
        stride = 1 if self.downsample == False else 2
        hidden_dim = int(inp * expansion)

        if self.downsample:
            self.pool = nn.MaxPool2d(3, 2, 1)
            self.proj = nn.Conv2d(inp, oup, 1, 1, 0, bias=False)

        if expansion == 1:
            self.conv = nn.Sequential(
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride,
                          1, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.GELU(),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )
        else:
            self.conv = nn.Sequential(
                # pw
                # down-sample in the first conv
                nn.Conv2d(inp, hidden_dim, 1, stride, 0, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.GELU(),
                # dw
                nn.Conv2d(hidden_dim, hidden_dim, 3, 1, 1,
                          groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.GELU(),
                SE(inp, hidden_dim),
                # pw-linear
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup),
            )
        
        self.conv = PreNorm(inp, self.conv, nn.BatchNorm2d)

    def forward(self, x):
        if self.downsample:
            return self.proj(self.pool(x)) + self.conv(x)
        else:
            return x + self.conv(x)


class Attention(nn.Module):
    def __init__(self, inp, oup, image_size, heads=8, dim_head=32, dropout=0.):
        super().__init__()
        inner_dim = dim_head * heads
        project_out = not (heads == 1 and dim_head == inp)

        self.ih, self.iw = image_size

        self.heads = heads
        self.scale = dim_head ** -0.5

        # parameter table of relative position bias
        self.relative_bias_table = nn.Parameter(
            torch.zeros((2 * self.ih - 1) * (2 * self.iw - 1), heads))

        coords = torch.meshgrid((torch.arange(self.ih), torch.arange(self.iw)))
        coords = torch.flatten(torch.stack(coords), 1)
        relative_coords = coords[:, :, None] - coords[:, None, :]

        relative_coords[0] += self.ih - 1
        relative_coords[1] += self.iw - 1
        relative_coords[0] *= 2 * self.iw - 1
        relative_coords = rearrange(relative_coords, 'c h w -> h w c')
        relative_index = relative_coords.sum(-1).flatten().unsqueeze(1)
        self.register_buffer("relative_index", relative_index)

        self.attend = nn.Softmax(dim=-1)
        self.to_qkv = nn.Linear(inp, inner_dim * 3, bias=False)

        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, oup),
            nn.Dropout(dropout)
        ) if project_out else nn.Identity()

    def forward(self, x):
        qkv = self.to_qkv(x).chunk(3, dim=-1)
        q, k, v = map(lambda t: rearrange(
            t, 'b n (h d) -> b h n d', h=self.heads), qkv)

        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale

        # Use "gather" for more efficiency on GPUs
        relative_bias = self.relative_bias_table.gather(
            0, self.relative_index.repeat(1, self.heads))
        relative_bias = rearrange(
            relative_bias, '(h w) c -> 1 c h w', h=self.ih*self.iw, w=self.ih*self.iw)
        dots = dots + relative_bias

        attn = self.attend(dots)
        out = torch.matmul(attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out = self.to_out(out)
        return out


class Transformer(nn.Module):
    def __init__(self, inp, oup, image_size, heads=8, dim_head=32, downsample=False, dropout=0.):
        super().__init__()
        hidden_dim = int(inp * 4)

        self.ih, self.iw = image_size
        self.downsample = downsample

        if self.downsample:
            self.pool1 = nn.MaxPool2d(3, 2, 1)
            self.pool2 = nn.MaxPool2d(3, 2, 1)
            self.proj = nn.Conv2d(inp, oup, 1, 1, 0, bias=False)

        self.attn = Attention(inp, oup, image_size, heads, dim_head, dropout)
        self.ff = FeedForward(oup, hidden_dim, dropout)

        self.attn = nn.Sequential(
            Rearrange('b c ih iw -> b (ih iw) c'),
            PreNorm(inp, self.attn, nn.LayerNorm),
            Rearrange('b (ih iw) c -> b c ih iw', ih=self.ih, iw=self.iw)
        )

        self.ff = nn.Sequential(
            Rearrange('b c ih iw -> b (ih iw) c'),
            PreNorm(oup, self.ff, nn.LayerNorm),
            Rearrange('b (ih iw) c -> b c ih iw', ih=self.ih, iw=self.iw)
        )

    def forward(self, x):
        if self.downsample:
            x = self.proj(self.pool1(x)) + self.attn(self.pool2(x))
        else:
            x = x + self.attn(x)
        x = x + self.ff(x)
        return x


class CoAtNet(nn.Module):
    def __init__(self, image_size, in_channels, num_blocks, channels, num_classes=1000, block_types=['C', 'C', 'T', 'T']):
        super().__init__()
        ih, iw = image_size
        block = {'C': MBConv, 'T': Transformer}

        self.s0 = self._make_layer(
            conv_3x3_bn, in_channels, channels[0], num_blocks[0], (ih // 2, iw // 2))
        self.s1 = self._make_layer(
            block[block_types[0]], channels[0], channels[1], num_blocks[1], (ih // 4, iw // 4))
        self.s2 = self._make_layer(
            block[block_types[1]], channels[1], channels[2], num_blocks[2], (ih // 8, iw // 8))
        self.s3 = self._make_layer(
            block[block_types[2]], channels[2], channels[3], num_blocks[3], (ih // 16, iw // 16))
        self.s4 = self._make_layer(
            block[block_types[3]], channels[3], channels[4], num_blocks[4], (ih // 32, iw // 32))

        self.pool = nn.AvgPool2d(ih // 32, 1)
        self.fc = nn.Linear(channels[-1], num_classes, bias=False)

    def forward(self, x):
        x = self.s0(x)
        x = self.s1(x)
        x = self.s2(x)
        x = self.s3(x)
        x = self.s4(x)

        x = self.pool(x).view(-1, x.shape[1])
        x = self.fc(x)
        return x

    def _make_layer(self, block, inp, oup, depth, image_size):
        layers = nn.ModuleList([])
        for i in range(depth):
            if i == 0:
                layers.append(block(inp, oup, image_size, downsample=True))
            else:
                layers.append(block(oup, oup, image_size))
        return nn.Sequential(*layers)


def coatnet_0():
    num_blocks = [2, 2, 3, 5, 2]            # L
    channels = [64, 96, 192, 384, 768]      # D
    return CoAtNet((224, 224), 3, num_blocks, channels, num_classes=1000)


def coatnet_1():
    num_blocks = [2, 2, 6, 14, 2]           # L
    channels = [64, 96, 192, 384, 768]      # D
    return CoAtNet((224, 224), 3, num_blocks, channels, num_classes=1000)


def coatnet_2():
    num_blocks = [2, 2, 6, 14, 2]           # L
    channels = [128, 128, 256, 512, 1026]   # D
    return CoAtNet((224, 224), 3, num_blocks, channels, num_classes=1000)


def coatnet_3():
    num_blocks = [2, 2, 6, 14, 2]           # L
    channels = [192, 192, 384, 768, 1536]   # D
    return CoAtNet((224, 224), 3, num_blocks, channels, num_classes=1000)


def coatnet_4():
    num_blocks = [2, 2, 12, 28, 2]          # L
    channels = [192, 192, 384, 768, 1536]   # D
    return CoAtNet((224, 224), 3, num_blocks, channels, num_classes=1000)


def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [11]:
# Warmup Learning rate scheduler
from torch.optim.lr_scheduler import _LRScheduler
class WarmUpLR(_LRScheduler):
    """warmup_training learning rate scheduler
    Args:
        optimizer: optimzier(e.g. SGD)
        total_iters: totoal_iters of warmup phase
    """
    def __init__(self, optimizer, total_iters, last_epoch=-1):
        
        self.total_iters = total_iters
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        """we will use the first m batches, and set the learning
        rate to base_lr * m / total_iters
        """
        return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]

# Logging
def get_root_logger(logger_name='basicsr',
                    log_level=logging.INFO,
                    log_file=None):

    logger = logging.getLogger(logger_name)
    # if the logger has been initialized, just return it
    if logger.hasHandlers():
        return logger

    format_str = '%(asctime)s %(levelname)s: %(message)s'
    logging.basicConfig(format=format_str, level=log_level)

    if log_file is not None:
        file_handler = logging.FileHandler(log_file, 'w')
        file_handler.setFormatter(logging.Formatter(format_str))
        file_handler.setLevel(log_level)
        logger.addHandler(file_handler)

    return logger

class AvgMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        self.losses = []

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        self.losses.append(val)

In [12]:
class RandomRotation(transforms.RandomRotation):
    def __init__(self, p: float, degrees: int):
        super(RandomRotation, self).__init__(degrees)
        self.p = p

    def forward(self, img):
        if torch.rand(1) < self.p:
            fill = self.fill
            if isinstance(img, Tensor):
                if isinstance(fill, (int, float)):
                    fill = [float(fill)] * F.get_image_num_channels(img)
                else:
                    fill = [float(f) for f in fill]
            angle = self.get_params(self.degrees)

            img = F.rotate(img, angle, self.resample, self.expand, self.center, fill)
        return img

In [13]:
class Train_Dataset(Dataset):
    def __init__(self, df, transform=None):
        self.img_path = df['file_name'].values
        self.target = df['class2'].values 
        self.transform = transform

        print(f'Dataset size:{len(self.img_path)}')

    def __getitem__(self, idx):
#         image = cv2.imread(opj('./open/train/', self.img_path[idx])).astype(np.float32)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
#         target = self.target[idx]

#         if self.transform is not None:
#             image = self.transform(torch.from_numpy(image.transpose(2,0,1)))
        
        image = Image.open(opj('./open/train/', self.img_path[idx])).convert('RGB')
        image = self.transform(image)
        target = self.target[idx]

        return image, target

    def __len__(self):
        return len(self.img_path)

class Test_dataset(Dataset):
    def __init__(self, df, transform=None):
        self.img_path = df['file_name'].values
        self.transform = transform

        print(f'Test Dataset size:{len(self.img_path)}')

#         image = cv2.imread(opj('./open/train/', self.img_path[idx])).astype(np.float32)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
#         target = self.target[idx]

#         if self.transform is not None:
#             image = self.transform(torch.from_numpy(image.transpose(2,0,1)))

    def __getitem__(self, idx):

        image = Image.open(opj('./open/test/', self.img_path[idx])).convert('RGB')
        image = self.transform(image)

        return image

    def __len__(self):
        return len(self.img_path)

def get_loader(df, phase: str, batch_size, shuffle,
               num_workers, transform):
    if phase == 'test':
        dataset = Test_dataset(df, transform)
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True)
    else:
        dataset = Train_Dataset(df, transform)
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True,
                                 drop_last=False)
    return data_loader

def get_train_augmentation(img_size, ver):
    if ver==1: # for validset
        transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Resize((img_size, img_size)),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

    if ver == 2:
        transform = transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.RandomAffine((-20, 20)),
                transforms.RandomRotation(90),
                transforms.ToTensor(),
                transforms.Resize((img_size, img_size)),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])
    
    
    return transform

In [14]:
class Network(nn.Module):
    def __init__(self, args):
        super().__init__()
#         self.cnn = timm.create_model( # timm ImageNet pre-trained 모델 load
#             args.model_name,
#             pretrained=True,
#             num_classes = 88, drop_path_rate=args.drop_path_rate
#         )

        self.model_ft = coatnet_3()
        num_ftrs = self.model_ft.fc.in_features
        self.model_ft.fc = nn.Linear(num_ftrs, args.class_num)
        
    def forward(self, x):
        out = self.model_ft(x)
        return out

class Network_test(nn.Module):
    def __init__(self, encoder_name):
        super().__init__()
#         self.cnn = timm.create_model( # timm ImageNet pre-trained 모델 load
#             args.model_name,
#             pretrained=True,
#             num_classes = 88, drop_path_rate=args.drop_path_rate
#         )

        self.model_ft = coatnet_3()
        num_ftrs = self.model_ft.fc.in_features
        self.model_ft.fc = nn.Linear(num_ftrs, args.class_num)

    def forward(self, x):
        out = self.model_ft(x)
        return out

In [15]:
# weighted crossentropy loss를 위한 weight 계산 함수
def get_class_weight():
    return 1 / train_df['class2'].value_counts().sort_index().values

class_weight = get_class_weight()



In [16]:
class Trainer():
    def __init__(self, args, save_path):
        '''
        args: arguments
        save_path: Model 가중치 저장 경로
        '''
        super(Trainer, self).__init__()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        # Logging
        log_file = os.path.join(save_path, 'log.log')
        self.logger = get_root_logger(logger_name='IR', log_level=logging.INFO, log_file=log_file)
        self.logger.info(args)
        # self.logger.info(args.tag)

        # Train, Valid Set load
        ############################################################################
        if args.step == 0 :
            df_train = pd.read_csv(opj(args.data_path, 'train_df2.csv'))
        else :
            df_train = pd.read_csv(opj(args.data_path, f'train_{args.step}step.csv'))

#         if args.image_type is not None:
#             df_train['img_path'] = df_train['img_path'].apply(lambda x:x.replace('train_imgs', args.image_type))
#             df_train['img_path'] = df_train['img_path'].apply(lambda x:x.replace('test_imgs', 'test_1024'))

        kf = StratifiedKFold(n_splits=args.Kfold, shuffle=True, random_state=args.seed)
        for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(df_train)), y=df_train['class2'])):
            df_train.loc[val_idx, 'fold'] = fold
        val_idx = list(df_train[df_train['fold'] == int(args.fold)].index)

        df_val = df_train[df_train['fold'] == args.fold].reset_index(drop=True)
        df_train = df_train[df_train['fold'] != args.fold].reset_index(drop=True)

        # Augmentation
        self.train_transform = get_train_augmentation(img_size=args.img_size, ver=args.aug_ver)
        self.test_transform = get_train_augmentation(img_size=args.img_size, ver=1)

        # TrainLoader
        self.train_loader = get_loader(df_train, phase='train', batch_size=args.batch_size, shuffle=True,
                                       num_workers=args.num_workers, transform=self.train_transform)
        self.val_loader = get_loader(df_val, phase='train', batch_size=args.batch_size, shuffle=False,
                                       num_workers=args.num_workers, transform=self.test_transform)

        # Network
        self.model = Network(args).to(self.device)

        # Loss
        self.criterion = nn.CrossEntropyLoss(weight= torch.Tensor(class_weight).cuda())
#         self.criterion = CutMixCrossEntropyLoss(True)
        
        # Optimizer & Scheduler
        self.optimizer = optim.Lamb(self.model.parameters(), lr=args.initial_lr, weight_decay=args.weight_decay)
        
        iter_per_epoch = len(self.train_loader)
        self.warmup_scheduler = WarmUpLR(self.optimizer, iter_per_epoch * args.warm_epoch)

        if args.scheduler == 'step':
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=args.milestone, gamma=args.lr_factor, verbose=True)
        elif args.scheduler == 'cos':
            tmax = args.tmax # half-cycle 
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max = tmax, eta_min=args.min_lr, verbose=True)
        elif args.scheduler == 'cycle':
            self.scheduler = torch.optim.lr_scheduler.OneCycleLR(self.optimizer, max_lr=args.max_lr, steps_per_epoch=iter_per_epoch, epochs=args.epochs)

        if args.multi_gpu:
            self.model = nn.DataParallel(self.model).to(self.device)

        # Train / Validate
        best_loss = np.inf
        best_acc = 0
        best_epoch = 0
        early_stopping = 0
        start = time.time()
        for epoch in range(1, args.epochs+1):
            self.epoch = epoch

            if args.scheduler == 'cos':
                if epoch > args.warm_epoch:
                    self.scheduler.step()

            # Training
            train_loss, train_acc, train_f1 = self.training(args)

            # Model weight in Multi_GPU or Single GPU
            state_dict= self.model.module.state_dict() if args.multi_gpu else self.model.state_dict()

            # Validation
            val_loss, val_acc, val_f1 = self.validate(args, phase='val')

            # Save models
            if val_loss < best_loss:
                early_stopping = 0
                best_epoch = epoch
                best_loss = val_loss
                best_acc = val_acc
                best_f1 = val_f1

                torch.save({'epoch':epoch,
                            'state_dict':state_dict,
                            'optimizer': self.optimizer.state_dict(),
                            'scheduler': self.scheduler.state_dict(),
                    }, os.path.join(save_path, 'best_model.pth'))
                self.logger.info(f'-----------------SAVE:{best_epoch}epoch----------------')
            else:
                early_stopping += 1

            # Early Stopping
            if early_stopping == args.patience:
                break

        self.logger.info(f'\nBest Val Epoch:{best_epoch} | Val Loss:{best_loss:.4f} | Val Acc:{best_acc:.4f} | Val F1:{best_f1:.4f}')
        end = time.time()
        self.logger.info(f'Total Process time:{(end - start) / 60:.3f}Minute')

    # Training
    def training(self, args):
        self.model.train()
        train_loss = AvgMeter()
        train_acc = 0
        preds_list = []
        targets_list = []

        scaler = grad_scaler.GradScaler()
        for i, (images, targets) in enumerate(tqdm(self.train_loader)):
            images = torch.tensor(images, device=self.device, dtype=torch.float32)
            targets = torch.tensor(targets, device=self.device, dtype=torch.long)
            
            if self.epoch <= args.warm_epoch:
                self.warmup_scheduler.step()

            self.model.zero_grad(set_to_none=True)
            if args.amp:
                with autocast():
                    preds = self.model(images)
                    loss = self.criterion(preds, targets)
                scaler.scale(loss).backward()

                # Gradient Clipping
                if args.clipping is not None:
                    scaler.unscale_(self.optimizer)
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)

                scaler.step(self.optimizer)
                scaler.update()

            else:
                preds = self.model(images)
                loss = self.criterion(preds, targets)
                loss.backward()
                nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)
                self.optimizer.step()

            if args.scheduler == 'cycle':
                if self.epoch > args.warm_epoch:
                    self.scheduler.step()

            # Metric
            train_acc += (preds.argmax(dim=1) == targets).sum().item()
            preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
            targets_list.extend(targets.cpu().detach().numpy())
            # log
            train_loss.update(loss.item(), n=images.size(0))

        train_acc /= len(self.train_loader.dataset)
        train_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

        self.logger.info(f'Epoch:[{self.epoch:03d}/{args.epochs:03d}]')
        self.logger.info(f'Train Loss:{train_loss.avg:.3f} | Acc:{train_acc:.4f} | F1:{train_f1:.4f}')
        return train_loss.avg, train_acc, train_f1
            
    # Validation or Dev
    def validate(self, args, phase='val'):
        self.model.eval()
        with torch.no_grad():
            val_loss = AvgMeter()
            val_acc = 0
            preds_list = []
            targets_list = []

            for i, (images, targets) in enumerate(self.val_loader):
                images = torch.tensor(images, device=self.device, dtype=torch.float32)
                targets = torch.tensor(targets, device=self.device, dtype=torch.long)

                preds = self.model(images)
                loss = self.criterion(preds, targets)

                # Metric
                val_acc += (preds.argmax(dim=1) == targets).sum().item()
                preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
                targets_list.extend(targets.cpu().detach().numpy())

                # log
                val_loss.update(loss.item(), n=images.size(0))
            val_acc /= len(self.val_loader.dataset)
            val_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

            self.logger.info(f'{phase} Loss:{val_loss.avg:.3f} | Acc:{val_acc:.4f} | F1:{val_f1:.4f}')
        return val_loss.avg, val_acc, val_f1

In [17]:
def main(args):
    print('<---- Training Params ---->')
    
    # Random Seed
    seed = args.seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True

    save_path = os.path.join(args.model_path, (args.exp_num).zfill(3))
    
    # Create model directory
    os.makedirs(save_path, exist_ok=True)
    Trainer(args, save_path)

    return save_path

In [18]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  # Arrange GPU devices starting from 0
os.environ["CUDA_VISIBLE_DEVICES"]= "0,1"  # Set the GPUs 2 and 3 to use

In [19]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print('Device:', device)
print('Current cuda device:', torch.cuda.current_device())
print('Count of using GPUs:', torch.cuda.device_count())

Device: cuda
Current cuda device: 0
Count of using GPUs: 2


In [20]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sub = pd.read_csv('./open/sample_submission.csv')
df_train = pd.read_csv('./open/train_df2.csv')
df_test = pd.read_csv('./open/test_df.csv')

In [21]:
args.step = 0
models_path = []
for s_fold in range(5): # 5fold
    args.fold = s_fold
    args.exp_num = str(s_fold)
    save_path = main(args)
    models_path.append(save_path)

2022-04-12 19:57:18,561 INFO: {'exp_num': '0', 'data_path': './open', 'Kfold': 5, 'model_path': 'class_results/', 'image_type': 'train_1024', 'class_num': 15, 'model_name': 'efficientnet_b3', 'drop_path_rate': 0.2, 'img_size': 224, 'batch_size': 16, 'epochs': 70, 'optimizer': 'Lamb', 'initial_lr': 0.0005, 'weight_decay': 0.001, 'aug_ver': 2, 'scheduler': 'cycle', 'warm_epoch': 5, 'max_lr': 0.001, 'min_lr': 5e-06, 'tmax': 145, 'patience': 5, 'clipping': None, 'amp': True, 'multi_gpu': True, 'logging': False, 'num_workers': 4, 'seed': 42, 'step': 0, 'fold': 0}


<---- Training Params ---->
Dataset size:3421
Dataset size:856


100%|██████████| 214/214 [01:18<00:00,  2.74it/s]
2022-04-12 19:58:41,439 INFO: Epoch:[001/070]
2022-04-12 19:58:41,440 INFO: Train Loss:1.995 | Acc:0.4987 | F1:0.4848
2022-04-12 19:58:52,836 INFO: val Loss:0.408 | Acc:0.9136 | F1:0.9051
2022-04-12 19:58:55,720 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 214/214 [01:07<00:00,  3.19it/s]
2022-04-12 20:00:02,828 INFO: Epoch:[002/070]
2022-04-12 20:00:02,828 INFO: Train Loss:0.230 | Acc:0.9471 | F1:0.9430
2022-04-12 20:00:13,380 INFO: val Loss:0.094 | Acc:0.9439 | F1:0.9324
2022-04-12 20:00:16,280 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 214/214 [01:06<00:00,  3.23it/s]
2022-04-12 20:01:22,586 INFO: Epoch:[003/070]
2022-04-12 20:01:22,586 INFO: Train Loss:0.077 | Acc:0.9795 | F1:0.9792
2022-04-12 20:01:33,586 INFO: val Loss:0.009 | Acc:1.0000 | F1:1.0000
2022-04-12 20:01:36,508 INFO: -----------------SAVE:3epoch----------------
100%|██████████| 214/214 [01:05<00:00,  3.25it/s]
2022-04-12 

<---- Training Params ---->
Dataset size:3421
Dataset size:856


100%|██████████| 214/214 [01:06<00:00,  3.21it/s]
2022-04-12 20:13:01,126 INFO: Epoch:[001/070]
2022-04-12 20:13:01,126 INFO: Train Loss:2.006 | Acc:0.5007 | F1:0.5007
2022-04-12 20:13:12,001 INFO: val Loss:0.482 | Acc:0.8715 | F1:0.8367
2022-04-12 20:13:14,883 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 214/214 [01:05<00:00,  3.26it/s]
2022-04-12 20:14:20,494 INFO: Epoch:[002/070]
2022-04-12 20:14:20,495 INFO: Train Loss:0.210 | Acc:0.9556 | F1:0.9541
2022-04-12 20:14:31,858 INFO: val Loss:0.019 | Acc:0.9977 | F1:0.9978
2022-04-12 20:14:34,579 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 214/214 [01:05<00:00,  3.25it/s]
2022-04-12 20:15:40,460 INFO: Epoch:[003/070]
2022-04-12 20:15:40,461 INFO: Train Loss:0.080 | Acc:0.9772 | F1:0.9759
2022-04-12 20:15:51,730 INFO: val Loss:0.107 | Acc:0.9918 | F1:0.9764
100%|██████████| 214/214 [01:03<00:00,  3.36it/s]
2022-04-12 20:16:55,393 INFO: Epoch:[004/070]
2022-04-12 20:16:55,393 INFO: Train Loss

<---- Training Params ---->
Dataset size:3422
Dataset size:855


100%|██████████| 214/214 [01:08<00:00,  3.11it/s]
2022-04-12 20:32:39,136 INFO: Epoch:[001/070]
2022-04-12 20:32:39,136 INFO: Train Loss:1.991 | Acc:0.4787 | F1:0.4784
2022-04-12 20:32:51,139 INFO: val Loss:0.402 | Acc:0.9146 | F1:0.9061
2022-04-12 20:32:53,946 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 214/214 [01:07<00:00,  3.17it/s]
2022-04-12 20:34:01,458 INFO: Epoch:[002/070]
2022-04-12 20:34:01,459 INFO: Train Loss:0.219 | Acc:0.9497 | F1:0.9496
2022-04-12 20:34:13,110 INFO: val Loss:0.215 | Acc:0.9462 | F1:0.9365
2022-04-12 20:34:15,872 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 214/214 [01:06<00:00,  3.20it/s]
2022-04-12 20:35:22,661 INFO: Epoch:[003/070]
2022-04-12 20:35:22,662 INFO: Train Loss:0.082 | Acc:0.9810 | F1:0.9798
2022-04-12 20:35:34,368 INFO: val Loss:0.022 | Acc:0.9953 | F1:0.9953
2022-04-12 20:35:37,107 INFO: -----------------SAVE:3epoch----------------
100%|██████████| 214/214 [01:06<00:00,  3.20it/s]
2022-04-12 

<---- Training Params ---->
Dataset size:3422
Dataset size:855


100%|██████████| 214/214 [01:07<00:00,  3.18it/s]
2022-04-12 20:49:57,079 INFO: Epoch:[001/070]
2022-04-12 20:49:57,080 INFO: Train Loss:1.979 | Acc:0.5073 | F1:0.4965
2022-04-12 20:50:08,427 INFO: val Loss:0.378 | Acc:0.9310 | F1:0.9143
2022-04-12 20:50:11,134 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 214/214 [01:07<00:00,  3.16it/s]
2022-04-12 20:51:18,929 INFO: Epoch:[002/070]
2022-04-12 20:51:18,930 INFO: Train Loss:0.226 | Acc:0.9439 | F1:0.9428
2022-04-12 20:51:30,437 INFO: val Loss:0.029 | Acc:0.9977 | F1:0.9979
2022-04-12 20:51:33,165 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 214/214 [01:06<00:00,  3.19it/s]
2022-04-12 20:52:40,170 INFO: Epoch:[003/070]
2022-04-12 20:52:40,171 INFO: Train Loss:0.082 | Acc:0.9790 | F1:0.9771
2022-04-12 20:52:51,252 INFO: val Loss:0.022 | Acc:0.9930 | F1:0.9930
2022-04-12 20:52:53,982 INFO: -----------------SAVE:3epoch----------------
100%|██████████| 214/214 [01:07<00:00,  3.16it/s]
2022-04-12 

<---- Training Params ---->
Dataset size:3422
Dataset size:855


100%|██████████| 214/214 [01:06<00:00,  3.22it/s]
2022-04-12 21:08:32,566 INFO: Epoch:[001/070]
2022-04-12 21:08:32,566 INFO: Train Loss:2.005 | Acc:0.4883 | F1:0.4779
2022-04-12 21:08:44,181 INFO: val Loss:0.491 | Acc:0.8503 | F1:0.8069
2022-04-12 21:08:46,971 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 214/214 [01:07<00:00,  3.19it/s]
2022-04-12 21:09:54,090 INFO: Epoch:[002/070]
2022-04-12 21:09:54,091 INFO: Train Loss:0.233 | Acc:0.9506 | F1:0.9484
2022-04-12 21:10:05,543 INFO: val Loss:0.069 | Acc:0.9673 | F1:0.9673
2022-04-12 21:10:08,270 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 214/214 [01:06<00:00,  3.22it/s]
2022-04-12 21:11:14,735 INFO: Epoch:[003/070]
2022-04-12 21:11:14,736 INFO: Train Loss:0.065 | Acc:0.9845 | F1:0.9837
2022-04-12 21:11:26,135 INFO: val Loss:0.026 | Acc:0.9988 | F1:0.9974
2022-04-12 21:11:28,826 INFO: -----------------SAVE:3epoch----------------
100%|██████████| 214/214 [01:07<00:00,  3.18it/s]
2022-04-12 

In [22]:
img_size = 224

test_transform = get_train_augmentation(img_size=img_size, ver=1)
test_dataset = Test_dataset(df_test, test_transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

Test Dataset size:2154


In [23]:
# models_path = ['./class_results/000', './class_results/001', './class_results/002', './class_results/003', './class_results/004']

In [24]:
def predict(encoder_name, test_loader, device, model_path):
    model = Network_test(encoder_name).to(device)
    model.load_state_dict(torch.load(opj(model_path, 'best_model.pth'))['state_dict'])
    model.eval()
    preds_list = []
    with torch.no_grad():
        for images in tqdm(test_loader):
            images = torch.as_tensor(images, device=device, dtype=torch.float32)
            preds = model(images)
            preds = torch.softmax(preds, dim=1)
            preds_list.extend(preds.cpu().tolist())

    return np.array(preds_list)

def ensemble_5fold(model_path_list, test_loader, device):
    predict_list = []
    for model_path in model_path_list:
        prediction = predict(encoder_name= 'regnety_040', test_loader = test_loader, device = device, model_path = model_path)
        predict_list.append(prediction)
    ensemble = (predict_list[0] + predict_list[1] + predict_list[2] + predict_list[3] + predict_list[4])/len(predict_list)

    return ensemble

In [25]:
ensemble = ensemble_5fold(models_path, test_loader, device)

100%|██████████| 34/34 [01:16<00:00,  2.25s/it]
100%|██████████| 34/34 [01:14<00:00,  2.20s/it]
100%|██████████| 34/34 [01:14<00:00,  2.20s/it]
100%|██████████| 34/34 [01:14<00:00,  2.19s/it]
100%|██████████| 34/34 [01:14<00:00,  2.20s/it]


In [26]:
f_pred = ensemble.argmax(axis=1).tolist()
f_pred

[10,
 4,
 12,
 10,
 10,
 8,
 2,
 5,
 6,
 6,
 0,
 13,
 14,
 9,
 10,
 0,
 4,
 3,
 3,
 8,
 10,
 14,
 0,
 9,
 8,
 9,
 8,
 9,
 6,
 13,
 9,
 1,
 9,
 14,
 14,
 14,
 6,
 1,
 6,
 7,
 7,
 1,
 9,
 2,
 7,
 10,
 11,
 2,
 14,
 9,
 1,
 12,
 5,
 5,
 14,
 7,
 3,
 3,
 6,
 2,
 12,
 8,
 2,
 14,
 2,
 6,
 12,
 2,
 8,
 7,
 4,
 13,
 5,
 10,
 6,
 14,
 1,
 6,
 1,
 1,
 8,
 13,
 9,
 13,
 12,
 0,
 14,
 3,
 6,
 14,
 6,
 14,
 1,
 7,
 9,
 10,
 12,
 12,
 7,
 7,
 1,
 3,
 8,
 2,
 13,
 0,
 14,
 8,
 12,
 5,
 6,
 9,
 12,
 12,
 14,
 2,
 4,
 8,
 2,
 6,
 9,
 3,
 7,
 12,
 13,
 12,
 10,
 12,
 8,
 9,
 4,
 7,
 8,
 7,
 2,
 4,
 9,
 0,
 8,
 13,
 12,
 5,
 1,
 1,
 14,
 5,
 3,
 14,
 14,
 13,
 14,
 13,
 12,
 8,
 14,
 10,
 11,
 10,
 5,
 7,
 9,
 9,
 9,
 1,
 3,
 6,
 8,
 13,
 14,
 1,
 3,
 3,
 9,
 6,
 1,
 8,
 9,
 14,
 2,
 14,
 6,
 4,
 0,
 1,
 8,
 3,
 10,
 7,
 13,
 13,
 8,
 2,
 14,
 5,
 8,
 9,
 14,
 14,
 14,
 5,
 12,
 1,
 9,
 0,
 8,
 6,
 8,
 8,
 5,
 4,
 6,
 2,
 9,
 13,
 8,
 3,
 2,
 8,
 12,
 0,
 10,
 4,
 10,
 0,
 2,
 7,
 8,
 6,
 1,
 12,
 8,
 9

In [27]:
train_y = pd.read_csv("./open/train_df.csv")

train_labels = train_y["class"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}


In [28]:
label_decoder = {val:key for key, val in label_unique.items()}

f_result = [label_decoder[result] for result in f_pred]

In [29]:
f_result

['tile',
 'grid',
 'transistor',
 'tile',
 'tile',
 'pill',
 'capsule',
 'hazelnut',
 'leather',
 'leather',
 'bottle',
 'wood',
 'zipper',
 'screw',
 'tile',
 'bottle',
 'grid',
 'carpet',
 'carpet',
 'pill',
 'tile',
 'zipper',
 'bottle',
 'screw',
 'pill',
 'screw',
 'pill',
 'screw',
 'leather',
 'wood',
 'screw',
 'cable',
 'screw',
 'zipper',
 'zipper',
 'zipper',
 'leather',
 'cable',
 'leather',
 'metal_nut',
 'metal_nut',
 'cable',
 'screw',
 'capsule',
 'metal_nut',
 'tile',
 'toothbrush',
 'capsule',
 'zipper',
 'screw',
 'cable',
 'transistor',
 'hazelnut',
 'hazelnut',
 'zipper',
 'metal_nut',
 'carpet',
 'carpet',
 'leather',
 'capsule',
 'transistor',
 'pill',
 'capsule',
 'zipper',
 'capsule',
 'leather',
 'transistor',
 'capsule',
 'pill',
 'metal_nut',
 'grid',
 'wood',
 'hazelnut',
 'tile',
 'leather',
 'zipper',
 'cable',
 'leather',
 'cable',
 'cable',
 'pill',
 'wood',
 'screw',
 'wood',
 'transistor',
 'bottle',
 'zipper',
 'carpet',
 'leather',
 'zipper',
 'leat

In [30]:
submission = pd.read_csv("./open/sample_submission.csv")

submission["label"] = f_result

submission

Unnamed: 0,index,label
0,0,tile
1,1,grid
2,2,transistor
3,3,tile
4,4,tile
...,...,...
2149,2149,tile
2150,2150,screw
2151,2151,grid
2152,2152,cable


In [31]:
submission.to_csv("class_result.csv", index = False)