In [1]:
import os
import cv2
import time
import random
import logging
import easydict
import numpy as np
import pandas as pd
from tqdm import tqdm
from os.path import join as opj
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from PIL import Image
import math

import timm
import torch
import torch.nn as nn
import torch_optimizer as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from torch.cuda.amp import autocast, grad_scaler
from torchvision import transforms
from torch import Tensor
from torchvision.transforms import functional as F


import warnings
warnings.filterwarnings('ignore')

In [2]:
DATA_DIR = './open'

train_df = pd.read_csv(os.path.join(DATA_DIR, 'train_df.csv'))
test_df = pd.read_csv(os.path.join(DATA_DIR, 'test_df.csv'))

print(train_df.head())
print(test_df.head())
print(train_df.shape)
print(test_df.shape)

   index  file_name       class state            label
0      0  10000.png  transistor  good  transistor-good
1      1  10001.png     capsule  good     capsule-good
2      2  10002.png  transistor  good  transistor-good
3      3  10003.png        wood  good        wood-good
4      4  10004.png      bottle  good      bottle-good
   index  file_name
0      0  20000.png
1      1  20001.png
2      2  20002.png
3      3  20003.png
4      4  20004.png
(4277, 5)
(2154, 2)


In [3]:
args = easydict.EasyDict(
    {'exp_num':'0',
     'class_name':'',
     'class_num':0,
     
     # Path settings
     'data_path':'./open',
     'Kfold':7,
     'model_path':'results/',
     'image_type':'train_1024', 

     # Model parameter settings
     'model_name':'regnety_064',
     'drop_path_rate':0.2,
     
     # Training parameter settings
     ## Base Parameter
     'img_size':352,
     'batch_size':8,
     'epochs':70,
     'optimizer':'Lamb',
     'initial_lr':4e-5,
     'weight_decay':1e-4,

     ## Augmentation
     'aug_ver':2,

     ## Scheduler (OnecycleLR)
     'scheduler':'cycle',
     'warm_epoch':5,
     'max_lr':5e-4,

     ### Cosine Annealing
     'min_lr':5e-6,
     'tmax':145,

     ## etc.
     'patience':15,
     'clipping':None,

     # Hardware settings
     'amp':True,
     'multi_gpu':True,
     'logging':False,
     'num_workers':4,
     'seed':42
     
     
    })

In [4]:
# Warmup Learning rate scheduler
from torch.optim.lr_scheduler import _LRScheduler
class WarmUpLR(_LRScheduler):
    """warmup_training learning rate scheduler
    Args:
        optimizer: optimzier(e.g. SGD)
        total_iters: totoal_iters of warmup phase
    """
    def __init__(self, optimizer, total_iters, last_epoch=-1):
        
        self.total_iters = total_iters
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        """we will use the first m batches, and set the learning
        rate to base_lr * m / total_iters
        """
        return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]

# Logging
def get_root_logger(logger_name='basicsr',
                    log_level=logging.INFO,
                    log_file=None):

    logger = logging.getLogger(logger_name)
    # if the logger has been initialized, just return it
    if logger.hasHandlers():
        return logger

    format_str = '%(asctime)s %(levelname)s: %(message)s'
    logging.basicConfig(format=format_str, level=log_level)

    if log_file is not None:
        file_handler = logging.FileHandler(log_file, 'w')
        file_handler.setFormatter(logging.Formatter(format_str))
        file_handler.setLevel(log_level)
        logger.addHandler(file_handler)

    return logger

class AvgMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        self.losses = []

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        self.losses.append(val)

In [5]:
class RandomRotation(transforms.RandomRotation):
    def __init__(self, p: float, degrees: int):
        super(RandomRotation, self).__init__(degrees)
        self.p = p

    def forward(self, img):
        if torch.rand(1) < self.p:
            fill = self.fill
            if isinstance(img, Tensor):
                if isinstance(fill, (int, float)):
                    fill = [float(fill)] * F.get_image_num_channels(img)
                else:
                    fill = [float(f) for f in fill]
            angle = self.get_params(self.degrees)

            img = F.rotate(img, angle, self.resample, self.expand, self.center, fill)
        return img

In [6]:
import random
import numpy as np
from torchvision import transforms

class CutPaste(object):

    def __init__(self, transform = True, type = 'binary'):

        '''
        This class creates to different augmentation CutPaste and CutPaste-Scar. Moreover, it returns augmented images
        for binary and 3 way classification
        :arg
        :transform[binary]: - if True use Color Jitter augmentations for patches
        :type[str]: options ['binary' or '3way'] - classification type
        '''
        self.type = type
        if transform:
            self.transform = transforms.ColorJitter(brightness = 0.1,
                                                      contrast = 0.1,
                                                      saturation = 0.1,
                                                      hue = 0.1)
        else:
            self.transform = None

    @staticmethod
    def crop_and_paste_patch(image, patch_w, patch_h, transform, rotation=False):
        """
        Crop patch from original image and paste it randomly on the same image.
        :image: [PIL] _ original image
        :patch_w: [int] _ width of the patch
        :patch_h: [int] _ height of the patch
        :transform: [binary] _ if True use Color Jitter augmentation
        :rotation: [binary[ _ if True randomly rotates image from (-45, 45) range
        :return: augmented image
        """

        org_w, org_h = image.size
        mask = None

        patch_left, patch_top = random.randint(0, org_w - patch_w), random.randint(0, org_h - patch_h)
        patch_right, patch_bottom = patch_left + patch_w, patch_top + patch_h
        patch = image.crop((patch_left, patch_top, patch_right, patch_bottom))
        if transform:
            patch= transform(patch)

        if rotation:
            random_rotate = random.uniform(*rotation)
            patch = patch.convert("RGBA").rotate(random_rotate, expand=True)
            mask = patch.split()[-1]

        # new location
        paste_left, paste_top = random.randint(0, org_w - patch_w), random.randint(0, org_h - patch_h)
        aug_image = image.copy()
        aug_image.paste(patch, (paste_left, paste_top), mask=mask)
        return aug_image

    def cutpaste(self, image, area_ratio = (0.02, 0.15), aspect_ratio = ((0.3, 1) , (1, 3.3))):
        '''
        CutPaste augmentation
        :image: [PIL] - original image
        :area_ratio: [tuple] - range for area ratio for patch
        :aspect_ratio: [tuple] -  range for aspect ratio
        :return: PIL image after CutPaste transformation
        '''

        img_area = image.size[0] * image.size[1]
        patch_area = random.uniform(*area_ratio) * img_area
        patch_aspect = random.choice([random.uniform(*aspect_ratio[0]), random.uniform(*aspect_ratio[1])])
        patch_w  = int(np.sqrt(patch_area*patch_aspect))
        patch_h = int(np.sqrt(patch_area/patch_aspect))
        cutpaste = self.crop_and_paste_patch(image, patch_w, patch_h, self.transform, rotation = False)
        return cutpaste

    def cutpaste_scar(self, image, width = [2,16], length = [10,25], rotation = (-45, 45)):
        '''
        :image: [PIL] - original image
        :width: [list] - range for width of patch
        :length: [list] - range for length of patch
        :rotation: [tuple] - range for rotation
        :return: PIL image after CutPaste-Scare transformation
        '''
        patch_w, patch_h = random.randint(*width), random.randint(*length)
        cutpaste_scar = self.crop_and_paste_patch(image, patch_w, patch_h, self.transform, rotation = rotation)
        return cutpaste_scar

    def __call__(self, image):
        '''
        :image: [PIL] - original image
        :return: if type == 'binary' returns original image and randomly chosen transformation, else it returns
                original image, an image after CutPaste transformation and an image after CutPaste-Scar transformation
        '''
        if self.type == 'binary':
            image = random.choice([image, self.cutpaste(image), self.cutpaste_scar(image)])
            return image

        elif self.type == '3way':
            cutpaste = self.cutpaste(image)
            scar = self.cutpaste_scar(image)
            return image, cutpaste, scar

In [7]:
class Train_Dataset(Dataset):
    def __init__(self, df, transform=None):
        self.img_path = df['file_name'].values
        self.target = df['state2'].values 
        self.cutpaste_transform = CutPaste(type='binary')
        self.transform = transform

        print(f'Dataset size:{len(self.img_path)}')

    def __getitem__(self, idx):
#         image = cv2.imread(opj('./open/train/', self.img_path[idx])).astype(np.float32)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
#         target = self.target[idx]

#         if self.transform is not None:
#             image = self.transform(torch.from_numpy(image.transpose(2,0,1)))
        
        image = Image.open(opj(opj('./open/train/', self.img_path[idx]))).convert('RGB')
        image = self.cutpaste_transform(image)
#         image = [self.transform(i) for i in out]
        image = self.transform(image)
        target = self.target[idx]

        return image, target

    def __len__(self):
        return len(self.img_path)

class Test_dataset(Dataset):
    def __init__(self, df, transform=None):
#         image = cv2.imread(opj('./open/train/', self.img_path[idx])).astype(np.float32)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
#         target = self.target[idx]

#         if self.transform is not None:
#             image = self.transform(torch.from_numpy(image.transpose(2,0,1)))
        
        image = Image.open(opj(opj('./open/test/', self.img_path[idx]))).convert('RGB')
        image = self.transform(image)
        target = self.target[idx]

        return image

    def __len__(self):
        return len(self.test_img_path)

def get_loader(df, phase: str, batch_size, shuffle,
               num_workers, transform):
    if phase == 'test':
        dataset = Test_dataset(df, transform)
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, 
                                 pin_memory=True)
#     elif phase == 'val':
#         dataset = Train_Dataset(df, transform)
#         data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, 
#                                  pin_memory=True)
    else:
        dataset = Train_Dataset(df, transform)
        data_loader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=True,
                                 drop_last=False)
    return data_loader

def get_train_augmentation(img_size, ver, cls):
    if ver==1: # for validset
        transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Resize((img_size, img_size)),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

    if ver == 2:
        if cls == 'cable' or cls == 'capsule' or cls == 'pill':
            transform = transforms.Compose([
                transforms.Resize((img_size, img_size)),
                RandomRotation(0.7, degrees=5),
                transforms.CenterCrop(340),
                transforms.ToTensor(),
                transforms.Normalize([.485, .456, .406], [.229, .224, .225])
                    ])
        elif cls == 'toothbrush' or cls == 'zipper' or cls == 'transistor':
            transform = transforms.Compose([
                transforms.Resize((img_size, img_size)),
                transforms.RandomHorizontalFlip(0.5),
                RandomRotation(0.7, degrees=5),
                transforms.CenterCrop(340),
                transforms.ToTensor(),
                transforms.Normalize([.485, .456, .406], [.229, .224, .225])
            ])
        elif cls == 'metal_nut':
            transform = transforms.Compose([
                transforms.Resize((img_size, img_size)),
                RandomRotation(0.7, 180),
                transforms.CenterCrop(340),
                transforms.ToTensor(),
                transforms.Normalize([.485, .456, .406], [.229, .224, .225])
            ])
        else:
            transform = transforms.Compose([
                transforms.Resize((img_size, img_size)),
                transforms.RandomHorizontalFlip(0.5),
                transforms.RandomVerticalFlip(0.3),
                RandomRotation(0.7, degrees=180),
                transforms.CenterCrop(340),
                transforms.ToTensor(),
                transforms.Normalize([.485, .456, .406], [.229, .224, .225])
            ])
    
    
    return transform

In [8]:
class Network(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.cnn = timm.create_model( # timm ImageNet pre-trained 모델 load
            args.model_name,
            pretrained=True,
            num_classes = args.class_num, drop_path_rate=args.drop_path_rate
        )

#         self.model_ft = coatnet_0()
#         num_ftrs = self.model_ft.fc.in_features
#         self.model_ft.fc = nn.Linear(num_ftrs, args.class_num)
        
    def forward(self, x):
        out = self.cnn(x)
        return out

class Network_test(nn.Module):
    def __init__(self, encoder_name):
        super().__init__()
        self.cnn = timm.create_model( # timm ImageNet pre-trained 모델 load
            args.model_name,
            pretrained=True,
            num_classes = args.class_num
        )

#         self.model_ft = coatnet_4()
#         num_ftrs = self.model_ft.fc.in_features
#         self.model_ft.fc = nn.Linear(num_ftrs, args.class_num)

    def forward(self, x):
        out = self.cnn(x)
        return out

In [9]:
# mixup augmentation을 위한 코드입니다.
def mixup_data(x, y, alpha=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x, y_a, y_b, lam


def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)


In [10]:
class CosineAnnealingWarmupRestarts(_LRScheduler):
    """
        optimizer (Optimizer): Wrapped optimizer.
        first_cycle_steps (int): First cycle step size.
        cycle_mult(float): Cycle steps magnification. Default: -1.
        max_lr(float): First cycle's max learning rate. Default: 0.1.
        min_lr(float): Min learning rate. Default: 0.001.
        warmup_steps(int): Linear warmup step size. Default: 0.
        gamma(float): Decrease rate of max learning rate by cycle. Default: 1.
        last_epoch (int): The index of last epoch. Default: -1.
    """
    
    def __init__(self,
                 optimizer : torch.optim.Optimizer,
                 first_cycle_steps : int,
                 cycle_mult : float = 1.,
                 max_lr : float = 0.1,
                 min_lr : float = 0.001,
                 warmup_steps : int = 0,
                 gamma : float = 1.,
                 last_epoch : int = -1
        ):
        assert warmup_steps < first_cycle_steps
        
        self.first_cycle_steps = first_cycle_steps # first cycle step size
        self.cycle_mult = cycle_mult # cycle steps magnification
        self.base_max_lr = max_lr # first max learning rate
        self.max_lr = max_lr # max learning rate in the current cycle
        self.min_lr = min_lr # min learning rate
        self.warmup_steps = warmup_steps # warmup step size
        self.gamma = gamma # decrease rate of max learning rate by cycle
        
        self.cur_cycle_steps = first_cycle_steps # first cycle step size
        self.cycle = 0 # cycle count
        self.step_in_cycle = last_epoch # step size of the current cycle
        
        super(CosineAnnealingWarmupRestarts, self).__init__(optimizer, last_epoch)
        
        # set learning rate min_lr
        self.init_lr()
    
    def init_lr(self):
        self.base_lrs = []
        for param_group in self.optimizer.param_groups:
            param_group['lr'] = self.min_lr
            self.base_lrs.append(self.min_lr)
    
    def get_lr(self):
        if self.step_in_cycle == -1:
            return self.base_lrs
        elif self.step_in_cycle < self.warmup_steps:
            return [(self.max_lr - base_lr)*self.step_in_cycle / self.warmup_steps + base_lr for base_lr in self.base_lrs]
        else:
            return [base_lr + (self.max_lr - base_lr) \
                    * (1 + math.cos(math.pi * (self.step_in_cycle-self.warmup_steps) \
                                    / (self.cur_cycle_steps - self.warmup_steps))) / 2
                    for base_lr in self.base_lrs]

    def step(self, epoch=None):
        if epoch is None:
            epoch = self.last_epoch + 1
            self.step_in_cycle = self.step_in_cycle + 1
            if self.step_in_cycle >= self.cur_cycle_steps:
                self.cycle += 1
                self.step_in_cycle = self.step_in_cycle - self.cur_cycle_steps
                self.cur_cycle_steps = int((self.cur_cycle_steps - self.warmup_steps) * self.cycle_mult) + self.warmup_steps
        else:
            if epoch >= self.first_cycle_steps:
                if self.cycle_mult == 1.:
                    self.step_in_cycle = epoch % self.first_cycle_steps
                    self.cycle = epoch // self.first_cycle_steps
                else:
                    n = int(math.log((epoch / self.first_cycle_steps * (self.cycle_mult - 1) + 1), self.cycle_mult))
                    self.cycle = n
                    self.step_in_cycle = epoch - int(self.first_cycle_steps * (self.cycle_mult ** n - 1) / (self.cycle_mult - 1))
                    self.cur_cycle_steps = self.first_cycle_steps * self.cycle_mult ** (n)
            else:
                self.cur_cycle_steps = self.first_cycle_steps
                self.step_in_cycle = epoch
                
        self.max_lr = self.base_max_lr * (self.gamma**self.cycle)
        self.last_epoch = math.floor(epoch)
        for param_group, lr in zip(self.optimizer.param_groups, self.get_lr()):
            param_group['lr'] = lr

In [11]:
from torch.nn.modules.loss import _WeightedLoss
class SmoothCrossEntropyLoss(_WeightedLoss):
    def __init__(self, weight=None, reduction='mean', smoothing=0.0):
        super().__init__(weight=weight, reduction=reduction)
        self.smoothing = smoothing
        self.weight = weight
        self.reduction = reduction

    @staticmethod
    def _smooth_one_hot(targets:torch.Tensor, n_classes:int, smoothing=0.0):
        assert 0 <= smoothing < 1
        with torch.no_grad():
            targets = torch.empty(size=(targets.size(0), n_classes),
                    device=targets.device) \
                .fill_(smoothing /(n_classes-1)) \
                .scatter_(1, targets.data.unsqueeze(1), 1.-smoothing)
        return targets

    def forward(self, inputs, targets):
        targets = SmoothCrossEntropyLoss._smooth_one_hot(targets, inputs.size(-1),
            self.smoothing)
        lsm = F.log_softmax(inputs, -1)

        if self.weight is not None:
            lsm = lsm * self.weight.unsqueeze(0)

        loss = -(targets * lsm).sum(-1)

        if  self.reduction == 'sum':
            loss = loss.sum()
        elif  self.reduction == 'mean':
            loss = loss.mean()

        return loss

In [12]:
class Trainer():
    def __init__(self, args, save_path):
        '''
        args: arguments
        save_path: Model 가중치 저장 경로
        '''
        super(Trainer, self).__init__()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        # Logging
        log_file = os.path.join(save_path, 'log.log')
        self.logger = get_root_logger(logger_name='IR', log_level=logging.INFO, log_file=log_file)
        self.logger.info(args)
        # self.logger.info(args.tag)

        # Train, Valid Set load
        ############################################################################
        if args.step == 0 :
            df_train = pd.read_csv(opj(args.data_path, 'train_df_%s.csv' % class_name))
#             df_train = df_train[df_train['class'] == class_name]
#             df_train = df_train.reset_index(drop=True)
        else :
            df_train = pd.read_csv(opj(args.data_path, f'train_{args.step}step.csv'))

#         if args.image_type is not None:
#             df_train['img_path'] = df_train['img_path'].apply(lambda x:x.replace('train_imgs', args.image_type))
#             df_train['img_path'] = df_train['img_path'].apply(lambda x:x.replace('test_imgs', 'test_1024'))

        kf = StratifiedKFold(n_splits=args.Kfold, shuffle=True, random_state=args.seed)
        for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(df_train)), y=df_train['state2'])):
            df_train.loc[val_idx, 'fold'] = fold
        val_idx = list(df_train[df_train['fold'] == int(args.fold)].index)

        df_val = df_train[df_train['fold'] == args.fold].reset_index(drop=True)
        df_train = df_train[df_train['fold'] != args.fold].reset_index(drop=True)
        
#         y_train=df_train['state2']
#         class_counts = y_train.value_counts().to_list() #43200, 4800
#         num_samples = sum(class_counts) # 48000 - 전체 데이터 갯수
#         labels = y_train.to_list()

#         #클래스별 가중치 부여 [48000/43200, 48000/4800] => class 1에 가중치 높게 부여하게 됨
#         over_class_weights = [num_samples / class_counts[i] for i in range(len(class_counts))] 

#         # 해당 데이터의 label에 해당되는 가중치
#         weights = [over_class_weights[labels[i]] for i in range(int(num_samples))] #해당 레이블마다의 가중치 비율
#         sampler = WeightedRandomSampler(torch.DoubleTensor(weights), int(num_samples))

        # Augmentation
        self.train_transform = get_train_augmentation(img_size=args.img_size, ver=args.aug_ver, cls=args.class_name)
        self.test_transform = get_train_augmentation(img_size=args.img_size, ver=1, cls=args.class_name)

        # TrainLoader
        self.train_loader = get_loader(df_train, phase='train', batch_size=args.batch_size, shuffle=True,
                                       num_workers=args.num_workers, transform=self.train_transform)
        self.val_loader = get_loader(df_val, phase='train', batch_size=args.batch_size, shuffle=False,
                                       num_workers=args.num_workers, transform=self.test_transform)

        # Network
        self.model = Network(args).to(self.device)
        
        # weighted crossentropy loss를 위한 weight 계산 함수
        def get_class_weight():
            return 1 / df_train['state2'].value_counts().sort_index().values

        self.class_weight = get_class_weight()

        # Loss
#         self.criterion = SmoothCrossEntropyLoss(smoothing=0.2)
        self.criterion = nn.CrossEntropyLoss(weight= torch.Tensor(self.class_weight).cuda())
#         self.criterion = CutMixCrossEntropyLoss(True)

        
        # Optimizer & Scheduler
        self.optimizer = optim.Lamb(self.model.parameters(), lr=args.initial_lr, weight_decay=args.weight_decay)
        
        iter_per_epoch = len(self.train_loader)
        self.warmup_scheduler = WarmUpLR(self.optimizer, iter_per_epoch * args.warm_epoch)

        if args.scheduler == 'step':
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=args.milestone, gamma=args.lr_factor, verbose=True)
        elif args.scheduler == 'cos':
            tmax = args.tmax # half-cycle 
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max = tmax, eta_min=args.min_lr, verbose=True)
        elif args.scheduler == 'cycle':
            self.scheduler = torch.optim.lr_scheduler.OneCycleLR(self.optimizer, max_lr=args.max_lr, steps_per_epoch=iter_per_epoch, epochs=args.epochs)
        else:
            self.scheduler = CosineAnnealingWarmupRestarts(self.optimizer,
                                          first_cycle_steps=60,
                                          cycle_mult=1.0,
                                          max_lr=args.max_lr,
                                          min_lr=args.min_lr,
                                          warmup_steps=args.warm_epoch,
                                          gamma=1.0)
            
        if args.multi_gpu:
            self.model = nn.DataParallel(self.model).to(self.device)

        # Train / Validate
        best_loss = np.inf
        best_acc = 0
        best_epoch = 0
        early_stopping = 0
        start = time.time()
        for epoch in range(1, args.epochs+1):
            self.epoch = epoch

            if args.scheduler == 'cos':
                if epoch > args.warm_epoch:
                    self.scheduler.step()

            # Training
            train_loss, train_acc, train_f1 = self.training(args)

            # Model weight in Multi_GPU or Single GPU
            state_dict= self.model.module.state_dict() if args.multi_gpu else self.model.state_dict()

            # Validation
            val_loss, val_acc, val_f1 = self.validate(args, phase='val')

            # Save models
            if val_loss < best_loss:
                early_stopping = 0
                best_epoch = epoch
                best_loss = val_loss
                best_acc = val_acc
                best_f1 = val_f1

                torch.save({'epoch':epoch,
                            'state_dict':state_dict,
                            'optimizer': self.optimizer.state_dict(),
                            'scheduler': self.scheduler.state_dict(),
                    }, os.path.join(save_path, 'best_model.pth'))
                self.logger.info(f'-----------------SAVE:{best_epoch}epoch----------------')
            else:
                early_stopping += 1

            # Early Stopping
            if early_stopping == args.patience:
                break

        self.logger.info(f'\nBest Val Epoch:{best_epoch} | Val Loss:{best_loss:.4f} | Val Acc:{best_acc:.4f} | Val F1:{best_f1:.4f}')
        end = time.time()
        self.logger.info(f'Total Process time:{(end - start) / 60:.3f}Minute')

    # Training
    def training(self, args):
        self.model.train()
        train_loss = AvgMeter()
        train_acc = 0
        preds_list = []
        targets_list = []

        scaler = grad_scaler.GradScaler()
        for i, (images, targets) in enumerate(tqdm(self.train_loader)):
            
            images = torch.tensor(images, device=self.device, dtype=torch.float32)
            targets = torch.tensor(targets, device=self.device, dtype=torch.long)
            
            if i % 2 == 0:
                images, targets_a, targets_b, lam = mixup_data(images, targets)
            
   
            if self.epoch <= args.warm_epoch:
                self.warmup_scheduler.step()

            self.model.zero_grad(set_to_none=True)
            if args.amp:
                with autocast():
                    preds = self.model(images)
                    loss = self.criterion(preds, targets)
                    
                    if i % 3 == 0:
                        loss = mixup_criterion(self.criterion, preds, targets_a, targets_b, lam)
                    
                    scaler.scale(loss).backward()

                # Gradient Clipping
                if args.clipping is not None:
                    scaler.unscale_(self.optimizer)
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)

                scaler.step(self.optimizer)
                scaler.update()

            else:
                preds = self.model(images)
                loss = self.criterion(preds, targets)
                loss.backward()
                nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)
                self.optimizer.step()

            if args.scheduler == 'cycle':
                if self.epoch > args.warm_epoch:
                    self.scheduler.step()

            # Metric
            train_acc += (preds.argmax(dim=1) == targets).sum().item()
            preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
            targets_list.extend(targets.cpu().detach().numpy())
            # log
            train_loss.update(loss.item(), n=images.size(0))

        train_acc /= len(self.train_loader.dataset)
        train_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

        self.logger.info(f'Epoch:[{self.epoch:03d}/{args.epochs:03d}]')
        self.logger.info(f'Train Loss:{train_loss.avg:.3f} | Acc:{train_acc:.4f} | F1:{train_f1:.4f}')
        return train_loss.avg, train_acc, train_f1
            
    # Validation or Dev
    def validate(self, args, phase='val'):
        self.model.eval()
        with torch.no_grad():
            val_loss = AvgMeter()
            val_acc = 0
            preds_list = []
            targets_list = []

            for i, (images, targets) in enumerate(self.val_loader):
                images = torch.tensor(images, device=self.device, dtype=torch.float32)
                targets = torch.tensor(targets, device=self.device, dtype=torch.long)

                preds = self.model(images)
                loss = self.criterion(preds, targets)

                # Metric
                val_acc += (preds.argmax(dim=1) == targets).sum().item()
                preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
                targets_list.extend(targets.cpu().detach().numpy())

                # log
                val_loss.update(loss.item(), n=images.size(0))
            val_acc /= len(self.val_loader.dataset)
            val_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

            self.logger.info(f'{phase} Loss:{val_loss.avg:.3f} | Acc:{val_acc:.4f} | F1:{val_f1:.4f}')
        return val_loss.avg, val_acc, val_f1

In [13]:
def main(args):
    print('<---- Training Params ---->')
    
    # Random Seed
    seed = args.seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True

    save_path = os.path.join(args.model_path, args.class_name, (args.exp_num).zfill(3))
    
    # Create model directory
    os.makedirs(save_path, exist_ok=True)
    Trainer(args, save_path)

    return save_path

In [14]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  # Arrange GPU devices starting from 0
os.environ["CUDA_VISIBLE_DEVICES"]= "0,1"  # Set the GPUs 2 and 3 to use

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print('Device:', device)
print('Current cuda device:', torch.cuda.current_device())
print('Count of using GPUs:', torch.cuda.device_count())

Device: cuda
Current cuda device: 0
Count of using GPUs: 2


In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sub = pd.read_csv('./open/sample_submission.csv')
df_train = pd.read_csv('./open/train_df2.csv')
df_test = pd.read_csv('./open/test_df.csv')

In [17]:
train_df['class'].unique()

array(['transistor', 'capsule', 'wood', 'bottle', 'screw', 'cable',
       'carpet', 'hazelnut', 'pill', 'metal_nut', 'zipper', 'leather',
       'toothbrush', 'tile', 'grid'], dtype=object)

In [18]:
class_list = ['transistor', 'capsule', 'wood', 'bottle', 'screw', 'cable',
       'carpet', 'hazelnut', 'pill', 'metal_nut', 'zipper', 'leather',
       'toothbrush', 'tile', 'grid']

In [19]:
args.step = 0
models_path = []
for class_name in class_list:
    args.class_name = class_name
    
    train_df = pd.read_csv(os.path.join(DATA_DIR, 'train_df.csv'))
    train_df = train_df[train_df['class'] == class_name]
    train_df = train_df.reset_index(drop=True)
    print(train_df)

    train_labels = train_df["state"]
    print(train_df["state"].value_counts())

    label_unique = sorted(np.unique(train_labels))
    label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

    train_labels = [label_unique[k] for k in train_labels]
    train_labels

    train_df['state2'] = train_labels

    train_df.to_csv('./open/train_df_%s.csv' % class_name, index=False)
    
    class_num = len(train_df['state2'].unique())
    print(class_num)
    args.class_num = class_num
    
    for s_fold in range(1): # 5fold
        args.fold = s_fold
        args.exp_num = str(s_fold)
        save_path = main(args)
        models_path.append(save_path)

2022-04-15 09:42:56,874 INFO: {'exp_num': '0', 'class_name': 'transistor', 'class_num': 5, 'data_path': './open', 'Kfold': 7, 'model_path': 'results/', 'image_type': 'train_1024', 'model_name': 'regnety_064', 'drop_path_rate': 0.2, 'img_size': 352, 'batch_size': 8, 'epochs': 70, 'optimizer': 'Lamb', 'initial_lr': 4e-05, 'weight_decay': 0.0001, 'aug_ver': 2, 'scheduler': 'cycle', 'warm_epoch': 5, 'max_lr': 0.0005, 'min_lr': 5e-06, 'tmax': 145, 'patience': 15, 'clipping': None, 'amp': True, 'multi_gpu': True, 'logging': False, 'num_workers': 4, 'seed': 42, 'step': 0, 'fold': 0}


     index  file_name       class      state                 label
0        0  10000.png  transistor       good       transistor-good
1        2  10002.png  transistor       good       transistor-good
2        9  10009.png  transistor       good       transistor-good
3       42  10042.png  transistor       good       transistor-good
4       49  10049.png  transistor       good       transistor-good
..     ...        ...         ...        ...                   ...
228   4149  14149.png  transistor       good       transistor-good
229   4203  14203.png  transistor  misplaced  transistor-misplaced
230   4248  14248.png  transistor  bent_lead  transistor-bent_lead
231   4272  14272.png  transistor       good       transistor-good
232   4273  14273.png  transistor       good       transistor-good

[233 rows x 5 columns]
good            213
damaged_case      5
bent_lead         5
misplaced         5
cut_lead          5
Name: state, dtype: int64
5
<---- Training Params ---->
Dataset size:199

2022-04-15 09:42:57,333 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth)
100%|██████████| 25/25 [00:13<00:00,  1.79it/s]
2022-04-15 09:43:15,211 INFO: Epoch:[001/070]
2022-04-15 09:43:15,212 INFO: Train Loss:1.710 | Acc:0.0302 | F1:0.0123
2022-04-15 09:43:17,595 INFO: val Loss:1.652 | Acc:0.0294 | F1:0.0147
2022-04-15 09:43:18,451 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 25/25 [00:08<00:00,  2.93it/s]
2022-04-15 09:43:26,986 INFO: Epoch:[002/070]
2022-04-15 09:43:26,987 INFO: Train Loss:1.700 | Acc:0.0352 | F1:0.0627
2022-04-15 09:43:28,607 INFO: val Loss:1.672 | Acc:0.0294 | F1:0.0143
100%|██████████| 25/25 [00:07<00:00,  3.24it/s]
2022-04-15 09:43:36,331 INFO: Epoch:[003/070]
2022-04-15 09:43:36,331 INFO: Train Loss:1.704 | Acc:0.0302 | F1:0.0124
2022-04-15 09:43:37,814 INFO: val Loss:1.665 | Acc:0.0294 | F1:0.0143
100%|██████████| 25/25 [00:08<00:00,  3.10it

2022-04-15 09:47:52,936 INFO: val Loss:0.860 | Acc:0.9118 | F1:0.4881
100%|██████████| 25/25 [00:08<00:00,  3.12it/s]
2022-04-15 09:48:00,961 INFO: Epoch:[029/070]
2022-04-15 09:48:00,962 INFO: Train Loss:1.169 | Acc:0.9196 | F1:0.4057
2022-04-15 09:48:02,645 INFO: val Loss:0.821 | Acc:0.9118 | F1:0.4881
2022-04-15 09:48:03,468 INFO: -----------------SAVE:29epoch----------------
100%|██████████| 25/25 [00:08<00:00,  3.08it/s]
2022-04-15 09:48:11,594 INFO: Epoch:[030/070]
2022-04-15 09:48:11,595 INFO: Train Loss:1.103 | Acc:0.9246 | F1:0.4801
2022-04-15 09:48:13,284 INFO: val Loss:0.787 | Acc:0.9118 | F1:0.4881
2022-04-15 09:48:14,120 INFO: -----------------SAVE:30epoch----------------
100%|██████████| 25/25 [00:07<00:00,  3.17it/s]
2022-04-15 09:48:22,025 INFO: Epoch:[031/070]
2022-04-15 09:48:22,026 INFO: Train Loss:1.172 | Acc:0.9146 | F1:0.3470
2022-04-15 09:48:23,656 INFO: val Loss:0.720 | Acc:0.9118 | F1:0.4881
2022-04-15 09:48:24,495 INFO: -----------------SAVE:31epoch-----------

2022-04-15 09:53:06,932 INFO: val Loss:0.474 | Acc:0.9118 | F1:0.4881
100%|██████████| 25/25 [00:07<00:00,  3.28it/s]
2022-04-15 09:53:14,563 INFO: Epoch:[061/070]
2022-04-15 09:53:14,564 INFO: Train Loss:0.978 | Acc:0.8995 | F1:0.4292
2022-04-15 09:53:16,256 INFO: val Loss:0.501 | Acc:0.8824 | F1:0.4839
100%|██████████| 25/25 [00:08<00:00,  3.07it/s]
2022-04-15 09:53:24,408 INFO: Epoch:[062/070]
2022-04-15 09:53:24,409 INFO: Train Loss:0.733 | Acc:0.9447 | F1:0.6417
2022-04-15 09:53:26,033 INFO: val Loss:0.405 | Acc:0.9412 | F1:0.4922
2022-04-15 09:53:26,874 INFO: -----------------SAVE:62epoch----------------
100%|██████████| 25/25 [00:08<00:00,  3.04it/s]
2022-04-15 09:53:35,106 INFO: Epoch:[063/070]
2022-04-15 09:53:35,106 INFO: Train Loss:0.722 | Acc:0.9146 | F1:0.5485
2022-04-15 09:53:36,669 INFO: val Loss:0.372 | Acc:0.9706 | F1:0.7460
2022-04-15 09:53:37,505 INFO: -----------------SAVE:63epoch----------------
100%|██████████| 25/25 [00:07<00:00,  3.15it/s]
2022-04-15 09:53:45,45

     index  file_name    class    state            label
0        1  10001.png  capsule     good     capsule-good
1        6  10006.png  capsule     good     capsule-good
2       22  10022.png  capsule     good     capsule-good
3       24  10024.png  capsule     good     capsule-good
4       80  10080.png  capsule     good     capsule-good
..     ...        ...      ...      ...              ...
270   4194  14194.png  capsule     good     capsule-good
271   4198  14198.png  capsule     good     capsule-good
272   4217  14217.png  capsule  squeeze  capsule-squeeze
273   4218  14218.png  capsule     good     capsule-good
274   4269  14269.png  capsule     good     capsule-good

[275 rows x 5 columns]
good              219
scratch            12
crack              12
poke               11
faulty_imprint     11
squeeze            10
Name: state, dtype: int64
6
<---- Training Params ---->
Dataset size:235
Dataset size:40


2022-04-15 09:54:44,678 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth)
100%|██████████| 30/30 [00:10<00:00,  2.75it/s]
2022-04-15 09:54:55,835 INFO: Epoch:[001/070]
2022-04-15 09:54:55,836 INFO: Train Loss:1.809 | Acc:0.0298 | F1:0.0228
2022-04-15 09:54:57,964 INFO: val Loss:1.843 | Acc:0.0500 | F1:0.0615
2022-04-15 09:54:58,864 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 30/30 [00:09<00:00,  3.20it/s]
2022-04-15 09:55:08,236 INFO: Epoch:[002/070]
2022-04-15 09:55:08,237 INFO: Train Loss:1.805 | Acc:0.0383 | F1:0.0375
2022-04-15 09:55:10,166 INFO: val Loss:1.851 | Acc:0.0000 | F1:0.0000
100%|██████████| 30/30 [00:09<00:00,  3.15it/s]
2022-04-15 09:55:19,706 INFO: Epoch:[003/070]
2022-04-15 09:55:19,706 INFO: Train Loss:1.822 | Acc:0.0553 | F1:0.0658
2022-04-15 09:55:21,559 INFO: val Loss:1.826 | Acc:0.0500 | F1:0.0992
2022-04-15 09:55:22,467 INFO: --------------

2022-04-15 10:00:48,562 INFO: -----------------SAVE:31epoch----------------
100%|██████████| 30/30 [00:09<00:00,  3.04it/s]
2022-04-15 10:00:58,427 INFO: Epoch:[032/070]
2022-04-15 10:00:58,427 INFO: Train Loss:1.517 | Acc:0.7702 | F1:0.2649
2022-04-15 10:01:00,312 INFO: val Loss:1.460 | Acc:0.8500 | F1:0.6237
2022-04-15 10:01:01,154 INFO: -----------------SAVE:32epoch----------------
100%|██████████| 30/30 [00:09<00:00,  3.16it/s]
2022-04-15 10:01:10,667 INFO: Epoch:[033/070]
2022-04-15 10:01:10,667 INFO: Train Loss:1.499 | Acc:0.7362 | F1:0.3078
2022-04-15 10:01:12,559 INFO: val Loss:1.412 | Acc:0.7000 | F1:0.3555
2022-04-15 10:01:13,409 INFO: -----------------SAVE:33epoch----------------
100%|██████████| 30/30 [00:09<00:00,  3.17it/s]
2022-04-15 10:01:22,893 INFO: Epoch:[034/070]
2022-04-15 10:01:22,894 INFO: Train Loss:1.462 | Acc:0.7319 | F1:0.3131
2022-04-15 10:01:24,806 INFO: val Loss:1.420 | Acc:0.6000 | F1:0.2421
100%|██████████| 30/30 [00:09<00:00,  3.11it/s]
2022-04-15 10:01

     index  file_name class state      label
0        3  10003.png  wood  good  wood-good
1        5  10005.png  wood  good  wood-good
2       12  10012.png  wood  good  wood-good
3       15  10015.png  wood  good  wood-good
4       17  10017.png  wood  good  wood-good
..     ...        ...   ...   ...        ...
273   4227  14227.png  wood  good  wood-good
274   4232  14232.png  wood  good  wood-good
275   4254  14254.png  wood  good  wood-good
276   4259  14259.png  wood  good  wood-good
277   4270  14270.png  wood  good  wood-good

[278 rows x 5 columns]
good        247
scratch      11
combined      6
liquid        5
hole          5
color         4
Name: state, dtype: int64
6
<---- Training Params ---->
Dataset size:238
Dataset size:40


2022-04-15 10:05:44,962 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth)
100%|██████████| 30/30 [00:10<00:00,  2.90it/s]
2022-04-15 10:05:55,571 INFO: Epoch:[001/070]
2022-04-15 10:05:55,572 INFO: Train Loss:1.834 | Acc:0.0294 | F1:0.0297
2022-04-15 10:05:57,396 INFO: val Loss:1.771 | Acc:0.0500 | F1:0.2100
2022-04-15 10:05:58,262 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 30/30 [00:09<00:00,  3.25it/s]
2022-04-15 10:06:07,505 INFO: Epoch:[002/070]
2022-04-15 10:06:07,506 INFO: Train Loss:1.855 | Acc:0.0210 | F1:0.0155
2022-04-15 10:06:09,380 INFO: val Loss:1.805 | Acc:0.0500 | F1:0.0759
100%|██████████| 30/30 [00:09<00:00,  3.18it/s]
2022-04-15 10:06:18,816 INFO: Epoch:[003/070]
2022-04-15 10:06:18,817 INFO: Train Loss:1.868 | Acc:0.0462 | F1:0.0400
2022-04-15 10:06:20,658 INFO: val Loss:1.795 | Acc:0.0500 | F1:0.1754
100%|██████████| 30/30 [00:09<00:00,  3.10it

2022-04-15 10:11:17,476 INFO: val Loss:1.042 | Acc:0.9500 | F1:0.5946
2022-04-15 10:11:18,351 INFO: -----------------SAVE:28epoch----------------
100%|██████████| 30/30 [00:09<00:00,  3.14it/s]
2022-04-15 10:11:27,912 INFO: Epoch:[029/070]
2022-04-15 10:11:27,912 INFO: Train Loss:1.283 | Acc:0.8950 | F1:0.3910
2022-04-15 10:11:29,770 INFO: val Loss:0.943 | Acc:0.9250 | F1:0.3920
2022-04-15 10:11:30,732 INFO: -----------------SAVE:29epoch----------------
100%|██████████| 30/30 [00:09<00:00,  3.23it/s]
2022-04-15 10:11:40,031 INFO: Epoch:[030/070]
2022-04-15 10:11:40,032 INFO: Train Loss:1.354 | Acc:0.8697 | F1:0.2479
2022-04-15 10:11:41,981 INFO: val Loss:0.952 | Acc:0.9750 | F1:0.7333
100%|██████████| 30/30 [00:09<00:00,  3.18it/s]
2022-04-15 10:11:51,411 INFO: Epoch:[031/070]
2022-04-15 10:11:51,412 INFO: Train Loss:1.322 | Acc:0.8950 | F1:0.3080
2022-04-15 10:11:53,235 INFO: val Loss:1.031 | Acc:0.9500 | F1:0.4667
100%|██████████| 30/30 [00:09<00:00,  3.28it/s]
2022-04-15 10:12:02,40

100%|██████████| 30/30 [00:09<00:00,  3.16it/s]
2022-04-15 10:17:15,044 INFO: Epoch:[059/070]
2022-04-15 10:17:15,045 INFO: Train Loss:1.197 | Acc:0.8950 | F1:0.3893
2022-04-15 10:17:16,921 INFO: val Loss:0.552 | Acc:0.9750 | F1:0.7973
100%|██████████| 30/30 [00:09<00:00,  3.23it/s]
2022-04-15 10:17:26,218 INFO: Epoch:[060/070]
2022-04-15 10:17:26,219 INFO: Train Loss:1.231 | Acc:0.9328 | F1:0.6403
2022-04-15 10:17:28,001 INFO: val Loss:0.686 | Acc:0.9750 | F1:0.7973
100%|██████████| 30/30 [00:09<00:00,  3.24it/s]
2022-04-15 10:17:37,279 INFO: Epoch:[061/070]
2022-04-15 10:17:37,280 INFO: Train Loss:0.955 | Acc:0.9076 | F1:0.4059
2022-04-15 10:17:39,138 INFO: val Loss:0.563 | Acc:0.9750 | F1:0.7973
100%|██████████| 30/30 [00:09<00:00,  3.15it/s]
2022-04-15 10:17:48,679 INFO: Epoch:[062/070]
2022-04-15 10:17:48,680 INFO: Train Loss:0.811 | Acc:0.9244 | F1:0.6495
2022-04-15 10:17:50,470 INFO: val Loss:0.542 | Acc:0.9750 | F1:0.7973
100%|██████████| 30/30 [00:09<00:00,  3.24it/s]
2022-04-

     index  file_name   class          state                 label
0        4  10004.png  bottle           good           bottle-good
1       26  10026.png  bottle           good           bottle-good
2       53  10053.png  bottle  contamination  bottle-contamination
3       58  10058.png  bottle           good           bottle-good
4       60  10060.png  bottle           good           bottle-good
..     ...        ...     ...            ...                   ...
236   4174  14174.png  bottle           good           bottle-good
237   4179  14179.png  bottle           good           bottle-good
238   4200  14200.png  bottle           good           bottle-good
239   4236  14236.png  bottle           good           bottle-good
240   4271  14271.png  bottle           good           bottle-good

[241 rows x 5 columns]
good             209
contamination     11
broken_small      11
broken_large      10
Name: state, dtype: int64
4
<---- Training Params ---->
Dataset size:206
Dataset size:35

2022-04-15 10:18:25,086 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth)
100%|██████████| 26/26 [00:07<00:00,  3.28it/s]
2022-04-15 10:18:33,332 INFO: Epoch:[001/070]
2022-04-15 10:18:33,332 INFO: Train Loss:1.436 | Acc:0.0437 | F1:0.0399
2022-04-15 10:18:34,974 INFO: val Loss:1.422 | Acc:0.0286 | F1:0.0147
2022-04-15 10:18:35,886 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 26/26 [00:08<00:00,  3.24it/s]
2022-04-15 10:18:43,925 INFO: Epoch:[002/070]
2022-04-15 10:18:43,926 INFO: Train Loss:1.409 | Acc:0.0583 | F1:0.0544
2022-04-15 10:18:45,290 INFO: val Loss:1.442 | Acc:0.0286 | F1:0.0143
100%|██████████| 26/26 [00:08<00:00,  3.19it/s]
2022-04-15 10:18:53,449 INFO: Epoch:[003/070]
2022-04-15 10:18:53,450 INFO: Train Loss:1.443 | Acc:0.0485 | F1:0.0691
2022-04-15 10:18:54,938 INFO: val Loss:1.445 | Acc:0.0286 | F1:0.0143
100%|██████████| 26/26 [00:08<00:00,  3.17it

2022-04-15 10:23:15,174 INFO: Train Loss:0.976 | Acc:0.8738 | F1:0.5032
2022-04-15 10:23:16,502 INFO: val Loss:0.629 | Acc:0.8571 | F1:0.4623
100%|██████████| 26/26 [00:08<00:00,  3.24it/s]
2022-04-15 10:23:24,531 INFO: Epoch:[030/070]
2022-04-15 10:23:24,531 INFO: Train Loss:0.966 | Acc:0.8835 | F1:0.5087
2022-04-15 10:23:25,955 INFO: val Loss:0.591 | Acc:0.9143 | F1:0.5708
100%|██████████| 26/26 [00:07<00:00,  3.35it/s]
2022-04-15 10:23:33,733 INFO: Epoch:[031/070]
2022-04-15 10:23:33,733 INFO: Train Loss:0.971 | Acc:0.8641 | F1:0.4447
2022-04-15 10:23:35,082 INFO: val Loss:0.529 | Acc:0.9429 | F1:0.7459
2022-04-15 10:23:36,020 INFO: -----------------SAVE:31epoch----------------
100%|██████████| 26/26 [00:07<00:00,  3.34it/s]
2022-04-15 10:23:43,805 INFO: Epoch:[032/070]
2022-04-15 10:23:43,806 INFO: Train Loss:0.927 | Acc:0.8883 | F1:0.5755
2022-04-15 10:23:45,243 INFO: val Loss:0.539 | Acc:0.9429 | F1:0.7459
100%|██████████| 26/26 [00:07<00:00,  3.29it/s]
2022-04-15 10:23:53,161 IN

100%|██████████| 26/26 [00:07<00:00,  3.28it/s]
2022-04-15 10:28:37,404 INFO: Epoch:[063/070]
2022-04-15 10:28:37,404 INFO: Train Loss:0.586 | Acc:0.9466 | F1:0.8390
2022-04-15 10:28:38,736 INFO: val Loss:0.514 | Acc:0.9429 | F1:0.7459
100%|██████████| 26/26 [00:08<00:00,  3.20it/s]
2022-04-15 10:28:46,864 INFO: Epoch:[064/070]
2022-04-15 10:28:46,865 INFO: Train Loss:0.620 | Acc:0.9126 | F1:0.7001
2022-04-15 10:28:48,202 INFO: val Loss:0.263 | Acc:0.9714 | F1:0.8333
2022-04-15 10:28:49,063 INFO: -----------------SAVE:64epoch----------------
100%|██████████| 26/26 [00:08<00:00,  3.24it/s]
2022-04-15 10:28:57,104 INFO: Epoch:[065/070]
2022-04-15 10:28:57,105 INFO: Train Loss:0.745 | Acc:0.9126 | F1:0.7031
2022-04-15 10:28:58,578 INFO: val Loss:0.543 | Acc:0.9429 | F1:0.7459
100%|██████████| 26/26 [00:07<00:00,  3.26it/s]
2022-04-15 10:29:06,574 INFO: Epoch:[066/070]
2022-04-15 10:29:06,574 INFO: Train Loss:0.794 | Acc:0.9126 | F1:0.7042
2022-04-15 10:29:07,909 INFO: val Loss:0.568 | Acc

     index  file_name  class              state                    label
0        7  10007.png  screw               good               screw-good
1       19  10019.png  screw        thread_side        screw-thread_side
2       35  10035.png  screw  manipulated_front  screw-manipulated_front
3       44  10044.png  screw               good               screw-good
4       46  10046.png  screw               good               screw-good
..     ...        ...    ...                ...                      ...
376   4223  14223.png  screw               good               screw-good
377   4234  14234.png  screw               good               screw-good
378   4246  14246.png  screw               good               screw-good
379   4253  14253.png  screw               good               screw-good
380   4276  14276.png  screw               good               screw-good

[381 rows x 5 columns]
good                 320
scratch_neck          13
thread_top            12
thread_side           12


2022-04-15 10:29:45,333 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth)
100%|██████████| 41/41 [00:11<00:00,  3.52it/s]
2022-04-15 10:29:57,286 INFO: Epoch:[001/070]
2022-04-15 10:29:57,286 INFO: Train Loss:1.829 | Acc:0.0613 | F1:0.0378
2022-04-15 10:29:59,146 INFO: val Loss:1.832 | Acc:0.0364 | F1:0.0370
2022-04-15 10:30:00,006 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 41/41 [00:11<00:00,  3.44it/s]
2022-04-15 10:30:11,921 INFO: Epoch:[002/070]
2022-04-15 10:30:11,921 INFO: Train Loss:1.818 | Acc:0.0491 | F1:0.0330
2022-04-15 10:30:13,462 INFO: val Loss:1.825 | Acc:0.0545 | F1:0.0810
2022-04-15 10:30:14,373 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 41/41 [00:12<00:00,  3.29it/s]
2022-04-15 10:30:26,852 INFO: Epoch:[003/070]
2022-04-15 10:30:26,852 INFO: Train Loss:1.827 | Acc:0.0583 | F1:0.0457
2022-04-15 10:30:28,299 INFO: val Loss:

100%|██████████| 41/41 [00:12<00:00,  3.37it/s]
2022-04-15 10:37:06,595 INFO: Epoch:[032/070]
2022-04-15 10:37:06,595 INFO: Train Loss:1.661 | Acc:0.7975 | F1:0.2139
2022-04-15 10:37:08,181 INFO: val Loss:1.619 | Acc:0.8182 | F1:0.2604
100%|██████████| 41/41 [00:12<00:00,  3.35it/s]
2022-04-15 10:37:20,446 INFO: Epoch:[033/070]
2022-04-15 10:37:20,446 INFO: Train Loss:1.681 | Acc:0.7822 | F1:0.2162
2022-04-15 10:37:22,009 INFO: val Loss:1.604 | Acc:0.8545 | F1:0.4163
2022-04-15 10:37:22,957 INFO: -----------------SAVE:33epoch----------------
100%|██████████| 41/41 [00:11<00:00,  3.46it/s]
2022-04-15 10:37:34,804 INFO: Epoch:[034/070]
2022-04-15 10:37:34,805 INFO: Train Loss:1.585 | Acc:0.7669 | F1:0.2457
2022-04-15 10:37:36,369 INFO: val Loss:1.596 | Acc:0.8545 | F1:0.2880
2022-04-15 10:37:37,211 INFO: -----------------SAVE:34epoch----------------
100%|██████████| 41/41 [00:11<00:00,  3.48it/s]
2022-04-15 10:37:49,007 INFO: Epoch:[035/070]
2022-04-15 10:37:49,007 INFO: Train Loss:1.605

100%|██████████| 41/41 [00:12<00:00,  3.41it/s]
2022-04-15 10:44:15,832 INFO: Epoch:[063/070]
2022-04-15 10:44:15,832 INFO: Train Loss:1.368 | Acc:0.8067 | F1:0.4152
2022-04-15 10:44:17,555 INFO: val Loss:1.231 | Acc:0.8727 | F1:0.5391
100%|██████████| 41/41 [00:11<00:00,  3.42it/s]
2022-04-15 10:44:29,552 INFO: Epoch:[064/070]
2022-04-15 10:44:29,553 INFO: Train Loss:1.538 | Acc:0.7914 | F1:0.2987
2022-04-15 10:44:31,246 INFO: val Loss:1.193 | Acc:0.8727 | F1:0.5447
100%|██████████| 41/41 [00:11<00:00,  3.42it/s]
2022-04-15 10:44:43,247 INFO: Epoch:[065/070]
2022-04-15 10:44:43,252 INFO: Train Loss:1.375 | Acc:0.8313 | F1:0.4634
2022-04-15 10:44:44,867 INFO: val Loss:1.141 | Acc:0.9091 | F1:0.5762
2022-04-15 10:44:45,727 INFO: -----------------SAVE:65epoch----------------
100%|██████████| 41/41 [00:11<00:00,  3.47it/s]
2022-04-15 10:44:57,558 INFO: Epoch:[066/070]
2022-04-15 10:44:57,559 INFO: Train Loss:1.310 | Acc:0.8497 | F1:0.4762
2022-04-15 10:44:59,091 INFO: val Loss:1.191 | Acc

     index  file_name  class      state            label
0        8  10008.png  cable  bent_wire  cable-bent_wire
1       14  10014.png  cable       good       cable-good
2       32  10032.png  cable       good       cable-good
3       38  10038.png  cable       good       cable-good
4       39  10039.png  cable       good       cable-good
..     ...        ...    ...        ...              ...
266   4207  14207.png  cable       good       cable-good
267   4212  14212.png  cable       good       cable-good
268   4238  14238.png  cable       good       cable-good
269   4241  14241.png  cable       good       cable-good
270   4251  14251.png  cable   combined   cable-combined

[271 rows x 5 columns]
good                    224
bent_wire                 7
cut_inner_insulation      7
cable_swap                6
combined                  6
missing_cable             6
cut_outer_insulation      5
missing_wire              5
poke_insulation           5
Name: state, dtype: int64
9
<---- Traini

2022-04-15 10:45:54,261 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth)
100%|██████████| 29/29 [00:09<00:00,  3.15it/s]
2022-04-15 10:46:03,716 INFO: Epoch:[001/070]
2022-04-15 10:46:03,716 INFO: Train Loss:2.174 | Acc:0.0647 | F1:0.0161
2022-04-15 10:46:05,561 INFO: val Loss:2.206 | Acc:0.0256 | F1:0.0064
2022-04-15 10:46:06,410 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 29/29 [00:09<00:00,  3.07it/s]
2022-04-15 10:46:15,856 INFO: Epoch:[002/070]
2022-04-15 10:46:15,857 INFO: Train Loss:2.203 | Acc:0.0819 | F1:0.0301
2022-04-15 10:46:17,728 INFO: val Loss:2.194 | Acc:0.0513 | F1:0.0127
2022-04-15 10:46:18,672 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 29/29 [00:09<00:00,  3.14it/s]
2022-04-15 10:46:27,915 INFO: Epoch:[003/070]
2022-04-15 10:46:27,915 INFO: Train Loss:2.184 | Acc:0.1078 | F1:0.0634
2022-04-15 10:46:29,720 INFO: val Loss:

2022-04-15 10:51:28,902 INFO: -----------------SAVE:29epoch----------------
100%|██████████| 29/29 [00:09<00:00,  3.08it/s]
2022-04-15 10:51:38,314 INFO: Epoch:[030/070]
2022-04-15 10:51:38,315 INFO: Train Loss:1.558 | Acc:0.8362 | F1:0.3797
2022-04-15 10:51:40,016 INFO: val Loss:1.608 | Acc:0.8718 | F1:0.3659
2022-04-15 10:51:40,913 INFO: -----------------SAVE:30epoch----------------
100%|██████████| 29/29 [00:09<00:00,  3.21it/s]
2022-04-15 10:51:49,946 INFO: Epoch:[031/070]
2022-04-15 10:51:49,946 INFO: Train Loss:1.541 | Acc:0.8578 | F1:0.5008
2022-04-15 10:51:51,880 INFO: val Loss:1.508 | Acc:0.8718 | F1:0.4091
2022-04-15 10:51:52,711 INFO: -----------------SAVE:31epoch----------------
100%|██████████| 29/29 [00:09<00:00,  3.15it/s]
2022-04-15 10:52:01,914 INFO: Epoch:[032/070]
2022-04-15 10:52:01,915 INFO: Train Loss:1.461 | Acc:0.8448 | F1:0.4740
2022-04-15 10:52:03,820 INFO: val Loss:1.403 | Acc:0.9231 | F1:0.5795
2022-04-15 10:52:04,643 INFO: -----------------SAVE:32epoch-----

2022-04-15 10:57:11,745 INFO: Train Loss:1.149 | Acc:0.8405 | F1:0.5080
2022-04-15 10:57:13,605 INFO: val Loss:0.578 | Acc:0.9487 | F1:0.7897
2022-04-15 10:57:14,470 INFO: -----------------SAVE:59epoch----------------
100%|██████████| 29/29 [00:09<00:00,  3.12it/s]
2022-04-15 10:57:23,775 INFO: Epoch:[060/070]
2022-04-15 10:57:23,776 INFO: Train Loss:0.887 | Acc:0.8534 | F1:0.5825
2022-04-15 10:57:25,632 INFO: val Loss:0.621 | Acc:0.8718 | F1:0.7000
100%|██████████| 29/29 [00:09<00:00,  3.11it/s]
2022-04-15 10:57:34,973 INFO: Epoch:[061/070]
2022-04-15 10:57:34,974 INFO: Train Loss:1.066 | Acc:0.8664 | F1:0.5631
2022-04-15 10:57:36,822 INFO: val Loss:0.627 | Acc:0.8974 | F1:0.7522
100%|██████████| 29/29 [00:09<00:00,  3.13it/s]
2022-04-15 10:57:46,109 INFO: Epoch:[062/070]
2022-04-15 10:57:46,110 INFO: Train Loss:1.249 | Acc:0.8750 | F1:0.5733
2022-04-15 10:57:47,942 INFO: val Loss:0.625 | Acc:0.9487 | F1:0.7897
100%|██████████| 29/29 [00:09<00:00,  3.05it/s]
2022-04-15 10:57:57,469 IN

     index  file_name   class  state         label
0       10  10010.png  carpet   hole   carpet-hole
1       41  10041.png  carpet   good   carpet-good
2       50  10050.png  carpet   good   carpet-good
3       51  10051.png  carpet   good   carpet-good
4       57  10057.png  carpet   good   carpet-good
..     ...        ...     ...    ...           ...
322   4156  14156.png  carpet   good   carpet-good
323   4182  14182.png  carpet  color  carpet-color
324   4213  14213.png  carpet  color  carpet-color
325   4225  14225.png  carpet   good   carpet-good
326   4229  14229.png  carpet   good   carpet-good

[327 rows x 5 columns]
good                   280
thread                  10
color                   10
metal_contamination      9
hole                     9
cut                      9
Name: state, dtype: int64
6
<---- Training Params ---->
Dataset size:280
Dataset size:47


2022-04-15 10:59:17,560 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth)
100%|██████████| 35/35 [00:11<00:00,  3.12it/s]
2022-04-15 10:59:29,036 INFO: Epoch:[001/070]
2022-04-15 10:59:29,037 INFO: Train Loss:1.823 | Acc:0.0179 | F1:0.0508
2022-04-15 10:59:31,045 INFO: val Loss:1.825 | Acc:0.0213 | F1:0.0098
2022-04-15 10:59:31,888 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 35/35 [00:10<00:00,  3.30it/s]
2022-04-15 10:59:42,490 INFO: Epoch:[002/070]
2022-04-15 10:59:42,490 INFO: Train Loss:1.804 | Acc:0.0357 | F1:0.0327
2022-04-15 10:59:44,461 INFO: val Loss:1.837 | Acc:0.0213 | F1:0.0076
100%|██████████| 35/35 [00:10<00:00,  3.27it/s]
2022-04-15 10:59:55,183 INFO: Epoch:[003/070]
2022-04-15 10:59:55,183 INFO: Train Loss:1.825 | Acc:0.0143 | F1:0.0115
2022-04-15 10:59:57,298 INFO: val Loss:1.806 | Acc:0.0213 | F1:0.0076
2022-04-15 10:59:58,123 INFO: --------------

2022-04-15 11:05:44,679 INFO: -----------------SAVE:29epoch----------------
100%|██████████| 35/35 [00:11<00:00,  3.17it/s]
2022-04-15 11:05:55,725 INFO: Epoch:[030/070]
2022-04-15 11:05:55,726 INFO: Train Loss:1.668 | Acc:0.8143 | F1:0.2070
2022-04-15 11:05:57,787 INFO: val Loss:1.339 | Acc:0.6809 | F1:0.2591
2022-04-15 11:05:58,643 INFO: -----------------SAVE:30epoch----------------
100%|██████████| 35/35 [00:11<00:00,  3.17it/s]
2022-04-15 11:06:09,697 INFO: Epoch:[031/070]
2022-04-15 11:06:09,698 INFO: Train Loss:1.607 | Acc:0.8036 | F1:0.2056
2022-04-15 11:06:11,621 INFO: val Loss:1.272 | Acc:0.9149 | F1:0.3611
2022-04-15 11:06:12,536 INFO: -----------------SAVE:31epoch----------------
100%|██████████| 35/35 [00:11<00:00,  3.18it/s]
2022-04-15 11:06:23,554 INFO: Epoch:[032/070]
2022-04-15 11:06:23,555 INFO: Train Loss:1.614 | Acc:0.7786 | F1:0.3121
2022-04-15 11:06:25,564 INFO: val Loss:1.330 | Acc:0.8511 | F1:0.3546
100%|██████████| 35/35 [00:10<00:00,  3.31it/s]
2022-04-15 11:06

2022-04-15 11:12:46,371 INFO: val Loss:0.948 | Acc:0.9787 | F1:0.7778
100%|██████████| 35/35 [00:11<00:00,  3.18it/s]
2022-04-15 11:12:57,381 INFO: Epoch:[062/070]
2022-04-15 11:12:57,382 INFO: Train Loss:1.486 | Acc:0.8500 | F1:0.4710
2022-04-15 11:12:59,487 INFO: val Loss:0.883 | Acc:0.9787 | F1:0.7778
100%|██████████| 35/35 [00:10<00:00,  3.19it/s]
2022-04-15 11:13:10,474 INFO: Epoch:[063/070]
2022-04-15 11:13:10,475 INFO: Train Loss:1.459 | Acc:0.8536 | F1:0.4466
2022-04-15 11:13:12,539 INFO: val Loss:1.053 | Acc:0.9787 | F1:0.8000
100%|██████████| 35/35 [00:10<00:00,  3.33it/s]
2022-04-15 11:13:23,047 INFO: Epoch:[064/070]
2022-04-15 11:13:23,048 INFO: Train Loss:1.486 | Acc:0.8464 | F1:0.4171
2022-04-15 11:13:25,083 INFO: val Loss:0.910 | Acc:0.9787 | F1:0.8000
100%|██████████| 35/35 [00:11<00:00,  3.10it/s]
2022-04-15 11:13:36,379 INFO: Epoch:[065/070]
2022-04-15 11:13:36,380 INFO: Train Loss:1.281 | Acc:0.8714 | F1:0.5539
2022-04-15 11:13:38,558 INFO: val Loss:0.922 | Acc:0.978

     index  file_name     class  state           label
0       11  10011.png  hazelnut   good   hazelnut-good
1       25  10025.png  hazelnut   good   hazelnut-good
2       31  10031.png  hazelnut   good   hazelnut-good
3       33  10033.png  hazelnut  crack  hazelnut-crack
4       64  10064.png  hazelnut   good   hazelnut-good
..     ...        ...       ...    ...             ...
422   4256  14256.png  hazelnut   good   hazelnut-good
423   4257  14257.png  hazelnut   good   hazelnut-good
424   4258  14258.png  hazelnut   good   hazelnut-good
425   4267  14267.png  hazelnut   good   hazelnut-good
426   4268  14268.png  hazelnut   good   hazelnut-good

[427 rows x 5 columns]
good     391
crack      9
print      9
hole       9
cut        9
Name: state, dtype: int64
5
<---- Training Params ---->
Dataset size:366
Dataset size:61


2022-04-15 11:14:44,496 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_064-0a48325c.pth)
 98%|█████████▊| 45/46 [00:14<00:00,  3.19it/s]


ValueError: Expected input batch_size (6) to match target batch_size (8).