In [1]:
import os
import cv2
import time
import random
import logging
import easydict
import numpy as np
import pandas as pd
from tqdm import tqdm
from os.path import join as opj
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from PIL import Image
from natsort import natsorted

import timm
import torch
import torch.nn as nn
import torch_optimizer as optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, grad_scaler
from torchvision import transforms
from torch import Tensor
from torchvision.transforms import functional as F
import torch.cuda.amp as amp
from adamp import AdamP, SGDP

import warnings
warnings.filterwarnings('ignore')

In [2]:
args = easydict.EasyDict(
    {'exp_num':'0',
     
     # Path settings
     'data_path':'./open',
     'Kfold':5,
     'model_path':'label_results_tf_efficientnet_b8_ap/',
     'image_type':'train_1024', 
     'class_num' : 88,

     # Model parameter settings
     'model_name':'tf_efficientnet_b8_ap',
     'drop_path_rate':0.2,
     
     # Training parameter settings
     ## Base Parameter
     'img_size':672,
     'batch_size':16,
     'epochs':100,
     'optimizer':'Lamb',
     'initial_lr':5e-4,
     'weight_decay':1e-3,

     ## Augmentation
     'aug_ver':2,

     ## Scheduler (OnecycleLR)
     'scheduler':'Reduce',
     'warm_epoch':5,
     'max_lr':1e-3,

     ### Cosine Annealing
     'min_lr':5e-5,
     'tmax':145,

     ## etc.
     'patience': 5,
     'clipping':None,

     # Hardware settings
     'amp':True,
     'multi_gpu':True,
     'logging':False,
     'num_workers':4,
     'seed':42
     
     
    })

In [3]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  # Arrange GPU devices starting from 0
os.environ["CUDA_VISIBLE_DEVICES"]= "0,1,2,3"  # Set the GPUs 2 and 3 to use

# 증강된 데이터 셋 불러오기

In [4]:
DATA_DIR = './open'

train_df = pd.read_csv(os.path.join(DATA_DIR, 'train_df_add_data.csv'))
test_df = pd.read_csv(os.path.join(DATA_DIR, 'test_df.csv'))

print(train_df.head())
print(test_df.head())
print(train_df.shape)
print(test_df.shape)

   Unnamed: 0  file_name       class state            label  encoder_label
0           0  10000.png  transistor  good  transistor-good             72
1           1  10001.png     capsule  good     capsule-good             15
2           2  10002.png  transistor  good  transistor-good             72
3           3  10003.png        wood  good        wood-good             76
4           4  10004.png      bottle  good      bottle-good              3
   index  file_name
0      0  20000.png
1      1  20001.png
2      2  20002.png
3      3  20003.png
4      4  20004.png
(13997, 6)
(2154, 2)


In [5]:
train_df['label'].unique()

array(['transistor-good', 'capsule-good', 'wood-good', 'bottle-good',
       'screw-good', 'cable-bent_wire', 'carpet-hole', 'hazelnut-good',
       'pill-pill_type', 'cable-good', 'metal_nut-scratch', 'pill-good',
       'screw-thread_side', 'zipper-fabric_border', 'leather-good',
       'pill-scratch', 'toothbrush-good', 'hazelnut-crack',
       'screw-manipulated_front', 'zipper-good', 'tile-good',
       'carpet-good', 'metal_nut-good', 'bottle-contamination',
       'grid-good', 'zipper-split_teeth', 'pill-crack', 'wood-combined',
       'pill-color', 'screw-thread_top', 'cable-missing_cable',
       'capsule-squeeze', 'zipper-rough', 'capsule-crack', 'capsule-poke',
       'metal_nut-flip', 'carpet-metal_contamination', 'metal_nut-color',
       'transistor-bent_lead', 'zipper-fabric_interior', 'leather-fold',
       'tile-glue_strip', 'screw-scratch_neck', 'screw-scratch_head',
       'hazelnut-cut', 'bottle-broken_large', 'bottle-broken_small',
       'leather-cut', 'cable-cut_

In [6]:
class_num = len(train_df.encoder_label.unique())
class_num

88

In [7]:
# Warmup Learning rate scheduler
from torch.optim.lr_scheduler import _LRScheduler
class WarmUpLR(_LRScheduler):
    """warmup_training learning rate scheduler
    Args:
        optimizer: optimzier(e.g. SGD)
        total_iters: totoal_iters of warmup phase
    """
    def __init__(self, optimizer, total_iters, last_epoch=-1):
        
        self.total_iters = total_iters
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        """we will use the first m batches, and set the learning
        rate to base_lr * m / total_iters
        """
        return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]

# Logging
def get_root_logger(logger_name='basicsr',
                    log_level=logging.INFO,
                    log_file=None):

    logger = logging.getLogger(logger_name)
    # if the logger has been initialized, just return it
    if logger.hasHandlers():
        return logger

    format_str = '%(asctime)s %(levelname)s: %(message)s'
    logging.basicConfig(format=format_str, level=log_level)

    if log_file is not None:
        file_handler = logging.FileHandler(log_file, 'w')
        file_handler.setFormatter(logging.Formatter(format_str))
        file_handler.setLevel(log_level)
        logger.addHandler(file_handler)

    return logger

class AvgMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        self.losses = []

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        self.losses.append(val)

In [8]:
class RandomRotation(transforms.RandomRotation):
    def __init__(self, p: float, degrees: int):
        super(RandomRotation, self).__init__(degrees)
        self.p = p

    def forward(self, img):
        if torch.rand(1) < self.p:
            fill = self.fill
            if isinstance(img, Tensor):
                if isinstance(fill, (int, float)):
                    fill = [float(fill)] * F.get_image_num_channels(img)
                else:
                    fill = [float(f) for f in fill]
            angle = self.get_params(self.degrees)

            img = F.rotate(img, angle, self.resample, self.expand, self.center, fill)
        return img

In [9]:
class Train_Dataset(Dataset):
    def __init__(self, df, transform=None):
        self.img_path = df['file_name'].values
        self.target = df['encoder_label'].values 
        self.transform = transform

        print(f'Dataset size:{len(self.img_path)}')

    def __getitem__(self, idx):
        
        image = Image.open(opj('./open/train_add_data/', self.img_path[idx])).convert('RGB')
        image = self.transform(image)
        target = self.target[idx]

        return image, target

    def __len__(self):
        return len(self.img_path)

class Test_dataset(Dataset):
    def __init__(self, df, transform=None):
        self.img_path = df['file_name'].values
        self.transform = transform

        print(f'Test Dataset size:{len(self.img_path)}')

    def __getitem__(self, idx):

        image = Image.open(opj('./open/test/', self.img_path[idx])).convert('RGB')
        image = self.transform(image)

        return image

    def __len__(self):
        return len(self.img_path)

def get_loader(df, phase: str, batch_size, shuffle,
               num_workers, transform):
    if phase == 'test':
        dataset = Test_dataset(df, transform)
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True)
    else:
        dataset = Train_Dataset(df, transform)
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, 
                                 pin_memory=True,
                                 drop_last=False)
    return data_loader

def get_train_augmentation(img_size, ver):
    if ver==1: # for validset
        transform = transforms.Compose([
                transforms.Resize((img_size, img_size)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

    if ver == 2:

        transform = transforms.Compose([
                transforms.RandomHorizontalFlip(),
                transforms.RandomVerticalFlip(),
                transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
                transforms.RandomAffine((-20,20)),
                RandomRotation(0.7, degrees=5),
                transforms.Resize((img_size, img_size)),
                transforms.ToTensor(), 
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])
    
    
    return transform

In [10]:
class Network(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.model_ft = timm.create_model(args.model_name, pretrained=True,  drop_path_rate=args.drop_path_rate,
                                          num_classes=88)

#         self.model_ft = coatnet_3()
#         num_ftrs = self.model_ft.fc.in_features
#         self.model_ft.fc = nn.Linear(num_ftrs, args.class_num)
        
    def forward(self, x):
        out = self.model_ft(x)
        return out

class Network_test(nn.Module):
    def __init__(self, encoder_name):
        super().__init__()
        self.model_ft = timm.create_model(args.model_name, pretrained=True, num_classes=88)

#         self.model_ft = coatnet_3()
#         num_ftrs = self.model_ft.fc.in_features
#         self.model_ft.fc = nn.Linear(num_ftrs, args.class_num)

    def forward(self, x):
        out = self.model_ft(x)
        return out

In [11]:
# # weighted crossentropy loss를 위한 weight 계산 함수

class_num = train_df.groupby(["encoder_label"])["encoder_label"].count().tolist()
class_weight = torch.tensor(np.max(class_num) / class_num).to("cuda", dtype=torch.float)
print(f"class_weight: {class_weight}")

class_weight: tensor([2.4437, 2.2216, 2.2216, 1.8708, 3.4911, 4.0729, 4.0729, 3.4911, 4.8875,
        1.7455, 4.0729, 4.8875, 4.8875, 2.0365, 2.2216, 1.7854, 2.2216, 2.0365,
        2.4437, 2.4437, 2.7153, 1.3964, 2.7153, 2.7153, 2.4437, 4.0729, 4.0729,
        4.0729, 1.4811, 4.0729, 4.0729, 2.7153, 2.7153, 1.0000, 2.7153, 2.7153,
        2.4437, 2.4437, 2.7153, 2.4437, 1.5959, 2.7153, 1.8798, 2.2216, 2.0365,
        1.7773, 2.0365, 1.8798, 2.7153, 2.2216, 1.8798, 2.4437, 1.4644, 4.8875,
        2.0365, 1.2219, 2.0365, 2.0365, 1.8798, 2.0365, 2.0365, 2.7153, 2.7153,
        1.7000, 3.0547, 2.7153, 3.0547, 1.6292, 6.5167, 4.8875, 4.8875, 4.8875,
        1.8357, 4.8875, 6.1094, 4.0729, 1.5830, 4.8875, 4.8875, 2.2216, 2.4437,
        3.0547, 2.7153, 3.0547, 1.6292, 2.7153, 2.7153, 3.0547],
       device='cuda:0')


In [12]:
class FocalLoss(nn.Module):
    """
    https://dacon.io/competitions/official/235585/codeshare/1796
    """

    def __init__(self, gamma=2.0, eps=1e-7):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        # print(self.gamma)
        self.eps = eps
        self.ce = nn.CrossEntropyLoss(reduction="none")

    def forward(self, input, target):
        logp = self.ce(input, target)
        p = torch.exp(-logp)
        loss = (1 - p) ** self.gamma * logp
        return loss.mean()

In [13]:
class Trainer():
    def __init__(self, args, save_path):
        '''
        args: arguments
        save_path: Model 가중치 저장 경로
        '''
        super(Trainer, self).__init__()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        # Logging
        log_file = os.path.join(save_path, 'log.log')
        self.logger = get_root_logger(logger_name='IR', log_level=logging.INFO, log_file=log_file)
        self.logger.info(args)
        # self.logger.info(args.tag)

        # Train, Valid Set load
        ############################################################################
        if args.step == 0 :
            df_train = pd.read_csv(opj(args.data_path, 'train_df_add_data.csv'))
        else :
            df_train = pd.read_csv(opj(args.data_path, f'train_{args.step}step.csv'))

#         if args.image_type is not None:
#             df_train['img_path'] = df_train['img_path'].apply(lambda x:x.replace('train_imgs', args.image_type))
#             df_train['img_path'] = df_train['img_path'].apply(lambda x:x.replace('test_imgs', 'test_1024'))

        kf = StratifiedKFold(n_splits=args.Kfold, shuffle=True, random_state=args.seed)
        for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(df_train)), y=df_train['encoder_label'])):
            df_train.loc[val_idx, 'fold'] = fold
        val_idx = list(df_train[df_train['fold'] == int(args.fold)].index)

        df_val = df_train[df_train['fold'] == args.fold].reset_index(drop=True)
        df_train = df_train[df_train['fold'] != args.fold].reset_index(drop=True)

        # Augmentation
        self.train_transform = get_train_augmentation(img_size=args.img_size, ver=args.aug_ver)
        self.test_transform = get_train_augmentation(img_size=args.img_size, ver=1)

        # TrainLoader
        self.train_loader = get_loader(df_train, phase='train', batch_size=args.batch_size, shuffle=True,
                                       num_workers=args.num_workers, transform=self.train_transform)
        self.val_loader = get_loader(df_val, phase='train', batch_size=args.batch_size, shuffle=False,
                                       num_workers=args.num_workers, transform=self.test_transform)

        # Network
        self.model = Network(args).to(self.device)

        # Loss
#         self.criterion = nn.CrossEntropyLoss(weight=class_weight)
        self.criterion = FocalLoss()
#         self.criterion = CutMixCrossEntropyLoss(True)
        
        # Optimizer & Scheduler
#         self.optimizer = Lookahead(torch.optim.Adam(self.model.parameters(), lr=args.initial_lr), k=5, alpha=0.5)
        self.optimizer = optim.Lamb(self.model.parameters(), lr=args.initial_lr)
        
        iter_per_epoch = len(self.train_loader)
        self.warmup_scheduler = WarmUpLR(self.optimizer, iter_per_epoch * args.warm_epoch)

        if args.scheduler == 'step':
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=args.milestone, gamma=args.lr_factor, verbose=True)
        elif args.scheduler == 'cos':
            tmax = args.tmax # half-cycle 
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max = tmax, eta_min=args.min_lr, verbose=True)
        elif args.scheduler == 'cycle':
            self.scheduler = torch.optim.lr_scheduler.OneCycleLR(self.optimizer, max_lr=args.max_lr, steps_per_epoch=iter_per_epoch, epochs=args.epochs)
        else:
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=5, factor=0.5, mode="max", verbose=True)
            
        if args.multi_gpu:
            self.model = nn.DataParallel(self.model).to(self.device)

        # Train / Validate
        best_loss = np.inf
        best_acc = 0
        best_epoch = 0
        early_stopping = 0
        start = time.time()
        for epoch in range(1, args.epochs+1):
            self.epoch = epoch

            if args.scheduler == 'cos':
                if epoch > args.warm_epoch:
                    self.scheduler.step()

            # Training
            train_loss, train_acc, train_f1 = self.training(args)

            # Model weight in Multi_GPU or Single GPU
            state_dict= self.model.module.state_dict() if args.multi_gpu else self.model.state_dict()

            # Validation
            val_loss, val_acc, val_f1 = self.validate(args, phase='val')

            # Save models
            if val_loss < best_loss:
                early_stopping = 0
                best_epoch = epoch
                best_loss = val_loss
                best_acc = val_acc
                best_f1 = val_f1

                torch.save({'epoch':epoch,
                            'state_dict':state_dict,
                            'optimizer': self.optimizer.state_dict(),
                            'scheduler': self.scheduler.state_dict(),
                    }, os.path.join(save_path, 'best_model.pth'))
                self.logger.info(f'-----------------SAVE:{best_epoch}epoch----------------')
            else:
                early_stopping += 1

            # Early Stopping
            if early_stopping == args.patience:
                break

        self.logger.info(f'\nBest Val Epoch:{best_epoch} | Val Loss:{best_loss:.4f} | Val Acc:{best_acc:.4f} | Val F1:{best_f1:.4f}')
        end = time.time()
        self.logger.info(f'Total Process time:{(end - start) / 60:.3f}Minute')

    # Training
    def training(self, args):
        self.model.train()
        train_loss = AvgMeter()
        train_acc = 0
        preds_list = []
        targets_list = []

        scaler = grad_scaler.GradScaler()
        
        for i, (images, targets) in enumerate(tqdm(self.train_loader)):
            images = torch.tensor(images, device=self.device, dtype=torch.float32)
            targets = torch.tensor(targets, device=self.device, dtype=torch.long)
            
            if self.epoch <= args.warm_epoch:
                self.warmup_scheduler.step()

            self.model.zero_grad(set_to_none=True)
    
            if args.amp:
                with autocast():
                    preds = self.model(images)
                    loss = self.criterion(preds, targets)
                    
                scaler.scale(loss).backward()

                # Gradient Clipping
                if args.clipping is not None:
                    scaler.unscale_(self.optimizer)
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)

                scaler.step(self.optimizer)
                scaler.update()

            else:
                preds = self.model(images)
                loss = self.criterion(preds, targets)
                loss.backward()
                nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)
                self.optimizer.step()

            if args.scheduler == 'cycle':
                if self.epoch > args.warm_epoch:
                    self.scheduler.step()

            # Metric
            train_acc += (preds.argmax(dim=1) == targets).sum().item()
            preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
            targets_list.extend(targets.cpu().detach().numpy())
            # log
            train_loss.update(loss.item(), n=images.size(0))

        train_acc /= len(self.train_loader.dataset)
        train_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

        self.logger.info(f'Epoch:[{self.epoch:03d}/{args.epochs:03d}]')
        self.logger.info(f'Train Loss:{train_loss.avg:.3f} | Acc:{train_acc:.4f} | F1:{train_f1:.4f}')
        return train_loss.avg, train_acc, train_f1
            
    # Validation or Dev
    def validate(self, args, phase='val'):
        self.model.eval()
        with torch.no_grad():
            val_loss = AvgMeter()
            val_acc = 0
            preds_list = []
            targets_list = []

            for i, (images, targets) in enumerate(self.val_loader):
                images = torch.tensor(images, device=self.device, dtype=torch.float32)
                targets = torch.tensor(targets, device=self.device, dtype=torch.long)

                preds = self.model(images)
                loss = self.criterion(preds, targets)

                # Metric
                val_acc += (preds.argmax(dim=1) == targets).sum().item()
                preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
                targets_list.extend(targets.cpu().detach().numpy())

                # log
                val_loss.update(loss.item(), n=images.size(0))
            val_acc /= len(self.val_loader.dataset)
            val_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

            self.logger.info(f'{phase} Loss:{val_loss.avg:.3f} | Acc:{val_acc:.4f} | F1:{val_f1:.4f}')
        return val_loss.avg, val_acc, val_f1

In [14]:
def main(args):
    print('<---- Training Params ---->')
    
    # Random Seed
    seed = args.seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True

    save_path = os.path.join(args.model_path, (args.exp_num).zfill(3))
    
    # Create model directory
    os.makedirs(save_path, exist_ok=True)
    Trainer(args, save_path)

    return save_path

In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print('Device:', device)
print('Current cuda device:', torch.cuda.current_device())
print('Count of using GPUs:', torch.cuda.device_count())

Device: cuda
Current cuda device: 0
Count of using GPUs: 4


In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sub = pd.read_csv('./open/sample_submission.csv')
df_train = pd.read_csv('./open/train_df_add_data.csv')
df_test = pd.read_csv('./open/test_df.csv')

In [17]:
import ttach as tta

def predict(encoder_name, test_loader, device, model_path):
    model = Network_test(encoder_name).to(device)
    model.load_state_dict(torch.load(opj(model_path, 'best_model.pth'))['state_dict'])
#     model.eval()
    tta_model = tta.ClassificationTTAWrapper(model, tta.aliases.d4_transform())
    tta_model.eval()
    
    preds_list = []
    with torch.no_grad():
        for images in tqdm(test_loader):
            images = torch.as_tensor(images, device=device, dtype=torch.float32)
            preds = tta_model(images)
            preds = torch.softmax(preds, dim=1)
            preds_list.extend(preds.cpu().tolist())

    return np.array(preds_list)

def ensemble_5fold(model_path_list, test_loader, device):
    predict_list = []
    for model_path in model_path_list:
        prediction = predict(encoder_name= 'tf_efficientnet_b8_ap', test_loader = test_loader, device = device, model_path = model_path)
        predict_list.append(prediction)
    ensemble = (predict_list[0] + predict_list[1] + predict_list[2] + predict_list[3] + predict_list[4])/len(predict_list)

    return ensemble


In [18]:
args.step = 0
models_path = []
for s_fold in range(5): # 5fold
    args.fold = s_fold
    args.exp_num = str(s_fold)
    save_path = main(args)
    models_path.append(save_path)

2022-05-09 11:36:33,049 INFO: {'exp_num': '0', 'data_path': './open', 'Kfold': 5, 'model_path': 'label_results_tf_efficientnet_b8_ap/', 'image_type': 'train_1024', 'class_num': 88, 'model_name': 'tf_efficientnet_b8_ap', 'drop_path_rate': 0.2, 'img_size': 672, 'batch_size': 16, 'epochs': 100, 'optimizer': 'Lamb', 'initial_lr': 0.0005, 'weight_decay': 0.001, 'aug_ver': 2, 'scheduler': 'Reduce', 'warm_epoch': 5, 'max_lr': 0.001, 'min_lr': 5e-05, 'tmax': 145, 'patience': 5, 'clipping': None, 'amp': True, 'multi_gpu': True, 'logging': False, 'num_workers': 4, 'seed': 42, 'step': 0, 'fold': 0}


<---- Training Params ---->
Dataset size:11197
Dataset size:2800


2022-05-09 11:36:34,270 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth)
Downloading: "https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth" to /root/.cache/torch/hub/checkpoints/tf_efficientnet_b8_ap-00e169fa.pth
100%|██████████| 700/700 [10:28<00:00,  1.11it/s]
2022-05-09 11:48:06,969 INFO: Epoch:[001/100]
2022-05-09 11:48:06,969 INFO: Train Loss:4.126 | Acc:0.0881 | F1:0.0542
2022-05-09 11:49:06,897 INFO: val Loss:3.677 | Acc:0.1929 | F1:0.1012
2022-05-09 11:49:08,508 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 700/700 [09:50<00:00,  1.19it/s]
2022-05-09 11:58:59,047 INFO: Epoch:[002/100]
2022-05-09 11:58:59,048 INFO: Train Loss:2.455 | Acc:0.3251 | F1:0.1926
2022-05-09 11:59:51,135 INFO: val Loss:1.769 | Acc:0.4625 | F1:0.3508
2022-05-09 11:59:53,487 INFO: -----------------SAVE:2epoch---

<---- Training Params ---->
Dataset size:11197
Dataset size:2800


2022-05-09 14:28:55,313 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth)
100%|██████████| 700/700 [09:54<00:00,  1.18it/s]
2022-05-09 14:38:50,113 INFO: Epoch:[001/100]
2022-05-09 14:38:50,114 INFO: Train Loss:4.121 | Acc:0.0894 | F1:0.0548
2022-05-09 14:39:44,054 INFO: val Loss:3.670 | Acc:0.2079 | F1:0.1071
2022-05-09 14:39:45,755 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 700/700 [09:54<00:00,  1.18it/s]
2022-05-09 14:49:40,003 INFO: Epoch:[002/100]
2022-05-09 14:49:40,003 INFO: Train Loss:2.473 | Acc:0.3240 | F1:0.1884
2022-05-09 14:50:33,497 INFO: val Loss:1.747 | Acc:0.4332 | F1:0.3242
2022-05-09 14:50:35,907 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 700/700 [09:57<00:00,  1.17it/s]
2022-05-09 15:00:33,210 INFO: Epoch:[003/100]
2022-05-09 15:00:33,210 INFO: Train Loss:1.081 | Acc:0.5861 | F1:0.5335
2022-05-09 15:01:26,70

<---- Training Params ---->
Dataset size:11198
Dataset size:2799


2022-05-09 17:10:25,782 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth)
100%|██████████| 700/700 [09:59<00:00,  1.17it/s]
2022-05-09 17:20:25,649 INFO: Epoch:[001/100]
2022-05-09 17:20:25,650 INFO: Train Loss:4.113 | Acc:0.0945 | F1:0.0599
2022-05-09 17:21:22,962 INFO: val Loss:3.685 | Acc:0.1965 | F1:0.1052
2022-05-09 17:21:24,601 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 700/700 [09:48<00:00,  1.19it/s]
2022-05-09 17:31:12,674 INFO: Epoch:[002/100]
2022-05-09 17:31:12,674 INFO: Train Loss:2.468 | Acc:0.3237 | F1:0.1866
2022-05-09 17:32:03,771 INFO: val Loss:1.847 | Acc:0.4123 | F1:0.3065
2022-05-09 17:32:06,083 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 700/700 [09:38<00:00,  1.21it/s]
2022-05-09 17:41:44,902 INFO: Epoch:[003/100]
2022-05-09 17:41:44,902 INFO: Train Loss:1.083 | Acc:0.5839 | F1:0.5338
2022-05-09 17:42:38,50

<---- Training Params ---->
Dataset size:11198
Dataset size:2799


2022-05-09 22:01:28,037 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth)
100%|██████████| 700/700 [10:00<00:00,  1.17it/s]
2022-05-09 22:11:29,051 INFO: Epoch:[001/100]
2022-05-09 22:11:29,051 INFO: Train Loss:4.113 | Acc:0.0876 | F1:0.0532
2022-05-09 22:12:23,795 INFO: val Loss:3.680 | Acc:0.2176 | F1:0.1086
2022-05-09 22:12:25,455 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 700/700 [10:11<00:00,  1.15it/s]
2022-05-09 22:22:36,631 INFO: Epoch:[002/100]
2022-05-09 22:22:36,632 INFO: Train Loss:2.474 | Acc:0.3223 | F1:0.1813
2022-05-09 22:23:31,015 INFO: val Loss:1.838 | Acc:0.4105 | F1:0.3009
2022-05-09 22:23:33,330 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 700/700 [10:13<00:00,  1.14it/s]
2022-05-09 22:33:47,053 INFO: Epoch:[003/100]
2022-05-09 22:33:47,054 INFO: Train Loss:1.091 | Acc:0.5850 | F1:0.5357
2022-05-09 22:34:40,91

<---- Training Params ---->
Dataset size:11198
Dataset size:2799


2022-05-10 01:38:23,214 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth)
100%|██████████| 700/700 [09:56<00:00,  1.17it/s]
2022-05-10 01:48:20,272 INFO: Epoch:[001/100]
2022-05-10 01:48:20,273 INFO: Train Loss:4.118 | Acc:0.0914 | F1:0.0583
2022-05-10 01:49:13,931 INFO: val Loss:3.655 | Acc:0.2165 | F1:0.1116
2022-05-10 01:49:15,788 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 700/700 [09:49<00:00,  1.19it/s]
2022-05-10 01:59:04,862 INFO: Epoch:[002/100]
2022-05-10 01:59:04,862 INFO: Train Loss:2.460 | Acc:0.3300 | F1:0.1950
2022-05-10 01:59:58,319 INFO: val Loss:1.805 | Acc:0.4237 | F1:0.3140
2022-05-10 02:00:00,766 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 700/700 [09:57<00:00,  1.17it/s]
2022-05-10 02:09:58,336 INFO: Epoch:[003/100]
2022-05-10 02:09:58,336 INFO: Train Loss:1.070 | Acc:0.5906 | F1:0.5399
2022-05-10 02:10:50,80

In [19]:
img_size = 672

test_transform = get_train_augmentation(img_size=img_size, ver=1)
test_dataset = Test_dataset(df_test, test_transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

Test Dataset size:2154


In [20]:
# models_path = ['./label_results/000', './label_results/001', './label_results/002', './label_results/003', './label_results/004']

In [21]:
ensemble = ensemble_5fold(models_path, test_loader, device)

2022-05-10 04:20:37,080 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth)
100%|██████████| 34/34 [09:42<00:00, 17.13s/it]
2022-05-10 04:30:22,779 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth)
100%|██████████| 34/34 [09:34<00:00, 16.89s/it]
2022-05-10 04:40:00,169 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth)
100%|██████████| 34/34 [09:33<00:00, 16.87s/it]
2022-05-10 04:49:36,761 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights/tf_efficientnet_b8_ap-00e169fa.pth)
100%|██████████| 34/34 [09:34<00:00, 16.89s/it]
2022-05-10 04:59:14,094 INFO: Loading pretrained weights from url (https

In [22]:
f_pred = ensemble.argmax(axis=1).tolist()
f_pred

[62,
 28,
 72,
 64,
 63,
 50,
 17,
 32,
 40,
 39,
 0,
 76,
 85,
 60,
 62,
 2,
 25,
 20,
 21,
 47,
 66,
 84,
 0,
 58,
 51,
 55,
 51,
 60,
 40,
 78,
 55,
 9,
 59,
 83,
 84,
 82,
 40,
 9,
 41,
 44,
 45,
 10,
 60,
 14,
 44,
 63,
 68,
 15,
 84,
 55,
 9,
 72,
 31,
 35,
 84,
 46,
 20,
 21,
 38,
 15,
 73,
 48,
 18,
 87,
 13,
 36,
 72,
 13,
 53,
 43,
 25,
 76,
 32,
 61,
 41,
 84,
 9,
 40,
 9,
 9,
 50,
 76,
 57,
 77,
 72,
 1,
 84,
 22,
 40,
 83,
 36,
 84,
 9,
 42,
 55,
 65,
 72,
 72,
 45,
 45,
 9,
 21,
 52,
 17,
 76,
 2,
 85,
 49,
 72,
 34,
 36,
 58,
 69,
 72,
 84,
 15,
 28,
 52,
 13,
 41,
 55,
 19,
 45,
 72,
 77,
 70,
 61,
 72,
 54,
 55,
 28,
 46,
 50,
 42,
 15,
 25,
 57,
 2,
 52,
 78,
 72,
 33,
 7,
 9,
 84,
 35,
 23,
 84,
 85,
 76,
 84,
 79,
 72,
 52,
 83,
 63,
 68,
 63,
 33,
 45,
 57,
 60,
 57,
 11,
 21,
 38,
 49,
 74,
 83,
 10,
 21,
 24,
 56,
 39,
 9,
 50,
 55,
 86,
 15,
 84,
 40,
 27,
 3,
 11,
 51,
 20,
 62,
 45,
 79,
 73,
 54,
 18,
 84,
 32,
 52,
 57,
 81,
 85,
 84,
 33,
 72,
 12,
 58,
 1,

In [23]:
train_y = pd.read_csv("./open/train_df_add_data.csv")

train_labels = train_y["label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}


In [24]:
label_decoder = {val:key for key, val in label_unique.items()}

f_result = [label_decoder[result] for result in f_pred]

In [25]:
f_result

['tile-glue_strip',
 'grid-good',
 'transistor-good',
 'tile-gray_stroke',
 'tile-good',
 'pill-crack',
 'capsule-scratch',
 'hazelnut-cut',
 'leather-good',
 'leather-glue',
 'bottle-broken_large',
 'wood-good',
 'zipper-rough',
 'screw-thread_top',
 'tile-glue_strip',
 'bottle-contamination',
 'grid-bent',
 'carpet-cut',
 'carpet-good',
 'pill-color',
 'tile-rough',
 'zipper-good',
 'bottle-broken_large',
 'screw-scratch_neck',
 'pill-faulty_imprint',
 'screw-good',
 'pill-faulty_imprint',
 'screw-thread_top',
 'leather-good',
 'wood-liquid',
 'screw-good',
 'cable-good',
 'screw-thread_side',
 'zipper-fabric_interior',
 'zipper-good',
 'zipper-fabric_border',
 'leather-good',
 'cable-good',
 'leather-poke',
 'metal_nut-flip',
 'metal_nut-good',
 'cable-missing_cable',
 'screw-thread_top',
 'capsule-faulty_imprint',
 'metal_nut-flip',
 'tile-good',
 'toothbrush-good',
 'capsule-good',
 'zipper-good',
 'screw-good',
 'cable-good',
 'transistor-good',
 'hazelnut-crack',
 'hazelnut-prin

In [26]:
submission = pd.read_csv("./open/sample_submission.csv")

submission["label"] = f_result

submission

Unnamed: 0,index,label
0,0,tile-glue_strip
1,1,grid-good
2,2,transistor-good
3,3,tile-gray_stroke
4,4,tile-good
...,...,...
2149,2149,tile-gray_stroke
2150,2150,screw-good
2151,2151,grid-good
2152,2152,cable-good


In [27]:
submission.to_csv("./submission/label_result_add_0509_label_tf_efficientnet_b8_ap_tta.csv", index = False)