In [1]:
import os
import cv2
import time
import random
import logging
import easydict
import numpy as np
import pandas as pd
from tqdm import tqdm
from os.path import join as opj
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
from PIL import Image
from natsort import natsorted

import timm
import torch
import torch.nn as nn
import torch_optimizer as optim
from torch.utils.data import Dataset, DataLoader
from torch.cuda.amp import autocast, grad_scaler
from torchvision import transforms
from torch import Tensor
from torchvision.transforms import functional as F
import torch.cuda.amp as amp
from adamp import AdamP, SGDP

import warnings
warnings.filterwarnings('ignore')

In [2]:
DATA_DIR = './open'

train_df = pd.read_csv(os.path.join(DATA_DIR, 'train_df_add2.csv'))
# train_df = train_df.drop('index', axis=1)
test_df = pd.read_csv(os.path.join(DATA_DIR, 'test_df.csv'))

print(train_df.head())
print(test_df.head())
print(train_df.shape)
print(test_df.shape)

   Unnamed: 0  file_name       class state            label  label2
0           0  10000.png  transistor  good  transistor-good      72
1           1  10001.png     capsule  good     capsule-good      15
2           2  10002.png  transistor  good  transistor-good      72
3           3  10003.png        wood  good        wood-good      76
4           4  10004.png      bottle  good      bottle-good       3
   index  file_name
0      0  20000.png
1      1  20001.png
2      2  20002.png
3      3  20003.png
4      4  20004.png
(6221, 6)
(2154, 2)


In [3]:
train_df['label'].unique()

array(['transistor-good', 'capsule-good', 'wood-good', 'bottle-good',
       'screw-good', 'cable-bent_wire', 'carpet-hole', 'hazelnut-good',
       'pill-pill_type', 'cable-good', 'metal_nut-scratch', 'pill-good',
       'screw-thread_side', 'zipper-fabric_border', 'leather-good',
       'pill-scratch', 'toothbrush-good', 'hazelnut-crack',
       'screw-manipulated_front', 'zipper-good', 'tile-good',
       'carpet-good', 'metal_nut-good', 'bottle-contamination',
       'grid-good', 'zipper-split_teeth', 'pill-crack', 'wood-combined',
       'pill-color', 'screw-thread_top', 'cable-missing_cable',
       'capsule-squeeze', 'zipper-rough', 'capsule-crack', 'capsule-poke',
       'metal_nut-flip', 'carpet-metal_contamination', 'metal_nut-color',
       'transistor-bent_lead', 'zipper-fabric_interior', 'leather-fold',
       'tile-glue_strip', 'screw-scratch_neck', 'screw-scratch_head',
       'hazelnut-cut', 'bottle-broken_large', 'bottle-broken_small',
       'leather-cut', 'cable-cut_

In [4]:
def rotation(img, angle):
    angle = int(random.uniform(-angle, angle))
    h, w = img.shape[:2]
    M = cv2.getRotationMatrix2D((int(w/2), int(h/2)), angle, 1)
    img = cv2.warpAffine(img, M, (w, h)) 
    return img

In [5]:
# import imutils
# oslabel = list(train_df['label'].unique())


# for label in tqdm(oslabel):
#     if 'good' not in label:
#         print(label)
#         idx = 0
#         one_sample = train_df[train_df['label'] == label].reset_index(drop=True)
#         images_list = natsorted(one_sample['file_name'])
#         print(images_list)
#         for _, image_name in enumerate(images_list):
#             image = np.array(Image.open(opj('./open/train_add/', image_name)).convert('RGB'))
            
#             aug_img = rotation(image, 30)  
#             aug_img = cv2.resize(aug_img, dsize=(1024, 1024))
#             aug_img = cv2.cvtColor(aug_img, cv2.COLOR_BGR2RGB)
#             save_path = opj('./open/train_add', f'{label}_{idx}.png')
#             save_name = f'{label}_{idx}.png'
#             idx += 1
#             cv2.imwrite(save_path, aug_img)
#             train_df.loc[len(train_df)] = [save_name] + one_sample.iloc[0][1:].values.tolist()

# for label in tqdm(oslabel):
#     if 'good' not in label:
#         print(label)
#         idx = 0
#         one_sample = train_df[train_df['label'] == label].reset_index(drop=True)
#         images_list = natsorted(one_sample['file_name'])
#         print(images_list)
#         for _, image_name in enumerate(images_list):
#             image = np.array(Image.open(opj('./open/train_add/', image_name)).convert('RGB'))
            
#             aug_img = cv2.flip(image, 1)
#             aug_img = cv2.resize(aug_img, dsize=(1024, 1024))
#             aug_img = cv2.cvtColor(aug_img, cv2.COLOR_BGR2RGB)
#             save_path = opj('./open/train_add', f'{label}_{idx}_flip.png')
#             save_name = f'{label}_{idx}_flip.png'
#             idx += 1
#             cv2.imwrite(save_path, aug_img)
#             train_df.loc[len(train_df)] = [save_name] + one_sample.iloc[0][1:].values.tolist()    
            
   
            
# train_df.to_csv('./open/train_df_add.csv', index=False)

In [6]:
# train_y = pd.read_csv("./open/train_df_add.csv")

# train_labels = train_y["label"]

# label_unique = sorted(np.unique(train_labels))
# label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}

# train_labels = [label_unique[k] for k in train_labels]
# train_labels

In [7]:
# train_df['label2'] = train_labels

In [8]:
# train_df.to_csv('./open/train_df_add2.csv')

In [9]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"  # Arrange GPU devices starting from 0
os.environ["CUDA_VISIBLE_DEVICES"]= "0,1"  # Set the GPUs 2 and 3 to use

In [10]:
class_num = len(train_df.label2.unique())

In [11]:
args = easydict.EasyDict(
    {'exp_num':'0',
     
     # Path settings
     'data_path':'./open',
     'Kfold':5,
     'model_path':'label_results/',
     'image_type':'train_1024', 
     'class_num' : class_num,

     # Model parameter settings
     'model_name':'regnety_040',
     'drop_path_rate':0.2,
     
     # Training parameter settings
     ## Base Parameter
     'img_size':512,
     'batch_size':16,
     'epochs':100,
     'optimizer':'Lamb',
     'initial_lr':5e-4,
     'weight_decay':1e-3,

     ## Augmentation
     'aug_ver':2,

     ## Scheduler (OnecycleLR)
     'scheduler':'Reduce',
     'warm_epoch':5,
     'max_lr':1e-3,

     ### Cosine Annealing
     'min_lr':5e-5,
     'tmax':145,

     ## etc.
     'patience': 7,
     'clipping':None,

     # Hardware settings
     'amp':True,
     'multi_gpu':True,
     'logging':False,
     'num_workers':4,
     'seed':42
     
     
    })

In [12]:
# Warmup Learning rate scheduler
from torch.optim.lr_scheduler import _LRScheduler
class WarmUpLR(_LRScheduler):
    """warmup_training learning rate scheduler
    Args:
        optimizer: optimzier(e.g. SGD)
        total_iters: totoal_iters of warmup phase
    """
    def __init__(self, optimizer, total_iters, last_epoch=-1):
        
        self.total_iters = total_iters
        super().__init__(optimizer, last_epoch)

    def get_lr(self):
        """we will use the first m batches, and set the learning
        rate to base_lr * m / total_iters
        """
        return [base_lr * self.last_epoch / (self.total_iters + 1e-8) for base_lr in self.base_lrs]

# Logging
def get_root_logger(logger_name='basicsr',
                    log_level=logging.INFO,
                    log_file=None):

    logger = logging.getLogger(logger_name)
    # if the logger has been initialized, just return it
    if logger.hasHandlers():
        return logger

    format_str = '%(asctime)s %(levelname)s: %(message)s'
    logging.basicConfig(format=format_str, level=log_level)

    if log_file is not None:
        file_handler = logging.FileHandler(log_file, 'w')
        file_handler.setFormatter(logging.Formatter(format_str))
        file_handler.setLevel(log_level)
        logger.addHandler(file_handler)

    return logger

class AvgMeter(object):
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
        self.losses = []

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count
        self.losses.append(val)

In [13]:
class RandomRotation(transforms.RandomRotation):
    def __init__(self, p: float, degrees: int):
        super(RandomRotation, self).__init__(degrees)
        self.p = p

    def forward(self, img):
        if torch.rand(1) < self.p:
            fill = self.fill
            if isinstance(img, Tensor):
                if isinstance(fill, (int, float)):
                    fill = [float(fill)] * F.get_image_num_channels(img)
                else:
                    fill = [float(f) for f in fill]
            angle = self.get_params(self.degrees)

            img = F.rotate(img, angle, self.resample, self.expand, self.center, fill)
        return img

In [14]:
class Train_Dataset(Dataset):
    def __init__(self, df, transform=None):
        self.img_path = df['file_name'].values
        self.target = df['label2'].values 
        self.transform = transform

        print(f'Dataset size:{len(self.img_path)}')

    def __getitem__(self, idx):
#         image = cv2.imread(opj('./open/train_add/', self.img_path[idx])).astype(np.float32)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0
        
#         target = self.target[idx]

#         if self.transform is not None:
#             image = self.transform(torch.from_numpy(image.transpose(2,0,1)))
        
        image = Image.open(opj('./open/train_add/', self.img_path[idx])).convert('RGB')
        image = self.transform(image)
#         augmentation = random.randint(0,2)
#             if augmentation==1:
#                 img = img[::-1].copy()
#             elif augmentation==2:
#                 img = img[:,::-1].copy()
#         img = transforms.ToTensor()(img)
        target = self.target[idx]

        return image, target

    def __len__(self):
        return len(self.img_path)

class Test_dataset(Dataset):
    def __init__(self, df, transform=None):
        self.img_path = df['file_name'].values
        self.transform = transform

        print(f'Test Dataset size:{len(self.img_path)}')

    def __getitem__(self, idx):

#         image = cv2.imread(opj('./open/test/', self.img_path[idx])).astype(np.float32)
#         image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) / 255.0

#         if self.transform is not None:
#             image = self.transform(torch.from_numpy(image.transpose(2,0,1)))
        image = Image.open(opj('./open/test/', self.img_path[idx])).convert('RGB')
        image = self.transform(image)

        return image

    def __len__(self):
        return len(self.img_path)

def get_loader(df, phase: str, batch_size, shuffle,
               num_workers, transform):
    if phase == 'test':
        dataset = Test_dataset(df, transform)
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=True)
    else:
        dataset = Train_Dataset(df, transform)
        data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, 
                                 pin_memory=True,
                                 drop_last=False)
    return data_loader

def get_train_augmentation(img_size, ver):
    if ver==1: # for validset
        transform = transforms.Compose([
                transforms.Resize((img_size, img_size)),
                transforms.ToTensor(),
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])

    if ver == 2:

        transform = transforms.Compose([
                transforms.RandomHorizontalFlip(p=0.3),
                transforms.RandomVerticalFlip(p=0.3),
#                 transforms.RandomCrop(500),
                transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
                transforms.RandomAffine((-20,20)),
                RandomRotation(0.5, degrees=5),
                transforms.Resize((img_size, img_size)),
                transforms.ToTensor(), 
                transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225]),
            ])
    
    
    return transform

In [15]:
class Network(nn.Module):
    def __init__(self, args):
        super().__init__()
        self.model_ft = timm.create_model(args.model_name, pretrained=True,
                                    drop_path_rate=args.drop_path_rate,
                                    )
        num_head = self.model_ft.head.fc.in_features
        self.model_ft.head.fc = nn.Linear(num_head, 88)

#         self.model_ft = coatnet_3()
#         num_ftrs = self.model_ft.fc.in_features
#         self.model_ft.fc = nn.Linear(num_ftrs, args.class_num)
        
    def forward(self, x):
        out = self.model_ft(x)
        return out

class Network_test(nn.Module):
    def __init__(self, encoder_name):
        super().__init__()
        self.model_ft = timm.create_model(args.model_name, pretrained=True,
                                    drop_path_rate=args.drop_path_rate,
                                    )
        num_head = self.model_ft.head.fc.in_features
        self.model_ft.head.fc = nn.Linear(num_head, 88)

#         self.model_ft = coatnet_3()
#         num_ftrs = self.model_ft.fc.in_features
#         self.model_ft.fc = nn.Linear(num_ftrs, args.class_num)

    def forward(self, x):
        out = self.model_ft(x)
        return out

In [16]:
# # weighted crossentropy loss를 위한 weight 계산 함수
# def get_class_weight():
#     return 1 / train_df['label2'].value_counts().sort_index().values

# class_weight = get_class_weight()


class_num = train_df.groupby(["label2"])["label2"].count().tolist()
class_weight = torch.tensor(np.max(class_num) / class_num).to("cuda", dtype=torch.float)
print(f"class_weight: {class_weight}")

class_weight: tensor([ 9.7750,  8.8864,  8.8864,  1.8708, 13.9643, 16.2917, 16.2917, 13.9643,
        19.5500,  1.7455, 16.2917, 19.5500, 19.5500,  8.1458,  8.8864,  1.7854,
         8.8864,  8.1458,  9.7750,  9.7750, 10.8611,  1.3964, 10.8611, 10.8611,
         9.7750, 16.2917, 16.2917, 16.2917,  1.4811, 16.2917, 16.2917, 10.8611,
        10.8611,  1.0000, 10.8611, 10.8611,  9.7750,  9.7750, 10.8611,  9.7750,
         1.5959, 10.8611,  7.5192,  8.8864,  8.1458,  1.7773,  8.1458,  7.5192,
        10.8611,  8.8864,  7.5192,  9.7750,  1.4644, 19.5500,  8.1458,  1.2219,
         8.1458,  8.1458,  7.5192,  8.1458,  8.1458, 10.8611, 10.8611,  1.7000,
        12.2188, 10.8611, 12.2188,  6.5167,  6.5167, 19.5500, 19.5500, 19.5500,
         1.8357, 19.5500, 24.4375, 16.2917,  1.5830, 19.5500, 19.5500,  8.8864,
         9.7750, 12.2188, 10.8611, 12.2188,  1.6292, 10.8611, 10.8611, 12.2188],
       device='cuda:0')


In [17]:
from collections import defaultdict
from itertools import chain
from torch.optim import Optimizer
import torch
import warnings

class Lookahead(Optimizer):
    def __init__(self, optimizer, k=5, alpha=0.5):
        self.optimizer = optimizer
        self.k = k
        self.alpha = alpha
        self.param_groups = self.optimizer.param_groups
        self.state = defaultdict(dict)
        self.fast_state = self.optimizer.state
        for group in self.param_groups:
            group["counter"] = 0
    
    def update(self, group):
        for fast in group["params"]:
            param_state = self.state[fast]
            if "slow_param" not in param_state:
                param_state["slow_param"] = torch.zeros_like(fast.data)
                param_state["slow_param"].copy_(fast.data)
            slow = param_state["slow_param"]
            slow += (fast.data - slow) * self.alpha
            fast.data.copy_(slow)
    
    def update_lookahead(self):
        for group in self.param_groups:
            self.update(group)

    def step(self, closure=None):
        loss = self.optimizer.step(closure)
        for group in self.param_groups:
            if group["counter"] == 0:
                self.update(group)
            group["counter"] += 1
            if group["counter"] >= self.k:
                group["counter"] = 0
        return loss

    def state_dict(self):
        fast_state_dict = self.optimizer.state_dict()
        slow_state = {
            (id(k) if isinstance(k, torch.Tensor) else k): v
            for k, v in self.state.items()
        }
        fast_state = fast_state_dict["state"]
        param_groups = fast_state_dict["param_groups"]
        return {
            "fast_state": fast_state,
            "slow_state": slow_state,
            "param_groups": param_groups,
        }

    def load_state_dict(self, state_dict):
        slow_state_dict = {
            "state": state_dict["slow_state"],
            "param_groups": state_dict["param_groups"],
        }
        fast_state_dict = {
            "state": state_dict["fast_state"],
            "param_groups": state_dict["param_groups"],
        }
        super(Lookahead, self).load_state_dict(slow_state_dict)
        self.optimizer.load_state_dict(fast_state_dict)
        self.fast_state = self.optimizer.state

    def add_param_group(self, param_group):
        param_group["counter"] = 0
        self.optimizer.add_param_group(param_group)

In [18]:
class FocalLoss(nn.Module):
    """
    https://dacon.io/competitions/official/235585/codeshare/1796
    """

    def __init__(self, gamma=2.0, eps=1e-7):
        super(FocalLoss, self).__init__()
        self.gamma = gamma
        # print(self.gamma)
        self.eps = eps
        self.ce = nn.CrossEntropyLoss(reduction="none")

    def forward(self, input, target):
        logp = self.ce(input, target)
        p = torch.exp(-logp)
        loss = (1 - p) ** self.gamma * logp
        return loss.mean()

In [19]:
class Trainer():
    def __init__(self, args, save_path):
        '''
        args: arguments
        save_path: Model 가중치 저장 경로
        '''
        super(Trainer, self).__init__()
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        # Logging
        log_file = os.path.join(save_path, 'log.log')
        self.logger = get_root_logger(logger_name='IR', log_level=logging.INFO, log_file=log_file)
        self.logger.info(args)
        # self.logger.info(args.tag)

        # Train, Valid Set load
        ############################################################################
        if args.step == 0 :
            df_train = pd.read_csv(opj(args.data_path, 'train_df_add2.csv'))
        else :
            df_train = pd.read_csv(opj(args.data_path, f'train_{args.step}step.csv'))

#         if args.image_type is not None:
#             df_train['img_path'] = df_train['img_path'].apply(lambda x:x.replace('train_imgs', args.image_type))
#             df_train['img_path'] = df_train['img_path'].apply(lambda x:x.replace('test_imgs', 'test_1024'))

        kf = StratifiedKFold(n_splits=args.Kfold, shuffle=True, random_state=args.seed)
        for fold, (train_idx, val_idx) in enumerate(kf.split(range(len(df_train)), y=df_train['label2'])):
            df_train.loc[val_idx, 'fold'] = fold
        val_idx = list(df_train[df_train['fold'] == int(args.fold)].index)

        df_val = df_train[df_train['fold'] == args.fold].reset_index(drop=True)
        df_train = df_train[df_train['fold'] != args.fold].reset_index(drop=True)

        # Augmentation
        self.train_transform = get_train_augmentation(img_size=args.img_size, ver=args.aug_ver)
        self.test_transform = get_train_augmentation(img_size=args.img_size, ver=1)

        # TrainLoader
        self.train_loader = get_loader(df_train, phase='train', batch_size=args.batch_size, shuffle=True,
                                       num_workers=args.num_workers, transform=self.train_transform)
        self.val_loader = get_loader(df_val, phase='train', batch_size=args.batch_size, shuffle=False,
                                       num_workers=args.num_workers, transform=self.test_transform)

        # Network
        self.model = Network(args).to(self.device)

        # Loss
        self.criterion = nn.CrossEntropyLoss(weight=class_weight)
#         self.criterion = FocalLoss()
#         self.criterion = CutMixCrossEntropyLoss(True)
        
        # Optimizer & Scheduler
#         self.optimizer = Lookahead(torch.optim.Adam(self.model.parameters(), lr=args.initial_lr), k=5, alpha=0.5)
        self.optimizer = optim.Lamb(self.model.parameters(), lr=args.initial_lr)
        
        iter_per_epoch = len(self.train_loader)
        self.warmup_scheduler = WarmUpLR(self.optimizer, iter_per_epoch * args.warm_epoch)

        if args.scheduler == 'step':
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(self.optimizer, milestones=args.milestone, gamma=args.lr_factor, verbose=True)
        elif args.scheduler == 'cos':
            tmax = args.tmax # half-cycle 
            self.scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, T_max = tmax, eta_min=args.min_lr, verbose=True)
        elif args.scheduler == 'cycle':
            self.scheduler = torch.optim.lr_scheduler.OneCycleLR(self.optimizer, max_lr=args.max_lr, steps_per_epoch=iter_per_epoch, epochs=args.epochs)
        else:
            self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=5, factor=0.5, mode="max", verbose=True)
            
        if args.multi_gpu:
            self.model = nn.DataParallel(self.model).to(self.device)

        # Train / Validate
        best_loss = np.inf
        best_acc = 0
        best_epoch = 0
        early_stopping = 0
        start = time.time()
        for epoch in range(1, args.epochs+1):
            self.epoch = epoch

            if args.scheduler == 'cos':
                if epoch > args.warm_epoch:
                    self.scheduler.step()

            # Training
            train_loss, train_acc, train_f1 = self.training(args)

            # Model weight in Multi_GPU or Single GPU
            state_dict= self.model.module.state_dict() if args.multi_gpu else self.model.state_dict()

            # Validation
            val_loss, val_acc, val_f1 = self.validate(args, phase='val')

            # Save models
            if val_loss < best_loss:
                early_stopping = 0
                best_epoch = epoch
                best_loss = val_loss
                best_acc = val_acc
                best_f1 = val_f1

                torch.save({'epoch':epoch,
                            'state_dict':state_dict,
                            'optimizer': self.optimizer.state_dict(),
                            'scheduler': self.scheduler.state_dict(),
                    }, os.path.join(save_path, 'best_model.pth'))
                self.logger.info(f'-----------------SAVE:{best_epoch}epoch----------------')
            else:
                early_stopping += 1

            # Early Stopping
            if early_stopping == args.patience:
                break

        self.logger.info(f'\nBest Val Epoch:{best_epoch} | Val Loss:{best_loss:.4f} | Val Acc:{best_acc:.4f} | Val F1:{best_f1:.4f}')
        end = time.time()
        self.logger.info(f'Total Process time:{(end - start) / 60:.3f}Minute')

    # Training
    def training(self, args):
        self.model.train()
        train_loss = AvgMeter()
        train_acc = 0
        preds_list = []
        targets_list = []

        scaler = grad_scaler.GradScaler()
        
        for i, (images, targets) in enumerate(tqdm(self.train_loader)):
            images = torch.tensor(images, device=self.device, dtype=torch.float32)
            targets = torch.tensor(targets, device=self.device, dtype=torch.long)
            
            if self.epoch <= args.warm_epoch:
                self.warmup_scheduler.step()

            self.model.zero_grad(set_to_none=True)
    
            if args.amp:
                with autocast():
                    preds = self.model(images)
                    loss = self.criterion(preds, targets)
                    
                scaler.scale(loss).backward()

                # Gradient Clipping
                if args.clipping is not None:
                    scaler.unscale_(self.optimizer)
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)

                scaler.step(self.optimizer)
                scaler.update()

            else:
                preds = self.model(images)
                loss = self.criterion(preds, targets)
                loss.backward()
                nn.utils.clip_grad_norm_(self.model.parameters(), args.clipping)
                self.optimizer.step()

            if args.scheduler == 'cycle':
                if self.epoch > args.warm_epoch:
                    self.scheduler.step()

            # Metric
            train_acc += (preds.argmax(dim=1) == targets).sum().item()
            preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
            targets_list.extend(targets.cpu().detach().numpy())
            # log
            train_loss.update(loss.item(), n=images.size(0))

        train_acc /= len(self.train_loader.dataset)
        train_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

        self.logger.info(f'Epoch:[{self.epoch:03d}/{args.epochs:03d}]')
        self.logger.info(f'Train Loss:{train_loss.avg:.3f} | Acc:{train_acc:.4f} | F1:{train_f1:.4f}')
        return train_loss.avg, train_acc, train_f1
            
    # Validation or Dev
    def validate(self, args, phase='val'):
        self.model.eval()
        with torch.no_grad():
            val_loss = AvgMeter()
            val_acc = 0
            preds_list = []
            targets_list = []

            for i, (images, targets) in enumerate(self.val_loader):
                images = torch.tensor(images, device=self.device, dtype=torch.float32)
                targets = torch.tensor(targets, device=self.device, dtype=torch.long)

                preds = self.model(images)
                loss = self.criterion(preds, targets)

                # Metric
                val_acc += (preds.argmax(dim=1) == targets).sum().item()
                preds_list.extend(preds.argmax(dim=1).cpu().detach().numpy())
                targets_list.extend(targets.cpu().detach().numpy())

                # log
                val_loss.update(loss.item(), n=images.size(0))
            val_acc /= len(self.val_loader.dataset)
            val_f1 = f1_score(np.array(targets_list), np.array(preds_list), average='macro')

            self.logger.info(f'{phase} Loss:{val_loss.avg:.3f} | Acc:{val_acc:.4f} | F1:{val_f1:.4f}')
        return val_loss.avg, val_acc, val_f1

In [20]:
def main(args):
    print('<---- Training Params ---->')
    
    # Random Seed
    seed = args.seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.benchmark = True

    save_path = os.path.join(args.model_path, (args.exp_num).zfill(3))
    
    # Create model directory
    os.makedirs(save_path, exist_ok=True)
    Trainer(args, save_path)

    return save_path

In [21]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print('Device:', device)
print('Current cuda device:', torch.cuda.current_device())
print('Count of using GPUs:', torch.cuda.device_count())

Device: cuda
Current cuda device: 0
Count of using GPUs: 2


In [22]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
sub = pd.read_csv('./open/sample_submission.csv')
df_train = pd.read_csv('./open/train_df_add2.csv')
df_test = pd.read_csv('./open/test_df.csv')

In [23]:
def predict(encoder_name, test_loader, device, model_path):
    model = Network_test(encoder_name).to(device)
    model.load_state_dict(torch.load(opj(model_path, 'best_model.pth'))['state_dict'])
    model.eval()
    preds_list = []
    with torch.no_grad():
        for images in tqdm(test_loader):
            images = torch.as_tensor(images, device=device, dtype=torch.float32)
            preds = model(images)
            preds = torch.softmax(preds, dim=1)
            preds_list.extend(preds.cpu().tolist())

    return np.array(preds_list)

def ensemble_5fold(model_path_list, test_loader, device):
    predict_list = []
    for model_path in model_path_list:
        prediction = predict(encoder_name= 'regnety_040', test_loader = test_loader, device = device, model_path = model_path)
        predict_list.append(prediction)
    ensemble = (predict_list[0] + predict_list[1] + predict_list[2] + predict_list[3] + predict_list[4])/len(predict_list)

    return ensemble

def make_pseudo_df(train_df, test_df, ensemble, step, threshold = 0.9, z_sample = 500): 
    train_df_copy = train_df.copy()
    test_df_copy = test_df.copy()

    test_df_copy['disease'] = np.nan
    test_df_copy['disease_code'] = ensemble.argmax(axis=1)
    pseudo_test_df = test_df_copy.iloc[np.where(ensemble > threshold)[0]].reset_index(drop=True)
    z_idx  = pseudo_test_df[pseudo_test_df['disease_code'] == 0].sample(n=z_sample, random_state=42).index.tolist()
    ot_idx = pseudo_test_df[pseudo_test_df['disease_code'].isin([*range(1,8)])].index.tolist()
    pseudo_test_df = pseudo_test_df.iloc[z_idx + ot_idx]

    train_df_copy = train_df_copy.append(pseudo_test_df, ignore_index=True).reset_index(drop=True) # reset_index
    # print(f'Make train_{step}step.csv')
    train_df_copy.to_csv(f'../data/train_{step}step.csv', index=False)

In [24]:
args.step = 0
models_path = []
for s_fold in range(5): # 5fold
    args.fold = s_fold
    args.exp_num = str(s_fold)
    save_path = main(args)
    models_path.append(save_path)

2022-05-02 15:32:39,159 INFO: {'exp_num': '0', 'data_path': './open', 'Kfold': 5, 'model_path': 'label_results/', 'image_type': 'train_1024', 'class_num': 88, 'model_name': 'regnety_040', 'drop_path_rate': 0.2, 'img_size': 512, 'batch_size': 16, 'epochs': 100, 'optimizer': 'Lamb', 'initial_lr': 0.0005, 'weight_decay': 0.001, 'aug_ver': 2, 'scheduler': 'Reduce', 'warm_epoch': 5, 'max_lr': 0.001, 'min_lr': 5e-05, 'tmax': 145, 'patience': 7, 'clipping': None, 'amp': True, 'multi_gpu': True, 'logging': False, 'num_workers': 4, 'seed': 42, 'step': 0, 'fold': 0}


<---- Training Params ---->
Dataset size:4976
Dataset size:1245


2022-05-02 15:32:39,553 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth)
100%|██████████| 311/311 [02:25<00:00,  2.14it/s]
2022-05-02 15:35:04,812 INFO: Epoch:[001/100]
2022-05-02 15:35:04,813 INFO: Train Loss:4.522 | Acc:0.0163 | F1:0.0073
2022-05-02 15:35:22,923 INFO: val Loss:4.207 | Acc:0.0948 | F1:0.0273
2022-05-02 15:35:23,603 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 311/311 [02:22<00:00,  2.19it/s]
2022-05-02 15:37:45,948 INFO: Epoch:[002/100]
2022-05-02 15:37:45,949 INFO: Train Loss:4.138 | Acc:0.1469 | F1:0.0637
2022-05-02 15:38:02,706 INFO: val Loss:3.349 | Acc:0.3631 | F1:0.1334
2022-05-02 15:38:03,361 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 311/311 [02:20<00:00,  2.21it/s]
2022-05-02 15:40:23,955 INFO: Epoch:[003/100]
2022-05-02 15:40:23,956 INFO: Train Loss:3.509 | Acc:0.3515 | F1:0.1526
2022-05-02 15:40:40,402 INFO: val

2022-05-02 17:05:12,752 INFO: -----------------SAVE:27epoch----------------
100%|██████████| 311/311 [03:20<00:00,  1.55it/s]
2022-05-02 17:08:32,906 INFO: Epoch:[028/100]
2022-05-02 17:08:32,907 INFO: Train Loss:0.332 | Acc:0.9186 | F1:0.9049
2022-05-02 17:08:52,677 INFO: val Loss:0.270 | Acc:0.8627 | F1:0.8700
100%|██████████| 311/311 [03:19<00:00,  1.56it/s]
2022-05-02 17:12:12,409 INFO: Epoch:[029/100]
2022-05-02 17:12:12,410 INFO: Train Loss:0.316 | Acc:0.9118 | F1:0.8966
2022-05-02 17:12:33,273 INFO: val Loss:0.123 | Acc:0.9502 | F1:0.9344
2022-05-02 17:12:34,065 INFO: -----------------SAVE:29epoch----------------
100%|██████████| 311/311 [03:19<00:00,  1.56it/s]
2022-05-02 17:15:53,535 INFO: Epoch:[030/100]
2022-05-02 17:15:53,536 INFO: Train Loss:0.288 | Acc:0.9244 | F1:0.9061
2022-05-02 17:16:13,671 INFO: val Loss:0.170 | Acc:0.9703 | F1:0.9561
100%|██████████| 311/311 [03:19<00:00,  1.56it/s]
2022-05-02 17:19:33,074 INFO: Epoch:[031/100]
2022-05-02 17:19:33,075 INFO: Train Lo

2022-05-02 19:02:52,295 INFO: Train Loss:0.111 | Acc:0.9711 | F1:0.9609
2022-05-02 19:03:12,661 INFO: val Loss:0.042 | Acc:0.9847 | F1:0.9776
100%|██████████| 311/311 [03:16<00:00,  1.59it/s]
2022-05-02 19:06:28,714 INFO: Epoch:[060/100]
2022-05-02 19:06:28,715 INFO: Train Loss:0.096 | Acc:0.9749 | F1:0.9668
2022-05-02 19:06:49,548 INFO: val Loss:0.047 | Acc:0.9783 | F1:0.9796
100%|██████████| 311/311 [03:18<00:00,  1.57it/s]
2022-05-02 19:10:08,211 INFO: Epoch:[061/100]
2022-05-02 19:10:08,212 INFO: Train Loss:0.111 | Acc:0.9662 | F1:0.9596
2022-05-02 19:10:28,535 INFO: val Loss:0.136 | Acc:0.9703 | F1:0.9644
100%|██████████| 311/311 [03:16<00:00,  1.58it/s]
2022-05-02 19:13:45,199 INFO: Epoch:[062/100]
2022-05-02 19:13:45,200 INFO: Train Loss:0.110 | Acc:0.9751 | F1:0.9686
2022-05-02 19:14:05,015 INFO: val Loss:0.038 | Acc:0.9823 | F1:0.9825
2022-05-02 19:14:05,016 INFO: 
Best Val Epoch:55 | Val Loss:0.0179 | Val Acc:0.9912 | Val F1:0.9905
2022-05-02 19:14:05,017 INFO: Total Process 

<---- Training Params ---->
Dataset size:4977
Dataset size:1244


2022-05-02 19:14:05,505 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth)
100%|██████████| 312/312 [03:24<00:00,  1.53it/s]
2022-05-02 19:17:29,890 INFO: Epoch:[001/100]
2022-05-02 19:17:29,891 INFO: Train Loss:4.521 | Acc:0.0115 | F1:0.0047
2022-05-02 19:17:51,807 INFO: val Loss:4.229 | Acc:0.0804 | F1:0.0293
2022-05-02 19:17:52,622 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 312/312 [03:20<00:00,  1.55it/s]
2022-05-02 19:21:13,581 INFO: Epoch:[002/100]
2022-05-02 19:21:13,581 INFO: Train Loss:4.136 | Acc:0.1577 | F1:0.0743
2022-05-02 19:21:34,561 INFO: val Loss:3.360 | Acc:0.4502 | F1:0.1409
2022-05-02 19:21:35,330 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 312/312 [03:23<00:00,  1.53it/s]
2022-05-02 19:24:59,045 INFO: Epoch:[003/100]
2022-05-02 19:24:59,046 INFO: Train Loss:3.507 | Acc:0.3448 | F1:0.1503
2022-05-02 19:25:19,494 INFO: val

2022-05-02 20:57:40,049 INFO: Train Loss:0.350 | Acc:0.9166 | F1:0.8966
2022-05-02 20:58:00,727 INFO: val Loss:0.138 | Acc:0.9695 | F1:0.9574
100%|██████████| 312/312 [03:21<00:00,  1.55it/s]
2022-05-02 21:01:22,227 INFO: Epoch:[029/100]
2022-05-02 21:01:22,227 INFO: Train Loss:0.321 | Acc:0.9202 | F1:0.9036
2022-05-02 21:01:42,700 INFO: val Loss:0.138 | Acc:0.9486 | F1:0.9358
100%|██████████| 312/312 [03:23<00:00,  1.53it/s]
2022-05-02 21:05:06,299 INFO: Epoch:[030/100]
2022-05-02 21:05:06,300 INFO: Train Loss:0.287 | Acc:0.9208 | F1:0.9041
2022-05-02 21:05:27,712 INFO: val Loss:0.116 | Acc:0.9550 | F1:0.9524
2022-05-02 21:05:28,536 INFO: -----------------SAVE:30epoch----------------
100%|██████████| 312/312 [03:22<00:00,  1.54it/s]
2022-05-02 21:08:51,184 INFO: Epoch:[031/100]
2022-05-02 21:08:51,185 INFO: Train Loss:0.274 | Acc:0.9277 | F1:0.9149
2022-05-02 21:09:12,235 INFO: val Loss:0.156 | Acc:0.9630 | F1:0.9469
100%|██████████| 312/312 [03:22<00:00,  1.54it/s]
2022-05-02 21:12:3

<---- Training Params ---->
Dataset size:4977
Dataset size:1244


2022-05-02 22:19:49,586 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth)
100%|██████████| 312/312 [03:20<00:00,  1.56it/s]
2022-05-02 22:23:10,588 INFO: Epoch:[001/100]
2022-05-02 22:23:10,589 INFO: Train Loss:4.519 | Acc:0.0137 | F1:0.0051
2022-05-02 22:23:31,275 INFO: val Loss:4.229 | Acc:0.0997 | F1:0.0258
2022-05-02 22:23:32,101 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 312/312 [03:24<00:00,  1.53it/s]
2022-05-02 22:26:56,599 INFO: Epoch:[002/100]
2022-05-02 22:26:56,600 INFO: Train Loss:4.136 | Acc:0.1579 | F1:0.0727
2022-05-02 22:27:16,728 INFO: val Loss:3.364 | Acc:0.4035 | F1:0.1153
2022-05-02 22:27:17,601 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 312/312 [03:23<00:00,  1.54it/s]
2022-05-02 22:30:40,847 INFO: Epoch:[003/100]
2022-05-02 22:30:40,848 INFO: Train Loss:3.504 | Acc:0.3516 | F1:0.1537
2022-05-02 22:31:01,495 INFO: val

2022-05-03 00:00:30,101 INFO: -----------------SAVE:27epoch----------------
100%|██████████| 312/312 [03:18<00:00,  1.57it/s]
2022-05-03 00:03:48,585 INFO: Epoch:[028/100]
2022-05-03 00:03:48,586 INFO: Train Loss:0.355 | Acc:0.9190 | F1:0.8977
2022-05-03 00:04:08,523 INFO: val Loss:0.179 | Acc:0.9268 | F1:0.9294
100%|██████████| 312/312 [03:20<00:00,  1.55it/s]
2022-05-03 00:07:29,357 INFO: Epoch:[029/100]
2022-05-03 00:07:29,358 INFO: Train Loss:0.351 | Acc:0.9158 | F1:0.8986
2022-05-03 00:07:49,498 INFO: val Loss:0.091 | Acc:0.9727 | F1:0.9522
2022-05-03 00:07:50,278 INFO: -----------------SAVE:29epoch----------------
100%|██████████| 312/312 [03:25<00:00,  1.52it/s]
2022-05-03 00:11:15,665 INFO: Epoch:[030/100]
2022-05-03 00:11:15,666 INFO: Train Loss:0.297 | Acc:0.9255 | F1:0.9091
2022-05-03 00:11:35,684 INFO: val Loss:0.093 | Acc:0.9711 | F1:0.9610
100%|██████████| 312/312 [03:24<00:00,  1.53it/s]
2022-05-03 00:14:59,908 INFO: Epoch:[031/100]
2022-05-03 00:14:59,909 INFO: Train Lo

<---- Training Params ---->
Dataset size:4977
Dataset size:1244


2022-05-03 01:18:49,059 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth)
100%|██████████| 312/312 [03:19<00:00,  1.57it/s]
2022-05-03 01:22:08,734 INFO: Epoch:[001/100]
2022-05-03 01:22:08,735 INFO: Train Loss:4.523 | Acc:0.0157 | F1:0.0053
2022-05-03 01:22:29,413 INFO: val Loss:4.261 | Acc:0.0563 | F1:0.0211
2022-05-03 01:22:30,189 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 312/312 [03:20<00:00,  1.55it/s]
2022-05-03 01:25:50,959 INFO: Epoch:[002/100]
2022-05-03 01:25:50,961 INFO: Train Loss:4.129 | Acc:0.1316 | F1:0.0667
2022-05-03 01:26:12,097 INFO: val Loss:3.460 | Acc:0.4212 | F1:0.1267
2022-05-03 01:26:12,922 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 312/312 [03:21<00:00,  1.55it/s]
2022-05-03 01:29:34,702 INFO: Epoch:[003/100]
2022-05-03 01:29:34,703 INFO: Train Loss:3.518 | Acc:0.3167 | F1:0.1453
2022-05-03 01:29:54,700 INFO: val

2022-05-03 02:50:06,532 INFO: Train Loss:0.362 | Acc:0.9126 | F1:0.8902
2022-05-03 02:50:26,127 INFO: val Loss:0.130 | Acc:0.9646 | F1:0.9440
100%|██████████| 312/312 [02:24<00:00,  2.16it/s]
2022-05-03 02:52:50,548 INFO: Epoch:[028/100]
2022-05-03 02:52:50,549 INFO: Train Loss:0.346 | Acc:0.9116 | F1:0.8976
2022-05-03 02:53:10,140 INFO: val Loss:0.096 | Acc:0.9614 | F1:0.9569
2022-05-03 02:53:10,854 INFO: -----------------SAVE:28epoch----------------
100%|██████████| 312/312 [02:27<00:00,  2.11it/s]
2022-05-03 02:55:38,446 INFO: Epoch:[029/100]
2022-05-03 02:55:38,447 INFO: Train Loss:0.311 | Acc:0.9216 | F1:0.9069
2022-05-03 02:55:57,166 INFO: val Loss:0.133 | Acc:0.9622 | F1:0.9475
100%|██████████| 312/312 [02:27<00:00,  2.11it/s]
2022-05-03 02:58:24,737 INFO: Epoch:[030/100]
2022-05-03 02:58:24,738 INFO: Train Loss:0.273 | Acc:0.9253 | F1:0.9180
2022-05-03 02:58:43,487 INFO: val Loss:0.101 | Acc:0.9526 | F1:0.9568
100%|██████████| 312/312 [02:27<00:00,  2.11it/s]
2022-05-03 03:01:1

<---- Training Params ---->
Dataset size:4977
Dataset size:1244


2022-05-03 04:01:53,388 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth)
100%|██████████| 312/312 [02:27<00:00,  2.12it/s]
2022-05-03 04:04:20,991 INFO: Epoch:[001/100]
2022-05-03 04:04:20,992 INFO: Train Loss:4.527 | Acc:0.0141 | F1:0.0060
2022-05-03 04:04:39,283 INFO: val Loss:4.264 | Acc:0.0844 | F1:0.0201
2022-05-03 04:04:40,047 INFO: -----------------SAVE:1epoch----------------
100%|██████████| 312/312 [02:28<00:00,  2.10it/s]
2022-05-03 04:07:08,522 INFO: Epoch:[002/100]
2022-05-03 04:07:08,523 INFO: Train Loss:4.135 | Acc:0.1336 | F1:0.0630
2022-05-03 04:07:27,213 INFO: val Loss:3.418 | Acc:0.3923 | F1:0.1586
2022-05-03 04:07:27,927 INFO: -----------------SAVE:2epoch----------------
100%|██████████| 312/312 [02:25<00:00,  2.14it/s]
2022-05-03 04:09:53,622 INFO: Epoch:[003/100]
2022-05-03 04:09:53,623 INFO: Train Loss:3.497 | Acc:0.3518 | F1:0.1543
2022-05-03 04:10:12,808 INFO: val

100%|██████████| 312/312 [02:25<00:00,  2.14it/s]
2022-05-03 05:16:22,611 INFO: Epoch:[027/100]
2022-05-03 05:16:22,612 INFO: Train Loss:0.371 | Acc:0.9009 | F1:0.8792
2022-05-03 05:16:41,192 INFO: val Loss:0.090 | Acc:0.9751 | F1:0.9682
2022-05-03 05:16:41,943 INFO: -----------------SAVE:27epoch----------------
100%|██████████| 312/312 [02:25<00:00,  2.15it/s]
2022-05-03 05:19:07,216 INFO: Epoch:[028/100]
2022-05-03 05:19:07,216 INFO: Train Loss:0.357 | Acc:0.9170 | F1:0.8985
2022-05-03 05:19:24,940 INFO: val Loss:0.160 | Acc:0.9333 | F1:0.9435
100%|██████████| 312/312 [02:28<00:00,  2.11it/s]
2022-05-03 05:21:53,164 INFO: Epoch:[029/100]
2022-05-03 05:21:53,165 INFO: Train Loss:0.313 | Acc:0.9108 | F1:0.8997
2022-05-03 05:22:11,781 INFO: val Loss:0.078 | Acc:0.9767 | F1:0.9682
2022-05-03 05:22:12,491 INFO: -----------------SAVE:29epoch----------------
100%|██████████| 312/312 [02:26<00:00,  2.13it/s]
2022-05-03 05:24:38,679 INFO: Epoch:[030/100]
2022-05-03 05:24:38,679 INFO: Train Lo

In [25]:
img_size = 512

test_transform = get_train_augmentation(img_size=img_size, ver=1)
test_dataset = Test_dataset(df_test, test_transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=0)

Test Dataset size:2154


In [26]:
models_path = ['./label_results/000', './label_results/001', './label_results/002', './label_results/003', './label_results/004']

In [27]:
ensemble = ensemble_5fold(models_path, test_loader, device)

2022-05-03 06:25:19,486 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth)
100%|██████████| 34/34 [01:53<00:00,  3.34s/it]
2022-05-03 06:27:14,634 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth)
100%|██████████| 34/34 [01:45<00:00,  3.11s/it]
2022-05-03 06:29:01,988 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth)
100%|██████████| 34/34 [01:49<00:00,  3.23s/it]
2022-05-03 06:30:52,837 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-regnet/regnety_040-f0d569f9.pth)
100%|██████████| 34/34 [01:46<00:00,  3.13s/it]
2022-05-03 06:32:40,406 INFO: Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models

In [28]:
f_pred = ensemble.argmax(axis=1).tolist()
f_pred

[62,
 28,
 72,
 64,
 63,
 50,
 14,
 32,
 40,
 39,
 0,
 76,
 85,
 60,
 62,
 2,
 25,
 22,
 21,
 47,
 66,
 84,
 0,
 58,
 51,
 55,
 51,
 60,
 40,
 78,
 55,
 9,
 59,
 83,
 84,
 82,
 40,
 9,
 37,
 44,
 45,
 10,
 60,
 14,
 44,
 63,
 68,
 13,
 84,
 55,
 9,
 72,
 31,
 35,
 84,
 46,
 20,
 21,
 38,
 15,
 69,
 48,
 18,
 87,
 13,
 36,
 72,
 13,
 53,
 43,
 25,
 76,
 32,
 63,
 41,
 84,
 9,
 40,
 5,
 9,
 50,
 76,
 57,
 75,
 72,
 1,
 84,
 22,
 40,
 83,
 36,
 84,
 9,
 42,
 55,
 65,
 72,
 72,
 45,
 45,
 9,
 24,
 52,
 17,
 76,
 2,
 86,
 49,
 72,
 34,
 36,
 58,
 72,
 72,
 84,
 15,
 28,
 52,
 15,
 41,
 55,
 19,
 45,
 72,
 77,
 72,
 61,
 72,
 54,
 55,
 28,
 46,
 50,
 42,
 15,
 25,
 57,
 2,
 52,
 78,
 73,
 33,
 7,
 9,
 84,
 35,
 23,
 84,
 85,
 76,
 84,
 79,
 72,
 52,
 83,
 63,
 68,
 63,
 33,
 45,
 57,
 60,
 57,
 6,
 21,
 38,
 49,
 74,
 83,
 10,
 21,
 24,
 56,
 39,
 9,
 52,
 55,
 86,
 15,
 84,
 40,
 27,
 3,
 11,
 51,
 20,
 62,
 45,
 79,
 70,
 52,
 18,
 84,
 32,
 52,
 57,
 83,
 85,
 84,
 33,
 72,
 12,
 58,
 1,


In [29]:
train_y = pd.read_csv("./open/train_df_add2.csv")

train_labels = train_y["label"]

label_unique = sorted(np.unique(train_labels))
label_unique = {key:value for key,value in zip(label_unique, range(len(label_unique)))}


In [30]:
label_decoder = {val:key for key, val in label_unique.items()}

f_result = [label_decoder[result] for result in f_pred]

In [31]:
f_result

['tile-glue_strip',
 'grid-good',
 'transistor-good',
 'tile-gray_stroke',
 'tile-good',
 'pill-crack',
 'capsule-faulty_imprint',
 'hazelnut-cut',
 'leather-good',
 'leather-glue',
 'bottle-broken_large',
 'wood-good',
 'zipper-rough',
 'screw-thread_top',
 'tile-glue_strip',
 'bottle-contamination',
 'grid-bent',
 'carpet-hole',
 'carpet-good',
 'pill-color',
 'tile-rough',
 'zipper-good',
 'bottle-broken_large',
 'screw-scratch_neck',
 'pill-faulty_imprint',
 'screw-good',
 'pill-faulty_imprint',
 'screw-thread_top',
 'leather-good',
 'wood-liquid',
 'screw-good',
 'cable-good',
 'screw-thread_side',
 'zipper-fabric_interior',
 'zipper-good',
 'zipper-fabric_border',
 'leather-good',
 'cable-good',
 'leather-cut',
 'metal_nut-flip',
 'metal_nut-good',
 'cable-missing_cable',
 'screw-thread_top',
 'capsule-faulty_imprint',
 'metal_nut-flip',
 'tile-good',
 'toothbrush-good',
 'capsule-crack',
 'zipper-good',
 'screw-good',
 'cable-good',
 'transistor-good',
 'hazelnut-crack',
 'hazel

In [32]:
submission = pd.read_csv("./open/sample_submission.csv")

submission["label"] = f_result

submission

Unnamed: 0,index,label
0,0,tile-glue_strip
1,1,grid-good
2,2,transistor-good
3,3,tile-gray_stroke
4,4,tile-good
...,...,...
2149,2149,tile-gray_stroke
2150,2150,screw-good
2151,2151,grid-good
2152,2152,cable-poke_insulation


In [33]:
submission.to_csv("./submission/label_result_add_0502_수정.csv", index = False)