In [5]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib as mpl
import math

from glob import glob
from PIL import Image
from tqdm import tqdm, tqdm_notebook
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, accuracy_score
from time import sleep

import warnings
warnings.filterwarnings('ignore')

In [6]:
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torchvision import transforms, utils
from torchvision.transforms import Resize, ToTensor, Normalize
from torch.utils.data import Dataset, DataLoader, random_split, SubsetRandomSampler, WeightedRandomSampler

## Make Label

In [7]:
df = pd.read_csv('/opt/ml/code/data/Final.csv')
df.head()

Unnamed: 0,id,path,age,gender,stem,img_path,gender_issue,mask_issue
0,1,000001_female_Asian_45,45,female,mask1,/opt/ml/input/data/train/images/000001_female_...,False,False
1,1,000001_female_Asian_45,45,female,mask2,/opt/ml/input/data/train/images/000001_female_...,False,False
2,1,000001_female_Asian_45,45,female,mask4,/opt/ml/input/data/train/images/000001_female_...,False,False
3,1,000001_female_Asian_45,45,female,mask3,/opt/ml/input/data/train/images/000001_female_...,False,False
4,1,000001_female_Asian_45,45,female,incorrect_mask,/opt/ml/input/data/train/images/000001_female_...,False,False


In [8]:
df['label'] = ''
for i in range(len(df)) :
    label = 0
    # 나이를 기준으로 점수
    if df['age'][i] < 30 :
        label = 0
    elif 30 <= df['age'][i] < 60 :
        label = 1
    elif 60 <= df['age'][i] :
        label = 2
    
    # 여자일 경우 남자의 label +3
    if df['gender'][i] == 'female' :
        label += 3
    
    # mask상태가 Not wear일 경우 + 12, Incorrect일 경우 + 6
    if df['stem'][i] == 'normal' :
        label += 12
    elif df['stem'][i] == 'incorrect_mask' :
        label += 6
    
    df['label'][i] = label

In [9]:
df.sample(10)

Unnamed: 0,id,path,age,gender,stem,img_path,gender_issue,mask_issue,label
9711,3421,003421_female_Asian_38,38,female,mask4,/opt/ml/input/data/train/images/003421_female_...,False,False,4
13655,4481,004481_male_Asian_29,29,male,mask5,/opt/ml/input/data/train/images/004481_male_As...,False,False,0
3922,1188,001188_male_Asian_60,60,male,mask4,/opt/ml/input/data/train/images/001188_male_As...,False,False,2
6775,1759,001759_female_Asian_46,46,female,normal,/opt/ml/input/data/train/images/001759_female_...,False,False,16
12722,4281,004281_female_Asian_60,60,female,mask3,/opt/ml/input/data/train/images/004281_female_...,False,False,5
12031,3981,003981_male_Asian_60,60,male,mask5,/opt/ml/input/data/train/images/003981_male_As...,False,False,2
7674,3032,003032_female_Asian_20,20,female,mask4,/opt/ml/input/data/train/images/003032_female_...,False,False,3
18077,6622,006622_male_Asian_19,19,male,incorrect_mask,/opt/ml/input/data/train/images/006622_male_As...,False,False,6
17895,6571,006571_female_Asian_21,21,female,mask3,/opt/ml/input/data/train/images/006571_female_...,False,False,3
9192,3332,003332_female_Asian_19,19,female,mask2,/opt/ml/input/data/train/images/003332_female_...,False,False,3


## Dataset Load

In [10]:
class MaskDataset(Dataset) :
    def __init__(self, path_list, label_list, transform, train = True) :
        self.train = train
        if self.train :
            self.X = []
            self.y = []
            for path, label in zip(path_list, label_list) :
                image = Image.open(path)
                self.X.append(image)
                self.y.append(label)
        else :
            self.X = []
            for path in path_list :
                image = Image.open(path)
                self.X.append(image)
        
        self.transform = transform
        self._repr_indent = 4
        if self.train :
            self.classes = list(set(self.y))
    
    def __len__(self) :
        len_dataset = len(self.X)
        return len_dataset

    def __getitem__(self, idx) :
        X = self.X[idx]
        X = self.transform(X)
        if not self.train :
            y = None
        else :
            y = self.y[idx]
        return torch.tensor(X, dtype = torch.float), torch.tensor(y, dtype = torch.long)
    
    def __repr__(self) :
        head = "(PyTorch Practice) My Custom Dataset : MASK"
        num_data = self._repr_indent*" " + "Number of datapoints : {}".format(self.__len__())
        
        if self.train :
            num_classes = self._repr_indent*" " + "Number of classes {}".format(len(self.classes))
        else :
            num_classes = self._repr_indent*" " + "Number of classes None"
            
        return '\n'.join([head, num_data, num_classes])

In [11]:
data_path = df['img_path']
target = list(df['label'])

### ResNext50_32x4d학습

In [12]:
model = torchvision.models.resnext50_32x4d(pretrained=True)
model.fc = nn.Sequential(
    nn.Linear(2048, 1024, bias = True),
    nn.ReLU(inplace = True),
    nn.Dropout(p = 0.3),
    nn.Linear(1024, 256, bias = True),
    nn.ReLU(inplace = True),
    nn.Dropout(p = 0.3),
    nn.Linear(256, 18, bias = True))

torch.nn.init.xavier_uniform_(model.fc[0].weight)
stdv = 1.0 / np.sqrt(model.fc[0].in_features)
model.fc[0].bias.data.uniform_(-stdv,stdv)

for param in model.parameters() : # frozon 해제
    param.requires_grad = True

In [13]:
torch.cuda.empty_cache()
stf = StratifiedKFold(n_splits = 4, shuffle = True, random_state =42)
foldperf = {}
for fold, (train_idx, valid_idx) in enumerate(stf.split(data_path, target)) :
    
    print('Fold {}'.format(fold + 1))
    target_array = np.array(target)
    
    dataset_train_Mask = MaskDataset(path_list = data_path[train_idx],
                                     label_list = target_array[train_idx],
                                     transform = transforms.Compose([
                                         Resize((512, 384), Image.BILINEAR),
                                         ToTensor(),
                                         Normalize(mean = (0.5,0.5,0.5), std = (0.2, 0.2, 0.2)),
                                      ]),
                                      train = True,
                                      )
    dataset_valid_Mask = MaskDataset(path_list = data_path[valid_idx],
                                     label_list = target_array[valid_idx],
                                     transform = transforms.Compose([
                                          Resize((512, 384), Image.BILINEAR),
                                          ToTensor(),
                                          Normalize(mean = (0.5,0.5,0.5), std = (0.2, 0.2, 0.2)),
                                      ]),
                                      train = True,
                                      )
    BATCH_SIZE = 64
    mask_train_dataloader = torch.utils.data.DataLoader(dataset_train_Mask,
                                                        batch_size = BATCH_SIZE,
                                                        shuffle = True)
    mask_valid_dataloader = torch.utils.data.DataLoader(dataset_valid_Mask,
                                                        batch_size = BATCH_SIZE,
                                                        shuffle = True)
    
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print(f'{device} is using !')
    sleep(1)
    
    model = model.to(device)

    LEARNING_RATE = 0.0001
    #NUM_EPOCH = 100

    loss_fn = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr = LEARNING_RATE)

    dataloaders = {
        'train' : mask_train_dataloader,
        'test' : mask_valid_dataloader
    }
    
    history = {'train_loss' : [], 'test_loss' : [],
               'train_acc' : [], 'test_acc' : [],
               'train_f1' : [], 'test_f1' : []}
    
    n_epochs_stop = 3
    epochs_no_improve = 0
    early_stop = False
    min_val_loss = np.Inf
    NUM_ACCUM = 4
    
    best_test_accuracy = 0
    best_test_loss = 9999.
    for epoch in range(10) :
        for phase in ['train', 'test'] :
            running_loss = 0.
            running_acc = 0.
            running_f1 = 0.
            n_iter = 0
            
            if phase == 'train' :
                model.train()
            elif phase == 'test' :
                model.eval()

            optimizer.zero_grad()
            for ind, (images, labels) in enumerate(tqdm(dataloaders[phase])) :
                images = images.to(device)
                labels = labels.to(device)

                with torch.set_grad_enabled(phase == 'train') : # phase == 'train'일 경우에만 grad_enabled를 True
                    logits = model(images)
                    _, preds = torch.max(logits, 1)
                    loss = loss_fn(logits, labels) / NUM_ACCUM
                    
                    if phase == 'train' :
                        loss.backward()
                        
                        if ind % NUM_ACCUM == 0 :
                            optimizer.step()
                            optimizer.zero_grad()

                running_loss += loss.item() * images.size(0)
                running_acc += torch.sum(preds == labels.data)
                running_f1 += f1_score(preds.cpu().numpy(), labels.cpu().numpy(), average = 'macro')
                n_iter += 1

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_acc / len(dataloaders[phase].dataset)
            epoch_f1 = running_f1 / n_iter
            #epoch_f1_check = running_f1 / len(dataloaders[phase].dataset)
            #print('n_iter는 ', n_iter, ' dataset는 :', len(dataloaders[phase].dataset))
            #print('n_iter :', epoch_f1, ' dataset :', epoch_f1_check)
            
            if phase == 'test' :
                if epoch_loss < min_val_loss :
                    epochs_no_improve = 0
                    min_val_loss = epoch_loss
                else :
                    epochs_no_improve += 1
                
                if epochs_no_improve == n_epochs_stop :
                    print('Early Stopping!')
                    early_stop = True
                    break
            
            if phase == 'train' :
                history['train_loss'].append(epoch_loss)
                history['train_acc'].append(epoch_acc)
                history['train_f1'].append(epoch_f1)
            elif phase == 'test' :
                history['test_loss'].append(epoch_loss)
                history['test_acc'].append(epoch_acc)
                history['test_f1'].append(epoch_f1)

            print(f"현재 epoch-{epoch}의 {phase}-데이터 셋에서 평균 Loss : {epoch_loss:.4f}, 평균 Accuracy : {epoch_acc:.4f}, 평균 F1 Score : {epoch_f1: .4f}")
            #if phase == "test" and best_test_accuracy < epoch_acc: # phase가 test일 때, best accuracy 계산
            #    best_test_accuracy = epoch_acc
            #if phase == "test" and best_test_loss > epoch_loss: # phase가 test일 때, best loss 계산
            #    best_test_loss = epoch_loss
    
        if early_stop :
            print(f'fold{fold+1} Stopped')
            break
    foldperf['fold{}'.format(fold+1)] = history 
    
    #print("학습 종료!")
    #print(f"최고 accuracy : {best_test_accuracy}, 최고 낮은 loss : {best_test_loss}")

Fold 1
cuda:0 is using !


100%|██████████| 222/222 [05:03<00:00,  1.37s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-0의 train-데이터 셋에서 평균 Loss : 0.3193, 평균 Accuracy : 0.6423, 평균 F1 Score :  0.3221


100%|██████████| 74/74 [00:49<00:00,  1.50it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-0의 test-데이터 셋에서 평균 Loss : 0.1103, 평균 Accuracy : 0.8777, 평균 F1 Score :  0.6763


100%|██████████| 222/222 [04:20<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-1의 train-데이터 셋에서 평균 Loss : 0.0689, 평균 Accuracy : 0.9256, 평균 F1 Score :  0.7855


100%|██████████| 74/74 [00:34<00:00,  2.14it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-1의 test-데이터 셋에서 평균 Loss : 0.0362, 평균 Accuracy : 0.9477, 평균 F1 Score :  0.8325


100%|██████████| 222/222 [04:20<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-2의 train-데이터 셋에서 평균 Loss : 0.0226, 평균 Accuracy : 0.9757, 평균 F1 Score :  0.9039


100%|██████████| 74/74 [00:35<00:00,  2.10it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-2의 test-데이터 셋에서 평균 Loss : 0.0187, 평균 Accuracy : 0.9799, 평균 F1 Score :  0.9439


100%|██████████| 222/222 [04:20<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-3의 train-데이터 셋에서 평균 Loss : 0.0111, 평균 Accuracy : 0.9874, 평균 F1 Score :  0.9539


100%|██████████| 74/74 [00:35<00:00,  2.10it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-3의 test-데이터 셋에서 평균 Loss : 0.0129, 평균 Accuracy : 0.9839, 평균 F1 Score :  0.9499


100%|██████████| 222/222 [04:21<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-4의 train-데이터 셋에서 평균 Loss : 0.0057, 평균 Accuracy : 0.9953, 평균 F1 Score :  0.9848


100%|██████████| 74/74 [00:35<00:00,  2.11it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-4의 test-데이터 셋에서 평균 Loss : 0.0085, 평균 Accuracy : 0.9905, 평균 F1 Score :  0.9720


100%|██████████| 222/222 [04:21<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-5의 train-데이터 셋에서 평균 Loss : 0.0038, 평균 Accuracy : 0.9959, 평균 F1 Score :  0.9847


100%|██████████| 74/74 [00:34<00:00,  2.12it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-5의 test-데이터 셋에서 평균 Loss : 0.0113, 평균 Accuracy : 0.9873, 평균 F1 Score :  0.9723


100%|██████████| 222/222 [04:20<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-6의 train-데이터 셋에서 평균 Loss : 0.0044, 평균 Accuracy : 0.9946, 평균 F1 Score :  0.9829


100%|██████████| 74/74 [00:35<00:00,  2.08it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-6의 test-데이터 셋에서 평균 Loss : 0.0063, 평균 Accuracy : 0.9930, 평균 F1 Score :  0.9796


100%|██████████| 222/222 [04:22<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-7의 train-데이터 셋에서 평균 Loss : 0.0023, 평균 Accuracy : 0.9980, 평균 F1 Score :  0.9946


100%|██████████| 74/74 [00:35<00:00,  2.08it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-7의 test-데이터 셋에서 평균 Loss : 0.0061, 평균 Accuracy : 0.9943, 평균 F1 Score :  0.9862


100%|██████████| 222/222 [04:21<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-8의 train-데이터 셋에서 평균 Loss : 0.0022, 평균 Accuracy : 0.9978, 평균 F1 Score :  0.9942


100%|██████████| 74/74 [00:35<00:00,  2.11it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-8의 test-데이터 셋에서 평균 Loss : 0.0076, 평균 Accuracy : 0.9917, 평균 F1 Score :  0.9830


100%|██████████| 222/222 [04:21<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-9의 train-데이터 셋에서 평균 Loss : 0.0028, 평균 Accuracy : 0.9971, 평균 F1 Score :  0.9960


100%|██████████| 74/74 [00:35<00:00,  2.11it/s]


현재 epoch-9의 test-데이터 셋에서 평균 Loss : 0.0072, 평균 Accuracy : 0.9920, 평균 F1 Score :  0.9762
Fold 2
cuda:0 is using !


100%|██████████| 222/222 [04:51<00:00,  1.31s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-0의 train-데이터 셋에서 평균 Loss : 0.0061, 평균 Accuracy : 0.9926, 평균 F1 Score :  0.9819


100%|██████████| 74/74 [00:46<00:00,  1.61it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-0의 test-데이터 셋에서 평균 Loss : 0.0004, 평균 Accuracy : 0.9998, 평균 F1 Score :  0.9999


100%|██████████| 222/222 [04:22<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-1의 train-데이터 셋에서 평균 Loss : 0.0037, 평균 Accuracy : 0.9959, 평균 F1 Score :  0.9904


100%|██████████| 74/74 [00:35<00:00,  2.08it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-1의 test-데이터 셋에서 평균 Loss : 0.0009, 평균 Accuracy : 0.9987, 평균 F1 Score :  0.9988


100%|██████████| 222/222 [04:22<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-2의 train-데이터 셋에서 평균 Loss : 0.0028, 평균 Accuracy : 0.9972, 평균 F1 Score :  0.9928


100%|██████████| 74/74 [00:36<00:00,  2.04it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-2의 test-데이터 셋에서 평균 Loss : 0.0003, 평균 Accuracy : 0.9998, 평균 F1 Score :  0.9998


100%|██████████| 222/222 [04:22<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-3의 train-데이터 셋에서 평균 Loss : 0.0022, 평균 Accuracy : 0.9978, 평균 F1 Score :  0.9929


100%|██████████| 74/74 [00:36<00:00,  2.05it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-3의 test-데이터 셋에서 평균 Loss : 0.0008, 평균 Accuracy : 0.9987, 평균 F1 Score :  0.9964


100%|██████████| 222/222 [04:22<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-4의 train-데이터 셋에서 평균 Loss : 0.0017, 평균 Accuracy : 0.9982, 평균 F1 Score :  0.9971


100%|██████████| 74/74 [00:36<00:00,  2.05it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-4의 test-데이터 셋에서 평균 Loss : 0.0007, 평균 Accuracy : 0.9992, 평균 F1 Score :  0.9990


100%|██████████| 222/222 [04:22<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-5의 train-데이터 셋에서 평균 Loss : 0.0021, 평균 Accuracy : 0.9976, 평균 F1 Score :  0.9939


100%|██████████| 74/74 [00:35<00:00,  2.07it/s]


Early Stopping!
fold2 Stopped
Fold 3
cuda:0 is using !


100%|██████████| 222/222 [04:54<00:00,  1.33s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-0의 train-데이터 셋에서 평균 Loss : 0.0034, 평균 Accuracy : 0.9960, 평균 F1 Score :  0.9916


100%|██████████| 74/74 [00:47<00:00,  1.55it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-0의 test-데이터 셋에서 평균 Loss : 0.0029, 평균 Accuracy : 0.9966, 평균 F1 Score :  0.9932


100%|██████████| 222/222 [04:20<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-1의 train-데이터 셋에서 평균 Loss : 0.0032, 평균 Accuracy : 0.9961, 평균 F1 Score :  0.9893


100%|██████████| 74/74 [00:35<00:00,  2.11it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-1의 test-데이터 셋에서 평균 Loss : 0.0003, 평균 Accuracy : 0.9998, 평균 F1 Score :  0.9999


100%|██████████| 222/222 [04:21<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-2의 train-데이터 셋에서 평균 Loss : 0.0017, 평균 Accuracy : 0.9980, 평균 F1 Score :  0.9967


100%|██████████| 74/74 [00:35<00:00,  2.10it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-2의 test-데이터 셋에서 평균 Loss : 0.0001, 평균 Accuracy : 1.0000, 평균 F1 Score :  1.0000


100%|██████████| 222/222 [04:21<00:00,  1.18s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-3의 train-데이터 셋에서 평균 Loss : 0.0014, 평균 Accuracy : 0.9981, 평균 F1 Score :  0.9957


100%|██████████| 74/74 [00:34<00:00,  2.14it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-3의 test-데이터 셋에서 평균 Loss : 0.0002, 평균 Accuracy : 0.9998, 평균 F1 Score :  0.9995


100%|██████████| 222/222 [04:19<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-4의 train-데이터 셋에서 평균 Loss : 0.0009, 평균 Accuracy : 0.9989, 평균 F1 Score :  0.9956


100%|██████████| 74/74 [00:34<00:00,  2.14it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-4의 test-데이터 셋에서 평균 Loss : 0.0007, 평균 Accuracy : 0.9989, 평균 F1 Score :  0.9971


100%|██████████| 222/222 [04:20<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-5의 train-데이터 셋에서 평균 Loss : 0.0028, 평균 Accuracy : 0.9965, 평균 F1 Score :  0.9900


100%|██████████| 74/74 [00:35<00:00,  2.09it/s]


Early Stopping!
fold3 Stopped
Fold 4
cuda:0 is using !


100%|██████████| 222/222 [04:53<00:00,  1.32s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-0의 train-데이터 셋에서 평균 Loss : 0.0021, 평균 Accuracy : 0.9967, 평균 F1 Score :  0.9905


100%|██████████| 74/74 [00:46<00:00,  1.61it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-0의 test-데이터 셋에서 평균 Loss : 0.0005, 평균 Accuracy : 0.9994, 평균 F1 Score :  0.9980


100%|██████████| 222/222 [04:19<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-1의 train-데이터 셋에서 평균 Loss : 0.0023, 평균 Accuracy : 0.9974, 평균 F1 Score :  0.9958


100%|██████████| 74/74 [00:34<00:00,  2.14it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-1의 test-데이터 셋에서 평균 Loss : 0.0005, 평균 Accuracy : 0.9996, 평균 F1 Score :  0.9988


100%|██████████| 222/222 [04:19<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-2의 train-데이터 셋에서 평균 Loss : 0.0019, 평균 Accuracy : 0.9978, 평균 F1 Score :  0.9962


100%|██████████| 74/74 [00:34<00:00,  2.12it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-2의 test-데이터 셋에서 평균 Loss : 0.0001, 평균 Accuracy : 1.0000, 평균 F1 Score :  1.0000


100%|██████████| 222/222 [04:19<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-3의 train-데이터 셋에서 평균 Loss : 0.0021, 평균 Accuracy : 0.9972, 평균 F1 Score :  0.9931


100%|██████████| 74/74 [00:35<00:00,  2.11it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-3의 test-데이터 셋에서 평균 Loss : 0.0005, 평균 Accuracy : 0.9994, 평균 F1 Score :  0.9991


100%|██████████| 222/222 [04:20<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-4의 train-데이터 셋에서 평균 Loss : 0.0008, 평균 Accuracy : 0.9990, 평균 F1 Score :  0.9971


100%|██████████| 74/74 [00:34<00:00,  2.12it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-4의 test-데이터 셋에서 평균 Loss : 0.0001, 평균 Accuracy : 0.9996, 평균 F1 Score :  0.9994


100%|██████████| 222/222 [04:19<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-5의 train-데이터 셋에서 평균 Loss : 0.0009, 평균 Accuracy : 0.9989, 평균 F1 Score :  0.9977


100%|██████████| 74/74 [00:34<00:00,  2.14it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-5의 test-데이터 셋에서 평균 Loss : 0.0001, 평균 Accuracy : 0.9998, 평균 F1 Score :  0.9996


100%|██████████| 222/222 [04:19<00:00,  1.17s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-6의 train-데이터 셋에서 평균 Loss : 0.0010, 평균 Accuracy : 0.9987, 평균 F1 Score :  0.9977


100%|██████████| 74/74 [00:34<00:00,  2.12it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-6의 test-데이터 셋에서 평균 Loss : 0.0003, 평균 Accuracy : 0.9994, 평균 F1 Score :  0.9981


100%|██████████| 222/222 [04:18<00:00,  1.16s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-7의 train-데이터 셋에서 평균 Loss : 0.0023, 평균 Accuracy : 0.9972, 평균 F1 Score :  0.9936


100%|██████████| 74/74 [00:34<00:00,  2.15it/s]
  0%|          | 0/222 [00:00<?, ?it/s]

현재 epoch-7의 test-데이터 셋에서 평균 Loss : 0.0017, 평균 Accuracy : 0.9979, 평균 F1 Score :  0.9976


100%|██████████| 222/222 [04:18<00:00,  1.16s/it]
  0%|          | 0/74 [00:00<?, ?it/s]

현재 epoch-8의 train-데이터 셋에서 평균 Loss : 0.0031, 평균 Accuracy : 0.9964, 평균 F1 Score :  0.9919


100%|██████████| 74/74 [00:34<00:00,  2.15it/s]

Early Stopping!
fold4 Stopped





In [14]:
torch.save(model, '/opt/ml/code/model/Resnext50_32x4d-SKFold4-10-EarlyStopping-Accumulation.pt')

In [15]:
# 테스트 데이터셋 폴더 경로를 지정해주세요.
test_dir = '/opt/ml/input/data/eval'

In [16]:
# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'images')

In [17]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [18]:
# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
transform = transforms.Compose([
    Resize((512, 384), Image.BILINEAR),
    ToTensor(),
    Normalize(mean=(0.5, 0.5, 0.5), std=(0.2, 0.2, 0.2)),
])
dataset = TestDataset(image_paths, transform)

loader = DataLoader(
    dataset,
    shuffle=False
)

In [19]:
device = torch.device('cuda')
model = torch.load('/opt/ml/code/model/Resnext50_32x4d-SKFold4-10-EarlyStopping-Accumulation.pt').to(device)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
      (bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1

In [20]:
# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in tqdm(loader):
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

100%|██████████| 12600/12600 [03:59<00:00, 52.56it/s]


In [21]:
# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submission_ninth.csv'), index=False)
print('test inference is done!')

test inference is done!
