In [1]:
#!pip install wandb

In [2]:
import GPUtil
import os

import pandas as pd

from PIL import Image

import re

import matplotlib.pyplot as plt

import numpy as np

import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn import Module

import torchvision
from torchvision import transforms

from pytorch_pretrained_vit import ViT

from tqdm import tqdm

import cv2

import imutils

import random

import wandb

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
cpu = torch.device('cpu')
print ("device:[%s]."%(device))

device:[cuda:0].


In [4]:
DATA_PATH = '/opt/ml/input/data'

## DATASET CLASS

In [5]:
class maskDataset(Dataset):
    def __init__(self, data_path, transform, n_val_per_class = 25, seed = 10, train = True):
        
        super().__init__()
        
        self.transform = transform
        self.train = train
    
        self.train_data_path = os.path.join(data_path, 'train')
        self.train_images_path = os.path.join(self.train_data_path, 'images')
        self.train_data_csv_path = os.path.join(self.train_data_path, 'train.csv')
        
        self.train_data_path_dataframe = pd.read_csv(self.train_data_csv_path)
        
        
        self.AGE_MAP = {0:'age<30', 1:'30<=age<60', 2:'age>=60'}
        self.CLASS_MAP = {'mask':0, 'incorrect_mask':6, 'normal':12,
                          'male':0, 'female':3,
                          0:0, 1:1, 2:2}
        
        self.CLASS_TO_ONEHOT_DICT = {}
        self.CLASS_TO_LABEL_DICT = {}
        
        def gen_onehot_label(gender, age, filename):
            rePattern = re.compile('[0-9]')
            one_hot_vector = torch.tensor([0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],dtype=torch.float32)
            if age>=90:
                age=89
            if (age == 58) or (age == 59):
                age = 60
            
            label = self.CLASS_MAP[gender]+self.CLASS_MAP[age//30] +self.CLASS_MAP[re.sub(rePattern,'',filename.split('.')[0]).strip()]
        
            if label not in self.CLASS_TO_ONEHOT_DICT:
                one_hot_vector[label]=1
                self.CLASS_TO_ONEHOT_DICT[label] = one_hot_vector
                self.CLASS_TO_LABEL_DICT[label] = {'gender':gender, 'age':self.AGE_MAP[age//30], 'filename':re.sub(rePattern,'',filename.split('.')[0])}
            else:
                one_hot_vector = self.CLASS_TO_ONEHOT_DICT[label]
            
            return one_hot_vector, label
        
        self.train_y = []
        self.train_X = []
        
        self.val_y = []
        self.val_X = []
        
        
        image_path_list = []
        
        for row in self.train_data_path_dataframe.iterrows():
            image_directory_path = os.path.join(self.train_images_path, row[1]['path'])
            image_file_name_list = os.listdir(image_directory_path)
            for image_file_name in image_file_name_list:
                if image_file_name[0]=='.': continue
                image_path_list.append([os.path.join(image_directory_path, image_file_name), row[1]['gender'], row[1]['age']])
        
        
        self.val_num_per_class = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0, 
                                  11:0, 12:0, 13:0, 14:0, 15:0, 16:0, 17:0}
        
        self.class_rate = {0:0, 1:0, 2:0, 3:0, 4:0, 5:0, 6:0, 7:0, 8:0, 9:0, 10:0, 
                                  11:0, 12:0, 13:0, 14:0, 15:0, 16:0, 17:0}
        random.seed(seed)
        random.shuffle(image_path_list)
        
        for i, [image_path, gender, age] in enumerate(image_path_list):
            one_hot_vector, label = gen_onehot_label(gender, age, image_path.split('/')[-1])
            if self.val_num_per_class[label] < n_val_per_class:
                self.val_X.append(image_path)
                self.val_y.append(label)
                self.val_num_per_class[label] += 1
            else:
                self.train_X.append(image_path)
                self.train_y.append(label)
            self.class_rate[label] += 1
               
        if self.train:
            del self.val_y
            del self.val_X
        else:
            del self.train_y
            del self.train_X
    
    def __len__(self):
        if self.train:
            return len(self.train_X)
        else:
            return len(self.val_X)
    
    def __getitem__(self, idx):
        X ,y = None, None
        
        if self.train:
            X = self.transform(cv2.imread(self.train_X[idx]))
            y = torch.tensor(self.CLASS_TO_ONEHOT_DICT[self.train_y[idx]],dtype=torch.float32)
            
        else:
            X = self.transform(Image.open(self.val_X[idx]))
            y = torch.tensor(self.CLASS_TO_ONEHOT_DICT[self.val_y[idx]],dtype=torch.float32)
        return y, X
    
    
    def onehot_tensor_to_class(self, onehot_tensor):
        return torch.argmax(onehot_tensor).item()
    
    def onehot_tensor_to_gender_age_filename_dict(self, onehot_tensor):
        return self.CLASS_TO_LABEL_DICT[self.onehot_list_to_class(onehot_tensor)]
    
    def gender_age_filename_to_onehot_list(self, gender, age, filename):
        rePattern = re.compile('[0-9]')
        one_hot_vector = [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]
        if age>=90:
            age=89
            
        if (age == 58) or (age == 59):
                age = 60
                
        label = self.CLASS_MAP[gender]+self.CLASS_MAP[age//30] +self.CLASS_MAP[re.sub(rePattern,'',filename.split('.')[0]).strip()]
        if label in self.CLASS_TO_ONEHOT_DICT:
            return self.CLASS_TO_ONEHOT_DICT[label]
        else:
            return -1


In [6]:
class specificClassDataModel(Dataset):
    def __init__(self, dataModel, specificClass, max_data_num):
        self.idxs = []
        
        self.dataModel = dataModel
        
        for i,(y, X) in tqdm(enumerate(dataModel),position=0,leave=True):
            if int(dataModel.onehot_tensor_to_class(y)) == specificClass:
                self.idxs.append(i)
    def __len__(self):
        return len(self.idxs)
    def __getitem__(self, idx):
        y, X = None, None
        
        idx = self.idxs[idx]
        y, X = self.dataModel[idx]
            
        return y, X

In [7]:
class pretextMaskDataset(Dataset):
    def __init__(self, data_path, transform, seed = 10):
        
        super().__init__()
        
        self.transform = transform
    
        self.train_data_path = os.path.join(data_path, 'train')
        self.train_images_path = os.path.join(self.train_data_path, 'images')
        self.train_data_csv_path = os.path.join(self.train_data_path, 'train.csv')
        
        self.train_data_path_dataframe = pd.read_csv(self.train_data_csv_path)
        
        self.y = []
        self.X = []
        
        image_path_list = []
        
        self.position = {0:(0,0), 1:(0,1), 2:(1,0), 3:(1,1)}
        
        for row in self.train_data_path_dataframe.iterrows():
            image_directory_path = os.path.join(self.train_images_path, row[1]['path'])
            image_file_name_list = os.listdir(image_directory_path)
            for image_file_name in image_file_name_list:
                if image_file_name[0]=='.': continue
                for rotation in range(4):
                    image_path_list.append([os.path.join(image_directory_path, image_file_name), rotation])
        

        random.seed(seed)
        random.shuffle(image_path_list)
        
        for i, [image_path, rotation] in enumerate(image_path_list):
            self.X.append(image_path)
            self.y.append(rotation)
               
    
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        X ,y = None, None
        rand = random.randrange(0, len(self.X))
        
        
        img = cv2.imread(self.X[idx])
        center_y = img.shape[0]//2
        center_x = img.shape[1]//2
        position = self.position[self.y[idx]]
        
        start_x = center_x*position[0]
        start_y = center_y*position[1]
        end_x = center_x*(position[0]+1)
        end_y = center_y*(position[1]+1)
        
        rand_img = cv2.imread(self.X[rand])
        img[start_y:end_y, start_x:end_x,:] = rand_img[start_y:end_y, start_x:end_x,:]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        rand_img = cv2.cvtColor(rand_img, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        
        
        X = self.transform(img)
        
        one_hot = [0,0,0,0]
        one_hot[self.y[idx]] = 1
        y = torch.tensor(one_hot,dtype=torch.float)
            
        
        return y, X

In [8]:
class weighted_probability_cutmix_Dataset(Dataset):
    def __init__(self, data_path, dataset, transform, class_num = 18):
        self.X = []
        self.transform = transform
        
        self.dataset = maskDataset(data_path, transform = self.transform)
        self.train_dataset_per_class_list = []
        
        
        for i in tqdm(range(class_num)):
            train_dataset = self.dataset
            class_n_train_dataset = specificClassDataset(train_dataset,i,self.dataset.class_rate[i])
            self.train_dataset_per_class_list.append(class_n_train_dataset)

        cnt = 0
        for idx in range(len(self.dataset)):
            weights = sum(list(self.dataset.class_rate.values())) - np.array(list(self.dataset.class_rate.values()))
            weights = list(weights)
            mix_r_label = random.choices(range(0, 18), weights=weights)[0]
            
            mix_r_idx = random.randint(0,len(self.train_dataset_per_class_list[mix_r_label])-1)
            self.X.append({'mix_r_label':mix_r_label, 'mix_r_idx':mix_r_idx})
            
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        X ,y = None, None
        mix_r_label = self.X[idx]['mix_r_label']
        mix_r_idx = self.X[idx]['mix_r_idx']
        
        
        label, img = self.dataset[idx]
        center_y = img.shape[0]//2
        center_x = img.shape[1]//2
        position = (0,0)
        
        start_x = center_x*position[0]
        start_y = center_y*position[1]
        end_x = center_x*(position[0]+1)
        end_y = center_y*(position[1]+1)
        
        rand_label, rand_img = self.train_dataset_per_class_list[mix_r_label][mix_r_idx]
        img[start_y:end_y, start_x:end_x,:] = rand_img[start_y:end_y, start_x:end_x,:]
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = Image.fromarray(img)
        
        X = self.transform(img)
        y = 0.75*label + 0.25*rand_label
            
        
        return y, X


In [9]:
class pytorchViTModel(Module):
    def __init__(self):
        super().__init__()
        self.ViTModel = ViT('B_16_imagenet1k', pretrained=True)
        self.linear = torch.nn.Linear(1000, 18, bias=True)
        
    def forward(self, x):
        x = self.ViTModel(x)
        return self.linear(x)

In [10]:
class preTaskModel(Module):
    def __init__(self, model):
        super().__init__()
        self.model = model
        self.lastLinear = torch.nn.Linear(18, 4, bias=True)
        
    def forward(self, x):
        x = self.model(x)
        return self.lastLinear(x)

In [11]:
def model_loss_acc(model, loss_fn, dataLoader):
    model.eval()
    n_correct = 0
    n_total = 0
    loss_sum = 0
    with torch.no_grad():
        for i,(y,X) in enumerate(dataLoader):
            X = X.to(device)
            y = y.to(device)
            output = model(X)
            loss = loss_fn(output, y)
            loss_sum += loss.item()
            target = torch.max(y,1)
            y_pred = torch.max(output,1)
            n_correct += (target.indices == y_pred.indices).sum()
            n_total += y.size(0)
    loss_avg = loss_sum/len(dataLoader)
    acc = n_correct/n_total
    model.train()
    return loss_avg, acc

In [12]:
def model_loss_acc_per_class(model, loss_fn, dataLoaderList):
    for i, dataLoader in enumerate(dataLoaderList):
        val_loss_avg, val_acc = model_loss_acc(model, loss_fn, dataLoader)
        print('클래스 {}에 대한 loss = [{}], acc = [{}]'.format( i, val_loss_avg, val_acc))

In [13]:
lr = 0.0001
EPOCHS = 10
BATCHS = 8

In [14]:
#학습용 데이터셋
mask_train_dataset = maskDataset(DATA_PATH,
                                 transform = transforms.Compose([]),
                                 n_val_per_class = 70,
                                 train = True
                          )
print(mask_train_dataset.class_rate)


#검증용 데이터셋 (전체 클래스)
mask_val_dataset = maskDataset(DATA_PATH, 
                               transform = transforms.Compose([transforms.Resize((384, 384)), 
                                                           transforms.ToTensor(),
                                                           transforms.Normalize(0.1, 0.5),]),
                               n_val_per_class = 70,
                               train = False
                          )
maskValDataLoader = DataLoader(mask_val_dataset, batch_size=1)

#검증용 데이터셋(특정 클래스)
val_dataloader_per_class_list = []
for i in tqdm(range(18)):
    val_dataset = mask_val_dataset
    class_n_val_dataset = specificClassDataModel(val_dataset,i,mask_val_dataset.class_rate[i])
    class_n_val_dataloader = DataLoader(class_n_val_dataset, batch_size=1)
    val_dataloader_per_class_list.append(class_n_val_dataloader)


pretext_task_dataset = pretextMaskDataset(DATA_PATH,
                                          transform = transforms.Compose([transforms.ToTensor(),
                                                                          transforms.Resize((384, 384)),
                                                                          transforms.Normalize(0.1, 0.5),])
                                         )
pretextTaskDataLoader = DataLoader(pretext_task_dataset, batch_size=BATCHS)

{0: 2745, 1: 1570, 2: 895, 3: 3660, 4: 3345, 5: 1285, 6: 549, 7: 314, 8: 179, 9: 732, 10: 669, 11: 257, 12: 549, 13: 314, 14: 179, 15: 732, 16: 669, 17: 257}


  y = torch.tensor(self.CLASS_TO_ONEHOT_DICT[self.val_y[idx]],dtype=torch.float32)
1260it [00:06, 192.99it/s]
1260it [00:06, 192.45it/s]06<01:51,  6.53s/it]
1260it [00:06, 195.17it/s]13<01:44,  6.54s/it]
1260it [00:06, 194.25it/s]19<01:37,  6.51s/it]
1260it [00:06, 190.28it/s]26<01:31,  6.51s/it]
1260it [00:06, 193.78it/s]32<01:25,  6.54s/it]
1260it [00:06, 193.74it/s]39<01:18,  6.53s/it]
1260it [00:06, 193.44it/s]45<01:11,  6.53s/it]
1260it [00:06, 194.36it/s]52<01:05,  6.52s/it]
1260it [00:06, 191.65it/s]58<00:58,  6.51s/it]
1260it [00:06, 194.68it/s]:05<00:52,  6.53s/it]
1260it [00:06, 193.77it/s]:11<00:45,  6.52s/it]
1260it [00:06, 194.05it/s]:18<00:39,  6.51s/it]
1260it [00:06, 194.19it/s]:24<00:32,  6.51s/it]
1260it [00:06, 193.58it/s]:31<00:26,  6.50s/it]
1260it [00:06, 193.08it/s]:37<00:19,  6.51s/it]
1260it [00:06, 194.35it/s]:44<00:13,  6.51s/it]
1260it [00:06, 194.00it/s]:50<00:06,  6.51s/it]
100%|██████████| 18/18 [01:57<00:00,  6.52s/it]


In [15]:
#학습용 데이터셋(특정 클래스)
train_dataloader_per_class_list = []
for i in tqdm(range(18)):
    train_dataset = mask_train_dataset
    class_n_train_dataset = specificClassDataModel(train_dataset,i,mask_train_dataset.class_rate[i])
    print(i, mask_train_dataset.class_rate[i], len(class_n_train_dataset))
    class_n_train_dataloader = DataLoader(class_n_train_dataset, batch_size=1)
    train_dataloader_per_class_list.append(class_n_train_dataloader)
    
weighted_cutmix_Dataset = weighted_probability_cutmix_Dataset(mask_train_dataset,
                                                                            train_dataloader_per_class_list,
                                                                           transform = transforms.Compose([
                                                                                 transforms.Resize((384, 384)), 
                                                                                 transforms.ToTensor(),
                                                                                 transforms.Normalize(0.1, 0.5),
                                                                             ]),)
maskTrainDataLoader = DataLoader(weighted_cutmix_Dataset, batch_size=BATCHS, shuffle=True)

  y = torch.tensor(self.CLASS_TO_ONEHOT_DICT[self.train_y[idx]],dtype=torch.float32)
17640it [00:46, 377.63it/s]
40it [00:00, 398.67it/s]0:46<13:14, 46.72s/it]

0 2745 2675


17640it [00:46, 375.40it/s]
40it [00:00, 396.91it/s]1:33<12:28, 46.80s/it]

1 1570 1500


17640it [00:46, 376.99it/s]
41it [00:00, 400.65it/s]2:20<11:41, 46.80s/it]

2 895 825


17640it [00:46, 376.84it/s]
40it [00:00, 397.96it/s]3:07<10:55, 46.80s/it]

3 3660 3590


17640it [00:46, 376.62it/s]
40it [00:00, 397.94it/s]3:54<10:08, 46.81s/it]

4 3345 3275


17640it [00:46, 377.09it/s]
40it [00:00, 396.28it/s]4:40<09:21, 46.80s/it]

5 1285 1215


17640it [00:46, 376.71it/s]
40it [00:00, 398.42it/s]5:27<08:34, 46.81s/it]

6 549 479


17640it [00:46, 376.55it/s]
40it [00:00, 397.25it/s]6:14<07:48, 46.82s/it]

7 314 244


17640it [00:46, 377.06it/s]
40it [00:00, 397.50it/s]7:01<07:01, 46.81s/it]

8 179 109


17640it [00:46, 376.33it/s]
40it [00:00, 397.68it/s]07:48<06:14, 46.83s/it]

9 732 662


17640it [00:46, 376.68it/s]
40it [00:00, 397.90it/s]08:35<05:27, 46.83s/it]

10 669 599


17640it [00:46, 376.38it/s]
41it [00:00, 401.32it/s]09:21<04:41, 46.84s/it]

11 257 187


17640it [00:46, 377.09it/s]
40it [00:00, 398.29it/s]10:08<03:54, 46.82s/it]

12 549 479


17640it [00:46, 377.35it/s]
41it [00:00, 401.60it/s]10:55<03:07, 46.80s/it]

13 314 244


17640it [00:46, 380.90it/s]
41it [00:00, 402.08it/s]11:41<02:19, 46.65s/it]

14 179 109


17640it [00:46, 378.53it/s]
41it [00:00, 400.46it/s]12:28<01:33, 46.64s/it]

15 732 662


17640it [00:46, 379.87it/s]
40it [00:00, 398.81it/s]13:14<00:46, 46.58s/it]

16 669 599


17640it [00:46, 380.60it/s]
100%|██████████| 18/18 [14:01<00:00, 46.73s/it]


17 257 187


In [16]:
model = pytorchViTModel()
model.train()
params = [param for param in model.parameters() if param.requires_grad]
optimizer = torch.optim.Adam(params, lr=lr)
loss_fn = torch.nn.MSELoss()

Loaded pretrained weights.


In [17]:
preModel = preTaskModel(model).to(device)
for e in range(1):
    train_loss_sum = 0
    n_correct, n_total = 0, 0
    for y,X in tqdm(pretextTaskDataLoader,position =0, leave=True):
        y = y.to(device)
        X = X.to(device)
        output = preModel(X)
        loss = loss_fn(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss_sum += loss.item()
        target = torch.max(y,1)
        y_pred = torch.max(output,1)
        n_correct += (target.indices == y_pred.indices).sum().item()
        n_total += y.size(0)
        
        del X
        del y
        del output
        
    train_loss_avg = train_loss_sum/len(maskTrainDataLoader)
    train_acc = n_correct/n_total
    print('train_loss = [{}], train_acc = [{}]'.format( train_loss_avg, train_acc))

100%|██████████| 9450/9450 [58:03<00:00,  2.71it/s]

train_loss = [0.1234074942875093], train_acc = [0.9130820105820106]





In [18]:
print('train_loss = [{}], train_acc = [{}]'.format( train_loss_avg, train_acc))

train_loss = [0.1234074942875093], train_acc = [0.9130820105820106]


In [None]:
torch.cuda.empty_cache()
GPUtil.showUtilization()

In [19]:
def f1_score(model, dataLoader):
    model.eval()
    tp, fp, tn, fn = 0, 0, 0, 0
    with torch.no_grad():
        for y, X in dataLoader:
            X = X.to(device)
            y = y.to(device)
            output = model(X)
            target = y
            y_pred = output
            tp += (target*y_pred).sum().item()
            tn += ((1-target)*(1-y_pred)).sum().item()
            fp += ((1-target)*y_pred).sum().item()
            fn += (target*(1-y_pred)).sum().item()
    
    precision = tp / (tp+fp)
    recall = tp / (tp + fn)
    epsilon = 1e-7
    f1 = (2*precision*recall)/(recall + precision + epsilon)
    
    model.train()
    return f1
                
        

In [21]:
wandb.init(project="mask_image_classification", entity="geup")
config = {"epochs":EPOCHS, "batch_size":BATCHS, "learning_rate":lr}
wandb.init(project="mask_image_classification", config=config)
wandb.watch(model, log="all")

[34m[1mwandb[0m: Currently logged in as: [33mgeup[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.12.1 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
2021-09-02 02:14:27.162795: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2021-09-02 02:14:27.163410: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.

CondaEnvException: Unable to determine environment

Please re-run this command with one of the following options:

* Provide an environment name via --name or -n
* Re-run this command inside an activated conda environment.



[34m[1mwandb[0m: wandb version 0.12.1 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade
2021-09-02 02:14:36.921766: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2021-09-02 02:14:36.922388: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.

CondaEnvException: Unable to determine environment

Please re-run this command with one of the following options:

* Provide an environment name via --name or -n
* Re-run this command inside an activated conda environment.



In [36]:
torch.cuda.empty_cache()
GPUtil.showUtilization()

model.train()

for e in range(EPOCHS):
    train_loss_sum = 0
    n_correct, n_total = 0, 0
    val_loss_sum = 0
    val_n_correct, val_n_total = 0,0
    for y,X in tqdm(maskTrainDataLoader,position =0, leave=True):
        X = X.to(device)
        y = y.to(device)
        output = model(X)
        loss = loss_fn(output, y)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss_sum += loss.item()
        target = torch.max(y,1)
        y_pred = torch.max(output,1)
        n_correct += (target.indices == y_pred.indices).sum().item()
        n_total += y.size(0)
        
        del X
        del y
        del output
        del loss
        torch.cuda.empty_cache()
        
    train_loss_avg = train_loss_sum/len(maskTrainDataLoader)
    train_acc = n_correct/n_total
    val_loss_avg, val_acc = model_loss_acc(model, loss_fn, maskValDataLoader)
    f1 = f1_score(model, maskValDataLoader)
    print('train_loss = [{}], train_acc = [{}] val_loss = [{}], val_acc = [{}], val_f1 = [{}]'.format( train_loss_avg, train_acc, val_loss_avg, val_acc, f1))
    model_loss_acc_per_class(model, loss_fn, val_dataloader_per_class_list)
    wandb.log({"train_loss_avg": train_loss_avg, "train_acc":train_acc, "val_loss_avg":val_loss_avg,"val_acc":val_acc, "val_f1_score":f1}) 

  0%|          | 0/2205 [00:00<?, ?it/s]

| ID | GPU | MEM |
------------------
|  0 |  0% | 74% |


  y = torch.tensor(self.CLASS_TO_ONEHOT_DICT[self.train_y[idx]],dtype=torch.float32)
100%|██████████| 2205/2205 [16:06<00:00,  2.28it/s]
  y = torch.tensor(self.CLASS_TO_ONEHOT_DICT[self.val_y[idx]],dtype=torch.float32)


train_loss = [0.0009538077902222525], train_acc = [0.9901927437641723] val_loss = [0.005880929050796286], val_acc = [0.9547619223594666], val_f1 = [0.8615707383578879]
클래스 0에 대한 loss = [0.001258272973219781], acc = [1.0]
클래스 1에 대한 loss = [0.003411058652792625], acc = [0.9714285731315613]
클래스 2에 대한 loss = [0.004613734048169655], acc = [0.9428571462631226]
클래스 3에 대한 loss = [0.0009868984265527356], acc = [1.0]
클래스 4에 대한 loss = [0.004428680356535811], acc = [0.9714285731315613]
클래스 5에 대한 loss = [0.00352922919638721], acc = [0.9714285731315613]
클래스 6에 대한 loss = [0.003593014628651352], acc = [0.985714316368103]
클래스 7에 대한 loss = [0.005541979949235351], acc = [0.9571428894996643]
클래스 8에 대한 loss = [0.014116256270790472], acc = [0.8571428656578064]
클래스 9에 대한 loss = [0.004922349193865167], acc = [0.9571428894996643]
클래스 10에 대한 loss = [0.004625479198979779], acc = [0.9714285731315613]
클래스 11에 대한 loss = [0.009345093292150913], acc = [0.9142857193946838]
클래스 12에 대한 loss = [0.004556865955237299], acc

  0%|          | 0/2205 [00:00<?, ?it/s]

클래스 17에 대한 loss = [0.006313240770292136], acc = [0.985714316368103]


100%|██████████| 2205/2205 [16:09<00:00,  2.27it/s]


train_loss = [0.0011726550183760777], train_acc = [0.9861678004535147] val_loss = [0.004540028067545098], val_acc = [0.9698413014411926], val_f1 = [0.8890753661233047]
클래스 0에 대한 loss = [0.0019351188151111793], acc = [0.985714316368103]
클래스 1에 대한 loss = [0.0015556331366367107], acc = [1.0]
클래스 2에 대한 loss = [0.0017349457184796587], acc = [1.0]
클래스 3에 대한 loss = [0.0008060072977968957], acc = [1.0]
클래스 4에 대한 loss = [0.004432206960180858], acc = [0.9714285731315613]
클래스 5에 대한 loss = [0.002388965369547285], acc = [0.985714316368103]
클래스 6에 대한 loss = [0.003035031770819582], acc = [0.985714316368103]
클래스 7에 대한 loss = [0.006906864688166284], acc = [0.9571428894996643]
클래스 8에 대한 loss = [0.005682170605307744], acc = [0.9571428894996643]
클래스 9에 대한 loss = [0.005790118319470951], acc = [0.9428571462631226]
클래스 10에 대한 loss = [0.005507567523454782], acc = [0.9571428894996643]
클래스 11에 대한 loss = [0.006986018928104645], acc = [0.9714285731315613]
클래스 12에 대한 loss = [0.004830992006723786], acc = [0.9571428

  0%|          | 0/2205 [00:00<?, ?it/s]

클래스 17에 대한 loss = [0.004828004746585585], acc = [0.985714316368103]


100%|██████████| 2205/2205 [16:06<00:00,  2.28it/s]


train_loss = [0.0009968140870522783], train_acc = [0.9884353741496599] val_loss = [0.006450869060559882], val_acc = [0.9396825432777405], val_f1 = [0.8754101939999677]
클래스 0에 대한 loss = [0.0026728378275168193], acc = [0.985714316368103]
클래스 1에 대한 loss = [0.0051101172004044725], acc = [0.9428571462631226]
클래스 2에 대한 loss = [0.011538635458938578], acc = [0.8857142925262451]
클래스 3에 대한 loss = [0.0006849927291373855], acc = [1.0]
클래스 4에 대한 loss = [0.004974467891224776], acc = [0.9428571462631226]
클래스 5에 대한 loss = [0.0049908329639168055], acc = [0.9428571462631226]
클래스 6에 대한 loss = [0.004555361341460541], acc = [0.9571428894996643]
클래스 7에 대한 loss = [0.004588314308784902], acc = [0.9714285731315613]
클래스 8에 대한 loss = [0.008215465347062231], acc = [0.9285714626312256]
클래스 9에 대한 loss = [0.0037264877615858236], acc = [0.9714285731315613]
클래스 10에 대한 loss = [0.008278354461487782], acc = [0.9285714626312256]
클래스 11에 대한 loss = [0.010845166150413985], acc = [0.8857142925262451]
클래스 12에 대한 loss = [0.0046

  0%|          | 0/2205 [00:00<?, ?it/s]

클래스 17에 대한 loss = [0.007801280269424231], acc = [0.9428571462631226]


100%|██████████| 2205/2205 [16:07<00:00,  2.28it/s]


train_loss = [0.0008837998913532538], train_acc = [0.990249433106576] val_loss = [0.01131397735022354], val_acc = [0.8904762268066406], val_f1 = [0.7646924607663924]
클래스 0에 대한 loss = [0.0015832821443577164], acc = [1.0]
클래스 1에 대한 loss = [0.0043044274310107405], acc = [0.9714285731315613]
클래스 2에 대한 loss = [0.0018945383643897782], acc = [1.0]
클래스 3에 대한 loss = [0.002935767546919773], acc = [0.9714285731315613]
클래스 4에 대한 loss = [0.003748821640017143], acc = [0.9714285731315613]
클래스 5에 대한 loss = [0.007327061019064526], acc = [0.9428571462631226]
클래스 6에 대한 loss = [0.0054422827221320145], acc = [0.9571428894996643]
클래스 7에 대한 loss = [0.020398173271470504], acc = [0.7714285850524902]
클래스 8에 대한 loss = [0.02136186930916405], acc = [0.8142856955528259]
클래스 9에 대한 loss = [0.01049641770992561], acc = [0.8714285492897034]
클래스 10에 대한 loss = [0.02435301940921428], acc = [0.6857143044471741]
클래스 11에 대한 loss = [0.017673336561503154], acc = [0.7571428418159485]
클래스 12에 대한 loss = [0.005134836465627554], acc

  0%|          | 0/2205 [00:00<?, ?it/s]

클래스 17에 대한 loss = [0.0328148112233196], acc = [0.5857142806053162]


100%|██████████| 2205/2205 [16:14<00:00,  2.26it/s]


train_loss = [0.0010349848804117156], train_acc = [0.9877551020408163] val_loss = [0.0063792487453380115], val_acc = [0.9515873193740845], val_f1 = [0.83606002136821]
클래스 0에 대한 loss = [0.001439154486537778], acc = [1.0]
클래스 1에 대한 loss = [0.005497440429358643], acc = [0.9428571462631226]
클래스 2에 대한 loss = [0.003297561837825924], acc = [1.0]
클래스 3에 대한 loss = [0.001504159349756914], acc = [1.0]
클래스 4에 대한 loss = [0.0065740734652665975], acc = [0.9571428894996643]
클래스 5에 대한 loss = [0.0030167136601189017], acc = [0.985714316368103]
클래스 6에 대한 loss = [0.0051810473868889465], acc = [0.9714285731315613]
클래스 7에 대한 loss = [0.01050802866853441], acc = [0.9142857193946838]
클래스 8에 대한 loss = [0.0046807290915499575], acc = [0.9714285731315613]
클래스 9에 대한 loss = [0.006401527910825929], acc = [0.9428571462631226]
클래스 10에 대한 loss = [0.01037770354908259], acc = [0.9142857193946838]
클래스 11에 대한 loss = [0.013409689981199336], acc = [0.8571428656578064]
클래스 12에 대한 loss = [0.0048266436626103575], acc = [0.9571428

  0%|          | 0/2205 [00:00<?, ?it/s]

클래스 17에 대한 loss = [0.013757851334646277], acc = [0.8285714387893677]


100%|██████████| 2205/2205 [16:04<00:00,  2.29it/s]


train_loss = [0.0010006957490904184], train_acc = [0.988265306122449] val_loss = [0.006959598308247454], val_acc = [0.9460317492485046], val_f1 = [0.8315894629943965]
클래스 0에 대한 loss = [0.005168473485744991], acc = [0.9571428894996643]
클래스 1에 대한 loss = [0.007434866033664938], acc = [0.9142857193946838]
클래스 2에 대한 loss = [0.006998010922896875], acc = [0.9142857193946838]
클래스 3에 대한 loss = [0.001859053414331616], acc = [1.0]
클래스 4에 대한 loss = [0.0034341870526986896], acc = [0.985714316368103]
클래스 5에 대한 loss = [0.003135839393196095], acc = [0.985714316368103]
클래스 6에 대한 loss = [0.003257083011807741], acc = [0.985714316368103]
클래스 7에 대한 loss = [0.00727164157786839], acc = [0.9428571462631226]
클래스 8에 대한 loss = [0.012337910106206046], acc = [0.9285714626312256]
클래스 9에 대한 loss = [0.005250786618645569], acc = [0.9571428894996643]
클래스 10에 대한 loss = [0.006568291926149479], acc = [0.9571428894996643]
클래스 11에 대한 loss = [0.01437808271148242], acc = [0.8999999761581421]
클래스 12에 대한 loss = [0.0078761876275

  0%|          | 0/2205 [00:00<?, ?it/s]

클래스 17에 대한 loss = [0.008133242086374334], acc = [0.9714285731315613]


100%|██████████| 2205/2205 [16:10<00:00,  2.27it/s]


train_loss = [0.000965428048928726], train_acc = [0.9891156462585035] val_loss = [0.00668839568080575], val_acc = [0.9492063522338867], val_f1 = [0.8556064200126967]
클래스 0에 대한 loss = [0.0012763060192810371], acc = [1.0]
클래스 1에 대한 loss = [0.0034241078893371326], acc = [0.9714285731315613]
클래스 2에 대한 loss = [0.0009048678150325681], acc = [1.0]
클래스 3에 대한 loss = [0.0007034431644569849], acc = [1.0]
클래스 4에 대한 loss = [0.004064952115835955], acc = [0.9714285731315613]
클래스 5에 대한 loss = [0.0031214646677004305], acc = [0.985714316368103]
클래스 6에 대한 loss = [0.002781335275877999], acc = [0.985714316368103]
클래스 7에 대한 loss = [0.017765251188705276], acc = [0.8285714387893677]
클래스 8에 대한 loss = [0.012357728458092814], acc = [0.8999999761581421]
클래스 9에 대한 loss = [0.005263909843883344], acc = [0.9571428894996643]
클래스 10에 대한 loss = [0.008414703822096012], acc = [0.9285714626312256]
클래스 11에 대한 loss = [0.012128989346404393], acc = [0.8999999761581421]
클래스 12에 대한 loss = [0.00983101962483488], acc = [0.89999997

  0%|          | 0/2205 [00:00<?, ?it/s]

클래스 17에 대한 loss = [0.01201212776650209], acc = [0.9142857193946838]


100%|██████████| 2205/2205 [16:05<00:00,  2.28it/s]


train_loss = [0.0008447787333315485], train_acc = [0.9901360544217687] val_loss = [0.00626162231939296], val_acc = [0.9484127163887024], val_f1 = [0.8600720442467025]
클래스 0에 대한 loss = [0.0017033152982289072], acc = [0.985714316368103]
클래스 1에 대한 loss = [0.002695620090006352], acc = [0.9714285731315613]
클래스 2에 대한 loss = [0.0020416819131371866], acc = [1.0]
클래스 3에 대한 loss = [0.0007763656605675351], acc = [1.0]
클래스 4에 대한 loss = [0.007595811470985479], acc = [0.9285714626312256]
클래스 5에 대한 loss = [0.0014984582381397818], acc = [1.0]
클래스 6에 대한 loss = [0.00397198944280847], acc = [0.9714285731315613]
클래스 7에 대한 loss = [0.007935126860788191], acc = [0.9428571462631226]
클래스 8에 대한 loss = [0.009901315052827288], acc = [0.9285714626312256]
클래스 9에 대한 loss = [0.00526339406117456], acc = [0.9285714626312256]
클래스 10에 대한 loss = [0.00797377023845911], acc = [0.9285714626312256]
클래스 11에 대한 loss = [0.008139674954665159], acc = [0.9428571462631226]
클래스 12에 대한 loss = [0.006160347309923963], acc = [0.957142889

  0%|          | 0/2205 [00:00<?, ?it/s]

클래스 17에 대한 loss = [0.011383034414861219], acc = [0.8999999761581421]


100%|██████████| 2205/2205 [16:09<00:00,  2.28it/s]


train_loss = [0.000686339819682403], train_acc = [0.9930272108843538] val_loss = [0.006018825030274918], val_acc = [0.9523809552192688], val_f1 = [0.8650685299512875]
클래스 0에 대한 loss = [0.001553065086773131], acc = [1.0]
클래스 1에 대한 loss = [0.004103401703417019], acc = [0.9571428894996643]
클래스 2에 대한 loss = [0.00805398256634362], acc = [0.8857142925262451]
클래스 3에 대한 loss = [0.00447276705625492], acc = [0.9428571462631226]
클래스 4에 대한 loss = [0.003280547383889955], acc = [1.0]
클래스 5에 대한 loss = [0.0022375423650893415], acc = [1.0]
클래스 6에 대한 loss = [0.0033947591893333344], acc = [0.985714316368103]
클래스 7에 대한 loss = [0.006232112064546007], acc = [0.9571428894996643]
클래스 8에 대한 loss = [0.011928196485262431], acc = [0.8999999761581421]
클래스 9에 대한 loss = [0.0048900034335279735], acc = [0.9571428894996643]
클래스 10에 대한 loss = [0.0041926841027036844], acc = [1.0]
클래스 11에 대한 loss = [0.006677079075598158], acc = [0.9428571462631226]
클래스 12에 대한 loss = [0.004003080864068969], acc = [0.985714316368103]
클래스 13

  0%|          | 0/2205 [00:00<?, ?it/s]

클래스 17에 대한 loss = [0.010446340404866663], acc = [0.8999999761581421]


100%|██████████| 2205/2205 [16:09<00:00,  2.27it/s]


train_loss = [0.0008994349047757818], train_acc = [0.9896258503401361] val_loss = [0.005599394695800295], val_acc = [0.9563491940498352], val_f1 = [0.8721784080886156]
클래스 0에 대한 loss = [0.0009416043875847078], acc = [1.0]
클래스 1에 대한 loss = [0.0036679276087982415], acc = [0.9571428894996643]
클래스 2에 대한 loss = [0.004866769578023065], acc = [0.9428571462631226]
클래스 3에 대한 loss = [0.0019200563387130388], acc = [0.985714316368103]
클래스 4에 대한 loss = [0.003064798359680156], acc = [0.985714316368103]
클래스 5에 대한 loss = [0.0026983001328127493], acc = [0.985714316368103]
클래스 6에 대한 loss = [0.0040788665937725455], acc = [0.985714316368103]
클래스 7에 대한 loss = [0.004728732099257675], acc = [0.985714316368103]
클래스 8에 대한 loss = [0.013212710688198319], acc = [0.8857142925262451]
클래스 9에 대한 loss = [0.004240412019342849], acc = [0.9714285731315613]
클래스 10에 대한 loss = [0.004379057320017767], acc = [0.985714316368103]
클래스 11에 대한 loss = [0.006320445543055289], acc = [0.9714285731315613]
클래스 12에 대한 loss = [0.003605939

In [31]:
torch.save({'model':maskModel(), 'state_dict':model.state_dict()}, './vit_pretext_task_postion_cutmix.pt')

Loaded pretrained weights.


In [24]:
#checkpoint = torch.load('./vit_pretext_task_position.pt')
#myModel = checkpoint['model']
#myModel.load_state_dict(checkpoint['state_dict'])

FileNotFoundError: [Errno 2] No such file or directory: './vit_pretext_task_position.pt'

In [32]:
torch.cuda.empty_cache()
GPUtil.showUtilization()

| ID | GPU | MEM |
------------------
|  0 |  0% | 74% |


## 테스트

In [33]:
class TestDataset(Dataset):
    def __init__(self, img_paths, transform):
        self.img_paths = img_paths
        self.transform = transform

    def __getitem__(self, index):
        image = Image.open(self.img_paths[index])

        if self.transform:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.img_paths)

In [34]:
# 테스트 데이터셋 폴더 경로를 지정해주세요.
test_dir = '/opt/ml/input/data/eval'

# meta 데이터와 이미지 경로를 불러옵니다.
submission = pd.read_csv(os.path.join(test_dir, 'info.csv'))
image_dir = os.path.join(test_dir, 'images')

# Test Dataset 클래스 객체를 생성하고 DataLoader를 만듭니다.
image_paths = [os.path.join(image_dir, img_id) for img_id in submission.ImageID]
transform = torchvision.transforms.Compose([
    torchvision.transforms.Resize((384,384)),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize(0.1, 0.5),
])
dataset = TestDataset(image_paths, transform)

loader = DataLoader(
    dataset,
    shuffle=False
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
all_softpredictions = []

for images in tqdm(loader):
    with torch.no_grad():
        images = images.to(device)
        output = model(images)
        pred = output.argmax(dim=-1)
        soft_pred = torch.nn.Softmax(dim=1)(output)
        all_predictions.extend(pred.cpu().numpy())
        all_softpredictions.extend(soft_pred.cpu().numpy())
submission['ans'] = all_predictions
# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'submission.csv'), index=False)

submission['ans'] = all_softpredictions
# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir, 'soft_submission.csv'), index=False)
print('test inference is done!')

100%|██████████| 12600/12600 [05:14<00:00, 40.09it/s]


test inference is done!
