In [1]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()

In [2]:
import GPUtil
GPUtil.showUtilization()

| ID | GPU | MEM |
------------------
|  0 |  0% |  0% |


In [3]:
import torch
import torch.nn as nn
import torchvision
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import numpy as np
import pandas as pd
import cv2
from PIL import Image
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import random
import os
from torchmetrics import F1Score

In [4]:
torch.__version__

'1.13.0+cu117'

In [5]:
os.chdir('input/data')
os.getcwd()

'/opt/ml/input/data'

In [6]:
!find . -regex ".*\.\_[a-zA-Z0-9._]+" -delete

In [4]:
random_seed = 12
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed) # if use multi-GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(random_seed)
random.seed(random_seed)

In [5]:
train_dir_path = '/opt/ml/input/data/train/'
train_image_path = '/opt/ml/input/data/train/images/'

dt_train = pd.read_csv(train_dir_path+'train.csv')
dt_train

Unnamed: 0,id,gender,race,age,path
0,000001,female,Asian,45,000001_female_Asian_45
1,000002,female,Asian,52,000002_female_Asian_52
2,000004,male,Asian,54,000004_male_Asian_54
3,000005,female,Asian,58,000005_female_Asian_58
4,000006,female,Asian,59,000006_female_Asian_59
...,...,...,...,...,...
2695,006954,male,Asian,19,006954_male_Asian_19
2696,006955,male,Asian,19,006955_male_Asian_19
2697,006956,male,Asian,19,006956_male_Asian_19
2698,006957,male,Asian,20,006957_male_Asian_20


In [6]:
def get_age_range(age):
    if age < 30:
        return 0
    elif 30 <= age < 60:
        return 1
    else:
        return 2

In [7]:
dt_train['age_range'] = dt_train['age'].apply(lambda x : get_age_range(x))

In [8]:
dt_train

Unnamed: 0,id,gender,race,age,path,age_range
0,000001,female,Asian,45,000001_female_Asian_45,1
1,000002,female,Asian,52,000002_female_Asian_52,1
2,000004,male,Asian,54,000004_male_Asian_54,1
3,000005,female,Asian,58,000005_female_Asian_58,1
4,000006,female,Asian,59,000006_female_Asian_59,1
...,...,...,...,...,...,...
2695,006954,male,Asian,19,006954_male_Asian_19,0
2696,006955,male,Asian,19,006955_male_Asian_19,0
2697,006956,male,Asian,19,006956_male_Asian_19,0
2698,006957,male,Asian,20,006957_male_Asian_20,0


In [9]:
train_idx, valid_idx = train_test_split(np.arange(len(dt_train)),
                                       test_size=0.2,
                                       shuffle=True,
                                       stratify=dt_train['age_range'])

In [10]:
train_image = []
train_label = []

for idx in train_idx:
    path = dt_train.iloc[idx]['path']
    for file_name in [i for i in os.listdir(train_image_path+path) if i[0] != '.']:
        train_image.append(train_image_path+path+'/'+file_name)
        train_label.append((path.split('_')[1], path.split('_')[3], file_name.split('.')[0]))                            

In [11]:
valid_image = []
valid_label = []

for idx in valid_idx:
    path = dt_train.iloc[idx]['path']
    for file_name in [i for i in os.listdir(train_image_path+path) if i[0] != '.']:
        valid_image.append(train_image_path+path+'/'+file_name)
        valid_label.append((path.split('_')[1], path.split('_')[3], file_name.split('.')[0]))              

In [12]:
def onehot_enc(x):
    def gender(i):
        if i == 'male':
            return 0
        elif i == 'female':
            return 3
    def age(j):
        j = int(j)
        if j < 30:
            return 0
        elif j >= 30 and j < 60:
            return 1
        elif j >= 60:
            return 2
    def mask(k):
        if k == 'normal':
            return 12
        elif 'incorrect' in k:
            return 6
        else:
            return 0
    return gender(x[0]) + age(x[1]) + mask(x[2])

In [13]:
train_data = pd.Series(train_image)
train_label = pd.Series(train_label)

valid_data = pd.Series(valid_image)
valid_label = pd.Series(valid_label)

In [14]:
class Dataset_Mask(Dataset):
    def __init__(self, data, label, encoding=True, midcrop=True, transform1=None, transform2=None):
        self.encoding = encoding
        self.midcrop = midcrop
        self.data = data
        self.label = label
        self.transform1 = transform1
        self.transform2 = transform2
        
        if encoding:
            self.label = self.label.apply(onehot_enc)
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        X = cv2.cvtColor(cv2.imread(self.data[idx]), cv2.COLOR_BGR2RGB)
        y = self.label[idx]
        
        if self.midcrop:
            X = X[70:420, 17:367]
        
        if y % 3 == 2:
            return self.transform2(X), y
        else:
            return self.transform1(X), y

In [15]:
transform1 = transforms.Compose([
    transforms.ToPILImage(),
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])
transform2 = transforms.Compose([
    transforms.ToPILImage(),
    transforms.RandomHorizontalFlip(), 
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

In [16]:
mask_train_set = Dataset_Mask(data=train_data, label=train_label, transform1=transform1, transform2=transform2)
mask_val_set = Dataset_Mask(data=valid_data, label=valid_label, transform1=transform1, transform2=transform1)

In [17]:
batch_size = 32
train_dataloader_mask = DataLoader(dataset = mask_train_set, batch_size=batch_size, shuffle=True, num_workers=2)
val_dataloader_mask = DataLoader(dataset = mask_val_set, batch_size=batch_size, shuffle=True, num_workers=2)

In [21]:
RegNet = torchvision.models.regnet_y_16gf(weights=torchvision.models.RegNet_Y_16GF_Weights.IMAGENET1K_SWAG_E2E_V1)
RegNet

RegNet(
  (stem): SimpleStemIN(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
  )
  (trunk_output): Sequential(
    (block1): AnyStage(
      (block1-0): ResBottleneckBlock(
        (proj): Conv2dNormActivation(
          (0): Conv2d(32, 224, kernel_size=(1, 1), stride=(2, 2), bias=False)
          (1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (f): BottleneckTransform(
          (a): Conv2dNormActivation(
            (0): Conv2d(32, 224, kernel_size=(1, 1), stride=(1, 1), bias=False)
            (1): BatchNorm2d(224, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): ReLU(inplace=True)
          )
          (b): Conv2dNormActivation(
            (0): Conv2d(224, 224, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=2, bias=False)
      

In [22]:
import math

for param in RegNet.parameters():
    param.requires_grad = False

class_num = 18
RegNet.fc = nn.Sequential(
                nn.Linear(in_features=3024, out_features=1000, bias=True),
                nn.Linear(in_features=1000, out_features=class_num, bias=True)
)
nn.init.xavier_uniform_(RegNet.fc[0].weight)
stdv = 1. / math.sqrt(RegNet.fc[0].weight.size(1))
RegNet.fc[0].bias.data.uniform_(-stdv, stdv)
nn.init.xavier_uniform_(RegNet.fc[1].weight)
stdv = 1. / math.sqrt(RegNet.fc[1].weight.size(1))
RegNet.fc[1].bias.data.uniform_(-stdv, stdv)
print(RegNet.fc)

Sequential(
  (0): Linear(in_features=3024, out_features=1000, bias=True)
  (1): Linear(in_features=1000, out_features=18, bias=True)
)


In [23]:
print('필요 입력 채널 개수', RegNet.stem[0].weight.shape[1])
print('네트워크 출력 채널 개수', RegNet.fc[1].weight.shape[0])

필요 입력 채널 개수 3
네트워크 출력 채널 개수 18


In [24]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"using {device}")

model = RegNet.to(device)

LEARNING_RATE = 0.0001
NUM_EPOCH = 100

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

using cuda:0


In [25]:
best_val_acc = 0
best_val_loss = np.inf
patience = 5
cur_count = 0

f1 = F1Score(num_classes=class_num, average='macro').to(device)
best_f1_score = 0

for epoch in range(NUM_EPOCH):
    model.train()
    loss_value = 0
    matches = 0
    for train_batch in tqdm(train_dataloader_mask):
        inputs, labels = train_batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outs = model(inputs)
        preds = torch.argmax(outs, dim=-1)
        loss = criterion(outs, labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if epoch % 10 == 0:
            torch.save(model, '/opt/ml/checkpoint/RegNet/checkpoint_ep_%d.pt'% epoch)
        
        loss_value += loss.item()
        matches += (preds == labels).sum().item()
        
    train_loss = loss_value / len(train_dataloader_mask)
    train_acc = matches / len(mask_train_set)
    
    print(f"epoch[{epoch}/{NUM_EPOCH}] training loss {train_loss:.5f}, training accuracy {train_acc:.5f}")
    
    with torch.no_grad():
        model.eval()
        val_loss_items = []
        val_acc_items = []
        f1_score = 0
        for val_batch in tqdm(val_dataloader_mask):
            inputs, labels = val_batch
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outs = model(inputs)
            preds = torch.argmax(outs, dim=-1)
            
            loss_item = criterion(outs, labels).item()
            acc_item = (labels==preds).sum().item()
            val_loss_items.append(loss_item)
            val_acc_items.append(acc_item)
            f1_score += f1(outs, labels)
            
        val_loss = np.sum(val_loss_items) / len(val_dataloader_mask)
        val_acc = np.sum(val_acc_items) / len(mask_val_set)
        
        f1_score /= len(val_dataloader_mask)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            cur_count = 0
            torch.save(model, '/opt/ml/checkpoint/RegNet/checkpoint_best.pt')
        else:
            cur_count += 1
            if cur_count >= patience:
                print("Early Stopping!")
                break
                
        if val_acc > best_val_acc:
            best_val_acc = val_acc
        
        if f1_score > best_f1_score:
            best_f1_score = f1_score    
                
        print(f"[val] acc : {val_acc:.5f}, loss : {val_loss:.5f}, f1 score : {f1_score:.5f}")
        print(f"best acc : {best_val_acc:.5f}, best loss : {best_val_loss:.5f}, best f1 : {best_f1_score:.5f}")

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[0/100] training loss 1.48001, training accuracy 0.59378


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.70106, loss : 1.04946, f1 score : 0.51444
best acc : 0.70106, best loss : 1.04946, best f1 : 0.51444


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[1/100] training loss 0.62314, training accuracy 0.80622


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.79206, loss : 0.66784, f1 score : 0.62077
best acc : 0.79206, best loss : 0.66784, best f1 : 0.62077


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[2/100] training loss 0.42233, training accuracy 0.86329


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.82196, loss : 0.57302, f1 score : 0.67915
best acc : 0.82196, best loss : 0.57302, best f1 : 0.67915


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[3/100] training loss 0.33753, training accuracy 0.89332


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.82619, loss : 0.55795, f1 score : 0.68856
best acc : 0.82619, best loss : 0.55795, best f1 : 0.68856


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[4/100] training loss 0.29635, training accuracy 0.90119


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.84127, loss : 0.52369, f1 score : 0.70063
best acc : 0.84127, best loss : 0.52369, best f1 : 0.70063


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[5/100] training loss 0.23576, training accuracy 0.92440


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.83201, loss : 0.54910, f1 score : 0.69248
best acc : 0.84127, best loss : 0.52369, best f1 : 0.70063


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[6/100] training loss 0.21346, training accuracy 0.93307


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.84762, loss : 0.50999, f1 score : 0.72494
best acc : 0.84762, best loss : 0.50999, best f1 : 0.72494


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[7/100] training loss 0.18840, training accuracy 0.94147


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.84788, loss : 0.50049, f1 score : 0.72804
best acc : 0.84788, best loss : 0.50049, best f1 : 0.72804


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[8/100] training loss 0.15887, training accuracy 0.95370


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.84815, loss : 0.51948, f1 score : 0.73064
best acc : 0.84815, best loss : 0.50049, best f1 : 0.73064


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[9/100] training loss 0.14585, training accuracy 0.95780


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.84974, loss : 0.50048, f1 score : 0.72907
best acc : 0.84974, best loss : 0.50048, best f1 : 0.73064


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[10/100] training loss 0.13895, training accuracy 0.96045


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.83942, loss : 0.54107, f1 score : 0.71545
best acc : 0.84974, best loss : 0.50048, best f1 : 0.73064


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[11/100] training loss 0.12431, training accuracy 0.96574


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.85291, loss : 0.50509, f1 score : 0.71396
best acc : 0.85291, best loss : 0.50048, best f1 : 0.73064


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[12/100] training loss 0.10952, training accuracy 0.97044


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.84286, loss : 0.52084, f1 score : 0.72712
best acc : 0.85291, best loss : 0.50048, best f1 : 0.73064


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[13/100] training loss 0.10292, training accuracy 0.97315


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.85847, loss : 0.51035, f1 score : 0.74378
best acc : 0.85847, best loss : 0.50048, best f1 : 0.74378


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[14/100] training loss 0.09279, training accuracy 0.97745


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.86296, loss : 0.48334, f1 score : 0.74249
best acc : 0.86296, best loss : 0.48334, best f1 : 0.74378


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[15/100] training loss 0.08060, training accuracy 0.98228


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.85899, loss : 0.49281, f1 score : 0.74436
best acc : 0.86296, best loss : 0.48334, best f1 : 0.74436


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[16/100] training loss 0.08238, training accuracy 0.98406


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.85847, loss : 0.50461, f1 score : 0.72930
best acc : 0.86296, best loss : 0.48334, best f1 : 0.74436


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[17/100] training loss 0.08510, training accuracy 0.97811


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.86190, loss : 0.49410, f1 score : 0.76210
best acc : 0.86296, best loss : 0.48334, best f1 : 0.76210


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[18/100] training loss 0.06487, training accuracy 0.98770


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


[val] acc : 0.85344, loss : 0.51779, f1 score : 0.72997
best acc : 0.86296, best loss : 0.48334, best f1 : 0.76210


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))


epoch[19/100] training loss 0.06315, training accuracy 0.98717


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=15.0), HTML(value='')))


Early Stopping!


In [26]:
torch.save({
        'model_state_dict': model.state_dict(),
        'optimizer_state_dict': optimizer.state_dict(),
        'epoch' : epoch
        }, '/opt/ml/checkpoint/RegNet/checkpoint.tar')

In [18]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"using {device}")

NUM_EPOCH = 200
class_num = 18

model = torchvision.models.regnet_y_16gf(weights=torchvision.models.RegNet_Y_16GF_Weights.IMAGENET1K_SWAG_E2E_V1)
model.fc = nn.Sequential(
                nn.Linear(in_features=3024, out_features=1000, bias=True),
                nn.Linear(in_features=1000, out_features=class_num, bias=True)
)
model = model.to(device)

LEARNING_RATE = 0.0001
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

using cuda:0


In [19]:
checkpoint = torch.load('/opt/ml/checkpoint/RegNet/checkpoint.tar')

model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
checkpoint_epoch = checkpoint['epoch']

In [20]:
for param in model.parameters():
    param.requires_grad = True

In [21]:
np.set_printoptions(precision=3)
n_param = 0
for p_idx, (param_name, param) in enumerate(model.named_parameters()):
    if param.requires_grad:
        param_numpy = param.detach().cpu().numpy()
        n_param += len(param_numpy.reshape(-1))
        print ("[%d] name:[%s] shape:[%s]."%(p_idx,param_name,param_numpy.shape))
        print ("    val:%s"%(param_numpy.reshape(-1)[:5]))
print ("Total number of parameters:[%s]."%(format(n_param,',d')))

[0] name:[stem.0.weight] shape:[(32, 3, 3, 3)].
    val:[-0.  0.  0.  0.  0.]
[1] name:[stem.1.weight] shape:[(32,)].
    val:[0.772 5.304 5.613 5.635 7.578]
[2] name:[stem.1.bias] shape:[(32,)].
    val:[-1.826  1.65  16.304 15.479  2.09 ]
[3] name:[trunk_output.block1.block1-0.proj.0.weight] shape:[(224, 32, 1, 1)].
    val:[ 0.     0.001 -0.    -0.001 -0.001]
[4] name:[trunk_output.block1.block1-0.proj.1.weight] shape:[(224,)].
    val:[-6.575e-03 -2.190e-05  1.392e-03 -2.287e-04  5.578e-03]
[5] name:[trunk_output.block1.block1-0.proj.1.bias] shape:[(224,)].
    val:[ 1.405  5.743 -0.396 -0.705 -0.043]
[6] name:[trunk_output.block1.block1-0.f.a.0.weight] shape:[(224, 32, 1, 1)].
    val:[-0.    -0.01  -0.364 -0.076  0.314]
[7] name:[trunk_output.block1.block1-0.f.a.1.weight] shape:[(224,)].
    val:[2.111 2.197 3.205 0.875 2.933]
[8] name:[trunk_output.block1.block1-0.f.a.1.bias] shape:[(224,)].
    val:[ 0.129  0.002 -1.065  1.929 -0.598]
[9] name:[trunk_output.block1.block1-0.f.b.

In [22]:
best_val_acc = 0
best_val_loss = np.inf
# patience = 5
# cur_count = 0
accumulation_steps = 4

f1 = F1Score(num_classes=class_num, average='macro').to(device)
best_f1_score = 0

for epoch in range(NUM_EPOCH):
    model.train()
    loss_value = 0
    matches = 0
    for idx, train_batch in enumerate(tqdm(train_dataloader_mask)):
        inputs, labels = train_batch
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outs = model(inputs)
        preds = torch.argmax(outs, dim=-1)
        loss = criterion(outs, labels)
        
        loss.backward()
        
        if (idx+1) % accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
        
        if epoch % 10 == 0:
            torch.save(model, f'/opt/ml/checkpoint/RegNet/nonfreeze_checkpoint_ep_{epoch+20}.pt')
        
        loss_value += loss.item()
        matches += (preds == labels).sum().item()
        
    train_loss = loss_value / len(train_dataloader_mask)
    train_acc = matches / len(mask_train_set)
    
    print(f"epoch[{epoch}/{NUM_EPOCH}] training loss {train_loss:.5f}, training accuracy {train_acc:.5f}")
    
    with torch.no_grad():
        model.eval()
        val_loss_items = []
        val_acc_items = []
        f1_score = 0
        for val_batch in tqdm(val_dataloader_mask):
            inputs, labels = val_batch
            inputs = inputs.to(device)
            labels = labels.to(device)
            
            outs = model(inputs)
            preds = torch.argmax(outs, dim=-1)
            
            loss_item = criterion(outs, labels).item()
            acc_item = (labels==preds).sum().item()
            val_loss_items.append(loss_item)
            val_acc_items.append(acc_item)
            
            f1_score += f1(outs, labels)
            
        val_loss = np.sum(val_loss_items) / len(val_dataloader_mask)
        val_acc = np.sum(val_acc_items) / len(mask_val_set)
        
        f1_score /= len(val_dataloader_mask)
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
#             cur_count = 0
            torch.save(model, '/opt/ml/checkpoint/RegNet/nonfreeze_checkpoint_best.pt')                     
            print('checkpoint saved!')
#         else:
#             cur_count += 1
#             if cur_count >= patience:
#                 print("Early Stopping!")
#                 break
                
        if val_acc > best_val_acc:
            best_val_acc = val_acc
        
        if f1_score > best_f1_score:
            best_f1_score = f1_score

                
        print(f"[val] acc : {val_acc:.5f}, loss : {val_loss:.5f}, f1 score : {f1_score:.5f}")
        print(f"best acc : {best_val_acc:.5f}, best loss : {best_val_loss:.5f}, best f1 : {best_f1_score:.5f}")

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[0/200] training loss 0.29734, training accuracy 0.91858


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


checkpoint saved!
[val] acc : 0.90450, loss : 0.39809, f1 score : 0.82499
best acc : 0.90450, best loss : 0.39809, best f1 : 0.82499


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[1/200] training loss 0.03674, training accuracy 0.98862


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.87566, loss : 0.64523, f1 score : 0.79393
best acc : 0.90450, best loss : 0.39809, best f1 : 0.82499


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[2/200] training loss 0.02008, training accuracy 0.99352


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.88492, loss : 0.61049, f1 score : 0.79532
best acc : 0.90450, best loss : 0.39809, best f1 : 0.82499


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[3/200] training loss 0.01905, training accuracy 0.99405


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.89259, loss : 0.64887, f1 score : 0.80171
best acc : 0.90450, best loss : 0.39809, best f1 : 0.82499


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[4/200] training loss 0.03219, training accuracy 0.98922


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.88519, loss : 0.68886, f1 score : 0.78967
best acc : 0.90450, best loss : 0.39809, best f1 : 0.82499


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[5/200] training loss 0.02529, training accuracy 0.99114


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.90661, loss : 0.61002, f1 score : 0.82530
best acc : 0.90661, best loss : 0.39809, best f1 : 0.82530


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[6/200] training loss 0.02154, training accuracy 0.99352


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.90423, loss : 0.59387, f1 score : 0.82157
best acc : 0.90661, best loss : 0.39809, best f1 : 0.82530


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[7/200] training loss 0.01637, training accuracy 0.99491


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.90635, loss : 0.61550, f1 score : 0.83741
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[8/200] training loss 0.01996, training accuracy 0.99418


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.90000, loss : 0.66043, f1 score : 0.82413
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[9/200] training loss 0.02839, training accuracy 0.99160


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.90423, loss : 0.81658, f1 score : 0.81813
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[10/200] training loss 0.01569, training accuracy 0.99491


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.89233, loss : 0.92357, f1 score : 0.80278
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[11/200] training loss 0.01151, training accuracy 0.99663


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.89815, loss : 0.73768, f1 score : 0.79353
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[12/200] training loss 0.00859, training accuracy 0.99722


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.89524, loss : 0.76775, f1 score : 0.80261
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[13/200] training loss 0.00839, training accuracy 0.99709


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.88915, loss : 0.83816, f1 score : 0.79659
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[14/200] training loss 0.01679, training accuracy 0.99497


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.88545, loss : 0.88137, f1 score : 0.80445
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[15/200] training loss 0.01751, training accuracy 0.99517


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.88704, loss : 1.00022, f1 score : 0.79441
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[16/200] training loss 0.01278, training accuracy 0.99577


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.90582, loss : 0.73636, f1 score : 0.82676
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[17/200] training loss 0.00882, training accuracy 0.99709


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.89471, loss : 0.77994, f1 score : 0.82200
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[18/200] training loss 0.01070, training accuracy 0.99762


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.89444, loss : 0.85142, f1 score : 0.80918
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))


epoch[19/200] training loss 0.00733, training accuracy 0.99769


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=119.0), HTML(value='')))


[val] acc : 0.90370, loss : 0.77627, f1 score : 0.81312
best acc : 0.90661, best loss : 0.39809, best f1 : 0.83741


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=473.0), HTML(value='')))




KeyboardInterrupt: 

In [27]:
# meta 데이터와 이미지 경로를 불러옵니다.
test_dir_path = '/opt/ml/input/data/eval/'
test_image_path = '/opt/ml/input/data/eval/images/'

best_model = torch.load('/opt/ml/checkpoint/RegNet/nonfreeze_checkpoint_best.pt')
submission = pd.read_csv(test_dir_path+'info.csv')
submission.head()

Unnamed: 0,ImageID,ans
0,cbc5c6e168e63498590db46022617123f1fe1268.jpg,0
1,0e72482bf56b3581c081f7da2a6180b8792c7089.jpg,0
2,b549040c49190cedc41327748aeb197c1670f14d.jpg,0
3,4f9cb2a045c6d5b9e50ad3459ea7b791eb6e18bc.jpg,0
4,248428d9a4a5b6229a7081c32851b90cb8d38d0c.jpg,0


In [28]:
image_paths = [os.path.join(test_image_path, img_id) for img_id in submission.ImageID]
test_image = pd.Series(image_paths)

In [29]:
class Test_Dataset(Dataset):
    def __init__(self, midcrop=True, transform=None):
        self.midcrop = midcrop
        self.data = test_image
        self.transform = transform
        
    def __len__(self):
        return len(test_image)
    
    def __getitem__(self, idx):
        img = cv2.cvtColor(cv2.imread(self.data[idx]), cv2.COLOR_BGR2RGB)
        
        if self.midcrop:
            img = img[64:448]
            
        if self.transform:
            img = self.transform(img)
            
        return img

In [30]:
dataset = Test_Dataset(transform = transform1)

loader = DataLoader(
    dataset,
    batch_size=batch_size,
    shuffle=False,
    num_workers=2
)

# 모델을 정의합니다. (학습한 모델이 있다면 torch.load로 모델을 불러주세요!)
device = torch.device('cuda')
model = best_model.to(device)
model.eval()

# 모델이 테스트 데이터셋을 예측하고 결과를 저장합니다.
all_predictions = []
for images in loader:
    with torch.no_grad():
        images = images.to(device)
        pred = model(images)
        pred = pred.argmax(dim=-1)
        all_predictions.extend(pred.cpu().numpy())
submission['ans'] = all_predictions

# 제출할 파일을 저장합니다.
submission.to_csv(os.path.join(test_dir_path, 'submission_regnet_2.csv'), index=False)
print('test inference is done!')

test inference is done!
