https://sensibilityit.tistory.com/511
https://towardsdatascience.com/custom-dataset-in-pytorch-part-1-images-2df3152895
https://tutorials.pytorch.kr/beginner/data_loading_tutorial.html
https://blog.naver.com/PostView.nhn?blogId=reisei11&logNo=221733757476&redirect=Dlog&widgetTypeCall=true&directAccess=false

# 0. GPU 확인

In [1]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 14496162872258889210
 xla_global_id: -1,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 6267797504
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 12759347098671891262
 physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 2070 SUPER, pci bus id: 0000:01:00.0, compute capability: 7.5"
 xla_global_id: 416903419]

# 1. Create Dataset

In [2]:
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
import os
import glob
import random
from skimage.io import imread, imshow
from skimage.color import rgb2lab
import numpy as np

## 1.1. data path

./data/train_data   
./data/sample_data    
./../../../../train_data

In [3]:
train_data_path = './data/train_data'
image_paths = glob.glob(os.path.join(train_data_path, '*'))
image_paths

['./data/train_data\\n01440764_10026.JPEG',
 './data/train_data\\n01440764_10027.JPEG',
 './data/train_data\\n01440764_10029.JPEG',
 './data/train_data\\n01440764_10040.JPEG',
 './data/train_data\\n01440764_10042.JPEG',
 './data/train_data\\n01440764_10043.JPEG',
 './data/train_data\\n01440764_10048.JPEG',
 './data/train_data\\n01440764_10066.JPEG',
 './data/train_data\\n01440764_10074.JPEG',
 './data/train_data\\n01440764_1009.JPEG',
 './data/train_data\\n01440764_10095.JPEG',
 './data/train_data\\n01440764_10108.JPEG',
 './data/train_data\\n01440764_10110.JPEG',
 './data/train_data\\n01440764_10120.JPEG',
 './data/train_data\\n01440764_10124.JPEG',
 './data/train_data\\n01440764_10150.JPEG',
 './data/train_data\\n01440764_10159.JPEG',
 './data/train_data\\n01440764_10162.JPEG',
 './data/train_data\\n01440764_10183.JPEG',
 './data/train_data\\n01440764_10194.JPEG',
 './data/train_data\\n01440764_10211.JPEG',
 './data/train_data\\n01440764_10218.JPEG',
 './data/train_data\\n01440764_10

## 1.2. train validation split 

In [4]:
def train_test_split_path(paths, split_ratio, seed=44) :
    random.seed(seed)
    random.shuffle(paths)
    train_paths, valid_paths = paths[:int(split_ratio*len(paths))], paths[int(split_ratio*len(paths)):] 
    return  train_paths, valid_paths

In [5]:
train_image_paths, valid_image_paths =  train_test_split_path(image_paths, 0.8)

In [6]:
len(train_image_paths)

1008957

In [7]:
len(valid_image_paths)

252240

## 1.3. Custom Dataset Class

In [8]:
class Custom_Image_Dataset(Dataset) :
    
    def __init__(self, image_paths, transforms=None):
        self.image_paths = image_paths
        self.transforms = transforms
        
    def __len__(self):
        return len(self.image_paths)

    def __getitem__(self, idx):
        img = imread(self.image_paths[idx])
        
        img_lab = rgb2lab(img)
        img_lab_norm = img_lab
        
        img_lab_norm[:, :, 0] = (img_lab_norm[:, :, 0]-50)/100
        img_lab_norm[:, :, 1] = img_lab_norm[:, :, 1]/110
        img_lab_norm[:, :, 2] = img_lab_norm[:, :, 2]/110
        
        img_l = np.expand_dims(img_lab_norm[:,:,0], axis = 2)
        img_ab = img_lab_norm[:, :, 1:]
        
        x = img_l
        y = img_ab
        
        if self.transforms is not None :
            x = self.transforms(x).float()
            y = self.transforms(y).float()
        
        return x, y

## 1.4. Data Load

In [9]:
transform = transforms.Compose([
    transforms.ToTensor()
])

In [10]:
batch_size = 32

In [11]:
train_dataset = Custom_Image_Dataset(train_image_paths, transform)
train_dataloader = DataLoader(train_dataset, batch_size = batch_size, shuffle = False)

In [12]:
valid_dataset = Custom_Image_Dataset(valid_image_paths, transform)
valid_dataloader = DataLoader(valid_dataset, batch_size = batch_size, shuffle = False)

In [13]:
sample_loader_train = next(iter(train_dataset))
print('x : ', sample_loader_train[0].size())
print('y : ', sample_loader_train[1].size())

x :  torch.Size([1, 128, 128])
y :  torch.Size([2, 128, 128])


In [14]:
sample_loader_train[0]

tensor([[[-0.0092, -0.0177, -0.0291,  ..., -0.4005, -0.3956, -0.4005],
         [ 0.0371,  0.0296,  0.0184,  ..., -0.4326, -0.4229, -0.4229],
         [ 0.0689,  0.0616,  0.0542,  ..., -0.4416, -0.4397, -0.4397],
         ...,
         [ 0.2700,  0.2700,  0.2735,  ...,  0.2560,  0.2524,  0.2452],
         [ 0.2664,  0.2628,  0.2628,  ...,  0.2524,  0.2488,  0.2452],
         [ 0.2557,  0.2521,  0.2557,  ...,  0.2524,  0.2524,  0.2524]]])

In [15]:
sample_loader_train[1]

tensor([[[ 0.2062,  0.2167,  0.2191,  ..., -0.1043, -0.1046, -0.1043],
         [ 0.2002,  0.2017,  0.2038,  ..., -0.0752, -0.0808, -0.0808],
         [ 0.1874,  0.2016,  0.2031,  ..., -0.0494, -0.0619, -0.0619],
         ...,
         [ 0.1470,  0.1470,  0.1466,  ...,  0.1112,  0.1116,  0.1124],
         [ 0.1475,  0.1479,  0.1479,  ...,  0.1116,  0.1120,  0.1124],
         [ 0.1488,  0.1492,  0.1488,  ...,  0.1116,  0.1116,  0.1116]],

        [[ 0.4295,  0.4295,  0.4299,  ...,  0.1144,  0.1172,  0.1144],
         [ 0.4400,  0.4405,  0.4412,  ...,  0.0904,  0.0960,  0.0960],
         [ 0.4635,  0.4649,  0.4653,  ...,  0.0786,  0.0809,  0.0809],
         ...,
         [ 0.4578,  0.4578,  0.4574,  ...,  0.4359,  0.4362,  0.4370],
         [ 0.4582,  0.4586,  0.4586,  ...,  0.4362,  0.4366,  0.4370],
         [ 0.4594,  0.4598,  0.4594,  ...,  0.4362,  0.4362,  0.4362]]])

# 2. Define Model

In [16]:
import torch
import torch.nn as nn
from torchinfo import summary

In [17]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

## 2.1. define model function

In [18]:
class JKPSNET(nn.Module) :
    def __init__(self) :
        super().__init__()
        
        def conv_2xs(in_dim, out_dim) :
            model = nn.Sequential(
                nn.Conv2d(in_dim, out_dim, kernel_size=3, stride=1, padding=1, bias=True),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_dim, out_dim, kernel_size=3, stride=1, padding=1, bias=True),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(out_dim)
            )
            return model
        
        def conv_2x(in_dim, out_dim) :
            model = nn.Sequential(
                nn.Conv2d(in_dim, in_dim, kernel_size=3, stride=2, padding=1, bias=True),
                nn.ReLU(inplace=True),
                nn.Conv2d(in_dim, out_dim, kernel_size=3, stride=1, padding=1, bias=True),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_dim, out_dim, kernel_size=3, stride=1, padding=1, bias=True),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(out_dim)
            )
            return model
        
        def conv_3xdil(dim) :
            model = nn.Sequential(
                nn.Conv2d(dim, dim, kernel_size=3, dilation=2, stride=1, padding=2, bias=True),
                nn.ReLU(inplace=True),
                nn.Conv2d(dim, dim, kernel_size=3, dilation=2, stride=1, padding=2, bias=True),
                nn.ReLU(inplace=True),
                nn.Conv2d(dim, dim, kernel_size=3, dilation=2, stride=1, padding=2, bias=True),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(dim)
            )
            return model
        
        def conv_up(in_dim, out_dim) :
            model = nn.Sequential(
                nn.Conv2d(in_dim, out_dim, kernel_size=3, stride=1, padding=1, bias=True),
                nn.ReLU(inplace=True),
                nn.Conv2d(out_dim, out_dim, kernel_size=3, stride=1, padding=1, bias=True),
                nn.ReLU(inplace=True),
                nn.BatchNorm2d(out_dim)
            )
            return model
        
        self.input = nn.Sequential()
        self.enc1 = conv_2xs(1, 64)
        self.enc2 = conv_2x(64, 128)
        self.enc3 = conv_2x(128, 256)
        self.enc4 = conv_2x(256, 512)
        self.convR = nn.Sequential(conv_3xdil(512))
       
        
        self.ups1 = nn.ConvTranspose2d(512, 256, kernel_size=4, stride=2, padding=1, bias=True)
        self.dec1 = conv_up(512, 256)
        self.ups2 = nn.ConvTranspose2d(256, 128, kernel_size=4, stride=2, padding=1, bias=True)
        self.dec2 = conv_up(256, 128)
        self.ups3 = nn.ConvTranspose2d(128, 64, kernel_size=4, stride=2, padding=1, bias=True)
        self.dec3 = conv_up(128, 64)

        self.abp = nn.Conv2d(64, 313, kernel_size=1, stride=1, padding=0, bias=True)
        self.softmax = nn.Softmax(dim=1)
        
        self.out = nn.Conv2d(313, 2, kernel_size=1, padding=0, dilation=1, stride=1, bias=False)
        
        self.print_cat = nn.Sequential()
        
        
        
    def forward(self, x):
        x = self.input(x)
        x1 = self.enc1(x)
        x2 = self.enc2(x1)
        x3 = self.enc3(x2)
        x = self.enc4(x3)
        x = self.convR(x)
        
        x = self.ups1(x)
        x = self.print_cat(torch.cat([x, x3], dim = 1))
        x = self.dec1(x)
        x = self.ups2(x)
        x = self.print_cat(torch.cat([x, x2], dim = 1))
        x = self.dec2(x)
        x = self.ups3(x)
        x = self.print_cat(torch.cat([x, x1], dim = 1))
        x = self.dec3(x)
        
        x = self.abp(x)
        
        x = self.out(self.softmax(x))
        
        
        return x

## 2.2. model compile

In [19]:
model_JKPSNET = JKPSNET().to(device)

## 2.3. model summary

In [20]:
sample_loader_train = next(iter(train_dataset))

In [21]:
input_shape_list = list(tuple(sample_loader_train[0].size()))
input_shape_list.insert(0, batch_size)
input_shape = tuple(input_shape_list)
input_shape

(32, 1, 128, 128)

In [22]:
del sample_loader_train
torch.cuda.empty_cache()

In [23]:
summary(
    model_JKPSNET,
    input_shape
)

Layer (type:depth-idx)                   Output Shape              Param #
JKPSNET                                  --                        --
├─Sequential: 1-1                        [32, 1, 128, 128]         --
├─Sequential: 1-2                        [32, 64, 128, 128]        --
│    └─Conv2d: 2-1                       [32, 64, 128, 128]        640
│    └─ReLU: 2-2                         [32, 64, 128, 128]        --
│    └─Conv2d: 2-3                       [32, 64, 128, 128]        36,928
│    └─ReLU: 2-4                         [32, 64, 128, 128]        --
│    └─BatchNorm2d: 2-5                  [32, 64, 128, 128]        128
├─Sequential: 1-3                        [32, 128, 64, 64]         --
│    └─Conv2d: 2-6                       [32, 64, 64, 64]          36,928
│    └─ReLU: 2-7                         [32, 64, 64, 64]          --
│    └─Conv2d: 2-8                       [32, 128, 64, 64]         73,856
│    └─ReLU: 2-9                         [32, 128, 64, 64]         --
│

# 3. Train Model

In [24]:
import torch
import numpy as np
import time
from tqdm.notebook import tqdm

## 3.1. Early Stopping Class

In [25]:
class EarlyStopping:
    """주어진 patience 이후로 validation loss가 개선되지 않으면 학습을 조기 중지"""
    def __init__(self, patience=10, verbose=False, delta=0, path='checkpoint.pt'):
        """
        Args:
            patience (int): validation loss가 개선된 후 기다리는 기간
                            Default: 10
            verbose (bool): True일 경우 각 validation loss의 개선 사항 메세지 출력
                            Default: False
            delta (float): 개선되었다고 인정되는 monitered quantity의 최소 변화
                            Default: 0
            path (str): checkpoint저장 경로
                            Default: 'checkpoint.pt'
        """
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = np.Inf
        self.delta = delta
        self.path = path

    def __call__(self, val_loss, model):

        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        '''validation loss가 감소하면 모델을 저장한다.'''
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

## 3.2. Training Function

**criterion 설정에서**  
onehot x -> target.squeeze().long()  
onehot o -> torch.max(train_target, 1)[1]  

In [26]:
def train_model(model, 
                train_dataloader, 
                valid_dataloader, 
                criterion, 
                optimizer, 
                n_epochs=50, 
                early_stopping=None) :
    print('----------------------------------------------------------------------------------')
    print()
    start_total = time.time()
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    model = model.float()
    
    # epoch당 평균 train loss
    train_loss_list = []
    # epoch당 평균 validation loss
    valid_loss_list = []    
    
    for epoch in range(1, n_epochs + 1):
        start_epoch = time.time()
        
        print(f"Epoch {epoch}/{n_epochs}")
        
        epoch_train_loss_list = []
        epoch_valid_loss_list = []
        
        
        # train
        model.train() # prep model for training
        
        train_dataloader_tqdm = tqdm(train_dataloader, desc="Training  ")
        for batch, (train_data, train_target) in enumerate(train_dataloader_tqdm, 1):
            train_data = train_data.to(device)
            train_target = train_target.to(device)
            # clear the gradients of all optimized variables
            optimizer.zero_grad()    
            # forward pass: 입력된 값을 모델로 전달하여 예측 출력 계산
            train_output = model(train_data)
            # calculate the loss
            train_loss = criterion(train_output, train_target)
            # backward pass: 모델의 파라미터와 관련된 loss의 그래디언트 계산
            train_loss.backward()
            # perform a single optimization step (parameter update)
            optimizer.step()
            # record train loss
            epoch_train_loss_list.append(train_loss.item())
            
            del train_data
            del train_target
            del train_output
            del train_loss
            torch.cuda.empty_cache()
            
            
        # validation
        model.eval() # prep model for evaluation
        
        valid_dataloader_tqdm = tqdm(valid_dataloader, desc="Validation  ")
        for valid_data , valid_target in valid_dataloader_tqdm :
            valid_data = valid_data.to(device)
            valid_target = valid_target.to(device)
            # forward pass: 입력된 값을 모델로 전달하여 예측 출력 계산
            valid_output = model(valid_data)
            # calculate the loss
            valid_loss = criterion(valid_output, valid_target)
            # record validation loss
            epoch_valid_loss_list.append(valid_loss.item())
            
            del valid_data
            del valid_target
            del valid_output
            del valid_loss
            torch.cuda.empty_cache()

        
        # print train/validation statistics
        # epoch당 평균 loss 계산
        epoch_train_loss_avg = np.average(epoch_train_loss_list)
        epoch_valid_loss_avg = np.average(epoch_valid_loss_list)
        train_loss_list.append(epoch_train_loss_avg)
        valid_loss_list.append(epoch_valid_loss_avg)
        
        
        epoch_len = len(str(n_epochs))
        time_taken = round((time.time() - start_epoch), 2)
        time_taken_min = round(time_taken/60, 2)
        print_msg = (
            # f'[{epoch:>{epoch_len}}/{n_epochs:>{epoch_len}}] ' +
            f'ETA: {time_taken}s ({time_taken_min}min)   ' +
            f'train_loss: {epoch_train_loss_avg:.5f}   ' +
            f'valid_loss: {epoch_valid_loss_avg:.5f} ')

        print(print_msg)
        
        e_path = early_stopping.path
        n_path = '.'.join(e_path.split('.')[:-1])+'_{}.{}'.format(epoch, e_path.split('.')[-1])
        torch.save(model.state_dict(), n_path)
        
        # early_stopping는 validation loss가 감소하였는지 확인이 필요하며,
        # 만약 감소하였을경우 현제 모델을 checkpoint로 만든다.
        if early_stopping != None :
            early_stopping(epoch_valid_loss_avg, model)
            
            if early_stopping.early_stop:
                print()
                print('----------------------------------------------------------------------------------')
                print("Early stopping")
                break

            # best model이 저장되어있는 last checkpoint를 로드한다.
            model.load_state_dict(torch.load(early_stopping.path))
        print()
        print('----------------------------------------------------------------------------------')
        print()

    time_taken_total = round((time.time() - start_total), 2)
    time_taken_total_min = round(time_taken_total/60, 2)
    print(f'time_taken_total: {time_taken_total}s ({time_taken_total_min}min) ')
    
    
    return  model, train_loss_list, valid_loss_list

## 3.3. train config

In [27]:
criterion = torch.nn.MSELoss().to(device)
optimizer = torch.optim.Adam(
    model_JKPSNET.parameters(), 
    lr= 0.001, 
    weight_decay=1e-7
)
early_stopping_JKPSNET_RGB = EarlyStopping(
    patience=10, 
    verbose=True, 
    delta=0, 
    path='./data/model_saved/checkpoint_JKPSNET_LAB.pt'
)

## 3.4. training

In [None]:
trained_model, train_loss, valid_loss = train_model(
    model = model_JKPSNET, 
    train_dataloader = train_dataloader, 
    valid_dataloader = valid_dataloader, 
    criterion = criterion, 
    optimizer = optimizer, 
    n_epochs=100, 
    early_stopping=early_stopping_JKPSNET_RGB
)   

----------------------------------------------------------------------------------

Epoch 1/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26029.95s (433.83min)   train_loss: 0.01702   valid_loss: 0.01674 
Validation loss decreased (inf --> 0.016743).  Saving model ...

----------------------------------------------------------------------------------

Epoch 2/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26938.46s (448.97min)   train_loss: 0.01601   valid_loss: 0.01600 
Validation loss decreased (0.016743 --> 0.016004).  Saving model ...

----------------------------------------------------------------------------------

Epoch 3/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27482.54s (458.04min)   train_loss: 0.01547   valid_loss: 0.01555 
Validation loss decreased (0.016004 --> 0.015549).  Saving model ...

----------------------------------------------------------------------------------

Epoch 4/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26215.43s (436.92min)   train_loss: 0.01517   valid_loss: 0.01521 
Validation loss decreased (0.015549 --> 0.015206).  Saving model ...

----------------------------------------------------------------------------------

Epoch 5/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 29420.9s (490.35min)   train_loss: 0.01489   valid_loss: 0.01493 
Validation loss decreased (0.015206 --> 0.014928).  Saving model ...

----------------------------------------------------------------------------------

Epoch 6/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 28043.14s (467.39min)   train_loss: 0.01463   valid_loss: 0.01466 
Validation loss decreased (0.014928 --> 0.014658).  Saving model ...

----------------------------------------------------------------------------------

Epoch 7/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27400.21s (456.67min)   train_loss: 0.01446   valid_loss: 0.01439 
Validation loss decreased (0.014658 --> 0.014387).  Saving model ...

----------------------------------------------------------------------------------

Epoch 8/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27515.13s (458.59min)   train_loss: 0.01432   valid_loss: 0.01435 
Validation loss decreased (0.014387 --> 0.014351).  Saving model ...

----------------------------------------------------------------------------------

Epoch 9/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 28439.89s (474.0min)   train_loss: 0.01420   valid_loss: 0.01424 
Validation loss decreased (0.014351 --> 0.014236).  Saving model ...

----------------------------------------------------------------------------------

Epoch 10/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 28441.04s (474.02min)   train_loss: 0.01409   valid_loss: 0.01439 
EarlyStopping counter: 1 out of 10

----------------------------------------------------------------------------------

Epoch 11/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26532.75s (442.21min)   train_loss: 0.01410   valid_loss: 0.01410 
Validation loss decreased (0.014236 --> 0.014100).  Saving model ...

----------------------------------------------------------------------------------

Epoch 12/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27872.65s (464.54min)   train_loss: 0.01400   valid_loss: 0.01425 
EarlyStopping counter: 1 out of 10

----------------------------------------------------------------------------------

Epoch 13/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27515.29s (458.59min)   train_loss: 0.01399   valid_loss: 0.01420 
EarlyStopping counter: 2 out of 10

----------------------------------------------------------------------------------

Epoch 14/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26887.15s (448.12min)   train_loss: 0.01399   valid_loss: 0.01433 
EarlyStopping counter: 3 out of 10

----------------------------------------------------------------------------------

Epoch 15/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 28496.65s (474.94min)   train_loss: 0.01400   valid_loss: 0.01443 
EarlyStopping counter: 4 out of 10

----------------------------------------------------------------------------------

Epoch 16/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27594.09s (459.9min)   train_loss: 0.01400   valid_loss: 0.01418 
EarlyStopping counter: 5 out of 10

----------------------------------------------------------------------------------

Epoch 17/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26821.84s (447.03min)   train_loss: 0.01400   valid_loss: 0.01415 
EarlyStopping counter: 6 out of 10

----------------------------------------------------------------------------------

Epoch 18/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27760.88s (462.68min)   train_loss: 0.01400   valid_loss: 0.01415 
EarlyStopping counter: 7 out of 10

----------------------------------------------------------------------------------

Epoch 19/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 25973.89s (432.9min)   train_loss: 0.01399   valid_loss: 0.01408 
Validation loss decreased (0.014100 --> 0.014076).  Saving model ...

----------------------------------------------------------------------------------

Epoch 20/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26670.23s (444.5min)   train_loss: 0.01392   valid_loss: 0.01394 
Validation loss decreased (0.014076 --> 0.013935).  Saving model ...

----------------------------------------------------------------------------------

Epoch 21/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26509.99s (441.83min)   train_loss: 0.01387   valid_loss: 0.01382 
Validation loss decreased (0.013935 --> 0.013822).  Saving model ...

----------------------------------------------------------------------------------

Epoch 22/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27252.04s (454.2min)   train_loss: 0.01380   valid_loss: 0.01403 
EarlyStopping counter: 1 out of 10

----------------------------------------------------------------------------------

Epoch 23/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26979.66s (449.66min)   train_loss: 0.01381   valid_loss: 0.01390 
EarlyStopping counter: 2 out of 10

----------------------------------------------------------------------------------

Epoch 24/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27722.71s (462.05min)   train_loss: 0.01381   valid_loss: 0.01396 
EarlyStopping counter: 3 out of 10

----------------------------------------------------------------------------------

Epoch 25/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27909.25s (465.15min)   train_loss: 0.01381   valid_loss: 0.01380 
Validation loss decreased (0.013822 --> 0.013795).  Saving model ...

----------------------------------------------------------------------------------

Epoch 26/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26900.73s (448.35min)   train_loss: 0.01376   valid_loss: 0.01388 
EarlyStopping counter: 1 out of 10

----------------------------------------------------------------------------------

Epoch 27/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26926.27s (448.77min)   train_loss: 0.01376   valid_loss: 0.01381 
EarlyStopping counter: 2 out of 10

----------------------------------------------------------------------------------

Epoch 28/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27516.82s (458.61min)   train_loss: 0.01376   valid_loss: 0.01382 
EarlyStopping counter: 3 out of 10

----------------------------------------------------------------------------------

Epoch 29/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 28253.1s (470.88min)   train_loss: 0.01376   valid_loss: 0.01393 
EarlyStopping counter: 4 out of 10

----------------------------------------------------------------------------------

Epoch 30/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27300.89s (455.01min)   train_loss: 0.01376   valid_loss: 0.01382 
EarlyStopping counter: 5 out of 10

----------------------------------------------------------------------------------

Epoch 31/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26148.67s (435.81min)   train_loss: 0.01376   valid_loss: 0.01379 
Validation loss decreased (0.013795 --> 0.013794).  Saving model ...

----------------------------------------------------------------------------------

Epoch 32/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26979.23s (449.65min)   train_loss: 0.01372   valid_loss: 0.01383 
EarlyStopping counter: 1 out of 10

----------------------------------------------------------------------------------

Epoch 33/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26607.37s (443.46min)   train_loss: 0.01372   valid_loss: 0.01375 
Validation loss decreased (0.013794 --> 0.013749).  Saving model ...

----------------------------------------------------------------------------------

Epoch 34/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26573.37s (442.89min)   train_loss: 0.01368   valid_loss: 0.01376 
EarlyStopping counter: 1 out of 10

----------------------------------------------------------------------------------

Epoch 35/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26016.95s (433.62min)   train_loss: 0.01368   valid_loss: 0.01385 
EarlyStopping counter: 2 out of 10

----------------------------------------------------------------------------------

Epoch 36/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27732.08s (462.2min)   train_loss: 0.01368   valid_loss: 0.01382 
EarlyStopping counter: 3 out of 10

----------------------------------------------------------------------------------

Epoch 37/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26721.05s (445.35min)   train_loss: 0.01368   valid_loss: 0.01380 
EarlyStopping counter: 4 out of 10

----------------------------------------------------------------------------------

Epoch 38/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27546.95s (459.12min)   train_loss: 0.01368   valid_loss: 0.01374 
Validation loss decreased (0.013749 --> 0.013735).  Saving model ...

----------------------------------------------------------------------------------

Epoch 39/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 25935.68s (432.26min)   train_loss: 0.01364   valid_loss: 0.01380 
EarlyStopping counter: 1 out of 10

----------------------------------------------------------------------------------

Epoch 40/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26011.36s (433.52min)   train_loss: 0.01364   valid_loss: 0.01376 
EarlyStopping counter: 2 out of 10

----------------------------------------------------------------------------------

Epoch 41/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26260.14s (437.67min)   train_loss: 0.01365   valid_loss: 0.01371 
Validation loss decreased (0.013735 --> 0.013709).  Saving model ...

----------------------------------------------------------------------------------

Epoch 42/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26262.52s (437.71min)   train_loss: 0.01361   valid_loss: 0.01373 
EarlyStopping counter: 1 out of 10

----------------------------------------------------------------------------------

Epoch 43/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27786.81s (463.11min)   train_loss: 0.01360   valid_loss: 0.01371 
EarlyStopping counter: 2 out of 10

----------------------------------------------------------------------------------

Epoch 44/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 28078.0s (467.97min)   train_loss: 0.01361   valid_loss: 0.01365 
Validation loss decreased (0.013709 --> 0.013648).  Saving model ...

----------------------------------------------------------------------------------

Epoch 45/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27574.69s (459.58min)   train_loss: 0.01358   valid_loss: 0.01385 
EarlyStopping counter: 1 out of 10

----------------------------------------------------------------------------------

Epoch 46/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27578.36s (459.64min)   train_loss: 0.01358   valid_loss: 0.01361 
Validation loss decreased (0.013648 --> 0.013611).  Saving model ...

----------------------------------------------------------------------------------

Epoch 47/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 28468.21s (474.47min)   train_loss: 0.01355   valid_loss: 0.01365 
EarlyStopping counter: 1 out of 10

----------------------------------------------------------------------------------

Epoch 48/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 28108.51s (468.48min)   train_loss: 0.01355   valid_loss: 0.01359 
Validation loss decreased (0.013611 --> 0.013594).  Saving model ...

----------------------------------------------------------------------------------

Epoch 49/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26232.63s (437.21min)   train_loss: 0.01352   valid_loss: 0.01351 
Validation loss decreased (0.013594 --> 0.013510).  Saving model ...

----------------------------------------------------------------------------------

Epoch 50/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 28705.46s (478.42min)   train_loss: 0.01350   valid_loss: 0.01358 
EarlyStopping counter: 1 out of 10

----------------------------------------------------------------------------------

Epoch 51/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 27294.68s (454.91min)   train_loss: 0.01350   valid_loss: 0.01366 
EarlyStopping counter: 2 out of 10

----------------------------------------------------------------------------------

Epoch 52/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 25210.02s (420.17min)   train_loss: 0.01351   valid_loss: 0.01362 
EarlyStopping counter: 3 out of 10

----------------------------------------------------------------------------------

Epoch 53/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

Validation  :   0%|          | 0/7883 [00:00<?, ?it/s]

ETA: 26373.52s (439.56min)   train_loss: 0.01350   valid_loss: 0.01365 
EarlyStopping counter: 4 out of 10

----------------------------------------------------------------------------------

Epoch 54/100


Training  :   0%|          | 0/31530 [00:00<?, ?it/s]

# 4. Visualizing Train

In [None]:
import matplotlib.pyplot as plt

## 4.1. visualizing function

In [None]:
def show_train_graph_loss(train_loss, valid_loss) :
    
    plt.plot(range(1,len(train_loss)+1),train_loss, label='train_loss')
    plt.plot(range(1,len(valid_loss)+1),valid_loss,label='valid_loss')

    # validation loss의 최저값 지점을 찾기
    minposs = valid_loss.index(min(valid_loss))+1
    plt.axvline(minposs, linestyle='--', color='r',label='Early Stopping Checkpoint')

    plt.xlabel('epochs')
    plt.ylabel('loss')
#     plt.ylim(0, 0.5) # 일정한 scale
    plt.xlim(0, len(train_loss)+1) # 일정한 scale
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    plt.show()

## 4.2. visualizing

In [None]:
show_train_graph_loss(train_loss, valid_loss)