In [1]:
# decoding JPEG images and decoding/encoding RLE datasets
# !pip3 install pylibjpeg==1.4.0
# https://github.com/pydicom/pylibjpeg

# !pip3 install python-gdcm

In [2]:
DEBUG = False

import os
import sys

In [3]:
# suitable for kaggle notebook
# sys.path = ['../ca_2',] + sys.path
# print(sys.path)

In [4]:
import argparse
import warnings

In [5]:
import gc, ast, cv2, time, pickle, random
# import pylibjpeg
# import gdcm
# import pydicom
# pydicom is a pure Python package for working with DICOM files. 
# -It lets you read, modify and write DICOM data in an easy "pythonic" way. 

In [6]:
import numpy as np
import pandas as pd
from glob import glob
from PIL import Image


# import nibabel as nib
# read / write access to some common neuroimaging file formats

In [7]:
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, StratifiedKFold

import albumentations # python library for pixel-level augmentations 

In [8]:
%matplotlib inline

In [9]:
import timm

import segmentation_models_pytorch as smp
import torch
import torch.nn as nn
import torch.optim as optim
import torch.cuda.amp as amp
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset


  warn(f"Failed to load image Python extension: {e}")


In [10]:
from tqdm import tqdm

In [11]:
# import graphviz

In [12]:
# # pip3 install torchview
# from torchview import draw_graph

In [13]:
np.set_printoptions(threshold=sys.maxsize)

In [14]:
pd.set_option('display.max_column', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_seq_items', None)
pd.set_option('display.max_colwidth', None) # 500
pd.set_option('expand_frame_repr', True)

In [15]:

device = torch.device('cuda')

# benchmark mode is good whenever your input sizes for your network do not vary. 
# This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware.
torch.backends.cudnn.benchmark = True

# Config

In [16]:
kernel_type = '0920_1bonev2_effv2s_224_15_6ch_augv2_mixupp5_drl3_rov1p2_bs8_lr23e5_eta23e6_50ep'
load_kernel = None
load_last = True

n_folds = 5
backbone = 'tf_efficientnetv2_s_in21ft1k'

image_size = 224
n_slice_per_c = 15
in_chans = 6

init_lr = 23e-5 # 23e-5 (for first run)
eta_min = 13e-5 # 23e-6 (for first run)
batch_size = 5 # 8
drop_rate = 0.
drop_rate_last = 0.3 # 0.3
drop_path_rate = 0.
p_mixup = 0.5
p_rand_order_v1 = 0.2

data_dir = './'
use_amp = True
num_workers = 12 # 4
out_dim = 1

n_epochs = 30 # 75

log_dir = './logs'
model_dir = './models'
model_dir_seg = './kaggle'
os.makedirs(log_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)

In [17]:
# Albumentations is a computer vision tool that boosts the performance of deep convolutional neural networks.
# Albumentations is a Python library for image augmentation.
transforms_train = albumentations.Compose([
    albumentations.Resize(image_size, image_size),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5),
    albumentations.Transpose(p=0.5),
    albumentations.RandomBrightnessContrast(brightness_limit=0.1, p=0.7),
    albumentations.ShiftScaleRotate(shift_limit=0.3, scale_limit=0.3, rotate_limit=45, border_mode=4, p=0.7),

    albumentations.OneOf([
        albumentations.MotionBlur(blur_limit=3),
        albumentations.MedianBlur(blur_limit=3),
        albumentations.GaussianBlur(blur_limit=3),
        albumentations.GaussNoise(var_limit=(3.0, 9.0)),
    ], p=0.5),
    albumentations.OneOf([
        albumentations.OpticalDistortion(distort_limit=1.),
        albumentations.GridDistortion(num_steps=5, distort_limit=1.),
    ], p=0.5),

    albumentations.CoarseDropout(max_height=int(image_size * 0.5), max_width=int(image_size * 0.5), max_holes=1, p=0.5),
])

transforms_valid = albumentations.Compose([
    albumentations.Resize(image_size, image_size),
])

  "blur_limit and sigma_limit minimum value can not be both equal to 0. "


# DataFrame

In [18]:
df_train = pd.read_csv(os.path.join(data_dir, 'train_seg.csv'))
# df_train =>
#             StudyInstanceUID  patient_overall  C1  C2  C3  C4  C5  C6  C7  \
# 0   1.2.826.0.1.3680043.6200                1   1   1   0   0   0   0   0   
# 1  1.2.826.0.1.3680043.27262                1   0   1   0   0   0   0   0   
# 2  1.2.826.0.1.3680043.21561                1   0   1   0   0   0   0   0
# ...
# len(df_train) => 2018

df = df_train.sample(16).reset_index(drop=True) if DEBUG else df_train

sid = []
cs = []
label = []
fold = []
for _, row in df.iterrows():
    for i in [1,2,3,4,5,6,7]:
        sid.append(row.StudyInstanceUID)
        cs.append(i)
        label.append(row[f'C{i}'])
        fold.append(row.fold)

df = pd.DataFrame({
    'StudyInstanceUID': sid,
    'Cid': cs,
    'Cid_label': label,
    'fold': fold
})

df.tail()

Unnamed: 0,StudyInstanceUID,Cid,Cid_label,fold
14121,1.2.826.0.1.3680043.18786,3,0,4
14122,1.2.826.0.1.3680043.18786,4,0,4
14123,1.2.826.0.1.3680043.18786,5,0,4
14124,1.2.826.0.1.3680043.18786,6,0,4
14125,1.2.826.0.1.3680043.18786,7,1,4


In [19]:
df[5:10].head()

Unnamed: 0,StudyInstanceUID,Cid,Cid_label,fold
5,1.2.826.0.1.3680043.6200,6,0,0
6,1.2.826.0.1.3680043.6200,7,0,0
7,1.2.826.0.1.3680043.27262,1,0,0
8,1.2.826.0.1.3680043.27262,2,1,0
9,1.2.826.0.1.3680043.27262,3,0,0


In [20]:
len(df_train)

2018

# Dataset

In [21]:
class CLSDataset(Dataset):
    def __init__(self, df, mode, transform):

        self.df = df.reset_index()
        self.mode = mode
        self.transform = transform

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        row = self.df.iloc[index]
        cid = row.Cid
        
        images = []
            
        filepath = os.path.join(data_dir, f'numpy_1/{row.StudyInstanceUID}_{cid}.npy')
        images = np.load(filepath)            
            # type(image), image.shape => <class 'numpy.ndarray'> (15, 224, 224, 6)

        images = np.stack([self.transform(image=images[i])['image'] for i in range(n_slice_per_c)], 0)
        
        images = images.transpose(0,3,1,2)
            # type(image), image.shape => <class 'numpy.ndarray'> (15, 6, 224, 224)        

        images = images / 255. # trim the 'data values' between 0. and 1. 
            # prior to 255. divide, convert data to float

        if self.mode != 'test':
            images = torch.tensor(images).float()  

            # images.shape => torch.Size([15, 6, 224, 224])            
            labels = torch.tensor([row.Cid_label] * n_slice_per_c).float()
                # labels => tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

            # randomly shuffling slices of row of 10% train data.
            if self.mode == 'train' and random.random() < p_rand_order_v1:                
                indices = torch.randperm(images.size(0))
                # indices => tensor([ 0,  3, 13, 11, 14,  6,  9,  1, 12,  8,  5,  2,  7, 10,  4])           
                images = images[indices]
                    # images.shape => torch.Size([15, 6, 224, 224])
                
            return images, labels
        else:
            return torch.tensor(images).float()

In [22]:
# # plt.rcParams['figure.figsize'] = 20,8

# df_show = df[7:8]
# dataset_show = CLSDataset(df_show, 'train', transform=transforms_train)

In [23]:
# dataset_show[0]

# Model

In [24]:
class TimmModel(nn.Module):
    def __init__(self, backbone, pretrained=False):
        super(TimmModel, self).__init__()

        self.encoder = timm.create_model(
            backbone,
            in_chans=in_chans,
            num_classes=out_dim,
            features_only=False,
            drop_rate=drop_rate,
            drop_path_rate=drop_path_rate,
            pretrained=pretrained
        )
        # self.encoder.default_cfg =>
        # {'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_s_21ft1k-d7dafa41.pth', 
        # 'num_classes': 1000, 'input_size': (3, 300, 300), 'pool_size': (10, 10), 'crop_pct': 1.0, 'interpolation': 
        # 'bicubic', 'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5), 'first_conv': 'conv_stem', 'classifier': 'classifier', 
        # 'test_input_size': (3, 384, 384), 'architecture': 'tf_efficientnetv2_s_in21ft1k'}        


        
        if 'efficient' in backbone:
            hdim = self.encoder.conv_head.out_channels
                # (conv_head): Conv2d(256, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False) 
                # self.encoder.conv_head => Conv2d(256, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)  
                # self.encoder.conv_head.out_channels => 1280
                
                # nn.Identity() => Identity()
                # self.encoder.classifier => Linear(in_features=1280, out_features=1, bias=True)  
            # replace the last classifier layer with identity layer.
            self.encoder.classifier = nn.Identity()

        elif 'convnext' in backbone:
            hdim = self.encoder.head.fc.in_features
            self.encoder.head.fc = nn.Identity()


        self.lstm = nn.LSTM(hdim, 256, num_layers=2, dropout=drop_rate, bidirectional=True, batch_first=True)
        self.head = nn.Sequential(
            nn.Linear(512, 256),
            nn.BatchNorm1d(256), # replaced BatchNorm1d for training with batch_size = 1
            nn.Dropout(drop_rate_last),
            nn.LeakyReLU(0.1),
            nn.Linear(256, out_dim),
        )

    def forward(self, x):  # (bs, nslice, ch, sz, sz)
        # x.shape => torch.Size([2, 15, 6, 224, 224])
        
        bs = x.shape[0]
        # Tensor.view(*shape) => Returns a new tensor with the same data as the self tensor but of a different shape.
        x = x.view(bs * n_slice_per_c, in_chans, image_size, image_size)
            # x.shape => torch.Size([30, 6, 224, 224])
        
        feat = self.encoder(x)        

            # feat.shape => torch.Size([30, 1280])        
        feat = feat.view(bs, n_slice_per_c, -1)
            # feat.shape => torch.Size([2, 15, 1280])
        
        feat, _ = self.lstm(feat) # multiple outputs by lstm layer.
        
        # tensor.contiguous() will create a copy of the tensor, and the element in the copy will be stored in the memory in a contiguous(ordered) way.
        # contiguous(ordered) => change the order of data in accordance to indices.
        # contiguous() function is usually required when we 'changed the shape of a tensor' and further reshaping (view) it. 
        feat = feat.contiguous().view(bs * n_slice_per_c, -1)
        
        feat = self.head(feat)
        feat = feat.view(bs, n_slice_per_c).contiguous()

        return feat


In [25]:
# m = TimmModel(backbone)
# m(torch.rand(2, n_slice_per_c, in_chans, image_size, image_size)).shape
#     # m(torch.rand(2, n_slice_per_c, in_chans, image_size, image_size)).shape => torch.Size([2, 15])

In [26]:
# draw_graph(m, input_data = torch.rand(1, 15, 6, 224, 224), expand_nested=True, save_graph=True).visual_graph

# Loss & Metric

In [27]:
bce = nn.BCEWithLogitsLoss(reduction='none')


def criterion(logits, targets, activated=False):
    if activated:
        losses = nn.BCELoss(reduction='none')(logits.view(-1), targets.view(-1))
    else:
        losses = bce(logits.view(-1), targets.view(-1))
    losses[targets.view(-1) > 0] *= 2.
    norm = torch.ones(logits.view(-1).shape[0]).to(device)
    norm[targets.view(-1) > 0] *= 2
    return losses.sum() / norm.sum()

# Train & Valid func

In [28]:
# mixup explained in train_1.ipynb
def mixup(input, truth, clip=[0, 1]):
    indices = torch.randperm(input.size(0))
    shuffled_input = input[indices]
    shuffled_labels = truth[indices]

    lam = np.random.uniform(clip[0], clip[1])
    input = input * lam + shuffled_input * (1 - lam)
    return input, truth, shuffled_labels, lam


def train_func(model, loader_train, optimizer, scaler=None):
    model.train()
    train_loss = []
    bar = tqdm(loader_train)
    for images, targets in bar:
        optimizer.zero_grad()
        images = images.cuda()
        targets = targets.cuda()
        
        do_mixup = False
        if random.random() < p_mixup:
            do_mixup = True
            images, targets, targets_mix, lam = mixup(images, targets)

        with amp.autocast():
            logits = model(images)
            loss = criterion(logits, targets)
            if do_mixup:
                loss11 = criterion(logits, targets_mix)
                loss = loss * lam  + loss11 * (1 - lam)
        train_loss.append(loss.item())
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

#         bar.set_description(f'smooth loss:{np.mean(train_loss[-30:]):.4f}')

    return np.mean(train_loss)


def valid_func(model, loader_valid):
    model.eval()
    valid_loss = []
    gts = []
    outputs = []
    bar = tqdm(loader_valid)
    with torch.no_grad():
        for images, targets in bar:
            images = images.cuda()
            targets = targets.cuda()

            logits = model(images)
            loss = criterion(logits, targets)
            
            gts.append(targets.cpu())
            outputs.append(logits.cpu())
            valid_loss.append(loss.item())
            
#             bar.set_description(f'smooth loss:{np.mean(valid_loss[-30:]):.4f}')

    outputs = torch.cat(outputs)
    gts = torch.cat(gts)
    valid_loss = criterion(outputs, gts).item()

    return valid_loss


In [29]:
# plt.rcParams['figure.figsize'] = 20, 2
# optimizer = optim.AdamW(m.parameters(), lr=init_lr)
# scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, n_epochs, eta_min = 23e-8)

# lrs = []
# for epoch in range(1, n_epochs+1):
#     scheduler_cosine.step(epoch-1)
#     lrs.append(optimizer.param_groups[0]["lr"])
# plt.plot(range(len(lrs)), lrs)

# Training

In [30]:
# df_debug = df.copy()
# df = df[1000:]
# df = df_debug[1006:].copy()

In [31]:
def run(fold):

    log_file = os.path.join(log_dir, f'{kernel_type}.txt')
    model_file = os.path.join(model_dir, f'{kernel_type}_fold{fold}_best.pth')

    train_ = df[df['fold'] != fold].reset_index(drop=True)
    valid_ = df[df['fold'] == fold].reset_index(drop=True)
    dataset_train = CLSDataset(train_, 'train', transform=transforms_train)
    dataset_valid = CLSDataset(valid_, 'valid', transform=transforms_valid)
    loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True)
    loader_valid = torch.utils.data.DataLoader(dataset_valid, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    model = TimmModel(backbone, pretrained=True)
    model = model.to(device)
    
    # if not first run, load previous model
    fold_l = 0
    load_model_file = os.path.join(model_dir_seg, f'{kernel_type}_fold{fold_l}_best.pth')
    sd = torch.load(load_model_file)
    if 'model_state_dict' in sd.keys():
        sd = sd['model_state_dict']
    sd = {k[7:] if k.startswith('module.') else k: sd[k] for k in sd.keys()}
    model.load_state_dict(sd, strict=True)    

    optimizer = optim.AdamW(model.parameters(), lr=init_lr)
    scaler = torch.cuda.amp.GradScaler() if use_amp else None

    metric_best = np.inf
    loss_min = np.inf

    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, n_epochs, eta_min=eta_min)

#     print(len(dataset_train), len(dataset_valid))

    for epoch in range(1, n_epochs+1):
        scheduler_cosine.step(epoch-1)

#         print(time.ctime(), 'Epoch:', epoch)

        train_loss = train_func(model, loader_train, optimizer, scaler)
        valid_loss = valid_func(model, loader_valid)
        metric = valid_loss

        content = time.ctime() + ' ' + f'Fold {fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train_loss: {train_loss:.5f}, valid_loss: {valid_loss:.5f}, metric(valid_loss): {(metric):.6f}.'
        print(content)
        with open(log_file, 'a') as appender:
            appender.write(content + '\n')

        if metric < metric_best:
            print(f'metric_best ({metric_best:.6f} --> {metric:.6f}). Saving model ...')
#             if not DEBUG:
            torch.save(model.state_dict(), model_file)
            metric_best = metric

#         # Save Last
#         if not DEBUG:
#             torch.save(
#                 {
#                     'epoch': epoch,
#                     'model_state_dict': model.state_dict(),
#                     'optimizer_state_dict': optimizer.state_dict(),
#                     'scaler_state_dict': scaler.state_dict() if scaler else None,
#                     'score_best': metric_best,
#                 },
#                 model_file.replace('_best', '_last')
#             )

    del model
    torch.cuda.empty_cache()
    _ = gc.collect()


In [32]:
# # run(0)
# run(1)
run(2)
run(4)
# run(3)

smooth loss:0.3912: 100%|███████████████████| 2261/2261 [17:35<00:00,  2.14it/s]
smooth loss:0.6242: 100%|█████████████████████| 565/565 [02:18<00:00,  4.08it/s]


Fri Feb 24 23:04:54 2023 Fold 2, Epoch 1, lr: 0.0002300, train_loss: 0.33754, valid_loss: 0.29303, metric(valid_loss): 0.293033.
metric_best (inf --> 0.293033). Saving model ...


smooth loss:0.2978: 100%|███████████████████| 2261/2261 [17:23<00:00,  2.17it/s]
smooth loss:0.6294: 100%|█████████████████████| 565/565 [02:15<00:00,  4.17it/s]


Fri Feb 24 23:24:34 2023 Fold 2, Epoch 2, lr: 0.0002297, train_loss: 0.33458, valid_loss: 0.29124, metric(valid_loss): 0.291239.
metric_best (0.293033 --> 0.291239). Saving model ...


smooth loss:0.3610: 100%|███████████████████| 2261/2261 [17:22<00:00,  2.17it/s]
smooth loss:0.7273: 100%|█████████████████████| 565/565 [02:15<00:00,  4.17it/s]


Fri Feb 24 23:44:12 2023 Fold 2, Epoch 3, lr: 0.0002289, train_loss: 0.34282, valid_loss: 0.29550, metric(valid_loss): 0.295497.


smooth loss:0.3449: 100%|███████████████████| 2261/2261 [17:24<00:00,  2.17it/s]
smooth loss:0.8650: 100%|█████████████████████| 565/565 [02:15<00:00,  4.16it/s]


Sat Feb 25 00:03:52 2023 Fold 2, Epoch 4, lr: 0.0002276, train_loss: 0.34012, valid_loss: 0.32518, metric(valid_loss): 0.325180.


smooth loss:0.2869: 100%|███████████████████| 2261/2261 [17:23<00:00,  2.17it/s]
smooth loss:0.8293: 100%|█████████████████████| 565/565 [02:15<00:00,  4.17it/s]


Sat Feb 25 00:23:32 2023 Fold 2, Epoch 5, lr: 0.0002257, train_loss: 0.34405, valid_loss: 0.31270, metric(valid_loss): 0.312698.


smooth loss:0.3439: 100%|███████████████████| 2261/2261 [17:25<00:00,  2.16it/s]
smooth loss:0.7096: 100%|█████████████████████| 565/565 [02:15<00:00,  4.16it/s]


Sat Feb 25 00:43:13 2023 Fold 2, Epoch 6, lr: 0.0002233, train_loss: 0.33175, valid_loss: 0.29336, metric(valid_loss): 0.293360.


smooth loss:0.4644: 100%|███████████████████| 2261/2261 [17:26<00:00,  2.16it/s]
smooth loss:0.7892: 100%|█████████████████████| 565/565 [02:15<00:00,  4.16it/s]


Sat Feb 25 01:02:55 2023 Fold 2, Epoch 7, lr: 0.0002205, train_loss: 0.34313, valid_loss: 0.31755, metric(valid_loss): 0.317551.


smooth loss:0.4019: 100%|███████████████████| 2261/2261 [17:24<00:00,  2.16it/s]
smooth loss:0.7275: 100%|█████████████████████| 565/565 [02:15<00:00,  4.16it/s]


Sat Feb 25 01:22:35 2023 Fold 2, Epoch 8, lr: 0.0002172, train_loss: 0.32939, valid_loss: 0.30997, metric(valid_loss): 0.309972.


smooth loss:0.3345: 100%|███████████████████| 2261/2261 [17:25<00:00,  2.16it/s]
smooth loss:0.8049: 100%|█████████████████████| 565/565 [02:15<00:00,  4.16it/s]


Sat Feb 25 01:42:16 2023 Fold 2, Epoch 9, lr: 0.0002135, train_loss: 0.34071, valid_loss: 0.31467, metric(valid_loss): 0.314669.


smooth loss:0.3675: 100%|███████████████████| 2261/2261 [17:24<00:00,  2.16it/s]
smooth loss:0.6635: 100%|█████████████████████| 565/565 [02:15<00:00,  4.16it/s]


Sat Feb 25 02:01:57 2023 Fold 2, Epoch 10, lr: 0.0002094, train_loss: 0.33247, valid_loss: 0.30406, metric(valid_loss): 0.304059.


smooth loss:0.3484: 100%|███████████████████| 2261/2261 [17:27<00:00,  2.16it/s]
smooth loss:0.7143: 100%|█████████████████████| 565/565 [02:16<00:00,  4.15it/s]


Sat Feb 25 02:21:40 2023 Fold 2, Epoch 11, lr: 0.0002050, train_loss: 0.33177, valid_loss: 0.29781, metric(valid_loss): 0.297811.


smooth loss:0.2847: 100%|███████████████████| 2261/2261 [17:26<00:00,  2.16it/s]
smooth loss:0.7270: 100%|█████████████████████| 565/565 [02:15<00:00,  4.16it/s]


Sat Feb 25 02:41:23 2023 Fold 2, Epoch 12, lr: 0.0002003, train_loss: 0.33150, valid_loss: 0.30675, metric(valid_loss): 0.306746.


smooth loss:0.2527: 100%|███████████████████| 2261/2261 [17:26<00:00,  2.16it/s]
smooth loss:0.8415: 100%|█████████████████████| 565/565 [02:16<00:00,  4.15it/s]


Sat Feb 25 03:01:05 2023 Fold 2, Epoch 13, lr: 0.0001955, train_loss: 0.32637, valid_loss: 0.31917, metric(valid_loss): 0.319166.


smooth loss:0.3675: 100%|███████████████████| 2261/2261 [17:26<00:00,  2.16it/s]
smooth loss:0.7513: 100%|█████████████████████| 565/565 [02:16<00:00,  4.15it/s]


Sat Feb 25 03:20:48 2023 Fold 2, Epoch 14, lr: 0.0001904, train_loss: 0.32891, valid_loss: 0.30972, metric(valid_loss): 0.309723.


smooth loss:0.3568: 100%|███████████████████| 2261/2261 [17:26<00:00,  2.16it/s]
smooth loss:0.7233: 100%|█████████████████████| 565/565 [02:16<00:00,  4.15it/s]


Sat Feb 25 03:40:30 2023 Fold 2, Epoch 15, lr: 0.0001852, train_loss: 0.32498, valid_loss: 0.29104, metric(valid_loss): 0.291039.
metric_best (0.291239 --> 0.291039). Saving model ...


smooth loss:0.2380: 100%|███████████████████| 2261/2261 [17:26<00:00,  2.16it/s]
smooth loss:0.7391: 100%|█████████████████████| 565/565 [02:15<00:00,  4.16it/s]


Sat Feb 25 04:00:13 2023 Fold 2, Epoch 16, lr: 0.0001800, train_loss: 0.32547, valid_loss: 0.28811, metric(valid_loss): 0.288115.
metric_best (0.291039 --> 0.288115). Saving model ...


smooth loss:0.3649: 100%|███████████████████| 2261/2261 [17:27<00:00,  2.16it/s]
smooth loss:0.6701: 100%|█████████████████████| 565/565 [02:15<00:00,  4.16it/s]


Sat Feb 25 04:19:57 2023 Fold 2, Epoch 17, lr: 0.0001748, train_loss: 0.32426, valid_loss: 0.29868, metric(valid_loss): 0.298683.


smooth loss:0.3510: 100%|███████████████████| 2261/2261 [17:27<00:00,  2.16it/s]
smooth loss:0.7659: 100%|█████████████████████| 565/565 [02:15<00:00,  4.15it/s]


Sat Feb 25 04:39:40 2023 Fold 2, Epoch 18, lr: 0.0001696, train_loss: 0.32302, valid_loss: 0.28074, metric(valid_loss): 0.280741.
metric_best (0.288115 --> 0.280741). Saving model ...


smooth loss:0.2405: 100%|███████████████████| 2261/2261 [17:26<00:00,  2.16it/s]
smooth loss:0.7121: 100%|█████████████████████| 565/565 [02:16<00:00,  4.15it/s]


Sat Feb 25 04:59:23 2023 Fold 2, Epoch 19, lr: 0.0001645, train_loss: 0.32478, valid_loss: 0.29358, metric(valid_loss): 0.293577.


smooth loss:0.3868: 100%|███████████████████| 2261/2261 [17:26<00:00,  2.16it/s]
smooth loss:0.7166: 100%|█████████████████████| 565/565 [02:16<00:00,  4.15it/s]


Sat Feb 25 05:19:06 2023 Fold 2, Epoch 20, lr: 0.0001597, train_loss: 0.32158, valid_loss: 0.28810, metric(valid_loss): 0.288097.


smooth loss:0.3602: 100%|███████████████████| 2261/2261 [17:25<00:00,  2.16it/s]
smooth loss:0.8026: 100%|█████████████████████| 565/565 [02:15<00:00,  4.16it/s]


Sat Feb 25 05:38:47 2023 Fold 2, Epoch 21, lr: 0.0001550, train_loss: 0.32051, valid_loss: 0.31761, metric(valid_loss): 0.317606.


smooth loss:0.2993: 100%|███████████████████| 2261/2261 [17:25<00:00,  2.16it/s]
smooth loss:0.6961: 100%|█████████████████████| 565/565 [02:15<00:00,  4.16it/s]


Sat Feb 25 05:58:28 2023 Fold 2, Epoch 22, lr: 0.0001506, train_loss: 0.31717, valid_loss: 0.29145, metric(valid_loss): 0.291452.


smooth loss:0.2774: 100%|███████████████████| 2261/2261 [17:25<00:00,  2.16it/s]
smooth loss:0.7439: 100%|█████████████████████| 565/565 [02:16<00:00,  4.15it/s]


Sat Feb 25 06:18:10 2023 Fold 2, Epoch 23, lr: 0.0001465, train_loss: 0.31497, valid_loss: 0.28436, metric(valid_loss): 0.284357.


smooth loss:0.3590: 100%|███████████████████| 2261/2261 [17:26<00:00,  2.16it/s]
smooth loss:0.7522: 100%|█████████████████████| 565/565 [02:16<00:00,  4.15it/s]


Sat Feb 25 06:37:54 2023 Fold 2, Epoch 24, lr: 0.0001428, train_loss: 0.31355, valid_loss: 0.28728, metric(valid_loss): 0.287283.


smooth loss:0.1682: 100%|███████████████████| 2261/2261 [17:31<00:00,  2.15it/s]
smooth loss:0.7386: 100%|█████████████████████| 565/565 [02:26<00:00,  3.86it/s]


Sat Feb 25 06:57:52 2023 Fold 2, Epoch 25, lr: 0.0001395, train_loss: 0.31377, valid_loss: 0.28069, metric(valid_loss): 0.280693.
metric_best (0.280741 --> 0.280693). Saving model ...


smooth loss:0.2703: 100%|███████████████████| 2261/2261 [18:17<00:00,  2.06it/s]
smooth loss:0.7490: 100%|█████████████████████| 565/565 [02:27<00:00,  3.82it/s]


Sat Feb 25 07:18:37 2023 Fold 2, Epoch 26, lr: 0.0001367, train_loss: 0.32133, valid_loss: 0.28505, metric(valid_loss): 0.285054.


smooth loss:0.3352: 100%|███████████████████| 2261/2261 [18:17<00:00,  2.06it/s]
smooth loss:0.7621: 100%|█████████████████████| 565/565 [02:24<00:00,  3.92it/s]


Sat Feb 25 07:39:18 2023 Fold 2, Epoch 27, lr: 0.0001343, train_loss: 0.30631, valid_loss: 0.28146, metric(valid_loss): 0.281457.


smooth loss:0.2460:   4%|▊                    | 82/2261 [00:43<19:02,  1.91it/s]


KeyboardInterrupt: 