In [1]:
# decoding JPEG images and decoding/encoding RLE datasets
# !pip3 install -q pylibjpeg==1.4.0
# https://github.com/pydicom/pylibjpeg
# !pip3 install pydicom==2.3.0
## !pip3 install -q python-gdcm==3.0.20
# !pip3 install -q torchvision==0.12.0
# !pip3 install -q torchaudio==0.11.0
# !pip3 install -q torchmetrics==0.11.0
# !pip3 install -q torchtext==0.12.0
# !pip3 install -q torch==1.13.1 # cuda 'sm_86' is only supported by 1.13.1
# !pip3 install -q timm==0.6.12 # or 0.4.12 (for train_1.ipynb and train_1_efficient.ipynb)
# !pip3 -q install monai==1.1.0
# !pip3 -q install segmentation-models-pytorch==0.2.1
# !conda install -c pytorch magma-cuda110==2.5.2
# !pip3 install opencv-python==4.5.4.60

In [2]:
DEBUG = False
import os
# os.environ["CUDA_VISIBLE_DEVICES"]="0"
# os.environ["PYTORCH_NO_CUDA_MEMORY_CACHING"]="1"


import sys

In [3]:
# suitable for kaggle notebook
# sys.path = ['../ca_2',] + sys.path
# print(sys.path)

In [4]:
import argparse
import warnings

In [5]:
import gc, ast, cv2, time, pickle, random
import pylibjpeg
# import gdcm
import pydicom
# pydicom is a pure Python package for working with DICOM files. 
# -It lets you read, modify and write DICOM data in an easy "pythonic" way. 

In [6]:
import numpy as np
import pandas as pd
from glob import glob
from PIL import Image

import nibabel as nib
# read / write access to some common neuroimaging file formats

In [7]:
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, StratifiedKFold


In [8]:
%matplotlib inline

In [9]:
from monai.transforms import Resize

  warn(f"Failed to load image Python extension: {e}")


In [10]:
import  monai.transforms as transforms

In [11]:
import timm

import segmentation_models_pytorch as smp
import torch
import torch.nn as nn
import torch.optim as optim
import torch.cuda.amp as amp
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset


In [12]:
from tqdm import tqdm

In [13]:
# import graphviz

In [14]:
# # pip3 install torchview
# from torchview import draw_graph

In [15]:
# graphviz.set_jupyter_format('png')

In [16]:
np.set_printoptions(threshold=sys.maxsize)

In [17]:
pd.set_option('display.max_column', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_seq_items', None)
pd.set_option('display.max_colwidth', None) # 500
pd.set_option('expand_frame_repr', True)

In [18]:


device = torch.device('cuda')

# benchmark mode is good whenever your input sizes for your network do not vary. 
# This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware.
torch.backends.cudnn.benchmark = True

# Config

In [19]:
kernel_type = 'timm3d_effv2_unet4b_128_128_128_dsv2_flip12_shift333p7_gd1p5_bs4_lr3e4_20x50ep'
load_kernel = None
load_last = True
n_blocks = 4
n_folds = 5

# check models performance => https://github.com/rwightman/pytorch-image-models/blob/main/results/results-imagenet.csv
backbone = 'tf_efficientnetv2_s_in21ft1k' # 'resnet18d' # 'tf_efficientnetv2_s_in21ft1k <=> tf_efficientnetv2_s.in21k_ft_in1k'

image_sizes = [128, 128, 128]
R = Resize(image_sizes, mode="area") # monai => Resize

init_lr = 19e-5 # 19e-4
eta_min = 11e-5
batch_size = 3
drop_rate = 0. # avoid this when running over kaggle.
drop_path_rate = 0.
loss_weights = [1, 1]
p_mixup = 0.1 # to avoid overfitting

data_dir = './' # ../input/rsna-2022-cervical-spine-fracture-detection
use_amp = True
num_workers = 12
out_dim = 7

n_epochs = 1000 # 1000

log_dir = './logs'
model_dir = './models'
model_dir_seg = './kaggle'
os.makedirs(log_dir, exist_ok=True)
os.makedirs(model_dir, exist_ok=True)

In [20]:
transforms_train = transforms.Compose([
    transforms.RandFlipd(keys=["image", "mask"], prob=0.5, spatial_axis=1),
    transforms.RandFlipd(keys=["image", "mask"], prob=0.5, spatial_axis=2),
    transforms.RandAffined(keys=["image", "mask"], translate_range=[int(x*y) for x, y in zip(image_sizes, [0.3, 0.3, 0.3])], padding_mode='zeros', prob=0.7),
    transforms.RandGridDistortiond(keys=("image", "mask"), prob=0.5, distort_limit=(-0.01, 0.01), mode="nearest"),    
])

transforms_valid = transforms.Compose([
])

# transforms_train =''
# transforms_valid =''

# DataFrame

In [21]:
df_train = pd.read_csv(os.path.join(data_dir, 'train.csv'))
    #            StudyInstanceUID    patient_overall    C1  C2  C3  C4  C5  C6  C7
    # 0   1.2.826.0.1.3680043.6200                 1    1   1   0   0   0   0   0

mask_files = os.listdir(f'{data_dir}segmentations')

df_mask = pd.DataFrame({ 'mask_file': mask_files, })
    #     mask_file
    # 0   1.2.826.0.1.3680043.30487.nii
    # 1   1.2.826.0.1.3680043.30640.nii

df_mask['StudyInstanceUID'] = df_mask['mask_file'].apply(lambda x: x[:-4])
    #     mask_file                       StudyInstanceUID
    # 0   1.2.826.0.1.3680043.30487.nii   1.2.826.0.1.3680043.30487

df_mask['mask_file'] = df_mask['mask_file'].apply(lambda x: os.path.join(data_dir, 'segmentations', x))
    #     mask_file                                       StudyInstanceUID
    # 0   ./segmentations/1.2.826.0.1.3680043.30487.nii   1.2.826.0.1.3680043.30487


df_train = df_train.merge(df_mask, on='StudyInstanceUID', how='left')
    #             StudyInstanceUID  patient_overall  C1  C2  C3  C4  C5  C6  C7  mask_file
    # 0   1.2.826.0.1.3680043.6200                1   1   1   0   0   0   0   0   Nan
    # 1  1.2.826.0.1.3680043.27262                1   0   1   0   0   0   0   0   Nan
del df_mask

df_train['image_folder'] = df_train['StudyInstanceUID'].apply(lambda x: os.path.join(data_dir, 'train_images', x))
    #             StudyInstanceUID  patient_overall  C1  C2  C3  C4  C5  C6  C7  mask_file  image_folder
    # 0   1.2.826.0.1.3680043.6200                1   1   1   0   0   0   0   0   Nan       ./train_images/1.2.826.0.1.3680043.6200      
    # 1  1.2.826.0.1.3680043.27262                1   0   1   0   0   0   0   0   Nan       ./train_images/1.2.826.0.1.3680043.27262

df_train['mask_file'].fillna('', inplace=True)
    #             StudyInstanceUID  patient_overall  C1  C2  C3  C4  C5  C6  C7  mask_file  image_folder
    # 0   1.2.826.0.1.3680043.6200                1   1   1   0   0   0   0   0             ./train_images/1.2.826.0.1.3680043.6200      
    # 1  1.2.826.0.1.3680043.27262                1   0   1   0   0   0   0   0             ./train_images/1.2.826.0.1.3680043.27262
    
df_seg = df_train.query('mask_file != ""').reset_index(drop=True)
    # dropping rows with df['mask_file'] == ''
    # equivalent to 
    # df_seg = df_train[df_train['mask_file'] != ""].reset_index(drop=True)
    

kf = KFold(5) # n_splits=5
df_seg['fold'] = -1
for fold, (train_idx, valid_idx) in enumerate(kf.split(df_seg, df_seg)): # X=df_seg, y =df_seg
    df_seg.loc[valid_idx, 'fold'] = fold # df_seg.loc[row_index, column_fold] = fold_number
# fold column represents the split_number / fold_number in which row falls.


In [22]:
# 1.2.826.0.1.3680043.20574 not present in the dataset
df_seg[df_seg['StudyInstanceUID'] == '1.2.826.0.1.3680043.20574']

Unnamed: 0,StudyInstanceUID,patient_overall,C1,C2,C3,C4,C5,C6,C7,mask_file,image_folder,fold


In [23]:
df_seg.head(4)

Unnamed: 0,StudyInstanceUID,patient_overall,C1,C2,C3,C4,C5,C6,C7,mask_file,image_folder,fold
0,1.2.826.0.1.3680043.1363,1,0,0,0,0,1,0,0,./segmentations/1.2.826.0.1.3680043.1363.nii,./train_images/1.2.826.0.1.3680043.1363,0
1,1.2.826.0.1.3680043.25704,0,0,0,0,0,0,0,0,./segmentations/1.2.826.0.1.3680043.25704.nii,./train_images/1.2.826.0.1.3680043.25704,0
2,1.2.826.0.1.3680043.20647,0,0,0,0,0,0,0,0,./segmentations/1.2.826.0.1.3680043.20647.nii,./train_images/1.2.826.0.1.3680043.20647,0
3,1.2.826.0.1.3680043.31077,1,0,0,1,1,1,1,0,./segmentations/1.2.826.0.1.3680043.31077.nii,./train_images/1.2.826.0.1.3680043.31077,0


In [24]:
df_seg.tail(3)

Unnamed: 0,StudyInstanceUID,patient_overall,C1,C2,C3,C4,C5,C6,C7,mask_file,image_folder,fold
84,1.2.826.0.1.3680043.28025,0,0,0,0,0,0,0,0,./segmentations/1.2.826.0.1.3680043.28025.nii,./train_images/1.2.826.0.1.3680043.28025,4
85,1.2.826.0.1.3680043.21321,1,1,1,1,0,0,0,1,./segmentations/1.2.826.0.1.3680043.21321.nii,./train_images/1.2.826.0.1.3680043.21321,4
86,1.2.826.0.1.3680043.26990,1,0,0,0,0,1,1,1,./segmentations/1.2.826.0.1.3680043.26990.nii,./train_images/1.2.826.0.1.3680043.26990,4


In [25]:
# df_seg.to_csv('df_seg_train_1.csv', index=False)

# Dataset

In [26]:
# df_seg = pd.read_csv('df_seg_train_1.csv')

In [27]:
# df_seg.tail(4)

In [28]:
# Here is the list of inverted (z-axis) segmentation masks:
revert_list = [
    '1.2.826.0.1.3680043.1363',
    '1.2.826.0.1.3680043.20120',
    '1.2.826.0.1.3680043.2243',
    '1.2.826.0.1.3680043.24606',
    '1.2.826.0.1.3680043.32071'
]

In [29]:
def load_dicom(path):
    # path => ./train_images/1.2.826.0.1.3680043.1363/1.dcm       
    dicom = pydicom.read_file(path)
    #     dicom =>
    #     Dataset.file_meta -------------------------------
    #     (0002, 0001) File Meta Information Version       OB: b'\x00\x01'
    #     (0002, 0002) Media Storage SOP Class UID         UI: CT Image Storage
    #     (0002, 0003) Media Storage SOP Instance UID      UI: 1.2.826.0.1.3680043.1363.1.1
    #     (0002, 0010) Transfer Syntax UID                 UI: Explicit VR Little Endian
    #     (0002, 0012) Implementation Class UID            UI: 1.2.40.0.13.1.1.1
    #     (0002, 0013) Implementation Version Name         SH: 'PYDICOM 2.3.0'
    #     -------------------------------------------------
    #     (0008, 0018) SOP Instance UID                    UI: 1.2.826.0.1.3680043.1363.1.1
    #     (0008, 0023) Content Date                        DA: '20220727'
    #     (0008, 0033) Content Time                        TM: '183924.353110'
    #     (0010, 0010) Patient's Name                      PN: '1363'
    #     (0010, 0020) Patient ID                          LO: '1363'
    #     (0018, 0050) Slice Thickness                     DS: '1.0'
    #     (0020, 000d) Study Instance UID                  UI: 1.2.826.0.1.3680043.1363
    #     (0020, 000e) Series Instance UID                 UI: 1.2.826.0.1.3680043.1363.1
    #     (0020, 0013) Instance Number                     IS: '1'
    #     (0020, 0032) Image Position (Patient)            DS: [-149.2080078125, -350.2080078125, 54]
    #     (0020, 0037) Image Orientation (Patient)         DS: [1, 0, 0, 0, 1, 0]
    #     (0028, 0002) Samples per Pixel                   US: 1
    #     (0028, 0004) Photometric Interpretation          CS: 'MONOCHROME2'
    #     (0028, 0010) Rows                                US: 512
    #     (0028, 0011) Columns                             US: 512
    #     (0028, 0030) Pixel Spacing                       DS: [0.583984375, 0.583984375]
    #     (0028, 0100) Bits Allocated                      US: 16
    #     (0028, 0101) Bits Stored                         US: 12
    #     (0028, 0102) High Bit                            US: 11
    #     (0028, 0103) Pixel Representation                US: 0
    #     (0028, 1050) Window Center                       DS: [450, 40]
    #     (0028, 1051) Window Width                        DS: [1500, 350]
    #     (0028, 1052) Rescale Intercept                   DS: '-1024.0'
    #     (0028, 1053) Rescale Slope                       DS: '1.0'
    #     (7fe0, 0010) Pixel Data                          OW: Array of 524288 elements    
       
    data = dicom.pixel_array
        # data => 
        # [[ 22  35  53 ...  46  80  33]
        #  [ 49  12  44 ...  58  52  11]    
        #          ...
        #          ...
        #  [11  54  31 ... 112  64  99]
        #  [371 280 242 ...  67  24   0]] 
        # data.shape => (512, 512) i.e., (Rows, Columns)    
        # np.unique(data) => [0    1    2    3    4    5    ......   3156 3163 3182 3200 3228 3267 3274]

    
    data = cv2.resize(data, (image_sizes[0], image_sizes[1]), interpolation = cv2.INTER_AREA)
        # If you are enlarging the image, you should prefer to use INTER_LINEAR or INTER_CUBIC interpolation. 
        # If you are shrinking the image, you should prefer to use INTER_AREA interpolation.    
        # data =>
        # [[ 40  45  57 ...  30  27  44]
        #  [ 40  50  52 ...  38  26  26]
        #      ...
        #      ...
        #  [ 99 158 234 ... 151 159 180]
        #  [155  91  74 ...  50 134 103]] 
        # data.shape => (128,128) i.e., (image_sizes[0], image_sizes[1])
        # np.unique(data) => [18   21   22   23   24   25   26   27   ........ 2684 2716 2727 2755 2791 2875]
        
    return data


def load_dicom_line_par(path):
    # path => ./train_images/1.2.826.0.1.3680043.1363

    t_paths = sorted(glob(os.path.join(path, "*")),\
                     key=lambda x: int(x.split('/')[-1].split(".")[0]))
    # glob(os.path.join(path, "*")) => ['./train_images/1.2.826.0.1.3680043.1363/179.dcm', 
    #                                        './train_images/1.2.826.0.1.3680043.1363/174.dcm',..........    
    # "./train_images/1.2.826.0.1.3680043.1363/179.dcm".split('/')[-1].split(".")[0] => 179
    # t_paths => ['./train_images/1.2.826.0.1.3680043.1363/1.dcm', 
    #                                        './train_images/1.2.826.0.1.3680043.1363/2.dcm',..........
    

    n_scans = len(t_paths) # number of scans in path; e.g. 199
    indices = np.quantile(list(range(n_scans)), np.linspace(0., 1., image_sizes[2])).round().astype(int)
    # np.linspace(0., 1., 128) => array([0.        , 0.00787402, 0.01574803, 0.02362205, ...., 1. ])
    # np.linspace(start, stop, num_of_samples_to_generate, ...) => Return evenly spaced numbers over a specified interval.
    # image_sizes[2] => defined in 'config' section above
    # list(range(199)) = [0,1,2,3,.......,199]
    # np.quantile([0,1,2,3,.......,199], array([0.        , 0.00787402, 0.01574803, 0.02362205, ...., 1. ]))
    # np.quantile(Input,  sequence of quantiles to compute between 0 and 1, ...)
    # indices => [  0   2   3   5   6   8   9  11  12  14  16  17  19  20  22  23  25  27 ............ 198]    
    # len(indices) => 128
    # even if n_scans is less than 128, the len(indices) remained 128. As, quantiles are repeated.
    
    t_paths = [t_paths[i] for i in indices] 
    # selecting 'image_sizes[2]' no. of elements from t_paths
    # previous 199 now 128
    

    images = []
    for filename in t_paths:
        images.append(load_dicom(filename))
    # images => list all images, where each image is in 'array' form with shape (128,128).

    images = np.stack(images, 0)
    # np.stack(arrays, axis, out=None, ...) => Join a sequence of arrays along a new axis.    
    #  images.shape => (128, 128, 128) i.e., (z,x,y)
    #  first axis is z axis (represents number of images) and (x,y) is per image shape.
    # images => 
    # [[[ 39  30  37 ...  30  40  37]
    #   [ 32  38  29 ...  27  36  34]
    #   [ 27  36  40 ...  30  30  33]

    #   ...
    #   ...
    #   ...

    #   [ 66  71  83 ...  78  69  68]
    #   [ 74  67  63 ...  63  60  68]
    #   [ 69  73  65 ...  72  74  71]]]  
    
    
    # normalization section
    images = images - np.min(images)
        # np.min(images) => minimum value in (z,x,y) matrix    
    images = images / (np.max(images) + 1e-4)
        # images=>
        # [[[0.00953098 0.00727364 0.00902935 ... 0.00727364 0.00978179 0.00902935]
        #   [0.00777527 0.00928016 0.00702282 ... 0.00652119 0.00877853 0.0082769 ]
        #   [0.00652119 0.00877853 0.00978179 ... 0.00727364 0.00727364 0.00802608]
        #   ...
        #   ...

    # normalized values are too low thus multiplying by 255 to stay within the range.
    images = (images * 255).astype(np.uint8)
        # prior to any type of transformation(resize, augmentation etc.) convert data to uint8.    
        
        # with ".astype(np.uint8)", images =>
        # [[[ 2  1  2 ...  1  2  2]
        #   [ 1  2  1 ...  1  2  2]
        #   [ 1  2  2 ...  1  1  2]    
        #   ...
        #   ...

        # without ".astype(np.uint8)", images =>
        # [[[ 2.43039874  1.85477798  2.30248301 ...  1.85477798  2.4943566  2.30248301]
        #   [ 1.9826937   2.36644087  1.79082012 ...  1.6629044   2.23852515  2.11060943]
        #   [ 1.6629044   2.23852515  2.4943566  ...  1.85477798  1.85477798  2.04665157]
        #     ...
        #     ...

    return images, indices


def load_sample(row, has_mask=True):
    # row =>
    # index                                                          0
    # StudyInstanceUID                        1.2.826.0.1.3680043.1363
    # patient_overall                                                1
    # C1                                                             0
    # C2                                                             0
    # C3                                                             0
    # C4                                                             0
    # C5                                                             1
    # C6                                                             0
    # C7                                                             0
    # mask_file           ./segmentations/1.2.826.0.1.3680043.1363.nii
    # image_folder             ./train_images/1.2.826.0.1.3680043.1363
    # fold                                                           0
    # Name: 0, dtype: object    
    
    image, index = load_dicom_line_par(row.image_folder)
    # image.shape => (128,128,128)
    # image.ndim => 3    
    
    if image.ndim < 4: # number of dimension is less 4
        image = np.expand_dims(image, axis=0).repeat(repeats=3, axis=0)  # to 3ch
        # np.expand_dims(image, axis=0) => (1,128,128,128)
        # image.shape => (3,128,128,128) i.e., repeatition of (128,128,128) three times
        # image.ndim => 4            

    if has_mask:
        # reading nii file
        # to ask for the array data is to call the get_fdata()
        mask_org = nib.load(row.mask_file).get_fdata()
            # np.min(mask_org, np.max(mask_org) => 0.0, 10.0
            # np.unique(mask_org) => [ 0.  1.  2.  3.  4.  5.  6.  7.  8.  9. 10.]
        
        shape = mask_org.shape
            # type(mask_org) => <class 'numpy.memmap'> 
            # shape => (512, 512, 199) i.e., (0,1,2)
            # 199 number of slices/images (each patient have different-2 slices/images)
                        
            
        if row.StudyInstanceUID in revert_list:
            # trasposing such that mask will be in orientation with image
            mask_org = mask_org.transpose((2, 1, 0))[:, ::-1, :]  
                # np.transpose(a, axes=(2, 1, 0)) => Returns an array with axes transposed.
                # mask_org.shape => (199, 512, 512)                        
        else:
            # trasposing such that mask will be in orientation with image
            mask_org = mask_org.transpose((2, 1, 0))[::-1, ::-1, :-1:]  
                # np.transpose(a, axes=(2, 1, 0)) => Returns an array with axes transposed.
                # mask_org.shape => (199, 512, 512)            
            
        mask_org = mask_org[index]  # picking same indexes from 199, as in images
            # mask_org.shape => (128, 512, 512)        
        
        shape = mask_org.shape

        mask = np.zeros((7, shape[0], shape[1], shape[2]))
            # mask.shape => (7,128,512,512) # we only intereseted in seven mask for (seven cervical vertebrae C1 to C7)   
        for cId in range(7):
            mask[cId] = (mask_org == (cId+1))
                # (mask_org == (cid+1)) ==>
                # [[[False False False ... False False False]
                #   [False False False ... False False False]
                #   [False False False ... False False False]
                #   ...
                #   ...
                #   [False False False ... False False False]
                #   [False False False ... False False False]
                #   [False False False ... False False False]]]
                
        # mask.shape => (7, 128, 512, 512)
        # np.min(mask), np.max(mask) => 0., 1.

        mask = mask.astype(np.uint8) * 255        
        # np.min(mask), np.max(mask) => 0, 255        
        # np.unique(mask) => [0 255]        
            
        shape = mask.shape    
            
            # mask.shape => (7, 128, 512, 512) => 7 = channels, [128,512,512] = spatial dimension 
        mask = R(mask).numpy() 
            # type(mask), mask.shape, np.min(mask), np.max(mask)  => np.ndarray, (7, 128, 128, 128), 0, 255
            # np.unique(mask) => [0  15  31  47  63  79  95 111 127 143 159 175 191 207 223 239 255]         

        return image, mask
    else:
        return image



class SEGDataset(torch.utils.data.Dataset): 
# An abstract class representing a Dataset.
# An abstract class is a class, but not one you can create objects from directly. Its purpose is to define how other classes should look like, i.e. what methods and properties they are expected to have.
    def __init__(self, df, mode, transform):
        
        self.df = df.reset_index()
        self.mode = mode
        self.transform = transform

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        row = self.df.iloc[index]                

#         image, mask = load_sample(row, has_mask=True)
            
#             # np.min(image), np.max(image) => 0 254
#             # mask.shape, np.min(mask), np.max(mask) => (7, 128, 128, 128), 0, 255
#             # np.unique(mask) => [ 0  15  31  47  63  79  95 111 127 143 159 175 191 207 223 239 255]
#         res = self.transform({'image':image, 'mask':mask}) # returns torch array
#         image = res['image'] / 255. # scaling high image values otherwise no display. 
#         mask = res['mask']
# #         image = image / 255.  # for view without transform
        
#             # mask.shape, np.min(mask), np.max(mask) => torch.Size([7, 128, 128, 128]), 0.0, 255.0 
#             # np.unique(mask) => [0.  15.  31.  47.  63.  79.  95. 111. 127. 143. 159. 175. 191. 207. 223. 239. 255.]
#         mask = (mask > 127).astype(np.float32) # 255/2 = 127.5 so convert '<127 to 0' and '>127 to 1'.
#             # np.min(image), np.max(image) => 0.0, 1.0
#             # mask.shape, np.min(mask), np.max(mask), np.unique(mask) => (7, 128, 128, 128), 0.0, 1.0, [0. 1.]
        
        ### using local cache
        image_file = os.path.join(data_dir, f'numpy/{row.StudyInstanceUID}.npy')
        mask_file = os.path.join(data_dir, f'numpy/{row.StudyInstanceUID}_mask.npy')
#         np.save(image_file, image)
#         np.save(mask_file, mask)
        
        image = np.load(image_file)
        mask = np.load(mask_file)
                
        image, mask = torch.tensor(image).float(), torch.tensor(mask).float()        
        
        return image, mask


In [30]:
# # storing image, mask => in numpy formats
# df_show = df_seg
# dataset_show = SEGDataset(df_show, 'train', transform=transforms_train)
# for index, row in df_show.iterrows():
#     img, mask = dataset_show[index]

In [31]:
# df_show = df_seg[0:4] # testing only few value
# dataset_show = SEGDataset(df_show, 'train', transform=transforms_train)

In [32]:
# # for debugging
# img, mask = dataset_show[0]

In [33]:
# # viewing some images
# plt.rcParams["figure.figsize"] = (20,8)
# for i in range(1): # 2
#     f, axarr = plt.subplots(1,8)
#     for p in range(4):
#         idx = i*4+p
#         img, mask = dataset_show[idx]
#             # img.shape, mask.shape => (3, 128, 128, 128) (7, 128, 128, 128)        
        
#         img = img[:, 60, :, :] # checking 60th dcm image for a particular patient       
#         mask = mask[:, 60, :, :] # checking corresponding 60th slice 
#             # img.shape, mask.shape => (3, 128, 128) (7, 128, 128)        
        
#         # merging 7 channels in order to reduce to 3
#         mask[0] = mask[0] + mask[3] + mask[6] # merging C1, C4 and C7
#         mask[1] = mask[1] + mask[4] # merging C2, C5
#         mask[2] = mask[2] + mask[5] # merging C3, C6
        
#         mask = mask[:3] # selecting only 3 sequence / channels out of 7.
#             # mask.shape => (3, 128, 128)


#         axarr[p].imshow(img.transpose(0, 1).transpose(1,2).squeeze()) # squeeze(); removes axes of length one.
#         axarr[p+4].imshow(mask.transpose(0, 1).transpose(1,2).squeeze())        

            
# for i in range(1): # 2
#     f, axarr = plt.subplots(1,4)
#     for p in range(4):
#         idx = i*4+p
#         img, mask = dataset_show[idx]        
#         img = img[:, 60, :, :] # checking 60th dcm image for a particular patient       
#         mask = mask[:, 60, :, :] # checking corresponding 60th slice 
        
#         # merging 7 channels in order to reduce to 3
#         mask[0] = mask[0] + mask[3] + mask[6] # merging C1, C4 and C7
#         mask[1] = mask[1] + mask[4] # merging C2, C5
#         mask[2] = mask[2] + mask[5] # merging C3, C6
        
#         mask = mask[:3] # selecting only 3 sequence / channels out of 7.
            
#         img = img * 0.7 + mask * 0.3 # merging image and its mask
#             # img.shape => (3, 128, 128)           
#         axarr[p].imshow(img.transpose(0, 1).transpose(1,2).squeeze())  # squeeze(); removes axes of length one.

# Model

In [34]:
class TimmSegModel(nn.Module):  # nn.Module: Base class for all neural network modules.
    def __init__(self, backbone, segtype='unet', pretrained=False):
        super(TimmSegModel, self).__init__()

        self.encoder = timm4smp.create_model(
            backbone,
            in_chans=3, # number of channels for input
            features_only=True, 
            # output feature maps for selected levels. leave output of encoder.
            # default, 5 strides will be output from most of models.
            
            drop_rate=drop_rate, # set the dropout rate for training.
            drop_path_rate=drop_path_rate, # stochastic depth rate. to “deactivate” some layers during training.
            pretrained=pretrained
        )
        # self.encoder => for summary
        # self.encoder.default_cfg =>        
        # {'url': 'https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_s_21ft1k-d7dafa41.pth', 
        #  'input_size': (3, 300, 300), 'pool_size': (10, 10), 'interpolation': 'bicubic', 'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5), 
        #  'first_conv': 'conv_stem', 'test_input_size': (3, 384, 384), 'architecture': 'tf_efficientnetv2_s_in21ft1k'}
        
        # self.encoder.feature_info.module_name() => ['act1', 'layer1', 'layer2', 'layer3', 'layer4']
        # self.encoder.feature_info.channels() => [64, 64, 128, 256, 512]
        # self.encoder.feature_info.reduction() => [2, 4, 8, 16, 32]        


        g = self.encoder(torch.rand(1, 3, 64, 64))
        # torch.rand(*size) => Returns a tensor filled with random numbers from a uniform distribution on the interval [0, 1)[0,1)
        # type(g) => list
        # len(g) => 5
        # g[0].shape, g[1].shape, g[2].shape, g[3].shape, g[4].shape =>
        # torch.Size([1, 24, 32, 32]), torch.Size([1, 48, 16, 16]), 
        # torch.Size([1, 64, 8, 8]), torch.Size([1, 160, 4, 4]),
        # torch.Size([1, 256, 2, 2])
        # number of channels is consistent with: self.encoder.feature_info.channels()  
        
        encoder_channels = [1] + [_.shape[1] for _ in g]
        # encoder_channels => [1, 24, 48, 64, 160, 256]        
            
        decoder_channels = [160, 64, 48, 24, 16]        
        
        if segtype == 'unet':
            # resnet is in list of supported encoders to the smp
            self.decoder = smp.unet.decoder.UnetDecoder(
                encoder_channels=encoder_channels[:n_blocks+1],                
                decoder_channels=decoder_channels[:n_blocks],
                # List of integers which specify in_channels parameter for convolutions used in decoder.                
                
                n_blocks=n_blocks, # n_blocks=encoder_depth,
                attention_type = 'scse',                
            )

        self.segmentation_head = nn.Conv2d(decoder_channels[n_blocks-1], out_dim, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        # nn.Conv2d(in_channels, out_channels,...) => 
        # Applies a 2D convolution over an input signal composed of several input planes.

    def forward(self,x):
        # x => torch.Size([1, 3, 128, 128, 128]); 1 => batch_size
        
        enc_features = self.encoder(x)[:n_blocks]
        f, d = enc_features[0].shape[0], enc_features[0].device
        a = [24,48,64,160]
        b = [63,31,15,7]
        enc_features = [torch.cat((feat, torch.zeros((f,a[i],1,b[i],b[i]), device=d).float()), dim=2) for i, feat in enumerate(enc_features)]
        enc_features = [torch.cat((feat, torch.zeros((f,a[i],b[i]+1,1,b[i]), device=d).float()), dim=3) for i, feat in enumerate(enc_features)]      
        enc_features = [torch.cat((feat, torch.zeros((f,a[i],b[i]+1,b[i]+1,1), device=d).float()), dim=4) for i, feat in enumerate(enc_features)]   
        
        global_features = [0] + enc_features
        # global_features[0], global_features[1].shape, global_features[2].shape, global_features[3].shape, global_features[4].shape =>
        # 0 torch.Size([1, 24, 63, 63, 63]) torch.Size([1, 48, 31, 31, 31]) torch.Size([1, 64, 15, 15, 15]) torch.Size([1, 160, 7, 7, 7]) 

        seg_features = self.decoder(*global_features) # (*global_features) is equivalent to (global_features[0], global_features[1], global_features[2]...)
        # seg_features.shape => torch.Size([1, 32, 128, 128, 128])

        seg_features = self.segmentation_head(seg_features)
        # seg_features.shape => torch.Size([1, 7, 128, 128, 128])
        return seg_features

In [35]:
from conv2d_same import Conv2dSame
from conv3d_same import Conv3dSame


def convert_3d(module):

    module_output = module
    if isinstance(module, torch.nn.BatchNorm2d):
        module_output = torch.nn.BatchNorm3d(
            module.num_features,
            module.eps,
            module.momentum,
            module.affine,
            module.track_running_stats,
        )
        
        # module => BatchNorm2d
        # affine – a boolean value that when set to True, this module has learnable affine parameters.
        # parameters weight and bias are only defined if the argument affine is set to True.
        if module.affine:
            # torch.no_grad() temporarily sets all of the requires_grad flags to false.
            # 'requires_grad' flag is set then model will compute gradient w.r.t to parameter.
            with torch.no_grad():
            # with => ensures that resource is "cleaned up" when the code that uses it finishes running, even if exceptions are thrown.
            # with torch.no_grad() => disable gradient calculation in this context.
                module_output.weight = module.weight
                module_output.bias = module.bias
        module_output.running_mean = module.running_mean
        module_output.running_var = module.running_var
        module_output.num_batches_tracked = module.num_batches_tracked
        if hasattr(module, "qconfig"):
            module_output.qconfig = module.qconfig
            
    elif isinstance(module, Conv2dSame):
        module_output = Conv3dSame(
            in_channels=module.in_channels,
            out_channels=module.out_channels,
            kernel_size=module.kernel_size[0],
            stride=module.stride[0],
            padding=module.padding[0],
            dilation=module.dilation[0],
            groups=module.groups,
            bias=module.bias is not None,
        )
        module_output.weight = torch.nn.Parameter(module.weight.unsqueeze(-1).repeat(1,1,1,1,module.kernel_size[0]))

    elif isinstance(module, torch.nn.Conv2d):
        module_output = torch.nn.Conv3d(
            in_channels=module.in_channels,
            out_channels=module.out_channels,
            kernel_size=module.kernel_size[0],
            stride=module.stride[0],
            padding=module.padding[0],
            dilation=module.dilation[0],
            groups=module.groups,
            bias=module.bias is not None,
            padding_mode=module.padding_mode
        )
        module_output.weight = torch.nn.Parameter(module.weight.unsqueeze(-1).repeat(1,1,1,1,module.kernel_size[0]))

    elif isinstance(module, torch.nn.MaxPool2d):
        module_output = torch.nn.MaxPool3d(
            kernel_size=module.kernel_size,
            stride=module.stride,
            padding=module.padding,
            dilation=module.dilation,
            ceil_mode=module.ceil_mode,
        )
    elif isinstance(module, torch.nn.AvgPool2d):
        module_output = torch.nn.AvgPool3d(
            kernel_size=module.kernel_size,
            stride=module.stride,
            padding=module.padding,
            ceil_mode=module.ceil_mode,
        )

    for name, child in module.named_children():
        module_output.add_module( name, convert_3d(child) )
    del module

    return module_output

In [36]:
n_blocks = 4

In [37]:
m = TimmSegModel(backbone)
#     # m => <class '__main__.TimmSegModel'>
#     # TimmSegModel(
#     #   (encoder): FeatureListNet(
#     #     (conv1): Sequential(
#     #       (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
#     #  .....
#     #  .....

# # all 2d layers are converted to 3d
m = convert_3d(m)
#     # m => <class '__main__.TimmSegModel'>
#     # TimmSegModel(
#     #   (encoder): FeatureListNet(
#     #     (conv1): Sequential(
#     #       (0): Conv3d(3, 32, kernel_size=(3, 3, 3), stride=(2, 2, 2), padding=(1, 1, 1), bias=False)
#     #  .....
#     #  .....

m(torch.rand(1, 3, 128,128,128)).shape
# # m(torch.rand(1, 3, 128,128,128)).shape => torch.Size([1, 7, 128, 128, 128])

torch.Size([1, 7, 128, 128, 128])

In [38]:
# draw_graph(m, input_data = torch.rand(1, 3, 128, 128, 128), expand_nested=True, save_graph=True).visual_graph
# draw_graph(m, input_size = (1, 3, 128, 128, 128), expand_nested=True, save_graph=True).visual_graph

# Loss & Metric

In [39]:
from typing import Any, Dict, Optional


def binary_dice_score( y_pred: torch.Tensor, y_true: torch.Tensor, threshold: Optional[float] = None, nan_score_on_empty=False, eps: float = 1e-7,) -> float:
    # ->; is introduced to get developers to optionally specify the return type of the function.    

    # y_pred.shape, y_true.shape => torch.Size([128, 128, 128]) torch.Size([128, 128, 128])
    if threshold is not None:
        y_pred = (y_pred > threshold).to(y_true.dtype)

    intersection = torch.sum(y_pred * y_true).item()
    cardinality = (torch.sum(y_pred) + torch.sum(y_true)).item()

    score = (2.0 * intersection) / (cardinality + eps)

    # flags
    has_predicted = torch.sum(y_pred) > 0    
    has_targets = torch.sum(y_true) > 0 # tensor(True) or tensor(False)

    if not has_targets:
        if nan_score_on_empty:
            score = np.nan
        else:
            score = float(not has_predicted)
    return score


def multilabel_dice_score( y_true: torch.Tensor, y_pred: torch.Tensor, threshold=None, eps=1e-7, nan_score_on_empty=False,):
    dice_of_lblS = [] # C1....C7 are labels

    # y_pred.shape => torch.Size([7, 128, 128, 128])
    num_classes = y_pred.size(0)
    for class_index in range(num_classes):
        dice_of_lbl = binary_dice_score(
            y_pred=y_pred[class_index],
            y_true=y_true[class_index],
            threshold=threshold,
            nan_score_on_empty=nan_score_on_empty,
            eps=eps,
        )
        dice_of_lblS.append(dice_of_lbl)

    return dice_of_lblS


def dice_loss(input, target):
    # torch.sigmoid(input) => Computes the expit (also known as the logistic sigmoid function) of the elements of input.
    input = torch.sigmoid(input)
    # input.shape => torch.Size([4, 7, 128, 128, 128])
    
    smooth = 1.0
    
    # flattens the input tensor
    iflat = input.view(-1)
    # iflat.shape => torch.Size([58720256]) => 4*7*128*128*128 so much dimension

    # flattens the target tensor
    tflat = target.view(-1)
    # tflat.shape => torch.Size([58720256]) => 4*7*128*128*128 so much dimension
    
    intersection = (iflat * tflat).sum()
    # intersection => tensor(82505.0234, device='cuda:0', grad_fn=<SumBackward0>)
    return 1 - ((2.0 * intersection + smooth) / (iflat.sum() + tflat.sum() + smooth))


def bce_dice(input, target, loss_weights=loss_weights): # loss_weights = [1,1] => defined in config section    
    # BCEWithLogitsLoss()(input, target) => This loss combines a Sigmoid layer and the BCELoss in one single class.
    # This version is more numerically stable than using a plain Sigmoid followed by a BCELoss.
    # BCE => Binary Cross Entropy.
    loss1 = loss_weights[0] * nn.BCEWithLogitsLoss()(input, target)
    loss2 = loss_weights[1] * dice_loss(input, target)
    return (loss1 + loss2) / sum(loss_weights)

criterion = bce_dice

# Train & Valid func

In [40]:
def mixup(input, truth, clip=[0, 1]):
    # torch.randperm(n, *, ...) => Returns a random permutation of integers from 0 to n - 1.
    indices = torch.randperm(input.size(0))
    # input.size(0) => batch_size => 4
    # indices => tensor([3, 1, 2, 0])
    
    # shuffling batch in batch of images. 
    shuffled_input = input[indices]
    
    # shuffling batch in batch of masks. 
    shuffled_masks = truth[indices]

    # np.random.uniform(low=0.0, high=1.0, size=None) => draw sample(s) from a uniform distribution over the over the half-open interval [low, high).
    lam = np.random.uniform(clip[0], clip[1])
    # lam => 0.632965343426405.

    # mixing 'image batch' with 'shuffled image batch' i.e., type of image transformation
    input = input * lam + shuffled_input * (1 - lam)
    
    return input, truth, shuffled_masks, lam


def train_func(model, loader_train, optimizer, scaler=None):
    model.train()
    train_loss = []
    bar = tqdm(loader_train)    
    # bar => 0%|                                   | 0/2019 [00:00<?, ?it/s]
    # type(bar) => <class 'tqdm.std.tqdm'>    
        
    for images, gt_masks in bar:
        # to reset the gradients of model parameters. 
        optimizer.zero_grad()
        
        # .cuda() => transfer a tensor from CPU to GPU.
        # .cuda() and .to(device=cuda) are same.
        images = images.cuda()
        # type(images), images.shape => <class 'torch.Tensor'> torch.Size([4, 3, 128, 128, 128]) => 4 is batch_size

        gt_masks = gt_masks.cuda() # gt_masks i.e., get_masks
        # type(gt_masks), gt_masks.shape => <class 'torch.Tensor'> torch.Size([4, 7, 128, 128, 128]) => => 4 is batch_size

        do_mixup = False
        # random.random() => function generates random floating numbers in the range[0.1, 1.0)
        if random.random() < p_mixup: # defined in config, p_mixup=0.1
            do_mixup = True
            images, gt_masks, gt_masks_sfl, lam = mixup(images, gt_masks)

        # torch.amp (automatic mixed precision) => mixed precision tries to match each op to its appropriate datatype.
        # autocast() => Instances of autocast serve as context managers or decorators that allow regions of your script to run in mixed precision.
        with amp.autocast():
            # images.shape => torch.Size([4, 3, 128, 128, 128])
            logits = model(images)
            # logits.shape => torch.Size([4, 7, 128, 128, 128])  
            
            # calculating loss using masks as targets
            loss = criterion(logits, gt_masks)

            if do_mixup:
                # calculating loss using shuffled masks as targets
                loss2 = criterion(logits, gt_masks_sfl)
                loss = loss * lam  + loss2 * (1 - lam)
                
        # loss, loss.item() => 
        # tensor(0.8821, device='cuda:0', grad_fn=<DivBackward0>), 0.8821219801902771 
        train_loss.append(loss.item())
        
        # Scales loss.  Calls backward() on scaled loss to create scaled gradients.
        scaler.scale(loss).backward()
        # All gradients produced by scaler.scale(loss).backward() are scaled. 
        # Each scale is calculated on-the-fly.
        
        scaler.step(optimizer)
        # scaler.step() first unscales the gradients of the optimizer's assigned params.        
        # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
        # otherwise, optimizer.step() is skipped.
        # optimizer's assigned params; parameters which are to be optimized by optimizer.
        
        # Updates the scale for next iteration.
        scaler.update()

        bar.set_description(f'smooth loss:{np.mean(train_loss[-30:]):.4f}')
        # train_loss[-30:] => take last 30 values of train_loss

    return np.mean(train_loss)


def valid_func(model, loader_valid):
    model.eval()
    valid_loss = []
    outputs = []
    
    ths = [0.5]    
    # ths = [0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
    
    batch_metrics = [[]] * len(ths) # 
    # if 'len(ths) = 7' then batch_metrics => [[], [], [], [], [], [], []]
        
    bar = tqdm(loader_valid)
    with torch.no_grad():
    # disable gradient calculation in this context.
    
        for images, gt_masks in bar:
            images = images.cuda()
            gt_masks = gt_masks.cuda()

            logits = model(images)
            loss = criterion(logits, gt_masks)
            valid_loss.append(loss.item())
            for thi, th in enumerate(ths):
                # # .detach(); Returns a new Tensor, detached from the current graph.
                # # new Tensor will never require gradient.
                # pred = (logits.sigmoid() > th).float().detach()
                
                for i in range(logits.shape[0]): # logits.shape[0] => batch_size => 
                    tmp = multilabel_dice_score(
                        # .cpu() => copy to cpu
                        y_pred=logits[i].sigmoid().cpu(),
                        y_true=gt_masks[i].cpu(),
                        threshold=th, # 0.5
                    )
                    # len(tmp) => 7
                    # tmp => [0.0, 0.0020704131924468654, 0.0, 0.001438477169412038, 0.0, 0.0, 0.0]
                    
                    batch_metrics[thi].extend(tmp) # extend previous list
            bar.set_description(f'smooth loss:{np.mean(valid_loss[-30:]):.4f}')
            
    # single value of 'batch_metrics[thi]' list contains score of one patient i.e., (128,128,128).
    metrics = [np.mean(this_metric) for this_metric in batch_metrics]
    print('best threshold:', ths[np.argmax(metrics)], 'best dice of epoch:', np.max(metrics))    

    return np.mean(valid_loss), np.max(metrics)


In [41]:
# plt.rcParams['figure.figsize'] = 20, 2
# optimizer = optim.AdamW(m.parameters(), lr=init_lr)
# scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 1000)
# lrs = []
# for epoch in range(1, 1000+1):
#     scheduler_cosine.step(epoch-1)
#     lrs.append(optimizer.param_groups[0]["lr"])
# plt.plot(range(len(lrs)), lrs)

# Training

In [42]:
# df_run_tr = df_seg[0:4]
# df_run_val = df_seg[4:5]
def run(fold):

    log_file = os.path.join(log_dir, f'{kernel_type}.txt')
    model_file = os.path.join(model_dir, f'{kernel_type}_fold{fold}_best.pth')

    train_ = df_seg[df_seg['fold'] != fold].reset_index(drop=True)
    valid_ = df_seg[df_seg['fold'] == fold].reset_index(drop=True)
    
    dataset_train = SEGDataset(train_, 'train', transform=transforms_train) # train_
    dataset_valid = SEGDataset(valid_, 'valid', transform=transforms_valid) # valid_
    loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True, num_workers=num_workers)
    loader_valid = torch.utils.data.DataLoader(dataset_valid, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    model = TimmSegModel(backbone, pretrained=True) # True. False; if loading previous model state
    model = convert_3d(model)
    
    # https://pytorch.org/docs/stable/notes/cuda.html    
    model = model.to(device) # device = torch.device('cuda')
    
    # if not first run, load previous model
    fold_l = 4
    load_model_file = os.path.join(model_dir_seg, f'{kernel_type}_fold{fold_l}_best.pth')
    sd = torch.load(load_model_file)
    if 'model_state_dict' in sd.keys():
        sd = sd['model_state_dict']
    sd = {k[7:] if k.startswith('module.') else k: sd[k] for k in sd.keys()}
    model.load_state_dict(sd, strict=True)    
    

    optimizer = optim.AdamW(model.parameters(), lr=init_lr)
    scaler = torch.cuda.amp.GradScaler()
    from_epoch = 0
    metric_best = 0.
    loss_min = np.inf

    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, n_epochs, eta_min=eta_min)

    print(f'Length of dataset_train = {len(dataset_train)}, ', f'Length of dataset_valid = {len(dataset_valid)}')

    for epoch in range(1, n_epochs+1):
        scheduler_cosine.step(epoch-1)

        print(time.ctime(), 'Epoch:', epoch)

        # model training in train_func function
        train_loss = train_func(model, loader_train, optimizer, scaler)
        valid_loss, metric = valid_func(model, loader_valid)

        content = time.ctime() + ' ' + f'Fold {fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {train_loss:.5f}, valid loss: {valid_loss:.5f}, metric: {(metric):.6f}.'
        print(content)
        with open(log_file, 'a') as appender:
            appender.write(content + '\n')

        if metric > metric_best:
            print(f'metric_best ({metric_best:.6f} --> {metric:.6f}). Saving model ...')
            torch.save(model.state_dict(), model_file)
            metric_best = metric

#         # Save Last
#         if not DEBUG:
#             torch.save(
#                 {
#                     'epoch': epoch,
#                     'model_state_dict': model.state_dict(),
#                     'optimizer_state_dict': optimizer.state_dict(),
#                     'scaler_state_dict': scaler.state_dict() if scaler else None,
#                     'score_best': metric_best,
#                 },
#                 model_file.replace('_best', '_last')
#             )

    del model
    torch.cuda.empty_cache()
#     _= gc.collect()


In [43]:
# print(torch.version.cuda)
# print(torch.cuda.get_arch_list())

In [44]:
# execute this cell twice.
# during first execution do not run all folds in continuity. run every fold after kernel restart.
# during second execution of this cell, choose those weights which performed well from all folds.
run(0)
# run(1) 
# run(2)
# run(3)
# run(4)

# maximum difference of 0.10 between "smooth loss train" and "smooth loss valid" is OK, otherwise reduce the lr.

2023-02-23 11:24:52,642 - Loading pretrained weights from url (https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_s_21ft1k-d7dafa41.pth)
Length of dataset_train = 69,  Length of dataset_valid = 18
Thu Feb 23 11:24:53 2023 Epoch: 1


smooth loss:0.0384: 100%|███████████████████████| 23/23 [01:04<00:00,  2.81s/it]
smooth loss:0.0343: 100%|█████████████████████████| 6/6 [00:13<00:00,  2.26s/it]


best threshold: 0.5 best dice of epoch: 0.9307108726825309
Thu Feb 23 11:26:11 2023 Fold 0, Epoch 1, lr: 0.0003000, train loss: 0.03842, valid loss: 0.03425, metric: 0.930711.
metric_best (0.000000 --> 0.930711). Saving model ...
Thu Feb 23 11:26:12 2023 Epoch: 2


smooth loss:0.0304: 100%|███████████████████████| 23/23 [00:25<00:00,  1.11s/it]
smooth loss:0.0342: 100%|█████████████████████████| 6/6 [00:06<00:00,  1.11s/it]


best threshold: 0.5 best dice of epoch: 0.9300611238882839
Thu Feb 23 11:26:44 2023 Fold 0, Epoch 2, lr: 0.0003000, train loss: 0.03041, valid loss: 0.03424, metric: 0.930061.
Thu Feb 23 11:26:44 2023 Epoch: 3


smooth loss:0.0302: 100%|███████████████████████| 23/23 [00:25<00:00,  1.13s/it]
smooth loss:0.0341: 100%|█████████████████████████| 6/6 [00:06<00:00,  1.13s/it]


best threshold: 0.5 best dice of epoch: 0.9308472615778829
Thu Feb 23 11:27:17 2023 Fold 0, Epoch 3, lr: 0.0003000, train loss: 0.03016, valid loss: 0.03409, metric: 0.930847.
metric_best (0.930711 --> 0.930847). Saving model ...
Thu Feb 23 11:27:17 2023 Epoch: 4


smooth loss:0.0470: 100%|███████████████████████| 23/23 [00:25<00:00,  1.12s/it]
smooth loss:0.0350: 100%|█████████████████████████| 6/6 [00:06<00:00,  1.16s/it]


best threshold: 0.5 best dice of epoch: 0.9273618513745848
Thu Feb 23 11:27:50 2023 Fold 0, Epoch 4, lr: 0.0003000, train loss: 0.04697, valid loss: 0.03498, metric: 0.927362.
Thu Feb 23 11:27:50 2023 Epoch: 5


smooth loss:0.0344: 100%|███████████████████████| 23/23 [00:25<00:00,  1.10s/it]
smooth loss:0.0352: 100%|█████████████████████████| 6/6 [00:06<00:00,  1.08s/it]


best threshold: 0.5 best dice of epoch: 0.9273315913554759
Thu Feb 23 11:28:22 2023 Fold 0, Epoch 5, lr: 0.0003000, train loss: 0.03444, valid loss: 0.03517, metric: 0.927332.
Thu Feb 23 11:28:22 2023 Epoch: 6


smooth loss:0.0290: 100%|███████████████████████| 23/23 [00:24<00:00,  1.07s/it]
smooth loss:0.0347: 100%|█████████████████████████| 6/6 [00:06<00:00,  1.07s/it]


best threshold: 0.5 best dice of epoch: 0.9290590532722638
Thu Feb 23 11:28:53 2023 Fold 0, Epoch 6, lr: 0.0003000, train loss: 0.02897, valid loss: 0.03474, metric: 0.929059.
Thu Feb 23 11:28:53 2023 Epoch: 7


smooth loss:0.0324: 100%|███████████████████████| 23/23 [00:24<00:00,  1.09s/it]
smooth loss:0.0352: 100%|█████████████████████████| 6/6 [00:06<00:00,  1.07s/it]


best threshold: 0.5 best dice of epoch: 0.927584668033893
Thu Feb 23 11:29:25 2023 Fold 0, Epoch 7, lr: 0.0003000, train loss: 0.03239, valid loss: 0.03524, metric: 0.927585.
Thu Feb 23 11:29:25 2023 Epoch: 8


smooth loss:0.0412: 100%|███████████████████████| 23/23 [00:24<00:00,  1.08s/it]
smooth loss:0.0363: 100%|█████████████████████████| 6/6 [00:06<00:00,  1.08s/it]


best threshold: 0.5 best dice of epoch: 0.9247637364760257
Thu Feb 23 11:29:56 2023 Fold 0, Epoch 8, lr: 0.0003000, train loss: 0.04120, valid loss: 0.03632, metric: 0.924764.
Thu Feb 23 11:29:56 2023 Epoch: 9


smooth loss:0.0356: 100%|███████████████████████| 23/23 [00:25<00:00,  1.09s/it]
smooth loss:0.0352: 100%|█████████████████████████| 6/6 [00:06<00:00,  1.08s/it]


best threshold: 0.5 best dice of epoch: 0.9273894142530573
Thu Feb 23 11:30:28 2023 Fold 0, Epoch 9, lr: 0.0003000, train loss: 0.03558, valid loss: 0.03524, metric: 0.927389.
Thu Feb 23 11:30:28 2023 Epoch: 10


smooth loss:0.0695: 100%|███████████████████████| 23/23 [00:25<00:00,  1.09s/it]
smooth loss:0.0367: 100%|█████████████████████████| 6/6 [00:06<00:00,  1.09s/it]


best threshold: 0.5 best dice of epoch: 0.9240821711293389
Thu Feb 23 11:31:00 2023 Fold 0, Epoch 10, lr: 0.0002999, train loss: 0.06951, valid loss: 0.03667, metric: 0.924082.
Thu Feb 23 11:31:00 2023 Epoch: 11


smooth loss:0.0289: 100%|███████████████████████| 23/23 [00:24<00:00,  1.08s/it]
smooth loss:0.0362: 100%|█████████████████████████| 6/6 [00:06<00:00,  1.09s/it]


best threshold: 0.5 best dice of epoch: 0.925965181133139
Thu Feb 23 11:31:31 2023 Fold 0, Epoch 11, lr: 0.0002999, train loss: 0.02893, valid loss: 0.03616, metric: 0.925965.
Thu Feb 23 11:31:31 2023 Epoch: 12


smooth loss:0.0279:  17%|████▏                   | 4/23 [00:07<00:33,  1.75s/it]


KeyboardInterrupt: 