In [34]:
# decoding JPEG images and decoding/encoding RLE datasets
# !pip3 install pylibjpeg==1.4.0
# https://github.com/pydicom/pylibjpeg

# !pip3 install python-gdcm

In [35]:
DEBUG = False

import os
import sys

In [36]:
# suitable for kaggle notebook
# sys.path = ['../ca_2',] + sys.path
# print(sys.path)

In [37]:
import argparse
import warnings

In [38]:
import gc, ast, cv2, time, pickle, random
# import pylibjpeg
# import gdcm
# import pydicom
# pydicom is a pure Python package for working with DICOM files. 
# -It lets you read, modify and write DICOM data in an easy "pythonic" way. 

In [39]:
import numpy as np
import pandas as pd
from glob import glob
from PIL import Image


# import nibabel as nib
# read / write access to some common neuroimaging file formats

In [40]:
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold, StratifiedKFold

import albumentations # python library for pixel-level augmentations 

In [41]:
%matplotlib inline

In [42]:
import timm
# from timm0412 import timm as timm # timm0412 means timm v0.4.12

# import segmentation_models_pytorch as smp
import torch
import torch.nn as nn
import torch.optim as optim
import torch.cuda.amp as amp
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset


In [43]:
from tqdm import tqdm

In [44]:
# import graphviz

In [45]:
# # pip3 install torchview
# from torchview import draw_graph

In [46]:
np.set_printoptions(threshold=sys.maxsize)

In [47]:
# pd.set_option('display.max_column', None)
# pd.set_option('display.max_rows', None)
# pd.set_option('display.max_seq_items', None)
# pd.set_option('display.max_colwidth', None) # 500
# pd.set_option('expand_frame_repr', True)

In [48]:

device = torch.device('cuda')

random_seed = 42
random.seed(random_seed)
np.random.seed(random_seed)
torch.manual_seed(random_seed)
os.environ["PYTHONHASHSEED"] = str(random_seed)
torch.cuda.manual_seed(random_seed)
torch.cuda.manual_seed_all(random_seed)

# benchmark mode is good whenever your input sizes for your network do not vary. 
# This flag allows you to enable the inbuilt cudnn auto-tuner to find the best algorithm to use for your hardware.
torch.backends.cudnn.benchmark = False

torch.backends.cudnn.deterministic = True

# Config

In [49]:
kernel_type = '0920_2d_lstmv22headv2_convnn_224_15_6ch_8flip_augv2_drl3_rov1p2_rov3p2_bs4_lr6e5_eta6e6_lw151_50ep'
load_kernel = None
load_last = True

n_folds = 5
backbone = 'convnext_nano'

image_size = 224
n_slice_per_c = 15
in_chans = 6

init_lr = 18739e-10 # 18739e-9 , 18739e-10 (run at peak)
eta_min = 18700e-10 # 18700e-9
lw = [15, 1]
batch_size = 1
drop_rate = 0.
drop_path_rate = 0.
drop_rate_last = 0.
p_mixup = 0.5 
p_rand_order = 0.5 
p_rand_order_v1 = 0.
weight_decay=0 # default: depend upon optimizer, regularizer like dropout to prevent overfitting.
n_accumulate=1 # 

data_dir = './'
use_amp = True
num_workers = 11 
out_dim = 1

n_epochs = 81 # 80

log_dir = './logs'
model_dir = './models'
model_dir_seg = './kaggle'
# os.makedirs(log_dir, exist_ok=True)
# os.makedirs(model_dir, exist_ok=True)

In [50]:
# # Albumentations is a computer vision tool that boosts the performance of deep convolutional neural networks.
# # Albumentations is a Python library for image augmentation.
# # preferred border_mode="reflection" for all techniques.
transforms_train = albumentations.Compose([
#     albumentations.Resize(image_size, image_size),    
    albumentations.OneOf([    
        albumentations.HorizontalFlip(p=1.),
        albumentations.VerticalFlip(p=1.),
        albumentations.Transpose(p=1.),        
    ], p=0.8),
   
    albumentations.OneOf([
        albumentations.RandomGamma(gamma_limit=(100, 150), p=0.5),
        albumentations.RandomBrightnessContrast(brightness_limit=0.5, contrast_limit=0.3, p=0.5),
        albumentations.Equalize(p=0.5),
        albumentations.Sharpen(alpha=(0.5, 1.), lightness=(0.5, 1.0), p=0.5),
        albumentations.ShiftScaleRotate(shift_limit=0.1, scale_limit=0.1, rotate_limit=45, border_mode=4, p=0.5), 
        
#         albumentations.GaussNoise(var_limit=(9000.0, 9999.0), p=1.),
        albumentations.Defocus(radius=(8, 8), alias_blur=(0.4, 0.4), p=0.4),
        albumentations.MotionBlur(blur_limit=27, p=0.4),
        albumentations.MedianBlur(blur_limit=27, p=0.4),
        albumentations.GaussianBlur(blur_limit=(25,27), p=0.4),
        albumentations.GlassBlur(sigma=0.7, max_delta=4, iterations=2, mode='fast', p=0.4),
        albumentations.ElasticTransform(alpha=80, sigma=6, alpha_affine=6, p=0.4),
        albumentations.ElasticTransform(alpha=20, sigma=80, alpha_affine=80, p=0.4),
        albumentations.GridDistortion(num_steps=5, distort_limit=(-0.2,0.2), p=0.4),
        albumentations.OpticalDistortion(distort_limit=(-0.3,0.3), shift_limit=(-0.5,0.5), p=0.4),
        albumentations.CoarseDropout(max_height=int(image_size * 0.5), max_width=int(image_size * 0.5), max_holes=1, fill_value=1., p=0.4),
    ], p=1.),
])

transforms_valid = albumentations.Compose([
##     albumentations.Resize(image_size, image_size),
])

# DataFrame

In [51]:
df = pd.read_csv('train_seg.csv')
df = df.sample(16).reset_index(drop=True) if DEBUG else df

# df.head()

In [52]:
df[1:2]

Unnamed: 0,StudyInstanceUID,patient_overall,C1,C2,C3,C4,C5,C6,C7,mask_file,image_folder,w,h,d,t,fold
1,1.2.826.0.1.3680043.27262,1,0,1,0,0,0,0,0,,/data/rsna-2022-cervical-spine-fracture-detect...,512,512,406,0.5,0


In [53]:
df[10:15]

Unnamed: 0,StudyInstanceUID,patient_overall,C1,C2,C3,C4,C5,C6,C7,mask_file,image_folder,w,h,d,t,fold
10,1.2.826.0.1.3680043.4744,1,0,0,0,1,1,0,0,,/data/rsna-2022-cervical-spine-fracture-detect...,512,512,281,0.625,0
11,1.2.826.0.1.3680043.15773,1,1,1,0,0,0,0,1,,/data/rsna-2022-cervical-spine-fracture-detect...,512,512,185,1.0,0
12,1.2.826.0.1.3680043.24946,0,0,0,0,0,0,0,0,,/data/rsna-2022-cervical-spine-fracture-detect...,512,512,571,0.5,0
13,1.2.826.0.1.3680043.9290,0,0,0,0,0,0,0,0,,/data/rsna-2022-cervical-spine-fracture-detect...,512,512,223,1.0,0
14,1.2.826.0.1.3680043.5482,1,1,0,0,0,0,0,0,,/data/rsna-2022-cervical-spine-fracture-detect...,512,512,234,0.625,0


# Dataset

In [54]:
class CLSDataset(Dataset):
    def __init__(self, df, mode, transform):

        self.df = df.reset_index()
        self.mode = mode
        self.transform = transform

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, index):
        row = self.df.iloc[index]
        
        images_lst = []
        
        tmp = list(range(7))
        ### random order v3
        if self.mode == 'train' and random.random() < p_rand_order:
            _ = random.shuffle(tmp)
        ###
        for cid in (tmp):                
            filepath = os.path.join(data_dir, f'numpy_2/{row.StudyInstanceUID}_{cid+1}.npy')
            images = np.load(filepath)            
                # type(image), image.shape => <class 'numpy.ndarray'> (15, 224, 224, 6)

            images = np.stack([self.transform(image=images[i])['image'] for i in range(n_slice_per_c)], 0)

            images = images.transpose(0,3,1,2)
                # type(image), image.shape => <class 'numpy.ndarray'> (15, 6, 224, 224)        

            images = images / 255. # trim the 'data values' between 0. and 1. 
                 
            images_lst.append(images)                
            
        images_lst = np.stack(images_lst, 0)

        images_lst = images_lst.reshape((105,6,224,224)).astype(np.float32)

        if self.mode != 'test':
            labels = []
            # tmp => [0, 1, 2, 3, 4, 5, 6]
            for i in row[[f'C{x+1}' for x in tmp]].tolist():
                labels += [i] * n_slice_per_c
                
            # labels => [1, 1, 1, 1, 1, 1, 1, 1, .........., 0, 0, 0, 0, 0, 0, 0, 0] => 105 items

            images = torch.tensor(images_lst)#.float()
            labels = torch.tensor(labels).float()
            
            if self.mode == 'train' and random.random() < p_rand_order_v1:
                indices = torch.randperm(images.size(0))
                images = images_lst[indices]
                labels = labels[indices]

            return images, labels
        else:
            return torch.tensor(images)#.float()

In [55]:
# df_show = df[163:164]
# dataset_show = CLSDataset(df_show, 'train', transform=transforms_train)

In [56]:
# aa, bb = dataset_show[0]
# # aa.shape
# # print(torch.max(aa), torch.min(aa))

In [57]:
# # check all Cid (for one patient) one by one in order to verify that images are showing correctly or not -
# # - otherwise adjust numeric value in 'msk[cid] > 0.1' in above function.
# # plotter Cid = 2
# plt.rcParams["figure.figsize"] = (20,8)
# for i in range(10,20):
#     fx, arr = plt.subplots(1,6)
    
#     for j in range(6):
#         arr[j].imshow(aa[i][j,:,:])  

# Model

In [58]:
class TimmModelType2(nn.Module):
    def __init__(self, backbone, pretrained=False):
        super(TimmModelType2, self).__init__()

        self.encoder = timm.create_model(
            backbone,
            in_chans=in_chans,
            num_classes=out_dim,
            features_only=False,
            drop_rate=drop_rate,
            drop_path_rate=drop_path_rate,
            pretrained=pretrained
        )

        if 'efficient' in backbone:
            hdim = self.encoder.conv_head.out_channels
            self.encoder.classifier = nn.Identity()
        elif 'convnext' in backbone:
            hdim = self.encoder.head.fc.in_features
            self.encoder.head.fc = nn.Identity()

        self.lstm = nn.LSTM(hdim, 256, num_layers=2, dropout=drop_rate, bidirectional=True, batch_first=True)
        self.head = nn.Sequential(
            nn.Linear(512, 256),
            nn.InstanceNorm1d(256), # replaced BatchNorm1d for training with batch_size = 1
            nn.Dropout(drop_rate_last),
            nn.LeakyReLU(0.1),#0.1
            nn.Linear(256, out_dim),
        )
        self.lstm2 = nn.LSTM(hdim, 256, num_layers=2, dropout=drop_rate, bidirectional=True, batch_first=True)
        self.head2 = nn.Sequential(
            nn.Linear(512, 256),
            nn.InstanceNorm1d(256), # replaced BatchNorm1d for training with batch_size = 1
            nn.Dropout(drop_rate_last),
            nn.LeakyReLU(0.1),#0.1
            nn.Linear(256, 1),
        )



    def forward(self, x):  # (bs, nc*7, ch, sz, sz)
        bs = x.shape[0]

        x = x.view(bs * n_slice_per_c * 7, in_chans, image_size, image_size)

        feat = self.encoder(x)
        feat = feat.view(bs, n_slice_per_c * 7, -1)
        feat1, _ = self.lstm(feat)
        feat1 = feat1.contiguous().view(bs * n_slice_per_c * 7, 512)
        feat2, _ = self.lstm2(feat)

        return self.head(feat1), self.head2(feat2[:, 0])

In [59]:
# m = TimmModelType2(backbone)
# n_sequence = 7 * n_slice_per_c
# m(torch.rand(2, n_sequence, in_chans, image_size, image_size)).shape
# #     m(torch.rand(2, n_sequence, in_chans, image_size, image_size)).shape => torch.Size([2, 15])

In [60]:
# draw_graph(m, input_data = torch.rand(1, 3, 128,128,128), expand_nested=True, save_graph=True).visual_graph

# Loss & Metric

In [61]:
bce = nn.BCEWithLogitsLoss(reduction='none')


def criterion(logits, targets, activated=False):
    # logits.view(-1) => have negative values.

    if activated:
        losses = nn.BCELoss(reduction='none')(logits.view(-1), targets.view(-1))
            # .view(-1) => return a single dimension tensor.
            # .view() => Returns a new tensor with the same data as the self tensor but of a different shape.
    else:
        losses = bce(logits.view(-1), targets.view(-1))
    losses[targets.view(-1) > 0] *= 2.
         # losses[targets.view(-1) > 0] => selecting those indices values from losses, where the targets are '> 0'.
         # losses[targets.view(-1) > 0] *= 2. =>   losses[targets.view(-1) > 0] = losses[targets.view(-1) > 0] * 2.
    norm = torch.ones(logits.view(-1).shape[0]).to(device)
    norm[targets.view(-1) > 0] *= 2

    return losses.sum() / norm.sum()

# Train & Valid func

In [62]:
# shuffle the original input then mix this shuffled with original input.
def mixup(input_, truth, clip=[0.0, 1.0]):
    # torch.randperm(n, *, ...) => Returns a random permutation of integers from 0 to n - 1.    
    indices = torch.randperm(input_.size(0))
    
    # shuffling batch in batch of images.     
    shuffled_input = input_[indices]    
    # shuffling batch in batch of masks.     
    shuffled_labels = truth[indices]

    # np.random.uniform(low=0.0, high=1.0, size=None) => draw sample(s) from a uniform distribution over the over the half-open interval [low, high).
    lam = np.random.uniform(clip[0], clip[1])
    
    # mixing 'image batch' with 'shuffled image batch' i.e., type of image transformation.   
    input_ = input_ * lam + shuffled_input * (1 - lam)

    return input_, shuffled_labels, lam


def train_func(model, loader_train, optimizer, scaler=None):    
    model.train()
    train_loss = []
    train_loss1 = []
    train_loss2 = []
    bar = tqdm(loader_train)
    
    i=0
    for images, targets in bar:
        images = images.cuda()
        targets = targets.cuda()
        
        do_mixup = False
        if random.random() < p_mixup:
            do_mixup = True
            images, targets_mix, lam = mixup(images, targets)

        with amp.autocast():            
            logits, logits2 = model(images)
                # logits => tensor([[-1.1621],[-0.7876],[-0.7744],[-0.6548],[-0.8032], ...., [-0.5107], [-0.0616]], device='cuda:0',...)  
                # logits2 => tensor([[0.3247]], device='cuda:0',..)
                
                # targets.max() => tensor(1., device='cuda:0')
                # targets.max(1) => torch.return_types.max(values=tensor([1.], device='cuda:0'),indices=tensor([0], device='cuda:0'))
                # targets.max(1).values => tensor([1.], device='cuda:0')  
                # targets.max(dim=1) => Returns a namedtuple (values, indices) where values is the maximum value of each row of the input tensor in the given dimension dim=1. 
                
            if do_mixup: targets = targets * lam + targets_mix * (1 - lam)
            
            loss1 = criterion(logits, targets)
            loss2 = criterion(logits2, targets.max(1).values)
                # loss1 => tensor(0.7388, device='cuda:0', grad_fn=<DivBackward0>) 
                # loss2 => tensor(0.7343, device='cuda:0', grad_fn=<DivBackward0>)                

            #loss = (loss1 * lw[0] + loss2 * lw[1]) / sum(lw)       
            loss = loss1 + loss2 / 2.
            
#             if do_mixup:
#                 loss11 = criterion(logits, targets_mix)
#                 loss21 = criterion(logits2, targets_mix.max(1).values)
#                 #loss = loss * lam  + (loss11 * lw[0] + loss21 * lw[1]) / sum(lw) * (1 - lam) 
#                 loss = loss * lam  + (loss11  + loss21 ) / 2. * (1 - lam) 
                
        train_loss1.append(loss1.item())
        train_loss2.append(loss2.item())
        train_loss.append(loss.item())
        scaler.scale(loss).backward() # retain_graph=True 
        
        
        if (i + 1) % n_accumulate == 0:            
            scaler.step(optimizer)
            # scaler.step() first unscales the gradients of the optimizer's assigned params.        
            # If these gradients do not contain infs or NaNs, optimizer.step() is then called,
            # otherwise, optimizer.step() is skipped.
            # optimizer's assigned params; parameters which are to be optimized by optimizer.
        
            # Updates the scale for next iteration.
            scaler.update()
            
            # to reset the gradients of model parameters.             
            optimizer.zero_grad()   
            i=-1
        i+=1        

        bar.set_description(f'smth:{np.mean(train_loss1[-30:]):.4f} {np.mean(train_loss2[-30:]):.4f}')

    return np.mean(train_loss)


def valid_func(model, loader_valid):
    model.eval()
    valid_loss = []
    valid_loss1 = []
    valid_loss2 = []
    outputs = []
    bar = tqdm(loader_valid)
    with torch.no_grad():
        for images, targets in bar:
            images = images.cuda()
            targets = targets.cuda()

            logits, logits2 = model(images)
            loss1 = criterion(logits, targets)
            loss2 = criterion(logits2, targets.max(1).values)
            
            loss = (loss1 + loss2) / 2.
            #loss = (loss1* lw[0] + loss2* lw[1]) / sum(lw)
#             valid_loss1.append(loss1.item())
#             valid_loss2.append(loss2.item())
            valid_loss.append(loss.item())
#             bar.set_description(f'smth:{np.mean(valid_loss1[-30:]):.4f} {np.mean(valid_loss2[-30:]):.4f}')

    return np.mean(valid_loss)



In [63]:
# m = TimmModelType2(backbone)

In [64]:
# optimizer = optim.AdamW(m.parameters(), lr=14e-7)
# scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, 30, eta_min = 13e-7)
# # scheduler_cosine = torch.optim.lr_scheduler.LinearLR(optimizer, start_factor=0.33, total_iters=22)

# lrs = []
# for epoch in range(1, 30+1):
#     scheduler_cosine.step(epoch-1)
#     lrs.append(optimizer.param_groups[0]["lr"])
# plt.plot(range(len(lrs)), lrs)

# Training

In [65]:
def run(fold):

    log_file = os.path.join(log_dir, f'{kernel_type}.txt')
    model_file = os.path.join(model_dir, f'{kernel_type}_fold{fold}_best.pth')

    train_ = df[df['fold'] != fold].reset_index(drop=True)
    valid_ = df[df['fold'] == fold].reset_index(drop=True)

#     # for debugging
#     train_ = df[df['fold'] != fold][:100].reset_index(drop=True)
#     valid_ = df[df['fold'] != fold][100:120].reset_index(drop=True)

    dataset_train = CLSDataset(train_, 'train', transform=transforms_train)
    dataset_valid = CLSDataset(valid_, 'valid', transform=transforms_valid)
    loader_train = torch.utils.data.DataLoader(dataset_train, batch_size=batch_size, shuffle=True, num_workers=num_workers, drop_last=True)
    loader_valid = torch.utils.data.DataLoader(dataset_valid, batch_size=batch_size, shuffle=False, num_workers=num_workers)

    model = TimmModelType2(backbone, pretrained=True)
    model = model.to(device)

    # if not first run, load previous model
    fold_l = 4#1
    load_model_file = os.path.join(model_dir_seg, f'{kernel_type}_fold{fold_l}_last.pth')
    sd = torch.load(load_model_file)
    if 'model_state_dict' in sd.keys():
        sd = sd['model_state_dict']
    sd = {k[7:] if k.startswith('module.') else k: sd[k] for k in sd.keys()}
    model.load_state_dict(sd, strict=False)    
    
    optimizer = optim.AdamW(model.parameters(), lr=init_lr, weight_decay=weight_decay)
    #optimizer = optim.SGD(model.parameters(), lr=init_lr, weight_decay=weight_decay)
    scaler = torch.cuda.amp.GradScaler()
    from_epoch = 0
    metric_best = 0.35
    loss_min = np.inf

    scheduler_cosine = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, n_epochs, eta_min=eta_min)

#     print(len(dataset_train), len(dataset_valid))

    for epoch in range(1, n_epochs+1):
        scheduler_cosine.step(epoch-1)
        if epoch < from_epoch + 1:
            print(logs[epoch-1])
            continue

        print(time.ctime(), 'Epoch:', epoch)

        train_loss = train_func(model, loader_train, optimizer, scaler)
        valid_loss = valid_func(model, loader_valid)
        metric = valid_loss

        content = time.ctime() + ' ' + f'Fold {fold}, Epoch {epoch}, lr: {optimizer.param_groups[0]["lr"]:.7f}, train loss: {train_loss:.5f}, valid loss: {valid_loss:.5f}, metric: {(metric):.6f}.'
        print(content)
        with open(log_file, 'a') as appender:
            appender.write(content + '\n')

        if metric < metric_best:#abs(train_loss-valid_loss) <= 0.009:
            print(f'metric_best ({metric_best:.6f} --> {metric:.6f}). Saving model ...')
            if not DEBUG:
                torch.save(model.state_dict(), model_file)
                metric_best = metric

        # Save Last
        if not DEBUG and abs(train_loss-valid_loss) <= 0.015:
            torch.save(model.state_dict(), model_file.replace('_best', '_last'))
#            torch.save(
#                {
#                    'epoch': epoch,
#                     'model_state_dict': model.state_dict(),
#                     'optimizer_state_dict': optimizer.state_dict(),
#                     'scaler_state_dict': scaler.state_dict() if scaler else None,
#                     'score_best': metric_best,
#                 },
#                 model_file.replace('_best', '_last')
#             )

    del model
    torch.cuda.empty_cache()
    _ = gc.collect()


In [1]:
#run(1)
#run(2)
#run(0)
#run(3)
run(4)












#  train loss: 0.05, valid loss: 0.01, fold1 last both kaggle

#  train loss: 0.05656, valid loss: 0.05088, fold4 last both kaggle

#  train loss: 0.06990, valid loss: 0.02214, fold0 last both kaggle

