In [1]:
DEBUG = False

In [2]:
import os
import time
import random
import numpy as np
from tqdm import tqdm
import pandas as pd
import albumentations
import matplotlib.pyplot as plt
from pylab import rcParams
import timm
import torch
import torch.nn as nn
import torch.cuda.amp as amp
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import GroupKFold

In [3]:
class CFG:
    backbone = 'tf_efficientnetv2_s_in21ft1k'
    device = 'cuda'
    image_size = 224
    n_slice_per_c = 15
    in_chans = 6
    out_dim = 1
    drop_rate = 0.
    drop_rate_last = 0.3
    drop_path_rate = 0.
    p_mixup = 0.5
    init_lr = 23e-5
    eta_min = 23e-6
    n_epochs = 75
    batch_size = 8
    num_workers = 4
    use_amp = True

In [5]:
transforms_train = albumentations.Compose([
    albumentations.Resize(CFG.image_size, CFG.image_size),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5),
    albumentations.Transpose(p=0.5),
    albumentations.RandomBrightness(limit=0.1, p=0.7),
    albumentations.ShiftScaleRotate(shift_limit=0.3, scale_limit=0.3, rotate_limit=45, border_mode=4, p=0.7),

    albumentations.OneOf([
        albumentations.MotionBlur(blur_limit=3),
        albumentations.MedianBlur(blur_limit=3),
        albumentations.GaussianBlur(blur_limit=3),
        albumentations.GaussNoise(var_limit=(3.0, 9.0)),
    ], p=0.5),
    # albumentations.OneOf([
    #     albumentations.OpticalDistortion(distort_limit=1.),
    #     albumentations.GridDistortion(num_steps=5, distort_limit=1.),
    # ], p=0.5),

    # albumentations.Cutout(max_h_size=int(image_size * 0.5), max_w_size=int(image_size * 0.5), num_holes=1, p=0.5),
])

transforms_valid = albumentations.Compose([
    albumentations.Resize(CFG.image_size, CFG.image_size),
])



In [6]:
class ClsDataset(Dataset):
    def __init__(self, df, mode, transform):
        self.df = df
        self.mode = mode
        self.transform = transform
        self.map_cls = {0: 'liver', 1: 'spleen', 2: 'kidney', 3: 'kidney', 4: 'bowel'}

    def __len__(self):
        return self.df.shape[0]

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        images = []
        labels = []
        tmp = list(range(5))
        random.shuffle(tmp)
        for cid in(tmp):
            for ind in list(range(CFG.n_slice_per_c)):
                filepath = os.path.join('data/stage1/crop', f'{patient_id}_{study_id}_{cid}_{ind}.npy')
                image = np.load(filepath)
                image = self.transform(image=image)['image']
                image = image.transpose(2, 0, 1).astype(np.float32) / 255.
                images.append(image)
            label = self.map_cls[cid]
            label = self.df.iloc[idx, self.df.columns.get_loc(label)]
            labels += [label] * CFG.n_slice_per_c
        images = np.stack(images, 0)
        images = torch.tensor(images).float()
        labels = torch.tensor(labels).float()
        return images, labels
        

rcParams['figure.figsize'] = 20,8
df = pd.read_csv('data/stage2/train_cls.csv')
print('len', len(df))
df_show = df
dataset_show = ClsDataset(df_show, 'train', transform=transforms_train)
loader_show = torch.utils.data.DataLoader(dataset_show, batch_size=CFG.batch_size, shuffle=False, num_workers=CFG.num_workers)


f, axarr = plt.subplots(2,5)
for p in range(5):
    idx = p + 5 * 0
    imgs, lbl = dataset_show[idx]
    axarr[0, p].imshow(imgs[3][:3].permute(1, 2, 0))
    axarr[1, p].imshow(imgs[3][-1])



FileNotFoundError: [Errno 2] No such file or directory: 'data/stage2/train_cls.csv'