In [1]:
sz = 384
bs = 4
nfolds = 5
SEED = 2022
N = 16 #number of tiles per image
EPOCHS = 10
N_tile = 64

In [2]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

In [3]:
import os
import gc
import zipfile
import torch

import random
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from sklearn.model_selection import StratifiedKFold
import cv2
# from efficientnet_pytorch import EfficientNet
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
from torchvision import models
import torchvision
import torch.nn as nn
from torchvision import transforms
from fastai.vision import *
from fastai.layers import AdaptiveConcatPool2d, Flatten, Mish
import timm

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
scaler = torch.cuda.amp.GradScaler()

cuda:0


In [5]:
class Model(nn.Module):
    def __init__(self, arch='tf_efficientnetv2_s', n=2, pre=True):
        super().__init__()
#         m = torch.hub.load('facebookresearch/semi-supervised-ImageNet1K-models', arch)
        m = timm.create_model(arch, pretrained=pre)
        self.enc = nn.Sequential(*list(m.children())[:-2])       
        nc = list(m.children())[-1].in_features
        self.head = nn.Sequential(AdaptiveConcatPool2d(),Flatten(),nn.Linear(2*nc,512),
                            Mish(),nn.BatchNorm1d(512), nn.Dropout(0.5),nn.Linear(512,n))
        
    def forward(self, x):
        x = [x for x in x]
        shape = x[0].shape
        n = N
        x = torch.stack(x,1).view(-1,shape[1],shape[2],shape[3])
        #x: bs*N x 3 x 128 x 128
        x = self.enc(x)
        #x: bs*N x C x 4 x 4
        shape = x.shape
#         print(x.shape)
        #concatenate the output for tiles into a single map
        x = x.view(-1,n,shape[1],shape[2],shape[3]).permute(0,2,1,3,4).contiguous()\
          .view(-1,shape[1],shape[2]*n,shape[3])
        #x: bs x C x N*4 x 4
        x = self.head(x)
        #x: bs x n
        return x

In [6]:
# #PyTorch
# class DiceBCELoss(nn.Module):
#     def __init__(self, weight=None, size_average=True):
#         super(DiceBCELoss, self).__init__()
        
#     def onehot_onezero(y_true):
#         res = []
#         for i in y_true:
#             if i == 0:
#                 res.append(torch.tensor([1., 0.]))
#             else:
#                 res.append(torch.tensor([0., 1.]))

#         return torch.stack(res)

#     def forward(self, inputs, targets, smooth=1):
        
#         #comment out if your model contains a sigmoid or equivalent activation layer
#         inputs = F.sigmoid(inputs)  
        
#         targets = DiceBCELoss.onehot_onezero(targets).to(device)
#         #flatten label and prediction tensors
#         inputs = inputs.view(-1)
#         targets = targets.view(-1)
        
#         intersection = (inputs * targets).sum()                            
#         dice_loss = 1 - (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)  
#         BCE = torch.nn.BCELoss()(inputs, targets)
#         Dice_BCE = BCE + dice_loss
        
#         return Dice_BCE

In [7]:
class FocalLossWithOneHot(nn.Module):
    def __init__(self, gamma=0, eps=1e-7):
        super(FocalLossWithOneHot, self).__init__()
        self.gamma = gamma
        self.eps = eps

    def forward(self, input, target):
        y = torch.nn.functional.one_hot(target.to(torch.int64), num_classes=2)

        logit = torch.nn.functional.softmax(input, dim=-1)
        logit = logit.clamp(self.eps, 1. - self.eps)

        loss = -1 * y * torch.log(logit) # cross entropy
        loss = loss * (1 - logit) ** self.gamma # focal loss

        return loss.sum()

In [8]:
model = Model()
model = model.to(device)

# クロスエントロピー損失関数使用
# loss_fn = nn.BCEWithLogitsLoss().cuda()
loss_fn = nn.CrossEntropyLoss().cuda()
# loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1).cuda()

# loss_fn = FocalLossWithOneHot(gamma=2).cuda()

optimizer = torch.optim.Adam(model.parameters(), lr=0.001, weight_decay=0.0001)

# optimizer = SGD(model, 0.1)
scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
# 前処理
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.Resize((sz, sz)),
#     torchvision.transforms.RandAugment(),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomVerticalFlip(),
    torchvision.transforms.RandomRotation(45),
#     torchvision.transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

transform_val = torchvision.transforms.Compose([
    torchvision.transforms.ToPILImage(),
    torchvision.transforms.Resize((sz, sz)),
#     torchvision.transforms.RandAugment(),
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
])

In [9]:
# !pip install torchinfo

In [10]:
# from torchinfo import summary
# summary(model=model, input_size=(bs, N, 3, sz, sz))

In [11]:
# This block is reffered from https://www.kaggle.com/code/yasufuminakama/mayo-train-images-size-1024-n-16-1/notebook
train = pd.read_csv('/kaggle/input/mayo-clinic-strip-ai/train.csv')
# train = train[train["image_id"] != "2c3c06_0"]

train['image_dir'] = ''

train.loc[:100,'image_dir'] = '/kaggle/input/mayotiled64384x384/train_images/train_images_1/'
train.loc[100:200,'image_dir'] = '/kaggle/input/mayotiled64384x384/train_images/train_images_2/'
train.loc[200:300,'image_dir'] = '/kaggle/input/mayotiled64384x384/train_images/train_images_3/'
train.loc[300:400,'image_dir'] = '/kaggle/input/mayotiled64384x384/train_images/train_images_4/'
train.loc[400:500,'image_dir'] = '/kaggle/input/mayotiled64384x384/train_images/train_images_5/'
train.loc[500:600,'image_dir'] = '/kaggle/input/mayotiled64384x384/train_images/train_images_6/'
train.loc[600:700,'image_dir'] = '/kaggle/input/mayotiled64384x384/train_images/train_images_7/'
train.loc[700:,'image_dir'] = '/kaggle/input/mayotiled64384x384/train_images/train_images_8/'
# train.loc[:100,'image_dir'] = '/kaggle/input/mayo-tiled-16-384x384/train_images/train_images/train_images_1/'
# train.loc[100:200,'image_dir'] = '/kaggle/input/mayo-tiled-16-384x384/train_images/train_images/train_images_2/'
# train.loc[200:300,'image_dir'] = '/kaggle/input/mayo-tiled-16-384x384/train_images/train_images/train_images_3/'
# train.loc[300:400,'image_dir'] = '/kaggle/input/mayo-tiled-16-384x384/train_images/train_images/train_images_4/'
# train.loc[400:500,'image_dir'] = '/kaggle/input/mayo-tiled-16-384x384/train_images/train_images/train_images_5/'
# train.loc[500:600,'image_dir'] = '/kaggle/input/mayo-tiled-16-384x384/train_images/train_images/train_images_6/'
# train.loc[600:700,'image_dir'] = '/kaggle/input/mayo-tiled-16-384x384/train_images/train_images/train_images_7/'
# train.loc[700:,'image_dir'] = '/kaggle/input/mayo-tiled-16-384x384/train_images/train_images/train_images_8/'

target_mapper = {"CE": 0, "LAA": 1}

train["target"] = train["label"].map(lambda x: target_mapper[x])

splits = StratifiedKFold(n_splits=nfolds, random_state=SEED, shuffle=True)
splits = list(splits.split(train,train.center_id))
folds_splits = np.zeros(len(train)).astype(np.int)
for i in range(nfolds): folds_splits[splits[i][1]] = i
train['split'] = folds_splits

class TrainDataset(Dataset):
    def __init__(self, cfg, df, transform=None, aug=True):
        self.cfg = cfg
        self.image_ids = df['image_id'].values
        self.image_dirs = df['image_dir'].values
#         self.image_path = df["path"].values
        self.labels = df['target'].values
        self.transform = transform
        self.aug = aug

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        image_dir = self.image_dirs[idx]
        images = []
        img_indexes = random.sample(list(range(0, N_tile)), N)
        for i in img_indexes:
            path = image_dir + image_id + f'_{i}.jpg'
            image = cv2.imread(path)
            image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
            image = self.transform(image)
            images.append(image)
        images = torch.stack(images, dim=0)
        
#         if self.aug:
#             images = torchvision.transforms.RandAugment()(images)
            
        label = torch.tensor(self.labels[idx]).long()
        return images, label

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations


In [12]:
def train_one_epoch(model, device, train_loader, val_loader, optimizer, scheduler, epoch, loss_fn):
    running_loss = 0.
    last_loss = 0.
    val_loss = 0.
    model.train(True)

    # Here, we use enumerate(training_loader) instead of
    # iter(training_loader) so that we can track the batch
    # index and do some intra-epoch reporting
    for i, (data, target) in enumerate(train_loader):
        # Every data instance is an input + label pair
        inputs, labels = data.to(device), target.to(device)
        # Zero your gradients for every batch!
        optimizer.zero_grad()

        # Make predictions for this batch
        with torch.cuda.amp.autocast():
            outputs = model(inputs)
#             loss = loss_fn(outputs, labels)

#             loss = loss_fn(torch.squeeze(outputs), labels.float())
            loss = loss_fn(outputs, labels)

#         if len(outputs) != bs:
#             print(outputs)

        # Compute the loss and its gradients
        
        scaler.scale(loss).backward()
        
        scaler.step(optimizer)
        scaler.update()

        # Gather data and report
        running_loss += loss.item()
        if i % 10 == 9:
            last_loss = running_loss / 10 # loss per batch
            print('  batch {} loss: {}'.format(i + 1, last_loss))
            tb_x = epoch * len(train_loader) + i + 1
            running_loss = 0.
#     scheduler.step()
    
#     model.requires_grad_(False)
    model.eval()
    with torch.no_grad():
        for j, (data, target) in enumerate(val_loader):
            inputs, labels = data.to(device), target.to(device)
            with torch.cuda.amp.autocast():
                outputs = model(inputs)
#                 loss = loss_fn(torch.squeeze(outputs), labels.float())
                loss = loss_fn(outputs, labels)

            val_loss += loss.item()
    
    return val_loss/j

In [13]:
# for i, one_batch in enumerate(training_loader):
#     x = one_batch[0]
#     print(x.shape)
#     y = one_batch[1]
#     x = [x for x in x]
#     shape = x[0].shape
#     print(torch.stack(x,1).view(-1,shape[1],shape[2],shape[3]).shape)
#     if i==5:
#         break

In [14]:
# y

In [15]:
for fold in range(nfolds):
    training_set = TrainDataset(None,train[train["split"]!=fold], transform=transform, aug=True)
    training_loader = torch.utils.data.DataLoader(training_set, shuffle=True, num_workers=2, batch_size=bs, drop_last=True)
    val_set = TrainDataset(None,train[train["split"]==fold], transform=transform_val, aug=False)
    val_loader = torch.utils.data.DataLoader(val_set, shuffle=True, num_workers=2, batch_size=bs, drop_last=True)

    epoch_number=0
    avg_loss = [999,]
    for epoch in range(EPOCHS):
        print('EPOCH {}:'.format(epoch_number + 1))

        epoch_loss = train_one_epoch(model=model, device=device, train_loader=training_loader, val_loader=val_loader, optimizer=optimizer, scheduler=scheduler, epoch=epoch_number, loss_fn=loss_fn)
        epoch_number += 1
        print("EPOCH ", str(epoch+1), "val loss: ", epoch_loss)
        
        if min(avg_loss) > epoch_loss:
            print("save model concat-tile-pooling-384-effv2-fold{}.pth".format(str(fold)))
            torch.save(model.state_dict(), "concat-tile-pooling-384-effv2-fold{}.pth".format(str(fold)))
        
        avg_loss.append(epoch_loss)

EPOCH 1:
  batch 10 loss: 1.3066925823688507
  batch 20 loss: 1.8727423429489136
  batch 30 loss: 0.8587545201182365
  batch 40 loss: 1.1802908092737199
  batch 50 loss: 0.9048580080270767
  batch 60 loss: 0.9885639548301697
  batch 70 loss: 0.8081225037574769
  batch 80 loss: 0.7727133899927139
  batch 90 loss: 0.7118479162454605
  batch 100 loss: 0.9034016370773316
  batch 110 loss: 1.087413950264454
  batch 120 loss: 0.7902820318937301
  batch 130 loss: 0.7376235663890839
  batch 140 loss: 0.6717137843370438
  batch 150 loss: 0.7587411075830459
EPOCH  1 val loss:  3.7815375816490917
save model concat-tile-pooling-384-effv2-fold0.pth
EPOCH 2:
  batch 10 loss: 0.5877505868673325
  batch 20 loss: 0.8544884026050568
  batch 30 loss: 1.1354012608528137
  batch 40 loss: 0.879451933503151
  batch 50 loss: 0.628269599378109
  batch 60 loss: 0.8919453918933868
  batch 70 loss: 0.7769123703241348
  batch 80 loss: 1.1661761380732059
  batch 90 loss: 0.9975090593099594
  batch 100 loss: 0.47421

In [16]:
# for input, label in training_loader:
#     break

In [17]:
# label


In [18]:
# torch.save(model.state_dict(), "concat-tile-pooling-model.pth")

In [19]:
!mkdir /kaggle/working/models
!mv /kaggle/working/*.pth /kaggle/working/models/

In [20]:
!tar -cvf /kaggle/working/models.tar /kaggle/working/models/

tar: Removing leading `/' from member names
/kaggle/working/models/
/kaggle/working/models/concat-tile-pooling-384-effv2-fold3.pth
/kaggle/working/models/concat-tile-pooling-384-effv2-fold0.pth
/kaggle/working/models/concat-tile-pooling-384-effv2-fold2.pth
/kaggle/working/models/concat-tile-pooling-384-effv2-fold4.pth
/kaggle/working/models/concat-tile-pooling-384-effv2-fold1.pth
