_____
**Credit:** 
- Parts of this notebook are based on the [great](https://www.kaggle.com/code/realneuralnetwork/coatnet-strip-ai-inference) [notebooks](https://www.kaggle.com/code/realneuralnetwork/cnn-strip-ai-inference) by [Kabir Ivan
](https://www.kaggle.com/realneuralnetwork)
_____


# [Train + Infer] CoAtNet + EfficientNet

This notebook contains steps and code to train CoAtNet and EfficientNet-B4 for the Mayo Clinic - STRIP AI competition.
In this scenario we train CoAtNet + EfficinetNet-B4, then use these models for inference

#### Setup

In [1]:
import sys
sys.path.append('../input/einops')
sys.path.append('../input/efficientnet-pytorch/EfficientNet-PyTorch/EfficientNet-PyTorch-master')
from efficientnet_pytorch import EfficientNet
# !pip install --upgrade efficientnet-pytorch

## CoAtNet: Marrying Convolution and Attention for All Data Sizes

[Paper](https://arxiv.org/pdf/2106.04803.pdf)

The CoAtNet paper attempts to effectively combine the strengths from both convolutional and transformers architectures, they present CoAtNets(pronounced "coat" nets), a family of hybrid models built from two key insights: 
- Depthwise Convolution and self-Attention can be naturally unified via simple relative attention
- Vertically stacking convolution layers and attention layers in a principled way is surprisingly effective in improving generalization, capacity and efficiency.

![](https://i.ibb.co/Sd6wj7D/Selection-998.png)

### Imports, Seed, Data loading

In [2]:
import os
import gc
import cv2
import copy
import time
import torch
import random
import string
import joblib
import tifffile
import numpy as np 
import pandas as pd 
import torch.nn as nn
import seaborn as sns
from random import randint
from einops import rearrange
from torchvision import models
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
from einops.layers.torch import Rearrange
from efficientnet_pytorch import EfficientNet
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import warnings; warnings.filterwarnings("ignore")
gc.enable()


def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed = 42
seed_everything(seed)


debug = False
generate_new = False
train_df = pd.read_csv("../input/mayo-clinic-strip-ai/train.csv").head(10 if debug else 1000)
test_df = pd.read_csv("../input/mayo-clinic-strip-ai/test.csv")
dirs = ["../input/mayo-clinic-strip-ai/train/", "../input/mayo-clinic-strip-ai/test/"]

In [3]:
max_count = max(train_df.label.value_counts())
for label in train_df.label.unique():
    df = train_df.loc[train_df.label == label]
    while(train_df.label.value_counts()[label] < max_count):
        train_df = pd.concat([train_df, df.head(max_count - train_df.label.value_counts()[label])], axis = 0)
        
if(generate_new):
    os.mkdir("./train/")
    os.mkdir("./test/")
    for i in tqdm(range(test_df.shape[0])):
        img_id = test_df.iloc[i].image_id
        img = cv2.resize(tifffile.imread(dirs[1] + img_id + ".tif"), (512, 512))
        cv2.imwrite(f"./test/{img_id}.jpg", img)
        del img
        gc.collect()
    for i in tqdm(range(train_df.shape[0])):
        img_id = train_df.iloc[i].image_id
        img = cv2.resize(tifffile.imread(dirs[0] + img_id + ".tif"), (512, 512))
        cv2.imwrite(f"./train/{img_id}.jpg", img)
        del img
        gc.collect()

### Dataset Class

In [4]:
class ImgDataset(Dataset):
    def __init__(self, df):
        self.df = df 
        self.train = 'label' in df.columns    
    def __len__(self): return len(self.df)    
    def __getitem__(self, index):
        if(generate_new): paths = ["./test/", "./train/"]
        else: paths = ["../input/jpg-images-strip-ai/test/", "../input/jpg-images-strip-ai/train/"]
        image = cv2.imread(paths[self.train] + self.df.iloc[index].image_id + ".jpg")
        if len(image.shape) == 5:
            image = image.squeeze().transpose(1, 2, 0)
        image = cv2.resize(image, (512, 512)).transpose(2, 0, 1)
        label = None
        if(self.train): label = {"CE" : 0, "LAA": 1}[self.df.iloc[index].label]
        return image, label

### CoAtNet Training

In [5]:
def train_model(model, dataloaders_dict, criterion, optimizer, num_epochs):
    best_acc = 0.0
    for epoch in range(num_epochs):
        model.cuda()       
        for phase in ['train', 'val']:
            if phase == 'train': model.train()
            else: model.eval()
               
            epoch_loss = 0.0
            epoch_acc = 0
            
            dataloader = dataloaders_dict[phase]
            for item in tqdm(dataloader, leave=False):
                images = item[0].cuda().float()
                classes = item[1].cuda().long()
                optimizer.zero_grad()                
                with torch.set_grad_enabled(phase == 'train'):
                    output = model(images)
                    loss = criterion(output, classes)
                    _, preds = torch.max(output, 1)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                    epoch_loss += loss.item() * len(output)
                    epoch_acc += torch.sum(preds == classes.data)                    
            data_size = len(dataloader.dataset)
            epoch_loss = epoch_loss / data_size
            epoch_acc = epoch_acc.double() / data_size
            print(f'Epoch {epoch + 1}/{num_epochs} | {phase:^5} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}')    
        if epoch_acc > best_acc:
            traced = torch.jit.trace(model.cpu(), torch.rand(1, 3, 512, 512))
            traced.save('model.pth')
            best_acc = epoch_acc

#### CoAtNet Architecture

In [6]:
def conv_3x3_bn(inp, oup, image_size, downsample=False):
    stride = 1 if downsample == False else 2
    return nn.Sequential(
        nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
        nn.BatchNorm2d(oup),
        nn.GELU()
    )

class PreNorm(nn.Module):
    def __init__(self, dim, fn, norm):
        super().__init__()
        self.norm = norm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

class SE(nn.Module):
    def __init__(self, inp, oup, expansion=0.25):
        super().__init__()
        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Sequential(
            nn.Linear(oup, int(inp * expansion), bias=False),
            nn.GELU(),
            nn.Linear(int(inp * expansion), oup, bias=False),
            nn.Sigmoid())
    def forward(self, x):
        b, c, _, _ = x.size()
        y = self.avg_pool(x).view(b, c)
        y = self.fc(y).view(b, c, 1, 1)
        return x * y

class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout=0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout))
    def forward(self, x):
        return self.net(x)


class MBConv(nn.Module):
    def __init__(self, inp, oup, image_size, downsample=False, expansion=4):
        super().__init__()
        self.downsample = downsample
        stride = 1 if self.downsample == False else 2
        hidden_dim = int(inp * expansion)

        if self.downsample:
            self.pool = nn.MaxPool2d(3, 2, 1)
            self.proj = nn.Conv2d(inp, oup, 1, 1, 0, bias=False)

        if expansion == 1:
            self.conv = nn.Sequential(
                nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.GELU(),
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup))
        else:
            self.conv = nn.Sequential(
                nn.Conv2d(inp, hidden_dim, 1, stride, 0, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.GELU(),                
                nn.Conv2d(hidden_dim, hidden_dim, 3, 1, 1, groups=hidden_dim, bias=False),
                nn.BatchNorm2d(hidden_dim),
                nn.GELU(),
                SE(inp, hidden_dim),
                nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
                nn.BatchNorm2d(oup))
        
        self.conv = PreNorm(inp, self.conv, nn.BatchNorm2d)

    def forward(self, x):
        if self.downsample: return self.proj(self.pool(x)) + self.conv(x)
        else: return x + self.conv(x)

class Attention(nn.Module):
    def __init__(self, inp, oup, image_size, heads=8, dim_head=32, dropout=0.):
        super().__init__()
        inner_dim = dim_head * heads
        project_out = not (heads == 1 and dim_head == inp)
        self.ih, self.iw = image_size
        self.heads = heads
        self.scale = dim_head ** -0.5       
        self.relative_bias_table = nn.Parameter(
            torch.zeros((2 * self.ih - 1) * (2 * self.iw - 1), heads))
        coords = torch.meshgrid((torch.arange(self.ih), torch.arange(self.iw)))
        coords = torch.flatten(torch.stack(coords), 1)
        relative_coords = coords[:, :, None] - coords[:, None, :]
        relative_coords[0] += self.ih - 1
        relative_coords[1] += self.iw - 1
        relative_coords[0] *= 2 * self.iw - 1
        relative_coords = rearrange(relative_coords, 'c h w -> h w c')
        relative_index = relative_coords.sum(-1).flatten().unsqueeze(1)
        self.register_buffer("relative_index", relative_index)
        self.attend = nn.Softmax(dim=-1)
        self.to_qkv = nn.Linear(inp, inner_dim * 3, bias=False)
        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, oup),
            nn.Dropout(dropout)
        ) if project_out else nn.Identity()

    def forward(self, x):
        qkv = self.to_qkv(x).chunk(3, dim=-1)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h=self.heads), qkv)
        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale
        relative_bias = self.relative_bias_table.gather(0, self.relative_index.repeat(1, self.heads))
        relative_bias = rearrange(relative_bias, '(h w) c -> 1 c h w', h=self.ih*self.iw, w=self.ih*self.iw)
        dots = dots + relative_bias
        attn = self.attend(dots)
        out = torch.matmul(attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        out = self.to_out(out)
        return out

class Transformer(nn.Module):
    def __init__(self, inp, oup, image_size, heads=8, dim_head=32, downsample=False, dropout=0.):
        super().__init__()
        hidden_dim = int(inp * 4)
        self.ih, self.iw = image_size
        self.downsample = downsample
        if self.downsample:
            self.pool1 = nn.MaxPool2d(3, 2, 1)
            self.pool2 = nn.MaxPool2d(3, 2, 1)
            self.proj = nn.Conv2d(inp, oup, 1, 1, 0, bias=False)
        self.attn = Attention(inp, oup, image_size, heads, dim_head, dropout)
        self.ff = FeedForward(oup, hidden_dim, dropout)
        self.attn = nn.Sequential(
            Rearrange('b c ih iw -> b (ih iw) c'),
            PreNorm(inp, self.attn, nn.LayerNorm),
            Rearrange('b (ih iw) c -> b c ih iw', ih=self.ih, iw=self.iw))
        self.ff = nn.Sequential(
            Rearrange('b c ih iw -> b (ih iw) c'),
            PreNorm(oup, self.ff, nn.LayerNorm),
            Rearrange('b (ih iw) c -> b c ih iw', ih=self.ih, iw=self.iw))

    def forward(self, x):
        if self.downsample: x = self.proj(self.pool1(x)) + self.attn(self.pool2(x))
        else: x = x + self.attn(x)
        x = x + self.ff(x)
        return x


class CoAtNet(nn.Module):
    def __init__(self, image_size, in_channels, num_blocks, channels, num_classes=1000, block_types=['C', 'C', 'T', 'T']):
        super().__init__()
        ih, iw = image_size
        block = {'C': MBConv, 'T': Transformer}

        self.s0 = self._make_layer(
            conv_3x3_bn, in_channels, channels[0], num_blocks[0], (ih // 2, iw // 2))
        self.s1 = self._make_layer(
            block[block_types[0]], channels[0], channels[1], num_blocks[1], (ih // 4, iw // 4))
        self.s2 = self._make_layer(
            block[block_types[1]], channels[1], channels[2], num_blocks[2], (ih // 8, iw // 8))
        self.s3 = self._make_layer(
            block[block_types[2]], channels[2], channels[3], num_blocks[3], (ih // 16, iw // 16))
        self.s4 = self._make_layer(
            block[block_types[3]], channels[3], channels[4], num_blocks[4], (ih // 32, iw // 32))

        self.pool = nn.AvgPool2d(ih // 32, 1)
        self.fc = nn.Linear(channels[-1], num_classes, bias=False)

    def forward(self, x):
        x = self.s0(x)
        x = self.s1(x)
        x = self.s2(x)
        x = self.s3(x)
        x = self.s4(x)

        x = self.pool(x).view(-1, x.shape[1])
        x = self.fc(x)
        return x

    def _make_layer(self, block, inp, oup, depth, image_size):
        layers = nn.ModuleList([])
        for i in range(depth):
            if i == 0:
                layers.append(block(inp, oup, image_size, downsample=True))
            else:
                layers.append(block(oup, oup, image_size))
        return nn.Sequential(*layers)

#### Architecture Definition & Running the training

In [7]:
num_blocks = [2, 2, 12, 28, 2]
channels = [64, 64, 128, 256, 512]
model = CoAtNet((512, 512), 3, num_blocks, channels, num_classes=2)
train, val = train_test_split(train_df, test_size=0.2, random_state=42, stratify = train_df.label)
batch_size = 1
train_loader = DataLoader(ImgDataset(train), batch_size=batch_size, shuffle=False, num_workers=1)
val_loader = DataLoader(ImgDataset(val), batch_size=batch_size, shuffle=False, num_workers=1)
dataloaders_dict = {"train": train_loader, "val": val_loader}
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
train_model(model, dataloaders_dict, criterion, optimizer, 1)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
train_model(model, dataloaders_dict, criterion, optimizer, 1)

  0%|          | 0/875 [00:00<?, ?it/s]

Epoch 1/1 | train | Loss: 0.8305 | Acc: 0.4526


  0%|          | 0/219 [00:00<?, ?it/s]

Epoch 1/1 |  val  | Loss: 0.6984 | Acc: 0.4703


  0%|          | 0/875 [00:00<?, ?it/s]

Epoch 1/1 | train | Loss: 0.6918 | Acc: 0.5291


  0%|          | 0/219 [00:00<?, ?it/s]

Epoch 1/1 |  val  | Loss: 0.8364 | Acc: 0.5251


### CoAtNet Inference

In [8]:
import os
import gc
import cv2
import copy
import time
import torch
import random
import string
import joblib
import tifffile
import numpy as np 
import pandas as pd 
from torch import nn
import seaborn as sns
from torchvision import models
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

import warnings; warnings.filterwarnings("ignore")
gc.enable()

debug = False
generate_new = True
test_df = pd.read_csv("../input/mayo-clinic-strip-ai/test.csv")
if(test_df.shape[0] == 4): test_df = pd.concat([test_df for i in range(25)])
dirs = ["../input/mayo-clinic-strip-ai/train/", "../input/mayo-clinic-strip-ai/test/"]
test_df

try:
    os.mkdir("../test/")
except:
    pass
for i in tqdm(range(test_df.shape[0])):
    img_id = test_df.iloc[i].image_id
    try:
        sz = os.path.getsize(dirs[1] + img_id + ".tif")
    except:
        sz = 1000000000
    if(sz > 8e8):
        img = np.zeros((512,512,3), np.uint8)
    else:
        try:
            img = cv2.resize(tifffile.imread(dirs[1] + img_id + ".tif"), (512, 512))
        except:
            img = np.zeros((512,512,3), np.uint8)
    cv2.imwrite(f"../test/{img_id}.jpg", img)
    del img
    gc.collect()      
    
class ImgDataset(Dataset):
    def __init__(self, df):
        self.df = df 
        self.train = 'label' in df.columns
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        if(generate_new):
            paths = ["../test/", "../train/"]
        else:
            paths = ["../input/jpg-images-strip-ai/test/", "../input/jpg-images-strip-ai/train/"]
        try:
            image = cv2.imread(paths[self.train] + self.df.iloc[index].image_id + ".jpg")
        except:
            image = np.zeros((512,512,3), np.uint8)
        label = 0
        try:
            if len(image.shape) == 5:
                image = image.squeeze().transpose(1, 2, 0)
            image = cv2.resize(image, (512, 512)).transpose(2, 0, 1)
        except:
            image = np.zeros((3, 512, 512))
        if(self.train):
            label = {"CE" : 0, "LAA": 1}[self.df.iloc[index].label]
        patient_id = self.df.iloc[index].patient_id
        return image, label, patient_id       
        
def predict(model, dataloader):
    model.cuda()
    model.eval()
    dataloader = dataloader
    outputs = []
    s = nn.Softmax(dim=1)
    ids = []
    for item in tqdm(dataloader, leave=False):
        patient_id = item[2][0]
        try:
            images = item[0].cuda().float()
            ids.append(patient_id)
            output = model(images)
            outputs.append(s(output.cpu()[:,:2])[0].detach().numpy())
        except:
            ids.append(patient_id)
            outputs.append(s(torch.tensor([[1, 1]]).float())[0].detach().numpy())
    return np.array(outputs), ids       
    
model = torch.jit.load('model.pth')
batch_size = 1
test_loader = DataLoader(
    ImgDataset(test_df), 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=1
)

anss, ids = predict(model, test_loader)
prob = pd.DataFrame({"CE" : anss[:,0], "LAA" : anss[:,1], "id" : ids}).groupby("id").mean()
submission = pd.read_csv("../input/mayo-clinic-strip-ai/sample_submission.csv")
submission.CE = prob.CE.to_list()
submission.LAA = prob.LAA.to_list()
submission.to_csv("submission_coatnet.csv", index = False)

  0%|          | 0/100 [00:00<?, ?it/s]

  0%|          | 0/100 [00:00<?, ?it/s]

_____

# EfficientNet B4

EfficientNet is one of the most solid baselines known today. It is a family of convolutional neural networks that have achieved state-of-the-art accuracy on ImageNet while also being smaller and faster than other models.
The main idea of EfficientNet is scaling up CNNs in a principled way. It uses a scalable architecture, named compound scaling, which balances network depth, width, and resolution to achieve superior performance.
The image below shows the scaling method in more detail.

![](https://i.ibb.co/Y86KGDg/image4-1.png)

#### EfficientNet B4 - Training

**Setups**

In [9]:
!mkdir /root/.cache/torch
!mkdir /root/.cache/torch/hub
!mkdir /root/.cache/torch/hub/checkpoints
!cp -r ../input/torchhub-efficientnet-b4/nvidia_efficientnet-b4_210412.pth /root/.cache/torch/hub/checkpoints/

In [10]:
import os
import gc
import cv2
import copy
import time
import torch
import random
import string
import joblib
import tifffile
import numpy as np 
import pandas as pd 
from torch import nn
import seaborn as sns
import efficientnet_pytorch
from torchvision import models
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
from efficientnet_pytorch import EfficientNet
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import warnings; warnings.filterwarnings("ignore")

debug = False
generate_new = False
train_df = pd.read_csv("../input/mayo-clinic-strip-ai/train.csv").head(10 if debug else 1000)
test_df = pd.read_csv("../input/mayo-clinic-strip-ai/test.csv")
dirs = ["../input/mayo-clinic-strip-ai/train/", "../input/mayo-clinic-strip-ai/test/"]

if(generate_new):
    os.mkdir("./train/")
    os.mkdir("./test/")
    for i in tqdm(range(test_df.shape[0])):
        img_id = test_df.iloc[i].image_id
        img = cv2.resize(tifffile.imread(dirs[1] + img_id + ".tif"), (512, 512))
        cv2.imwrite(f"./test/{img_id}.jpg", img)
        del img
        gc.collect()
    for i in tqdm(range(train_df.shape[0])):
        img_id = train_df.iloc[i].image_id
        img = cv2.resize(tifffile.imread(dirs[0] + img_id + ".tif"), (512, 512))
        cv2.imwrite(f"./train/{img_id}.jpg", img)
        del img
        gc.collect()
        
class ImgDataset(Dataset):
    def __init__(self, df):
        self.df = df 
        self.train = 'label' in df.columns
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        if(generate_new):
            paths = ["./test/", "./train/"]
        else:
            paths = ["../input/jpg-images-strip-ai/test/", "../input/jpg-images-strip-ai/train/"]
        image = cv2.imread(paths[self.train] + self.df.iloc[index].image_id + ".jpg")
        if len(image.shape) == 5:
            image = image.squeeze().transpose(1, 2, 0)
        image = cv2.resize(image, (512, 512)).transpose(2, 0, 1)
        label = None
        if(self.train):
            label = {"CE" : 0, "LAA": 1}[self.df.iloc[index].label]
        return image, label
    
def train_model(model, dataloaders_dict, criterion, optimizer, num_epochs):
    best_acc = 0.0

    for epoch in range(num_epochs):
        model.cuda()
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            epoch_loss = 0.0
            epoch_acc = 0
            
            dataloader = dataloaders_dict[phase]
            for item in tqdm(dataloader, leave=False):
                images = item[0].cuda().float()
                classes = item[1].cuda().long()

                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    output = model(images)
                    loss = criterion(output, classes)
                    _, preds = torch.max(output, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    epoch_loss += loss.item() * len(output)
                    epoch_acc += torch.sum(preds == classes.data)
                    

            data_size = len(dataloader.dataset)
            epoch_loss = epoch_loss / data_size
            epoch_acc = epoch_acc.double() / data_size

            print(f'Epoch {epoch + 1}/{num_epochs} | {phase:^5} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}')
        
        if epoch_acc > best_acc:
            traced = torch.jit.trace(model.cpu(), torch.rand(1, 3, 512, 512))
            traced.save('efficientnet_model.pth')
            best_acc = epoch_acc

#### Running the training

In [11]:
model = efficientnet_pytorch.EfficientNet.from_name("efficientnet-b4")
checkpoint = torch.load('../input/efficientnet-pytorch/efficientnet-b4-e116e8b3.pth')
model.load_state_dict(checkpoint)

train, val = train_test_split(train_df, test_size=0.2, random_state=42, stratify = train_df.label)
batch_size = 1
train_loader = DataLoader(ImgDataset(train), batch_size=batch_size, shuffle=False, num_workers=1)
val_loader = DataLoader(ImgDataset(val), batch_size=batch_size, shuffle=False, num_workers=1)
dataloaders_dict = {"train": train_loader, "val": val_loader}
criterion = nn.CrossEntropyLoss()

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)
train_model(model, dataloaders_dict, criterion, optimizer, 1)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
train_model(model, dataloaders_dict, criterion, optimizer, 1)

  0%|          | 0/603 [00:00<?, ?it/s]

Epoch 1/1 | train | Loss: 0.8517 | Acc: 0.7081


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 1/1 |  val  | Loss: 1.5957 | Acc: 0.5563


  0%|          | 0/603 [00:00<?, ?it/s]

Epoch 1/1 | train | Loss: 0.5728 | Acc: 0.7214


  0%|          | 0/151 [00:00<?, ?it/s]

Epoch 1/1 |  val  | Loss: 1.9526 | Acc: 0.5298


### Efficientnet Inference

In [12]:
import os
import gc
import cv2
import copy
import time
import random
import torch
import string
import joblib
import tifffile
import numpy as np 
import pandas as pd 
from torch import nn
import seaborn as sns
import efficientnet_pytorch
from tqdm.notebook import tqdm
from torchvision import models
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import warnings; warnings.filterwarnings("ignore")

gc.enable()
debug = False
generate_new = True
test_df = pd.read_csv("../input/mayo-clinic-strip-ai/test.csv")
dirs = ["../input/mayo-clinic-strip-ai/train/", "../input/mayo-clinic-strip-ai/test/"]
test_df

try:
    os.mkdir("../test/")
except:
    pass
for i in tqdm(range(test_df.shape[0])):
    img_id = test_df.iloc[i].image_id
    try:
        sz = os.path.getsize(dirs[1] + img_id + ".tif")
    except:
        sz = 1000000000
    if(sz > 8e8):
        img = np.zeros((512,512,3), np.uint8)
    else:
        try:
            img = cv2.resize(tifffile.imread(dirs[1] + img_id + ".tif"), (512, 512))
        except:
            img = np.zeros((512,512,3), np.uint8)
    cv2.imwrite(f"../test/{img_id}.jpg", img)
    del img
    gc.collect()
    
    
class ImgDataset(Dataset):
    def __init__(self, df):
        self.df = df 
        self.train = 'label' in df.columns
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        if(generate_new):
            paths = ["../test/", "../train/"]
        else:
            paths = ["../input/jpg-images-strip-ai/test/", "../input/jpg-images-strip-ai/train/"]
        try:
            image = cv2.imread(paths[self.train] + self.df.iloc[index].image_id + ".jpg")
        except:
            image = np.zeros((512,512,3), np.uint8)
        label = 0
        try:
            if len(image.shape) == 5:
                image = image.squeeze().transpose(1, 2, 0)
            image = cv2.resize(image, (512, 512)).transpose(2, 0, 1)
        except:
            image = np.zeros((3, 512, 512))
        if(self.train):
            label = {"CE" : 0, "LAA": 1}[self.df.iloc[index].label]
        patient_id = self.df.iloc[index].patient_id
        return image, label, patient_id
def predict(model, dataloader):
    model.cuda()
    model.eval()
    dataloader = dataloader
    outputs = []
    s = nn.Softmax(dim=1)
    ids = []
    for item in tqdm(dataloader, leave=False):
        patient_id = item[2][0]
        try:
            images = item[0].cuda().float()
            ids.append(patient_id)
            output = model(images)
            outputs.append(s(output.cpu()[:,:2])[0].detach().numpy())
        except:
            ids.append(patient_id)
            outputs.append(s(torch.tensor([[1, 1]]).float())[0].detach().numpy())
    return np.array(outputs), ids


model = torch.jit.load('efficientnet_model.pth')
batch_size = 1
test_loader = DataLoader(
    ImgDataset(test_df), 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=1
)
anss, ids = predict(model, test_loader)

prob = pd.DataFrame({"CE" : anss[:,0], "LAA" : anss[:,1], "id" : ids}).groupby("id").mean()
submission = pd.read_csv("../input/mayo-clinic-strip-ai/sample_submission.csv")
submission.CE = prob.CE.to_list()
submission.LAA = prob.LAA.to_list()
submission.to_csv("submission_efficientnet.csv", index = False)

  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

# Ensemble

Now that we got both our models ready, we can simply combine the predictions of both and use them as an ensemble!
We do this by running the predictions and averaging them. This yields a more powerful model.

In [13]:
submission_coatnet = pd.read_csv("submission_coatnet.csv")
submission_efficientnet = pd.read_csv("submission_efficientnet.csv")
sub_df = pd.read_csv('../input/mayo-clinic-strip-ai/sample_submission.csv')

sub_df['CE'] = (submission_efficientnet['CE'].values + submission_coatnet['CE'].values) / 2.0
sub_df['LAA'] = (submission_efficientnet['LAA'].values + submission_coatnet['LAA'].values) / 2.0


sub_df.to_csv('submission.csv', index = False)