In [1]:
package_path = '../input/pytorch-image-models/pytorch-image-models-master' #'../input/efficientnet-pytorch-07/efficientnet_pytorch-0.7.0'
import sys; sys.path.append(package_path)

In [2]:
from glob import glob
from sklearn.model_selection import GroupKFold, StratifiedKFold
import cv2
from skimage import io
import torch
from torch import nn
import os
from datetime import datetime
import time
import random
import cv2
import torchvision
from torchvision import transforms
import pandas as pd
import numpy as np
from tqdm import tqdm

import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from  torch.cuda.amp import autocast, GradScaler

import sklearn
import warnings
import joblib
from sklearn.metrics import roc_auc_score, log_loss
from sklearn import metrics
import warnings
import cv2
import pydicom
import timm #from efficientnet_pytorch import EfficientNet
from scipy.ndimage.interpolation import zoom
from sklearn.metrics import log_loss

In [3]:
train = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')

In [4]:
submission = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')

# Helper Functions

In [5]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
    
def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    #print(im_rgb)
    return im_rgb

# Dataset

In [6]:
class CassavaDataset(Dataset):
    def __init__(
        self, df, data_root, transforms=None, output_label=True
    ):
        
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms = transforms
        self.data_root = data_root
        self.output_label = output_label
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        
        # get labels
        if self.output_label:
            target = self.df.iloc[index]['label']
          
        path = "{}/{}".format(self.data_root, self.df.iloc[index]['image_id'])
        
        img  = get_img(path)
        
        if self.transforms:
            img = self.transforms(image=img)['image']
            
        # do label smoothing
        if self.output_label == True:
            return img, target
        else:
            return img

# Define Train\Validation Image Augmentations

In [13]:
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize
)

from albumentations.pytorch import ToTensorV2

from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize
)

from albumentations.pytorch import ToTensorV2

def get_train_transforms():
    return Compose([
            RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            CoarseDropout(p=0.5),
            Cutout(p=0.5),
            ToTensorV2(p=1.0),
        ], p=1.)
  
        
def get_valid_transforms():
    return Compose([
            CenterCrop(CFG['img_size'], CFG['img_size'], p=1.),
            Resize(CFG['img_size'], CFG['img_size']),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.)

def get_inference_transforms(CFG):
    return Compose([
            RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.)

# Model

In [14]:
class CassvaImgClassifier(nn.Module):
    def __init__(self, model_arch, n_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, n_class)
        
    def forward(self, x):
        x = self.model(x)
        return x
    
class CustomViT(nn.Module):
    def __init__(self, model_arch, num_classes, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        ### vit
        num_features = self.model.head.in_features
        self.model.head = nn.Linear(num_features, num_classes)
        '''
        self.model.classifier = nn.Sequential(
            nn.Dropout(0.3),
            #nn.Linear(num_features, hidden_size,bias=True), nn.ELU(),
            nn.Linear(num_features, num_classes, bias=True)
        )
        '''
    def forward(self, x):
        x = self.model(x)
        return x  
    
    
class CustomResNext(nn.Module):
    def __init__(self, model_arch, num_classes, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        #='resnext50_32x4d',
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, num_classes)

    def forward(self, x):
        x = self.model(x)
        return x    

In [15]:
EF_CFG = {
    'fold_num': 5,
    'seed': 719,
    'model_arch': 'tf_efficientnet_b4_ns',
    'img_size': 512,
    'epochs': 10,
    'train_bs': 32,
    'valid_bs': 32,
    'lr': 1e-4,
    'num_workers': 0,
    'accum_iter': 1, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'verbose_step': 1,
    'device': 'cuda:0',
    'tta': 3,
    'weights': [1,1,1,1,1]
}

RES_CFG = {
    'fold_num': 5,
    'seed': 719,
    'model_arch': 'resnext50_32x4d',
    'img_size': 512,
    'epochs': 10,
    'train_bs': 32,
    'valid_bs': 32,
    'lr': 1e-4,
    'num_workers': 0,
    'accum_iter': 1, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'verbose_step': 1,
    'device': 'cuda:0',
    'tta': 3,
    'weights': [1,1,1,1,1]
}

ViT_CFG = {
    'fold_num': 5,
    'seed': 719,
    'model_arch': 'vit_base_patch16_384',
    'img_size': 384,
    'epochs': 10,
    'train_bs': 32,
    'valid_bs': 32,
    'lr': 1e-4,
    'num_workers': 0,
    'accum_iter': 1, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'verbose_step': 1,
    'device': 'cuda:0',
    'tta': 3,
    'weights': [1,1,1,1,1]
}

# Main Loop

In [16]:
def inference_one_epoch(model, data_loader, device):
    model.eval()

    image_preds_all = []
    
    pbar = tqdm(enumerate(data_loader), total=len(data_loader))
    for step, (imgs) in pbar:
        imgs = imgs.to(device).float()
        
        image_preds = model(imgs)   #output = model(input)
        image_preds_all += [torch.softmax(image_preds, 1).detach().cpu().numpy()]
        
    
    image_preds_all = np.concatenate(image_preds_all, axis=0)
    return image_preds_all

In [25]:
if __name__ == '__main__':
     # for training only, need nightly build pytorch

    seed_everything(EF_CFG['seed'])
    
    folds = StratifiedKFold(n_splits=EF_CFG['fold_num']).split(np.arange(train.shape[0]), train.label.values)
    
    for fold, (trn_idx, val_idx) in enumerate(folds):
        # we'll train fold 0 first

        print('Inference fold {} started'.format(fold))

        valid_ = train.loc[val_idx,:].reset_index(drop=True)
        valid_ds = CassavaDataset(valid_, '../input/cassava-leaf-disease-classification/train_images/', transforms=get_inference_transforms(EF_CFG), output_label=False)
        vi_valid_ds = CassavaDataset(valid_, '../input/cassava-leaf-disease-classification/train_images/', transforms=get_inference_transforms(ViT_CFG), output_label=False)
        
        val_loader = torch.utils.data.DataLoader(
            valid_ds, 
            batch_size=EF_CFG['valid_bs'],
            num_workers=EF_CFG['num_workers'],
            shuffle=False,
            pin_memory=False,
        )
        
        vi_val_loader = torch.utils.data.DataLoader(
            vi_valid_ds, 
            batch_size=ViT_CFG['valid_bs'],
            num_workers=ViT_CFG['num_workers'],
            shuffle=False,
            pin_memory=False,
        )

        device = torch.device(EF_CFG['device'])
        val_preds = []
        
        #for epoch in range(CFG['epochs']-3):    
        ef_model = CassvaImgClassifier(EF_CFG['model_arch'], train.label.nunique()).to(device)
        ef_model.load_state_dict(torch.load('baseline_pytorch_efb4/{}_fold_{}'.format(EF_CFG['model_arch'], fold)))
        with torch.no_grad():
            for _ in range(EF_CFG['tta']):
                val_preds += [1/EF_CFG['tta']/3*inference_one_epoch(ef_model, val_loader, device)]
        del ef_model
        
        vi_model = CustomViT(ViT_CFG['model_arch'], train.label.nunique()).to(device)                
        vi_model.load_state_dict(torch.load('baseline_pytorch_vit/{}_fold_{}'.format(ViT_CFG['model_arch'], fold)))     
        with torch.no_grad():
            for _ in range(ViT_CFG['tta']):
                val_preds += [1/EF_CFG['tta']/3*inference_one_epoch(vi_model, vi_val_loader, device)]
        del vi_model
        
        res_model = CustomResNext(RES_CFG['model_arch'], train.label.nunique()).to(device)
        res_model.load_state_dict(torch.load('baseline_pytorch_resnext/{}_fold_{}'.format(RES_CFG['model_arch'], fold)))    
        with torch.no_grad():
            for _ in range(RES_CFG['tta']):                
                val_preds += [1/EF_CFG['tta']/3*inference_one_epoch(res_model, val_loader, device)]
        del res_model
        
        val_preds = np.mean(val_preds, axis=0) 
        print('fold {} validation loss = {:.5f}'.format(fold, log_loss(valid_.label.values, val_preds)))
        print('fold {} validation accuracy = {:.5f}'.format(fold, (valid_.label.values==np.argmax(val_preds, axis=1)).mean()))
        
        oof_ = pd.concat([valid_, pd.DataFrame(val_preds, columns=[f'soft_label_{i}' for i in range(1,6)])], axis=1)
        oof_.to_pickle(f"{EF_CFG['model_arch']}_oof{fold}.pkl")
        
        torch.cuda.empty_cache()

Inference fold 0 started


100%|██████████| 134/134 [01:15<00:00,  1.77it/s]
100%|██████████| 134/134 [01:15<00:00,  1.77it/s]
100%|██████████| 134/134 [01:18<00:00,  1.71it/s]
100%|██████████| 134/134 [01:34<00:00,  1.42it/s]
100%|██████████| 134/134 [01:35<00:00,  1.40it/s]
100%|██████████| 134/134 [01:34<00:00,  1.42it/s]
100%|██████████| 134/134 [01:16<00:00,  1.74it/s]
100%|██████████| 134/134 [01:16<00:00,  1.76it/s]
100%|██████████| 134/134 [01:16<00:00,  1.76it/s]


fold 0 validation loss = 0.31585
fold 0 validation accuracy = 0.92757
Inference fold 1 started


100%|██████████| 134/134 [01:18<00:00,  1.70it/s]
100%|██████████| 134/134 [01:16<00:00,  1.76it/s]
100%|██████████| 134/134 [01:16<00:00,  1.75it/s]
100%|██████████| 134/134 [01:35<00:00,  1.41it/s]
100%|██████████| 134/134 [01:35<00:00,  1.40it/s]
100%|██████████| 134/134 [01:17<00:00,  1.73it/s]
100%|██████████| 134/134 [01:17<00:00,  1.74it/s]
100%|██████████| 134/134 [01:15<00:00,  1.77it/s]


fold 1 validation loss = 0.31494
fold 1 validation accuracy = 0.92640
Inference fold 2 started


100%|██████████| 134/134 [01:15<00:00,  1.79it/s]
100%|██████████| 134/134 [01:14<00:00,  1.79it/s]
100%|██████████| 134/134 [01:14<00:00,  1.79it/s]
100%|██████████| 134/134 [01:32<00:00,  1.44it/s]
100%|██████████| 134/134 [01:33<00:00,  1.44it/s]
100%|██████████| 134/134 [01:34<00:00,  1.42it/s]
100%|██████████| 134/134 [01:16<00:00,  1.74it/s]
100%|██████████| 134/134 [01:17<00:00,  1.73it/s]


fold 2 validation loss = 0.29904
fold 2 validation accuracy = 0.93246
Inference fold 3 started


100%|██████████| 134/134 [01:15<00:00,  1.78it/s]
100%|██████████| 134/134 [01:15<00:00,  1.78it/s]
100%|██████████| 134/134 [01:17<00:00,  1.72it/s]
100%|██████████| 134/134 [01:38<00:00,  1.36it/s]
100%|██████████| 134/134 [01:37<00:00,  1.38it/s]
100%|██████████| 134/134 [01:34<00:00,  1.42it/s]
100%|██████████| 134/134 [01:18<00:00,  1.70it/s]
100%|██████████| 134/134 [01:17<00:00,  1.73it/s]


fold 3 validation loss = 0.31596
fold 3 validation accuracy = 0.92709
Inference fold 4 started


100%|██████████| 134/134 [01:15<00:00,  1.78it/s]
100%|██████████| 134/134 [01:15<00:00,  1.78it/s]
100%|██████████| 134/134 [01:15<00:00,  1.78it/s]
100%|██████████| 134/134 [01:35<00:00,  1.40it/s]
100%|██████████| 134/134 [01:34<00:00,  1.42it/s]
100%|██████████| 134/134 [01:34<00:00,  1.42it/s]
100%|██████████| 134/134 [01:18<00:00,  1.71it/s]
100%|██████████| 134/134 [01:16<00:00,  1.74it/s]
100%|██████████| 134/134 [01:15<00:00,  1.77it/s]

fold 4 validation loss = 0.31552
fold 4 validation accuracy = 0.92054





In [30]:
if __name__ == '__main__':
     # for training only, need nightly build pytorch

    tst_preds = []
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    if torch.cuda.is_available():
        map_location=lambda storage, loc: storage.cuda()
    else:
        map_location='cpu'
    print('device ', device)
    
    train = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')    
    test = pd.DataFrame()
    test['image_id'] = list(os.listdir('../input/cassava-leaf-disease-classification/test_images/'))
    
    ## ef
    seed_everything(EF_CFG['seed'])
    test_ds = CassavaDataset(test, '../input/cassava-leaf-disease-classification/test_images/', transforms=get_inference_transforms(EF_CFG), output_label=False)
    tst_loader = torch.utils.data.DataLoader(
        test_ds, 
        batch_size=EF_CFG['valid_bs'],
        num_workers=EF_CFG['num_workers'],
        shuffle=False,
        pin_memory=False,
    )
    model = CassvaImgClassifier(EF_CFG['model_arch'], train.label.nunique()).to(device)
    folds = StratifiedKFold(n_splits=EF_CFG['fold_num']).split(np.arange(train.shape[0]), train.label.values)
    for fold, (trn_idx, val_idx) in enumerate(folds):  
        model.load_state_dict(torch.load('baseline_pytorch_efb4/{}_fold_{}'.format(EF_CFG['model_arch'], fold), map_location=map_location))
        with torch.no_grad():
            for _ in range(EF_CFG['tta']):
                tst_preds += [EF_CFG['weights'][fold]/sum(EF_CFG['weights'])/EF_CFG['tta']/3*inference_one_epoch(model, tst_loader, device)]
    del model
    
    
    ## vit
    seed_everything(ViT_CFG['seed'])
    test_ds = CassavaDataset(test, '../input/cassava-leaf-disease-classification/test_images/', transforms=get_inference_transforms(ViT_CFG), output_label=False)
    tst_loader = torch.utils.data.DataLoader(
        test_ds, 
        batch_size=ViT_CFG['valid_bs'],
        num_workers=ViT_CFG['num_workers'],
        shuffle=False,
        pin_memory=False,
    )
    
    model = CustomViT(ViT_CFG['model_arch'], train.label.nunique()).to(device)
    folds = StratifiedKFold(n_splits=ViT_CFG['fold_num']).split(np.arange(train.shape[0]), train.label.values)
    for fold, (trn_idx, val_idx) in enumerate(folds):  
        model.load_state_dict(torch.load('baseline_pytorch_vit/{}_fold_{}'.format(ViT_CFG['model_arch'], fold), map_location=map_location))
        with torch.no_grad():
            for _ in range(ViT_CFG['tta']):
                tst_preds += [ViT_CFG['weights'][fold]/sum(ViT_CFG['weights'])/ViT_CFG['tta']/3*inference_one_epoch(model, tst_loader, device)]
    del model
    
    
    ## resnext
    seed_everything(RES_CFG['seed'])
    test_ds = CassavaDataset(test, '../input/cassava-leaf-disease-classification/test_images/', transforms=get_inference_transforms(RES_CFG), output_label=False)
    tst_loader = torch.utils.data.DataLoader(
        test_ds, 
        batch_size=RES_CFG['valid_bs'],
        num_workers=RES_CFG['num_workers'],
        shuffle=False,
        pin_memory=False,
    )
    
    model = CustomResNext(RES_CFG['model_arch'], train.label.nunique()).to(device)
    folds = StratifiedKFold(n_splits=RES_CFG['fold_num']).split(np.arange(train.shape[0]), train.label.values)
    for fold, (trn_idx, val_idx) in enumerate(folds):  
        model.load_state_dict(torch.load('baseline_pytorch_resnext/{}_fold_{}'.format(RES_CFG['model_arch'], fold), map_location=map_location))
        with torch.no_grad():
            for _ in range(RES_CFG['tta']):
                tst_preds += [RES_CFG['weights'][fold]/sum(RES_CFG['weights'])/RES_CFG['tta']/3*inference_one_epoch(model, tst_loader, device)]
                
    del model
    torch.cuda.empty_cache()

device  cuda


100%|██████████| 1/1 [00:00<00:00, 37.14it/s]
100%|██████████| 1/1 [00:00<00:00, 35.81it/s]
100%|██████████| 1/1 [00:00<00:00, 35.81it/s]
100%|██████████| 1/1 [00:00<00:00, 38.56it/s]
100%|██████████| 1/1 [00:00<00:00, 38.56it/s]
100%|██████████| 1/1 [00:00<00:00, 38.56it/s]
100%|██████████| 1/1 [00:00<00:00, 37.14it/s]
100%|██████████| 1/1 [00:00<00:00, 40.11it/s]
100%|██████████| 1/1 [00:00<00:00, 37.14it/s]
100%|██████████| 1/1 [00:00<00:00, 40.11it/s]
100%|██████████| 1/1 [00:00<00:00, 40.11it/s]
100%|██████████| 1/1 [00:00<00:00, 34.57it/s]
100%|██████████| 1/1 [00:00<00:00, 35.81it/s]
100%|██████████| 1/1 [00:00<00:00, 35.81it/s]
100%|██████████| 1/1 [00:00<00:00, 32.34it/s]
100%|██████████| 1/1 [00:00<00:00, 25.70it/s]
100%|██████████| 1/1 [00:00<00:00, 28.65it/s]
100%|██████████| 1/1 [00:00<00:00, 33.42it/s]
100%|██████████| 1/1 [00:00<00:00, 33.42it/s]
100%|██████████| 1/1 [00:00<00:00, 32.34it/s]
100%|██████████| 1/1 [00:00<00:00, 33.42it/s]
100%|██████████| 1/1 [00:00<00:00,

In [29]:
test['label'] = np.argmax(np.mean(tst_preds, axis=0), axis=1)
test.to_csv('submission.csv', index=False)