In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# install the python package without the internet
! pip install --no-index --find-links /kaggle/input/timmpackage1/ timm
! pip install --no-index --find-links=/kaggle/input/visiontransformerpkg1/ vision_transformer_pytorch

In [None]:
from glob import glob
from sklearn.model_selection import GroupKFold, StratifiedKFold
import cv2
from skimage import io
import torch
from sklearn.preprocessing import LabelBinarizer
from torch import nn
import os
from datetime import datetime
import time
import random
import cv2
import torchvision
from torchvision import transforms
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.cuda.amp import autocast, GradScaler
import torch.nn.functional as F
from vision_transformer_pytorch import VisionTransformer

import sklearn
import warnings
import joblib
from sklearn.metrics import roc_auc_score, log_loss
from sklearn import metrics
import warnings
import cv2
import pydicom
import timm #from efficientnet_pytorch import EfficientNet
from scipy.ndimage.interpolation import zoom
from sklearn.metrics import log_loss

In [None]:
CFG = {
    'fold_num': 5,
    'seed': 719,
    'model_efficientnet': 'tf_efficientnet_b4_ns',
    'model_resnext50': 'resnext50_32x4d',
    'img_size': 384, #448, #512
    'epochs': 10,
    'train_bs': 32,
    'valid_bs': 32,
    'lr': 1e-4,
    'num_workers': 4,
    'accum_iter': 1, # suppoprt to do batch accumulation for backprop with effectively larger batch size
    'verbose_step': 1,
    'device': 'cuda:0',
    'tta': 3,
    'used_epochs': [6,7,8,9],
    'weights': [1,1,1,1, 1,1]
}

In [None]:
train = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')
train.head()

In [None]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True
def get_img(path):
    im_bgr = cv2.imread(path)
    im_rgb = im_bgr[:, :, ::-1]
    #print(im_rgb)
    return im_rgb

In [None]:
class CassavaDataset(Dataset):
    # CassavaDataset function: return the transformed image file
    def __init__(
        self, df, data_root, transforms=None, output_label=True
    ):
        super().__init__()
        self.df = df.reset_index(drop=True).copy()
        self.transforms = transforms
        self.data_root = data_root
        self.output_label = output_label
    
    def __len__(self):
        return self.df.shape[0]
    
    def __getitem__(self, index: int):
        
        # get labels
        if self.output_label:
            target = self.df.iloc[index]['label']
          
        path = "{}/{}".format(self.data_root, self.df.iloc[index]['image_id'])
        # extract image array
        img  = get_img(path)
        
        if self.transforms:
            img = self.transforms(image=img)['image']
            
        # do label smoothing
        if self.output_label == True:
            return img, target
        else:
            return img

In [None]:
from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize
)

from albumentations.pytorch import ToTensorV2

from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize
)

from albumentations.pytorch import ToTensorV2

def get_train_transforms():
    return Compose([
            RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            CoarseDropout(p=0.5),
            Cutout(p=0.5),
            ToTensorV2(p=1.0),
        ], p=1.)

def get_valid_transforms():
    return Compose([
            CenterCrop(CFG['img_size'], CFG['img_size'], p=1.),
            Resize(CFG['img_size'], CFG['img_size']),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.)

def get_inference_transforms():
    # preprocess the image through a pipeline
    return Compose([
            RandomResizedCrop(CFG['img_size'], CFG['img_size']),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            #HueSaturationValue(hue_shift_limit=0.2, sat_shift_limit=0.2, val_shift_limit=0.2, p=0.5),
            #RandomBrightnessContrast(brightness_limit=(-0.1,0.1), contrast_limit=(-0.1, 0.1), p=0.5),
            Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ], p=1.)

In [None]:
# Model
class CassvaImgClassifier(nn.Module):
    '''CassvaImgClassifier class: construct the deep learning model
    input: 
        model_arch: model name
    '''
    def __init__(self, model_arch, n_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_arch, pretrained=pretrained)
        # model is efficientnet
        if 'efficientnet' in model_arch:
            n_features = self.model.classifier.in_features
            self.model.classifier = nn.Linear(n_features, n_class)
        # model is resnext
        if 'resnext' in model_arch:
            n_features = self.model.fc.in_features
            self.model.fc = nn.Linear(n_features, n_class)
            
    def forward(self, x):
        x = self.model(x)
        return x
    
class EnsembleClassifier(nn.Module):
     '''
     EnsembleClassifier class: Ensemble three models' performance and apply different weights to
     model prediction
     input:
     model_efficientnet: tf_efficientnet_b4_ns model
     model_resnext50: resnext50_32x4d model'''
    def __init__(self, model_efficientnet, model_resnext50,n_class, pretrained=False):
        super().__init__()
        # load the VisionTransformer model
        self.model1 = VisionTransformer.from_name('ViT-B_16', num_classes=5) 
        self.model1.load_state_dict(torch.load('/kaggle/input/vitmodel2/ViT-B_16.pt'))
        print('load VisionTransformer model:', self.model1)
        self.model2 = CassvaImgClassifier(model_efficientnet, n_class, pretrained)
        print('load efficientnet model:', model_efficientnet)
        self.model3 = CassvaImgClassifier(model_resnext50, n_class, pretrained)
        print('Resnext model:', model_resnext50)
        
    def forward(self, x):
        x1 = self.model1(x)
        x2 = self.model2(x)
        x3 = self.model3(x)
        # apply weights to the models' predictions
        return 0.4 * x1 + 0.4 * x2 + 0.2 * x3
    
    def load_efficientnet(self, state_dict):
        self.model2.load_state_dict(state_dict)
        
    def load_resnet(self, state_dict):
        self.model3.load_state_dict(state_dict)

In [None]:
def smooth_one_hot(true_labels: torch.Tensor, classes: int, smoothing):
    """
    smooth_one_hot function: label smoothing is a regularization method to restraint the logit value for the 
    correct class to be closer to the logit values for other classes
    input:
    true_labels: two dimension tensor with the batches of the target variables
    smoothing: float number
    if smoothing == 0, it's one-hot method
    if 0 < smoothing < 1, it's smooth method

    """
    assert 0 <= smoothing < 1
    confidence = 1.0 - smoothing
    label_shape = torch.Size((true_labels.size(0), classes))
    with torch.no_grad():
        true_dist = torch.empty(size=label_shape, device=true_labels.device)
        true_dist.fill_(smoothing / (classes - 1))
        # true_labels.data.unsqueeze shape torch.Size([32, 1]) (32 train batches with the label value)
        # true_dist tensor is filled with confidence score according to the index (true_labels.data.unsqueeze(1))
        true_dist.scatter_(1, true_labels.data.unsqueeze(1), confidence)
        true_dist = true_dist.type(torch.cuda.HalfTensor)
    return true_dist

In [None]:
# Main Loop
def inference_one_epoch(model, data_loader, device):
    '''
    inference_one_epoch function: predict the input data
    input:
    data_loader: pytorch dataloader object that contains the data batches
    '''
    model.eval()
    image_preds_all = []
    pbar = tqdm(enumerate(data_loader), total=len(data_loader))
    
    for step, (imgs) in pbar:
        imgs = imgs.to(device).float()
        image_preds = model(imgs)   #output = model(input)
        # image_preds: positive to negative float number
        # apply softmax function with 1 dimension
        # image_preds np array (32, 5)
        image_preds_all += [torch.softmax(image_preds, 1).detach().cpu().numpy()]
        # image_preds_all: porbability of the predicted image in batch
        
    image_preds_all = np.concatenate(image_preds_all, axis=0)
    return image_preds_all

In [None]:
if __name__ == '__main__':
    '''
    Perform the 5 fold prediction on the test dataset and take the mean of the predicted value from each fold.'''

    seed_everything(CFG['seed'])
    
    folds = StratifiedKFold(n_splits=CFG['fold_num']).split(np.arange(train.shape[0]), train.label.values)
    folds_pred_lst = []
    for fold, (trn_idx, val_idx) in enumerate(folds):
        # conduct 2 fold prediction
        if fold > 1:
            break 

        print('Inference fold {} started'.format(fold))

        valid_ = train.loc[val_idx,:].reset_index(drop=True)
        # preprocess the image, and extract the image array 
        valid_ds = CassavaDataset(valid_, '../input/cassava-leaf-disease-classification/train_images/', transforms=get_inference_transforms(), output_label=False)
        
        test = pd.DataFrame()
        test['image_id'] = list(os.listdir('../input/cassava-leaf-disease-classification/test_images/'))
        test_ds = CassavaDataset(test, '../input/cassava-leaf-disease-classification/test_images/', transforms=get_inference_transforms(), output_label=False)
        
        val_loader = torch.utils.data.DataLoader(
            valid_ds, 
            batch_size=CFG['valid_bs'],
            num_workers=CFG['num_workers'],
            shuffle=False,
            pin_memory=False,
        )
        
        tst_loader = torch.utils.data.DataLoader(
            test_ds, 
            batch_size=CFG['valid_bs'],
            num_workers=CFG['num_workers'],
            shuffle=False,
            pin_memory=False,
        )

        device = torch.device(CFG['device'])
#         model = CassvaImgClassifier(CFG['model_arch'], train.label.nunique()).to(device)
        model = EnsembleClassifier(CFG['model_efficientnet'], CFG['model_resnext50'], train.label.nunique()).to(device)
        
        val_preds = []
        tst_preds = []
        model_efficientnet_b4ns = ['tf_efficientnet_b4_ns_fold_0_8', 'tf_efficientnet_b4_ns_fold_0_9','tf_efficientnet_b4_ns_fold_1_8','tf_efficientnet_b4_ns_fold_1_9']
        model_resnext5 = ['resnext50_32x4d_fold_1_6', 'resnext50_32x4d_fold_1_6', 'resnext50_32x4d_fold_1_9', 'resnext50_32x4d_fold_1_9']
        
        for i, (model_efficientnet, model_resnext50) in enumerate(zip(model_efficientnet_b4ns, model_resnext5)):  
            print(f'load model /kaggle/input/tfefficientnetb4ns6/{model_efficientnet}')
            print(f'load model /kaggle/input/resnext/{model_resnext50}')
            model.load_efficientnet(torch.load('/kaggle/input/tfefficientnetb4ns6/{}'.format(model_efficientnet)))
            model.load_resnet(torch.load('/kaggle/input/resnext/{}'.format(model_resnext50)))
            # no backpropagation
            with torch.no_grad():
                for _ in range(CFG['tta']):
                    #inference_one_epoch output the batch predictions
                    #scale the probability to 1/12, and concatenate the prediction into a list
                    val_preds += [CFG['weights'][i]/4/CFG['tta']*inference_one_epoch(model, val_loader, device)]
                    tst_preds += [CFG['weights'][i]/4/CFG['tta']*inference_one_epoch(model, tst_loader, device)]
                torch.cuda.empty_cache()
        # compute the mean of the scaled probability from 3 models(resnext, visiontransformer and tf_efficientnet_b4ns)
        val_preds = np.mean(val_preds, axis=0) 
        tst_preds = np.mean(tst_preds, axis=0) 
        folds_pred_lst.append(tst_preds)
        # compute log loss of the validation dataset 
        print('fold {} validation loss softmax = {:.5f}'.format(fold, log_loss(valid_.label.values, val_preds)))
#         print('fold {} validation loss = {:.5f}'.format(fold, loss_fn(val_preds, smooth_label)))
        # compute the accuracy rate of the batches in each fold
        print('fold {} validation accuracy of softmax= {:.5f}'.format(fold, (valid_.label.values==np.argmax(val_preds, axis=1)).mean()))
        
        del model

    first_test_pred = folds_pred_lst[0]
    snd_test_pred = folds_pred_lst[1]
    # compute the mean value of the prediction in 2 folds
    mean_tst_preds = np.mean(folds_pred_lst, axis=0)

    print('first_test_pred:',first_test_pred)
    print('snd_test_pred:',snd_test_pred)
    print('mean_tst_preds:',mean_tst_preds)

In [None]:
test['label'] = np.argmax(mean_tst_preds, axis=1)
test.head()

In [None]:
test.to_csv('submission.csv', index=False)
# valid_df.to_csv('valid_entry.csv', index=False)