In [None]:
import sys
sys.path.append('../input/timm-pytorch-image-models/pytorch-image-models-master')

import cv2
import os
import timm
import torch
import time
import random
import sklearn
import warnings
import pydicom
import joblib
import logging
import pandas as pd
import numpy as np

import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from tqdm import tqdm
from datetime import datetime
from glob import glob
from skimage import io
from scipy.ndimage.interpolation import zoom

import seaborn as sns
import matplotlib.pyplot as plt
from torch.utils.data import Dataset,DataLoader
from torch.utils.data.sampler import SequentialSampler, RandomSampler
from torch.cuda.amp import autocast, GradScaler
from torch.nn.modules.loss import _WeightedLoss

from sklearn import metrics
from sklearn.model_selection import GroupKFold, StratifiedKFold
from sklearn.metrics import roc_auc_score, log_loss

from albumentations import (
    HorizontalFlip, VerticalFlip, IAAPerspective, ShiftScaleRotate, CLAHE, RandomRotate90,
    Transpose, ShiftScaleRotate, Blur, OpticalDistortion, GridDistortion, HueSaturationValue,
    IAAAdditiveGaussianNoise, GaussNoise, MotionBlur, MedianBlur, IAAPiecewiseAffine, RandomResizedCrop,
    IAASharpen, IAAEmboss, RandomBrightnessContrast, Flip, OneOf, Compose, Normalize, Cutout, CoarseDropout, ShiftScaleRotate, CenterCrop, Resize
)

from albumentations.pytorch import ToTensorV2


In [None]:
OUTPUT_DIR = './'
MODEL_DIR = {'vit':'../input/vit-labelsmooth-3101/vit_labelsmooth_3101',
             'effnet':'../input/effnet-taylorsmooth-3001/effnet_taylor_smooth_3001',
             'resnext': '../input/resnext50-symm-1502/resnext50_32x4d_1502',
             'resnet': '../input/resnet200d-1602/resnet300d_1502',
             'nfnet': '../input/nfnet-1702/nf_resnet50_1702'}

if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

TRAIN_PATH = '../input/cassava-leaf-disease-classification/train_images'
TEST_PATH = '../input/cassava-leaf-disease-classification/test_images'

In [None]:
class CFG:
    debug=False
    num_workers=4
    model_zoo = ['effnet','vit','nfnet','resnet','resnext']
    model = None
    model_name= {'vit':'vit_base_patch16_384',
                 'effnet':'tf_efficientnet_b4_ns',
                 'resnext': 'resnext50_32x4d',
                 'resnet': 'resnet200d',
                 'nfnet': 'nf_resnet50'}
    
    size={'vit':384,
          'effnet':512,
          'resnext': 512,
          'resnet': 512,
          'nfnet': 512}
    
    batch_size=32
    seed=2020
    num_class=5
    target_col='label'
    fold=1
    num_fold=[1,2,3,4,5]
    pretrained=False
    tta=4
    used_epochs=[6,7,8,9]
    weights=[1,1,1,1]
    
    #validation
    num_folds = 5
    image_col_name = 'image_id'
    class_col_name = 'label'

In [None]:
def seed_torch(seed=2020):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_torch(seed=CFG.seed)

In [None]:
test = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
valid = pd.read_csv('../input/cassava-leaf-disease-classification/train.csv')

In [None]:
class ValidDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_id'].values
        self.labels = df['label'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        file_name = self.file_names[idx]
        file_path = os.path.join(TRAIN_PATH, file_name)
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image


class TestDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.file_names = df['image_id'].values
        self.transform = transform
        
    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        file_name = self.file_names[idx]
        file_path = f'{TEST_PATH}/{file_name}'
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        return image

In [None]:
def get_transforms(*, data):
    
    if data == 'train':
        return Compose([
            #Resize(CFG.size, CFG.size),
            RandomResizedCrop(CFG.size[CFG.model], CFG.size[CFG.model]),
            Transpose(p=0.5),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            ShiftScaleRotate(p=0.5),
            Normalize(
                mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225],
            ),
            ToTensorV2(),
        ])

    elif data == 'valid':
        return Compose([
            RandomResizedCrop(CFG.size[CFG.model], CFG.size[CFG.model]),
            HorizontalFlip(p=0.5),
            VerticalFlip(p=0.5),
            Transpose(p=0.5),
            ShiftScaleRotate(p=0.5),
            Normalize(
                mean=[0.485, 0.456, 0.406], 
                std=[0.229, 0.224, 0.225], 
                max_pixel_value=255.0, p=1.0),
            ToTensorV2(p=1.0),
        ])

In [None]:
class Effnet(nn.Module):
    """
    EfficientNet model by https://arxiv.org/pdf/1905.11946.pdf
    """
    def __init__(self, model_name, n_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.classifier.in_features
        self.model.classifier = nn.Linear(n_features, n_class)

    def forward(self, x):
        x = self.model(x)
        return x


class ViT(nn.Module):
    """
    VisionTransformer model by https://arxiv.org/pdf/2010.11929.pdf
    """
    def __init__(self, model_name, n_class, pretrained=False):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        self.model.head = nn.Linear(self.model.head.in_features, n_class)

    def forward(self, x):
        x = self.model(x)
        return x
    
    
class Resnext(nn.Module):
    def __init__(self, model_name, n_class, pretrained=False):
          super().__init__()
          self.model = timm.create_model(model_name, pretrained=pretrained)
          n_features = self.model.fc.in_features
          self.model.fc = nn.Linear(n_features, n_class)

    def forward(self, x):
        x = self.model(x)
        return x
    
    
class Resnet(nn.Module):
    def __init__(self, model_name, n_class, pretrained=False):
          super().__init__()
          self.model = timm.create_model(model_name, pretrained=pretrained)
          n_features = self.model.fc.in_features
          self.model.fc = nn.Linear(n_features, n_class)

    def forward(self, x):
        x = self.model(x)
        return x


class NFNet(nn.Module):
    def __init__(self, model_name, n_class, pretrained=False):
          super().__init__()
          self.model = timm.create_model(model_name, pretrained=pretrained)
          n_features = self.model.head.fc.in_features
          self.model.head.fc = nn.Linear(n_features, n_class)

    def forward(self, x):
        x = self.model(x)
        return x


def create_model(config):
    if config.model == "effnet":
        model_obj = Effnet
        model = Effnet(config.model_name[config.model], config.num_class, config.pretrained)
        return model

    if config.model == 'vit':
        model_obj = ViT
        model = ViT(config.model_name[config.model], config.num_class, config.pretrained)
        return model
    
    if config.model == 'resnext':
        model_obj = Resnext
        model = Resnext(config.model_name[config.model], config.num_class, config.pretrained)
        return model

    if config.model == 'resnet':
        model_obj = Resnet
        model = Resnet(config.model_name[config.model], config.num_class, config.pretrained)
        return model
    
    if config.model == 'nfnet':
        model_obj = NFNet
        model = NFNet(config.model_name[config.model], config.num_class, config.pretrained)
        return model

In [None]:
def inference(model, states, test_loader, device):
    model.to(device)
    model.load_state_dict(states['model_state_dict'])
    model.eval()
    tbar = tqdm(enumerate(test_loader), total=len(test_loader))
    full_pred = []
    batch_pred = []
    for i, (images) in tbar:
        images = images.to(device)
        with torch.no_grad():
            y_preds = model(images)
        batch_pred+=[y_preds.softmax(1).to('cpu').numpy()]
    full_pred = np.concatenate(batch_pred)
    return full_pred

In [None]:
def make_validation_fold(num_folds, val_df):
    skf = StratifiedKFold(n_splits=CFG.num_folds, shuffle=True, random_state=CFG.seed).split(
                        X=val_df[CFG.image_col_name], y=val_df[CFG.class_col_name])
    
    for fold, (train_idx, val_idx) in enumerate(skf):
        val_df.loc[val_idx, 'fold'] = int(fold+1)
    
    return val_df

In [None]:
def blend():
    models_oof = []
    for net in CFG.model_zoo:
        CFG.model = net
        print(CFG.model)
        model = create_model(CFG)
        oof_pred = []
        for folds in (i+1 for i in range(CFG.num_folds)):
            checkpoint_path = os.path.join(MODEL_DIR[CFG.model],f'{CFG.model_name[CFG.model]}_fold{folds}.pt')
            print('Loading {} checkpoint oof'.format(checkpoint_path))
            states = torch.load(checkpoint_path)
            oof_pred += [states['oof_preds']]
        oof_pred = np.concatenate(oof_pred)
        models_oof += [oof_pred]
    
    return models_oof
    
    

def validate_on_fold(fold, df):
    valid_df = df[df["fold"] == fold].reset_index(drop=True)
    valid_dataset = ValidDataset(valid_df, transform=get_transforms(data='valid'))
    valid_loader = DataLoader(valid_dataset, batch_size=CFG.batch_size, shuffle=False,
                              num_workers=CFG.num_workers, pin_memory=True)
    checkpoint_path = os.path.join(MODEL_DIR[CFG.model],f'{CFG.model_name[CFG.model]}_fold{folds}.pt')
    print('Loading {} checkpoint'.format(checkpoint_path))
    states = torch.load(checkpoint_path)
    
    tta_stack = []
    for _ in range(CFG.tta):
        tta_stack += [inference(model, states, valid_loader, device)/CFG.tta]
        
    tta_stack = np.mean(tta_stack, axis=0)
    valid_df['pred'] = tta_stack.argmax(1)
    return valid_df, tta_stack

In [None]:
pred = blend()
validate_df = pd.DataFrame()
valid_fold = make_validation_fold(CFG.num_folds, valid)
for folds in (number+1 for number in range(CFG.num_folds)):
    valid_ = valid_fold[valid_fold["fold"] == folds].reset_index(drop=True)
    validate_df = pd.concat([validate_df, valid_], axis=0)

In [None]:
# # ### Validation
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# final_ensemble = []
# valid = make_validation_fold(CFG.num_folds, valid)

# for net in CFG.model_zoo[2:]:
#     fold_stack = []
#     validate_df = pd.DataFrame()
#     CFG.model = net
#     model = create_model(CFG)
    
#     print('Validating {}'.format(CFG.model))
#     for folds in (number+1 for number in range(CFG.num_folds)):
#         fold_df, tta_stack = validate_on_fold(folds, valid)
#         validate_df = pd.concat([validate_df,fold_df])
#         fold_stack += [tta_stack]
#         print("Fold {} TTA: {}".format(folds, metrics.accuracy_score(y_true=fold_df['label'], y_pred=fold_df['pred'])))
        
#     print(metrics.accuracy_score(y_true=validate_df['label'], y_pred=validate_df['pred']))
#     model_stack = np.concatenate(fold_stack, axis=0)
#     final_ensemble += [model_stack]

In [None]:
### Inference
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
final_ensemble = []


#valid = make_validation_fold(CFG.num_folds, valid)
for net in CFG.model_zoo:
    CFG.model = net
    model = create_model(CFG)
    model_ensemble = []
    
    print('Evaluating {}'.format(CFG.model))
     
    test_dataset = TestDataset(test, transform=get_transforms(data='valid'))
    test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False, 
                             num_workers=CFG.num_workers, pin_memory=True)
       
    print('Inference...')
    for folds in CFG.num_fold:
        checkpoint_path = os.path.join(MODEL_DIR[CFG.model],f'{CFG.model_name[CFG.model]}_fold{folds}.pt')
        print('Loading {} checkpoint'.format(checkpoint_path))
        states = torch.load(checkpoint_path)

        for _ in range(CFG.tta):
            model_ensemble += [inference(model, states, test_loader, device)/CFG.tta]

    model_ensemble = np.mean(model_ensemble, axis=0)
    final_ensemble += [model_ensemble]
# print(final_ensemble)
final_ensemble = np.mean(final_ensemble, axis=0)
# final_ensemble = (0.25*final_ensemble[0]) + (0.2*final_ensemble[1]) + (0.25*final_ensemble[2]) + (0.25*final_ensemble[3]) + (0.05*final_ensemble[4])


# submission
test['label'] = final_ensemble.argmax(1)
test[['image_id', 'label']].to_csv(OUTPUT_DIR+'submission.csv', index=False)
test.head()