In [1]:
tez_path = '../input/tez-lib/'
effnet_path = '../input/efficientnet-pytorch/'
import sys
sys.path.append(tez_path)
sys.path.append(effnet_path)

In [2]:
import os
import albumentations
import pandas as pd
import numpy as np

import tez
from tez.datasets import ImageDataset

import torch
import torch.nn as nn
from torch.nn import functional as F

from efficientnet_pytorch import EfficientNet

In [3]:
class LeafModel(tez.Model):
    def __init__(self, num_classes):
        super().__init__()

        self.effnet = EfficientNet.from_name("efficientnet-b4")
        self.dropout = nn.Dropout(0.1)
        self.out = nn.Linear(1792, num_classes)
        self.step_scheduler_after = "epoch"

    def forward(self, image, targets=None):
        batch_size, _, _, _ = image.shape

        x = self.effnet.extract_features(image)
        x = F.adaptive_avg_pool2d(x, 1).reshape(batch_size, -1)
        outputs = self.out(self.dropout(x))
        return outputs, None, None

In [4]:
# augmentations taken from: https://www.kaggle.com/khyeh0719/pytorch-efficientnet-baseline-inference-tta
test_aug = albumentations.Compose([
    albumentations.RandomResizedCrop(256, 256),
    albumentations.Transpose(p=0.5),
    albumentations.HorizontalFlip(p=0.5),
    albumentations.VerticalFlip(p=0.5),
    albumentations.HueSaturationValue(
        hue_shift_limit=0.2, 
        sat_shift_limit=0.2,
        val_shift_limit=0.2, 
        p=0.5
    ),
    albumentations.RandomBrightnessContrast(
        brightness_limit=(-0.1,0.1), 
        contrast_limit=(-0.1, 0.1), 
        p=0.5
    ),
    albumentations.Normalize(
        mean=[0.485, 0.456, 0.406], 
        std=[0.229, 0.224, 0.225], 
        max_pixel_value=255.0, 
        p=1.0
    )
], p=1.)

In [5]:
dfx = pd.read_csv("../input/cassava-leaf-disease-classification/sample_submission.csv")
image_path = "../input/cassava-leaf-disease-classification/test_images/"
test_image_paths = [os.path.join(image_path, x) for x in dfx.image_id.values]
# fake targets
test_targets = dfx.label.values
test_dataset = ImageDataset(
    image_paths=test_image_paths,
    targets=test_targets,
    #resize=None,
    augmentations=test_aug,
)

In [6]:
train_dfx = pd.read_csv("../input/cassava-leaf-disease-classification/train.csv")
model0 = LeafModel(num_classes=train_dfx.label.nunique())
model0.load("../input/casava-training-code/model_fold0.bin")

model1 = LeafModel(num_classes=train_dfx.label.nunique())
model1.load("../input/casava-training-code/model_fold1.bin")

model2 = LeafModel(num_classes=train_dfx.label.nunique())
model2.load("../input/casava-training-code/model_fold2.bin")

model3 = LeafModel(num_classes=train_dfx.label.nunique())
model3.load("../input/casava-training-code/model_fold3.bin")

model4 = LeafModel(num_classes=train_dfx.label.nunique())
model4.load("../input/casava-training-code/model_fold4.bin")

In [7]:
# run inference 5 times with 5 different model

model_list = [model0, model1, model2, model3, model4]

def run_inference(model):
    final_preds = None
    for j in range(5):
        preds = model.predict(test_dataset, batch_size=64, n_jobs=-1)
        temp_preds = None
        for p in preds:
            if temp_preds is None:
                temp_preds = p
            else:
                temp_preds = np.vstack((temp_preds, p))
        if final_preds is None:
            final_preds = temp_preds
        else:
            final_preds += temp_preds
    final_preds /= 5
    final_preds = final_preds.argmax(axis=1)
    return final_preds




In [8]:
new_df = pd.DataFrame()
new_df['model0'] = run_inference(model_list[0])
new_df['model1'] = run_inference(model_list[1])
new_df['model2'] = run_inference(model_list[2])
new_df['model3'] = run_inference(model_list[3])
new_df['model4'] = run_inference(model_list[4])

100%|██████████| 1/1 [00:00<00:00,  1.05it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  6.16it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  6.94it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  6.98it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.18it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  6.89it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.22it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.13it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.00it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.21it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.41it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.17it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.32it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.34it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.24it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.22it/s, stage=test]
100%|██████████| 1/1 [00:00<00:00,  7.28it/s, stage=test]
100%|█████████

In [9]:
from collections import Counter
def most_common(lst):
    data = Counter(lst)
    return max(lst, key=data.get)

final_preds = list()
for index, row in new_df.iterrows():
    out_list = [row['model0'], row['model1'], row['model2'], row['model3'], row['model4']]
    final_preds.append(most_common(out_list))
    

In [10]:
dfx.label = final_preds
dfx.to_csv("submission.csv", index=False)