**Training Notebook- https://www.kaggle.com/iamprateek/cassava-tpu/notebook**

In [None]:
import os
import re
import numpy as np
import pandas as pd
import random
import math
from sklearn import metrics
from sklearn.model_selection import KFold
import tensorflow as tf
from tensorflow.keras import backend as K
import tensorflow_addons as tfa
from kaggle_datasets import KaggleDatasets
! pip install ../input/keras-efficientnet-whl/Keras_Applications-1.0.8-py3-none-any.whl
! pip install ../input/keras-efficientnet-whl/efficientnet-1.1.1-py3-none-any.whl

In [None]:
import efficientnet.tfkeras as efn
import glob

In [None]:
try:
    # TPU detection. No parameters necessary if TPU_NAME environment variable is
    # set: this is always the case on Kaggle.
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    # Default distribution strategy in Tensorflow. Works on CPU and single GPU.
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

In [None]:
# Configuration
EPOCHS = 20
BATCH_SIZE = 32 * strategy.num_replicas_in_sync
IMAGE_SIZE = [512, 512]
# Seed
SEED = 123
# Learning rate
LR = 0.0001
# Test time augmentation rounds
TTA = 10
# Verbosity
VERBOSE = 2
# Number of classes
N_CLASSES = 5

# Test filenames directory
TEST_FILENAMES = '../input/cassava-leaf-disease-classification/test_images/*.jpg'

In [None]:
def data_augment(image, image_name):
    p_spatial = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_1 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_2 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_3 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_crop = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
            
    # Flips
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    if p_spatial > 0.75:
        image = tf.image.transpose(image)
        
    # Rotates
    if p_rotate > 0.75:
        image = tf.image.rot90(image, k = 3) # rotate 270º
    elif p_rotate > 0.5:
        image = tf.image.rot90(image, k = 2) # rotate 180º
    elif p_rotate > 0.25:
        image = tf.image.rot90(image, k = 1) # rotate 90º
        
    # Pixel-level transforms
    if p_pixel_1 >= 0.4:
        image = tf.image.random_saturation(image, lower = 0.7, upper = 1.3)
    if p_pixel_2 >= 0.4:
        image = tf.image.random_contrast(image, lower = 0.8, upper = 1.2)
    if p_pixel_3 >= 0.4:
        image = tf.image.random_brightness(image, max_delta = 0.1)
        
    # Crops
    if p_crop > 0.7:
        if p_crop > 0.9:
            image = tf.image.central_crop(image, central_fraction = 0.7)
        elif p_crop > 0.8:
            image = tf.image.central_crop(image, central_fraction = 0.8)
        else:
            image = tf.image.central_crop(image, central_fraction = 0.9)
    elif p_crop > 0.4:
        crop_size = tf.random.uniform([], int(IMAGE_SIZE[0] * 0.8), IMAGE_SIZE[0], dtype = tf.int32)
        image = tf.image.random_crop(image, size = [crop_size, crop_size, 3])

    image = tf.image.resize(image, size = IMAGE_SIZE)
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    
    return image, image_name

# Function to decode our images (normalize and reshape)
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels = 3)
    # Resize image to be aligned with the inference phase
    image = tf.image.resize(image, IMAGE_SIZE)
    # convert image to floats in [0, 1] range
    image = tf.cast(image, tf.float32) / 255.0
    # explicit size needed for TPU
    image = tf.reshape(image, [*IMAGE_SIZE, 3])
    return image

def get_image_name(file_path):
    parts = tf.strings.split(file_path, os.path.sep)
    image_name = parts[-1]
    return image_name

def read_image(file_path):
    image_name = get_image_name(file_path)
    image = tf.io.read_file(file_path)
    image = decode_image(image)
    return image, image_name

def get_test_dataset(filenames, tta = False):
    dataset = tf.data.Dataset.list_files(filenames, shuffle = False)
    dataset = dataset.map(read_image, num_parallel_calls = AUTO)
    dataset = dataset.map(data_augment, num_parallel_calls = AUTO)
    # The test dataset must repeat if we want to predict with test time augmentation
    if tta:
        dataset = dataset.repeat() 
    dataset = dataset.batch(BATCH_SIZE)
    # prefetch next batch while predicting (autotune prefetch buffer size)
    dataset = dataset.prefetch(AUTO)
    return dataset


NUM_TESTING_IMAGES = len(os.listdir('../input/cassava-leaf-disease-classification/test_images/'))

In [None]:
def get_model():
    
    with strategy.scope():
        
        inp = tf.keras.layers.Input(shape = (*IMAGE_SIZE, 3))

        x = efn.EfficientNetB5(weights = None, include_top = False)(inp)
        x = tf.keras.layers.GlobalAveragePooling2D()(x)
        x = tf.keras.layers.Dropout(0.2)(x)
        output = tf.keras.layers.Dense(N_CLASSES, activation = 'softmax')(x)
        
        model = tf.keras.models.Model(inputs = [inp], outputs = [output])

        opt = tf.keras.optimizers.Adam(learning_rate = LR)

        model.compile(
            optimizer = opt,
            loss = [tf.keras.losses.CategoricalCrossentropy(label_smoothing = 0.4)],
            metrics = [tf.keras.metrics.CategoricalAccuracy()]
        )

        return model
    
def inference(model_paths):
    
    # Create a numpy array to store predictions
    prediction = np.zeros((NUM_TESTING_IMAGES, N_CLASSES))
    
    print('Extracting test image names...')
    # Get the test dataset without tta to extract image names
    test_dataset = get_test_dataset(TEST_FILENAMES, tta = False)
    image_name = test_dataset.map(lambda image, image_name: image_name).unbatch()
    image_name = next(iter(image_name.batch(NUM_TESTING_IMAGES))).numpy().astype('U')
    print('Test image names completed...')
    
    for fold, model_path in enumerate(model_paths):
        print('\n')
        print('-'*50)
        print(f'Predicting fold {fold + 1}')
        K.clear_session()
        model = get_model()
        # Load weights of pretrained model
        model.load_weights(model_path)
        
        # Add 1 to the steps because we only have a sample for public inference
        steps = TTA * ((NUM_TESTING_IMAGES / BATCH_SIZE) + 1)
        # Get the test dataset with tta to extract image
        test_dataset = get_test_dataset(TEST_FILENAMES, tta = True)
        image = test_dataset.map(lambda image, image_name: image)
        probabilities = model.predict(image, steps = steps)[: TTA * NUM_TESTING_IMAGES]
        probabilities = np.mean(probabilities.reshape((NUM_TESTING_IMAGES, TTA, N_CLASSES), order = 'F'), axis = 1)
        prediction += probabilities / len(model_paths)
        
    sub = pd.DataFrame({'image_id': image_name, 'label': np.argmax(prediction, axis = -1)})
    sub.to_csv('submission.csv', index = False)
        
    return image_name, prediction, sub
        
# Get pretrained models list for inference
model_paths = glob.glob('../input/trained-cassava-weights/*.h5')
image_name, prediction, sub = inference(model_paths)

In [None]:
sub.head()