In [None]:
import numpy as np
import tensorflow as tf
import re
from keras.models import load_model

# Load Data

In [None]:
def decode_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.cast(image, tf.float32) / 255
    image = tf.image.resize(image, [HEIGHT, WIDTH])
    image = tf.reshape(image, [HEIGHT, WIDTH, 3])
    return image

def read_tfrecord(serialized_example):    
    feature_description = {
        'image': tf.io.FixedLenFeature([], tf.string),
        'image_name': tf.io.FixedLenFeature([], tf.string),
    }
    
    # read 1 record
    example = tf.io.parse_single_example(serialized_example, feature_description)
    image = decode_image(example['image'])
    image_name = example['image_name']
    
    return image, image_name

def load_dataset(filenames, labeled=True, ordered = False):
    ignore_order = tf.data.Options()
    if not ordered:
        ignore_order.experimental_deterministic = False
    
    dataset = tf.data.TFRecordDataset(filenames)
    dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(read_tfrecord)
    
    return dataset

def get_dataset(filenames, ordered = True):
    dataset = load_dataset(filenames, ordered=ordered)
    #dataset = dataset.shuffle(2048)
    #dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    dataset = dataset.batch(BATCH_SIZE)
    
    return dataset
    
def count_data_items(filenames):
    n = [int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames]
    return np.sum(n)

FILEPATHS = tf.io.gfile.glob('/kaggle/input/cassava-leaf-disease-classification/test_tfrecords/ld_test*.tfrec')
#FILEPATHS = tf.io.gfile.glob('/kaggle/input/cassava-leaf-disease-classification/train_tfrecords/ld_train*.tfrec')
#FILEPATHS = tf.io.gfile.glob('/kaggle/input/cassava-leaf-disease-classification/train_tfrecords/ld_train00-1338.tfrec')
AUTOTUNE = tf.data.experimental.AUTOTUNE
BATCH_SIZE = 25
HEIGHT = 128
WIDTH = 128
IMAGE_SIZE = (HEIGHT,WIDTH)


NUM_TEST_IMAGES = int( count_data_items(FILEPATHS) )
print("NUM_TEST_IMAGES:", NUM_TEST_IMAGES)

test_dataset = get_dataset(FILEPATHS, ordered=True)

In [None]:
# Visualize Data
#import matplotlib.pyplot as plt

def show_batch(image_batch):
    plt.figure(figsize=(10,10))
    for n in range(25):
        ax = plt.subplot(5,5,n+1)
        plt.imshow(image_batch[n])
        plt.axis("off")

#image_batch, name_batch = next(iter(test_dataset))
#show_batch(image_batch.numpy())

# Load Model

In [None]:
#MY_MODEL_FILE = '/kaggle/input/my-cassava-data/resnet50_model.h5'
#MY_MODEL_FILE = '/kaggle/input/my-cassava-data/resnet50_model_v4.h5' # v4 model takes images of size (225, 300, 3)
MY_MODEL_FILE = '/kaggle/input/my-cassava-data/resnet50_model_v10.h5' # v5-7 model takes images of size (128, 128, 3)

model = load_model(MY_MODEL_FILE, custom_objects = None)

# Generate Predictions

In [None]:
# Generate predictions
images = test_dataset.map(lambda image, image_name: image)
probabilities = model.predict(images) #, verbose=1)
predictions = np.argmax(probabilities, axis=-1)

image_names_ds = test_dataset.map(lambda image, image_name: image_name).unbatch()
image_names = next(iter(image_names_ds.batch(NUM_TEST_IMAGES))).numpy().astype('U')

np.savetxt('submission.csv', np.rec.fromarrays([image_names, predictions]), fmt=['%s', '%d'], delimiter=',', header='image_id,label', comments='')