In [1]:
import numpy as np 
import pandas as pd 


import tensorflow as tf

try:
    from kaggle_datasets import KaggleDatasets
except:
    pass

In [2]:
params = {
    'batch_size' : 5,
    'img_size' : [512, 512], #length and width will be equal
    'epochs': 100,
    'test_steps': 3200,
    'test_size':16000
}

In [3]:
strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

REPLICAS:  1


In [4]:
sample_submission = pd.read_csv('../input/cassava-leaf-disease-classification/sample_submission.csv')
sample_submission

Unnamed: 0,image_id,label
0,2216849948.jpg,4


In [5]:
def decode_image_test(tfrec):
    '''
    ***update documentation 
    function to decode an image from tfrecord
    
    args:
        tfrec: tfrecord, single record of training/validation data
    
    returns:
        decoded_image: tensor, converted image from tfrecord
        img_name: tensor, string, Id of the decoded image
    
    '''
    
    features_dictionary = {
        'image': tf.io.FixedLenFeature([], tf.string),
        "image_name": tf.io.FixedLenFeature([], tf.string),
        }
    features = tf.io.parse_single_example(tfrec, features_dictionary)
    decoded_image = tf.io.decode_jpeg(features['image'], 3)
    decoded_image = tf.image.resize(decoded_image, params['img_size'])
    img_name = features['image_name']
    
    return decoded_image ,img_name


In [6]:
def normalize_image(decoded_image, label):
    '''
    function to convert an image tensor values from 0 to 255 
    -> -1 to 1
    to be used when dealing with tfrecords containing labels
    
    args:
        decoded_image: tensor that is an image with values from 0 to 255
        label: tensor, target label
    
    returns: 
        image_tensor: tensor that is an image with values from -1 to 1
        label, same as input
    
    '''
    
    #add dim at the zero axis Shape will be from (x, y, z) -> (None, x, y, z)
    image_tensor = tf.expand_dims(decoded_image, 0)
    #undo the above line -- this is needed due to TF not allowing a filtered tensor py_function
    image_tensor = tf.gather(image_tensor, 0)

    #convert tensor values to between -1 and 1 (0 to 255 -> -1 to 1)
    image_tensor = (tf.cast(image_tensor, tf.float32) - 127.5) / 127.5

    return image_tensor, label

In [7]:
def get_test_ds(tfrecords, batch_size):
    '''
    function to create a ds pipeline from tfrecord files
    
    args:
        tfrecords: list, tfrecord file paths
        batch_size: int, batch size for number of records to pass into
            model at a time
    returns:
        ds: tensorflow input pipeline with images and labels
    '''
    ds = (tf.data.TFRecordDataset(filenames=[tfrecords],
                                 num_parallel_reads=tf.data.experimental.AUTOTUNE).
#           cache(). #need to remove cache while not usnig TPUs
          map(decode_image_test, num_parallel_calls=tf.data.experimental.AUTOTUNE).
          map(normalize_image, num_parallel_calls=tf.data.experimental.AUTOTUNE).
          batch(batch_size,
               drop_remainder=False).
          prefetch(tf.data.experimental.AUTOTUNE)
         )
    

    
    return ds

In [8]:
test_files = tf.io.gfile.glob('../input/cassava-leaf-disease-classification/test_tfrecords/*.tfrec')

test_ds = get_test_ds(test_files, params['batch_size'])



In [9]:
model = tf.keras.models.load_model('../input/leafmodel1/final_model.h5')

In [11]:
predictions = model.predict(test_ds.map(lambda ids, img: ids), steps=params['test_steps']) #look into steps

In [12]:
prediction_ids = next(iter(test_ds.
                          map(lambda img, ids:ids).
                          unbatch().
                          batch(params['test_size']))).numpy().astype('str')

In [13]:
prediction_dict = {
    'image_id': prediction_ids,
    'label': tf.concat(tf.math.argmax(predictions, axis=1), axis=0)
}
submission_ds = pd.DataFrame(prediction_dict)

In [14]:
submission_ds.head()

Unnamed: 0,image_id,label
0,2216849948.jpg,4


In [15]:
submission_ds.to_csv('submission.csv', index=False)