## Kaggle APTOS 2019 Diabetic Retinopathy Detection/Classification

Link to competition: https://www.kaggle.com/c/aptos2019-blindness-detection

This notebook was converted from my prior Kaggle notebook.  Migrated to TF 2.x and converted various methods to be more native TF.  (Mainly because of using graph/mapped methods.)

You will need to run this notebook in the Kaggle environment or download/upload the training data to Google Colab environment.  The training files are large.  I downloaded and copied them to Google drive for training.

Results are from training images, not from Kaggle scoring.  The Kaggle score will be lower.  I did not finish the notebook to submit and score the model.


## Final Classification Report from Training images:

Accuracy : 0.9030694668820679

              precision    recall  f1-score   support

           0    0.97842   0.97143   0.97491       140
           1    0.85915   0.94574   0.90037       129
           2    0.89076   0.75714   0.81853       140
           3    0.84000   0.94382   0.88889        89
           4    0.93277   0.91736   0.92500       121

    accuracy                        0.90307       619
   macro avg    0.90022   0.90710   0.90154       619
weighted avg    0.90491   0.90307   0.90188       619



### Processing for using Google Drive, Kaggle and normal includes

In [None]:
#"""
# Google Collab specific stuff....
from google.colab import drive
drive.mount('/content/drive')

import os
!ls "/content/drive/My Drive"

USING_COLLAB = True
%tensorflow_version 2.x
#"""

In [None]:
# Upload your "kaggle.json" file that you created from your Kaggle Account tab
# If you downloaded it, it would be in your "Downloads" directory

from google.colab import files
files.upload()

In [None]:
# To start, install kaggle libs
#!pip install -q kaggle

# Workaround to install the newest version
# https://stackoverflow.com/questions/58643979/google-colaboratory-use-kaggle-server-version-1-5-6-client-version-1-5-4-fai
!pip install kaggle --upgrade --force-reinstall --no-deps

In [None]:
# On your VM, create kaggle directory and modify access rights

!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/
!ls ~/.kaggle
!chmod 600 /root/.kaggle/kaggle.json

In [None]:
#!kaggle competitions list
# Takes about 4 mins to download
!kaggle competitions download -c aptos2019-blindness-detection

In [None]:
# Takes about 5 mins to unzip
!unzip -uq aptos2019-blindness-detection.zip 

In [None]:
!ls

In [None]:
# Cleanup to add some space....
!rm -r test_images
!rm aptos2019-blindness-detection.zip 

In [None]:
# Setup sys.path to find MachineLearning lib directory

try: USING_COLLAB
except NameError: USING_COLLAB = False

%load_ext autoreload
%autoreload 2

import sys
if "MachineLearning" in sys.path[0]:
    pass
else:
    print(sys.path)
    if USING_COLLAB:
        sys.path.insert(0, '/content/drive/My Drive/GitHub/MachineLearning/lib')  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    else:
        sys.path.insert(0, '/Users/john/Documents/GitHub/MachineLearning/lib')  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    
    print(sys.path)

In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import os, sys, random, warnings, time, copy, csv, gc
import numpy as np 

import IPython.display as display
from PIL import Image

import matplotlib.pyplot as plt
%matplotlib inline

import cv2
from tqdm import tqdm_notebook, tnrange, tqdm
import pandas as pd

import tensorflow as tf
print(tf.__version__)

from tensorflow.keras.models import load_model 

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

AUTOTUNE = tf.data.experimental.AUTOTUNE
print("AUTOTUNE: ", AUTOTUNE)

from TrainingUtils import *

#warnings.filterwarnings("ignore", category=DeprecationWarning)
#warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", "(Possibly )?corrupt EXIF data", UserWarning)

## Examine and understand data


In [None]:
# GLOBALS/CONFIG ITEMS

# Set root directory path to data
if USING_COLLAB:
    #ROOT_PATH = "/content/drive/My Drive/ImageData/KaggleDiabeticRetinopathy/Data"  ###### CHANGE FOR SPECIFIC ENVIRONMENT
    ROOT_PATH = ""  ###### CHANGE FOR SPECIFIC ENVIRONMENT
else:
    ROOT_PATH = ""
        
# Establish global dictionary
parms = GlobalParms(MODEL_NAME="model-Eye-V02.h5",
                    ROOT_PATH=ROOT_PATH,
                    TRAIN_DIR="train_images",
                    MODEL_PATH="/content/drive/My Drive/ImageData/KaggleDiabeticRetinopathy/Models",
                    NUM_CLASSES=5,
                    CLASS_NAMES=['Normal', 'Moderate', 'Mild', 'Proliferative', 'Severe'],
                    IMAGE_ROWS=224,
                    IMAGE_COLS=224,
                    IMAGE_CHANNELS=3,
                    BATCH_SIZE=16,
                    EPOCS=20,
                    IMAGE_EXT=".png",
                    FINAL_ACTIVATION='sigmoid',
                    LOSS='binary_crossentropy',
                    METRICS=['accuracy'])

parms.print_contents()

In [None]:
# Simple helper method to display batches of images with labels....        
def show_batch(image_batch, label_batch, number_to_show=25, r=5, c=5, print_shape=False):
    show_number = min(number_to_show, parms.BATCH_SIZE)

    if show_number < 8: #if small number, then change row, col and figure size
        if parms.IMAGE_COLS > 64 or parms.IMAGE_ROWS > 64:
            plt.figure(figsize=(25,25)) 
        else:
            plt.figure(figsize=(10,10))  
        r = 4
        c = 2 
    else:
        plt.figure(figsize=(10,10))  

    if show_number == 1:
        image_batch = np.expand_dims(image_batch, axis=0)
        label_batch = np.expand_dims(label_batch, axis=0)

    for n in range(show_number):
        if print_shape:
            print("Image shape: {}  Max: {}  Min: {}".format(image_batch[n].shape, np.max(image_batch[n]), np.min(image_batch[n])))
        ax = plt.subplot(r,c,n+1)
        cmap="gray"
        if len(image_batch[n].shape) == 3:
            if image_batch[n].shape[2] == 3:
                cmap="viridis"
        plt.imshow(tf.keras.preprocessing.image.array_to_img(image_batch[n]), cmap=plt.get_cmap(cmap))
        tmp = label_batch[n].numpy().astype(int).sum(axis=0) - 1
        plt.title(parms.CLASS_NAMES[tmp])
        plt.axis('off')

# Total Pipeline:

Create training and validation Pandas dataframes:

- Read training csv file and convert to Pandas -> add file_path and diagnosis_multi -> create additional training examples and a balanced sample -> split into train and val dataframes -> remove duplicates from val


Create training and validation datasets:

- For both: Use Panda dataframe to create dataset passing file_path and diagnosis_multi -> read & load image and create label -> remove any black boarders using midpoint and resize -> apply image_add_weighted to the image -> apply cache

- For training only: After cache -> apply additional image augmentation, random rotate and random zoom.

### Load csv file

- Load list of filenames and diagnosis
- Perform initiall analysis on dataframe

In [None]:
all_df = pd.read_csv(os.path.join(parms.ROOT_PATH, "train.csv"))
all_df["file_path"] = parms.TRAIN_PATH + "/" + all_df["id_code"] + ".png"
print("Training set is {}".format(len(all_df)))
all_df.head()


In [None]:
# Apply method to create the label, every row will be processed
def build_label(x):
    if x == 0:
        return "1, 0, 0, 0, 0"
    elif x == 1:
        return "1, 1, 0, 0, 0"
    elif x == 2:
        return "1, 1, 1, 0, 0"
    elif x == 3:
        return "1, 1, 1, 1, 0"
    else:
        return "1, 1, 1, 1, 1"

all_df['diagnosis_multi'] = all_df['diagnosis'].apply(build_label)
all_df.head()


In [None]:
#"""
#0-1805, 1-370, 2-999, 3-193, 4-295

# 1, 370 * 2 = 740
all_df = pd.concat([all_df, all_df.loc[all_df["diagnosis"] == 1]])

# 3, 193 * 3 = 579
only_3_df = all_df.loc[all_df["diagnosis"] == 3]
all_df = pd.concat([all_df, only_3_df])
all_df = pd.concat([all_df, only_3_df])


# 4, 295 * 3 = 885
only_4_df = all_df.loc[all_df["diagnosis"] == 4]
all_df = pd.concat([all_df, only_4_df])
all_df = pd.concat([all_df, only_4_df])

all_df.reset_index(drop=True)

# Select some number per group for training
SAMPLES_PER_GROUP = 700
balanced_all_df = all_df.groupby('diagnosis').apply(lambda x: x.sample(SAMPLES_PER_GROUP) if len(x) > SAMPLES_PER_GROUP else x)
#"""

In [None]:
#"""
balanced_all_df['diagnosis'].hist()
balanced_all_df['diagnosis'].value_counts()
#"""

## Build an input pipeline using dataframes

In [None]:
# Create training and validation dataframes
train_df, valid_df = train_test_split(balanced_all_df, 
                                      test_size = 0.2,
                                      stratify = balanced_all_df['diagnosis'])


train_df = shuffle(train_df) # Shuffle

# remove any duplicate images from validation
valid_df = valid_df.sort_values('id_code')
valid_df = valid_df.drop_duplicates(subset='id_code', keep='first')

print('Training len: ', train_df.shape[0], "  Validation len: ", valid_df.shape[0])

In [None]:
# set lengths and steps
train_len = len(train_df)
val_len = len(valid_df)
images_list_len = train_len + val_len

steps_per_epoch = np.ceil(train_len // parms.BATCH_SIZE) # set step sizes based on train & batch
validation_steps = np.ceil(val_len // parms.BATCH_SIZE) # set step sizes based on val & batch

print("Total number: ", images_list_len, "  Train number: ", train_len, "  Val number: ", val_len)
print("Steps/EPOC: ", steps_per_epoch, "  Steps/Validation: ", validation_steps)

In [None]:
# final check on the numbers, because we dropped duplicate validation, numbers may not be 20%
print(train_df['diagnosis'].value_counts())
print(valid_df['diagnosis'].value_counts())

### Methods for dataset processing

In [None]:
"""
# Did not use this, but left it in notebook.  Makea the image a square by cropping from the smallest side
# I used this in the old Kaggle notebook, but found that it removed too much details
def smallest_side_center_crop(image: tf.Tensor) -> tf.Tensor:
    h = tf.shape(image)[0]
    w = tf.shape(image)[1]
    # if h == w, then zoom in a bit cause the image is square
    smallest_side = tf.math.minimum(h, w)
    offset_height = tf.cast(tf.math.floor((h - smallest_side) / 2), dtype=tf.int32)
    offset_width =  tf.cast(tf.math.floor((w - smallest_side) / 2), dtype=tf.int32)
    image = tf.image.crop_to_bounding_box(image, offset_height, offset_width, smallest_side, smallest_side)   
    return image
"""

# Will remove black rows looking at the MIDPOINT ONLY, and then crop image
def remove_black_boarder_from_midpoint(image, mask_threshold=0.1):
    h = tf.shape(image)[0]
    w = tf.shape(image)[1]
    mh = tf.cast(tf.math.floor(h/2), dtype=tf.dtypes.int32)
    mw = tf.cast(tf.math.floor(w/2), dtype=tf.dtypes.int32)
    
    # Create mask based on some threshold
    image_grey = tf.image.rgb_to_grayscale(image)    
    image_grey = tf.where(image_grey < mask_threshold, 0, 1)

    # Find first non-black, starting from the midpoint
    left = tf.math.argmax(image_grey[mh,:], axis=0, output_type=tf.dtypes.int32)
    top = tf.math.argmax(image_grey[:,mw], axis=0, output_type=tf.dtypes.int32)
    # Rotate 180 to get the other non-black starting
    image_grey = tf.image.rot90(image_grey, 2)
    right = tf.math.argmax(image_grey[mh,:], axis=0, output_type=tf.dtypes.int32)
    bottom = tf.math.argmax(image_grey[:,mw], axis=0, output_type=tf.dtypes.int32)

    right = w - right
    bottom = h - bottom

    image = tf.image.crop_to_bounding_box(image, top[0], left[0], bottom[0]-top[0], right[0]-left[0]) 
    return image


# Read, decode the image, convert to float
def read_decode_image(file_path: tf.Tensor, label_string: tf.Tensor) -> tf.Tensor:
    # load the raw data from the file as a string
    image = tf.io.read_file(file_path)
    # convert the compressed string to a 3D uint8 tensor
    image = tf.image.decode_png(image, channels=parms.IMAGE_CHANNELS)
    # Use `convert_image_dtype` to convert to floats in the [0,1] range.
    image = tf.image.convert_image_dtype(image, parms.IMAGE_DTYPE)

    # Build the label
    b = tf.strings.split(label_string, sep=",")
    label = tf.strings.to_number(b, tf.int32)

    # For both training and validation, remove any black boarders
    image = remove_black_boarder_from_midpoint(image)

    # For both training and validation, resize
    image = tf.image.resize(image, [parms.IMAGE_ROWS, parms.IMAGE_COLS])

    return image, label

# Augmentations for training dataset, done after cache
def image_aug(image: tf.Tensor) -> tf.Tensor:
    image = tf.image.rot90(image, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32)) #0-4, 0/360, 90/180/270

    #######################################################
    # random zoom => use random crop + resize which will zoom the image
    #######################################################
    if tf.random.uniform(()) > 0.4:
        w = parms.IMAGE_COLS
        h = parms.IMAGE_ROWS
        p = 0.85
        image = tf.image.resize(tf.image.random_crop(image, (int(h*p), int(w*p), 3)), (h, w))
    #######################################################


    return image


from skimage.filters import gaussian
"""
Ben Graham won the competition 5 years ago and his augmentation was widely used
in the last competition.  He applied a weighted average to all images.
https://github.com/btgraham/SparseConvNet/tree/kaggle_Diabetic_Retinopathy_competition

The main approach used in this competition was to apply CV2.add_weighted.  
This is an approximation of CV2.add_weighted.  CV2 gave me weird results as a 
graph operation, realy did not work....so I used skimage for the gaussian blur and 
native TF for weighted average.  This will bring out the features of the eye images 
to make them similar. (and runs under graph processing). Assume image is color 
and already scaled between 0 and 1.

https://docs.opencv.org/2.4/modules/core/doc/operations_on_arrays.html
Add weighted uses this formula: dst = src1*alpha + src2*beta + gamma, where src1 
is the original image and src2 is a blurred image.
"""
def image_add_weighted(image, sigmaX=50, alpha=4.0, beta=-4.0, gamma=0.5):
    image_blur = gaussian(image, sigma=sigmaX, multichannel=True)
    image = tf.add(tf.multiply(image, alpha), tf.multiply(image_blur, beta))
    image = tf.add(image, gamma)
    image = tf.clip_by_value(image, clip_value_min=0, clip_value_max=1)

    return image

# pre-cache mapped method
def process_train_pre_cache(file_path: tf.Tensor, label_string: tf.Tensor) -> tf.Tensor:
    image, label = read_decode_image(file_path, label_string)

    im_shape = image.shape
    [image,] = tf.py_function(image_add_weighted, [image], [tf.float32])  #parms must be tensors
    image.set_shape(im_shape)
    
    return image, label

# post-cache mapped method, does augmentation
def process_train_post_cache(image: tf.Tensor, label: tf.Tensor) -> tf.Tensor:
    image = image_aug(image)

    return image, label

# method mapped to load val
def process_val(file_path: tf.Tensor, label_string: tf.Tensor) -> tf.Tensor:
    image, label = read_decode_image(file_path, label_string)

    im_shape = image.shape
    [image,] = tf.py_function(image_add_weighted, [image], [tf.float32])  #parms must be tensors
    image.set_shape(im_shape)

    return image, label
  

### Training setup - build train and val datasets

In [None]:
# Create Dataset from pf
train_dataset = tf.data.Dataset.from_tensor_slices((train_df["file_path"].values,
                                                    train_df["diagnosis_multi"].values)
                                                  )

# Verify image paths were loaded
for file_path, label in train_dataset.take(2):
    print("File path: ", file_path.numpy().decode("utf-8"), "  Label: ", label.numpy())

# map training images to processing, includes any augmentation
train_dataset = train_dataset.map(process_train_pre_cache, num_parallel_calls=AUTOTUNE)

# Verify the mapping worked
for image, label in train_dataset.take(1):
    print("Image shape: {}  Max: {}  Min: {}".format(image.numpy().shape, np.max(image.numpy()), np.min(image.numpy())))
    print("Label: ", label.numpy())
    some_image = image.numpy()
    some_label = label.numpy()

# Remove cache if running under Kaggle
train_dataset = train_dataset.cache("./eye_train.tfcache") \
                             .map(process_train_post_cache, num_parallel_calls=AUTOTUNE) \
                             .batch(parms.BATCH_SIZE) \
                             .prefetch(1) \
                             .repeat()

# Show the images, execute this cell multiple times to see the images
for image, label in train_dataset.take(1):
    sample_image, sample_label = image, label
show_batch(sample_image, sample_label)

### Validation setup

In [None]:
# Create Dataset from pd
val_dataset = tf.data.Dataset.from_tensor_slices((valid_df["file_path"].values,
                                                  valid_df["diagnosis_multi"].values)
                                                 )


# Verify image paths were loaded
for file_path, label in val_dataset.take(2):
    print("File path: ", file_path.numpy().decode("utf-8"), "  Label: ", label.numpy())

    # map training images to processing, includes any augmentation
val_dataset = val_dataset.map(process_val, num_parallel_calls=AUTOTUNE)

# Verify the mapping worked
for image, label in val_dataset.take(1):
    print("Image shape: {}  Max: {}  Min: {}".format(image.numpy().shape, np.max(image.numpy()), np.min(image.numpy())))
    print("Label: ", label.numpy())
    some_image = image.numpy()
    some_label = label.numpy()

# Remove cache if running under Kaggle
val_dataset = val_dataset.cache("./eye_val.tfcache") \
                         .batch(parms.BATCH_SIZE) \
                         .prefetch(1) \
                         .repeat()


In [None]:
# Final check before model training.  I added a string of the mask non-zero counts - need to make sure the masks 
# were created ok.  (got bit by this one after a small change....)

# Test Validation or Train by changing the dataset

#for image, mask in train_dataset.take(1):
for image, label in val_dataset.take(1):
    show_batch(image, label)  # Will show all of the batch


## Build and compile model


In [None]:
# Create any call backs for training...These are the most common.

from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger

reduce_lr = ReduceLROnPlateau(monitor='val_loss', patience=4, verbose=1, min_lr=1e-6)
earlystopper = EarlyStopping(monitor='val_loss', patience=8, verbose=1)
checkpointer = ModelCheckpoint(parms.MODEL_PATH, monitor='eye_metric', verbose=1, mode="max", save_best_only=True)


In [None]:
# Create model and compile it

from tensorflow.keras.models import Sequential, load_model, Model
#from tensorflow.keras.layers import Dense, Dropout, Flatten, Input, Conv2D, MaxPooling2D, BatchNormalization, UpSampling2D, Conv2DTranspose, Concatenate, Activation
from tensorflow.keras.losses import binary_crossentropy, categorical_crossentropy
from tensorflow.keras.optimizers import Adadelta, Adam, Nadam, SGD
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout

# https://www.tensorflow.org/api_docs/python/tf/keras/applications
densenet = tf.keras.applications.DenseNet121(include_top=False, input_shape=(224,224,3))

# Simple metric to track improvements, can reduce the number of statements, left as is for readability 
# Just helps to know when to save the model, it will be higher than actual results, but I found it very helpful
# than just using normal accuracy.
def eye_metric(y_true, y_pred):
    y_true_label = tf.cast(y_true > 0.5, dtype=y_true.dtype)
    y_pred_label = tf.cast(y_pred > 0.5, dtype=y_true.dtype)
    y_true_arg = tf.math.argmax(tf.reverse(y_true_label, axis=[0]))
    y_pred_arg = tf.math.argmax(tf.reverse(y_pred_label, axis=[0]))
    return tf.cast(y_true_arg == y_pred_arg, dtype=y_true.dtype)

# Build and compile model.  I used this model before, did not adjust parms.
# You can change to try different configurations.  (DO percentages, Dense layers, etc)
def build_compile_model():
    model = Sequential()
    model.add(densenet)
    model.add(GlobalAveragePooling2D())

    model.add(Dropout(0.3))
    model.add(Dense(1024,activation='relu'))
    model.add(Dropout(0.4))

    #model.add(Dropout(0.5))
    model.add(Dense(5, activation='sigmoid'))
    
    model.compile(
        loss='binary_crossentropy',
        optimizer=Adam(lr=0.00005),  #0.00005 orig
        metrics=['accuracy', eye_metric])
    
    return model



## Train model

In [None]:
# if you stop execution during training, a lock file may need to be removed
!ls
#!rm eye_val.tfcache_0.lockfile
#!ls

In [None]:
# Reload the model from prior run
#model = load_model(parms.MODEL_PATH, custom_objects={"eye_metric": eye_metric})


In [None]:
## Train model

model = build_compile_model()

history = model.fit(train_dataset,
                    validation_data=val_dataset,
                    epochs=parms.EPOCS, 
                    steps_per_epoch=steps_per_epoch,
                    validation_steps=validation_steps,
                    callbacks=[reduce_lr, earlystopper, checkpointer] 
                    )

In [None]:
# Plot the training history
history_df = pd.DataFrame(history.history)
plt.figure()
history_df[['eye_metric', 'val_eye_metric']].plot(title="eye_metric")
plt.xlabel('Epocs')
plt.ylabel('Accuracy')

history_df[['loss', 'val_loss']].plot(title="Loss")
plt.xlabel('Epocs')
plt.ylabel('Loss')

history_df[['accuracy', 'val_accuracy']].plot(title="Accuracy")
plt.xlabel('Epocs')
plt.ylabel('Accuracy')
plt.show()

## Validate model's predictions
- Create actual_lables and predict_labels
- Calculate Confusion Matrix & Accuracy
- Display results


In [None]:
# Reload the model from prior run
model = load_model(parms.MODEL_PATH, custom_objects={"eye_metric": eye_metric})


In [None]:
# Easy to modify to predict the given test files that do not have a mask

# Method to be applied to all testing images
def process_test_image_id(image_id, file_path: tf.Tensor, label_string: tf.Tensor) -> tf.Tensor:
    image, label = read_decode_image(file_path, label_string)

    im_shape = image.shape
    [image,] = tf.py_function(image_add_weighted, [image], [tf.float32])  #parms must be tensors
    image.set_shape(im_shape)
 
    return image_id, image, label   

# Create Dataset from pd, could use validation or training
test_df = shuffle(valid_df)
test_dataset = tf.data.Dataset.from_tensor_slices((test_df["id_code"].values,
                                                   test_df["file_path"].values,
                                                   test_df["diagnosis_multi"].values)
                                                 )
# Verify image paths were loaded
for image_id, image, label in test_dataset.take(2):
    print(image_id.numpy().decode("utf-8"), label.numpy())

# map training images to processing, includes any augmentation
test_dataset = test_dataset.map(process_test_image_id, num_parallel_calls=AUTOTUNE)

# Verify the mapping worked
for image_id, image, label in test_dataset.take(1):
    print("Image Id: ", image_id.numpy().decode("utf-8"))
    print("Image shape: {}  Max: {}  Min: {}".format(image.numpy().shape, np.max(image.numpy()), np.min(image.numpy())))
    print("Label: ", label.numpy())
    some_image = image.numpy()
    
test_dataset = test_dataset.batch(1).repeat()

In [None]:
def predictions_using_dataset_eye(model_actual,
                              dataset,
                              steps,
                              batch_size,
                              create_bad_results_list=True):
    """
      Uses generator to predict results.  Builds actual_labels, predict_labels
      and predict_probabilities

      Args:
        model_actual : trained model to use for predictions
        dataset : dataset
        steps : number of batches to process
        batch_size : size of batch
        create_bad_results_list : bool default True.  Lets you trun on/off
            the creation of the bad results lists.

      Returns:
        actual_labels : list of actual labels
        predict_labels : list of predicted labels
        predict_probabilities : list of predicted probability array
        bad_results : list of bad results [actual_labels, predict_labels,
                      predict_probabilities, image]
    """

    bad_cnt = 0.0
    good_cnt = 0.0
    total_cnt = 0
    actual_labels = []
    predict_labels = []
    predict_probabilities = []
    bad_results = []

    for image_id_batch, image_batch, label_batch in tqdm(dataset.take(steps)):
        for j in range(batch_size):
            image_id = image_id_batch[j]
            image = image_batch[j]
            label = label_batch[j]

            total_cnt += 1
 
            actual_label = label.numpy().astype(int).sum(axis=0) - 1
            if actual_label < 0:
                actual_label = 0
 
            image = np.expand_dims(image, axis=0)

            predict_probabilities_tmp = model_actual.predict(image)[0]
            predict_probabilities_actual = predict_probabilities_tmp > 0.5

            if np.count_nonzero(predict_probabilities_actual.astype(int)) == 0:  # if nothing scored, default to a normal eye...what else can you do :)
                predict_label = 0
            else:
                predict_label = (parms.NUM_CLASSES - np.argmax(np.flip(predict_probabilities_actual))) - 1

            #print(image_id.numpy(), label.numpy(), actual_label, predict_label, predict_probabilities_tmp)

            actual_labels.append(actual_label)
            predict_labels.append(predict_label)
            predict_probabilities.append(predict_probabilities_tmp)

            correct_flag = actual_label == predict_label
            if correct_flag:
                good_cnt = good_cnt + 1
            else:
                bad_cnt = bad_cnt + 1
                if create_bad_results_list:
                    bad_results.append([image_id,
                                        [actual_label],
                                        [predict_label],
                                        predict_probabilities_tmp,
                                        image])
    print(" ")
    print("total: ", total_cnt, "  Good: ", good_cnt, "  Bad: ",
          bad_cnt, "  percent good: ", str(good_cnt/total_cnt))

    return actual_labels, predict_labels, predict_probabilities, \
        bad_results

In [None]:
steps = val_len
steps = 20
actual_labels, predict_labels, predict_probabilities, bad_results = predictions_using_dataset_eye(model,
                              test_dataset,
                              steps,
                              1,
                              create_bad_results_list=False)

In [None]:
show_confusion_matrix(actual_labels, predict_labels, parms.CLASS_NAMES, show_graph=True)

In [None]:
# Graph the results
display_prediction_results(actual_labels, predict_labels, predict_probabilities, parms.NUM_CLASSES, parms.CLASS_NAMES)


In [None]:
#Create a df from the bad results list, can save as csv or use for further analysis
bad_results_df = pd.DataFrame(bad_results, columns =['image_id', 'actual', 'predict', 'prob', 'image'])
bad_results_df.head()

In [None]:
# Last check, spot check some entries as needed....
#all_df.loc[all_df['id_code'] == "e07045d7c5f7"]