In [None]:
import tensorflow as tf

import math
import numpy as np
import matplotlib.pyplot as plt
import logging
import seaborn as sns

import tensorflow_datasets as tfds
tfds.disable_progress_bar()

logger = tf.get_logger()
logger.setLevel(logging.ERROR)




#INITIALIZING VARIABLES

ds, info = tfds.load('malaria', split = 'train', shuffle_files = True, with_info = True)

train_ds, test_ds = tfds.load(
  'malaria',
  split = ['train[:70%]', 'train[70%:]'],
  shuffle_files = True, as_supervised = True,
)

NUM_TRAIN_IMAGES = tf.data.experimental.cardinality(train_ds).numpy()
print(NUM_TRAIN_IMAGES)

NUM_TEST_IMAGES = tf.data.experimental.cardinality(test_ds).numpy()
print(NUM_TEST_IMAGES)

vis = tfds.visualization.show_examples(ds, info)




#PROCESSING DATA


for image, label in train_ds.take(1):
    print("Image size: ", image.numpy().shape)
    print("Label: ", label.numpy())

#To clean the data, we will resize the images to be 200 x 200 pixels
# and invert the labels to have 0 represent uninfected cells and 1 represent parasitized ones


BATCH_SIZE = 32
IMAGE_SIZE = [200, 200]

def convert(image, label):
  image = tf.image.convert_image_dtype(image, tf.float32)
  return image, label

# resizing each image to 200 x 200
def pad(image,label):
  image,label = convert(image, label)
  image = tf.image.resize_with_crop_or_pad(image, 200, 200)
  return image, label

# switching the 0 and 1 around, as mentioned above
def invert_labels(image, label):
 return image, label

clean_train_ds = (
    train_ds
    .map(pad)
    .map(invert_labels)
)

clean_test_ds = (
    test_ds
    .map(pad)
    .map(invert_labels)
)




#THE CLEAN DATA

#Visualizing the data
image_batch, label_batch = next(iter(clean_train_ds.batch(BATCH_SIZE)))

def show_batch(image_batch, label_batch):
    plt.figure(figsize = (10, 10))
    for n in range(25):
        ax = plt.subplot(5, 5, n+1)
        plt.imshow(image_batch[n])
        if label_batch[n]:
            plt.title("parasitized (1) ")
        else:
            plt.title("uninfected (0) ")
        plt.axis("off")
show_batch(image_batch.numpy(), label_batch.numpy())

#Sending the data in batches to the model for training and testing
clean_train_ds = clean_train_ds.repeat().shuffle(NUM_TRAIN_IMAGES).batch(BATCH_SIZE)
clean_test_ds = clean_test_ds.batch(BATCH_SIZE)




#THE MODEL

model = tf.keras.Sequential([

    # CNN: this is the convolutional part of the neural network, how the computer sees the cell

    tf.keras.layers.Conv2D(32, (3, 3), padding = 'same', activation = tf.nn.relu, input_shape = (200, 200, 3)),
    tf.keras.layers.MaxPooling2D((2, 2), strides = 2),

    tf.keras.layers.Conv2D(32, (3, 3), padding = 'same', activation = tf.nn.relu),
    tf.keras.layers.MaxPooling2D((2, 2), strides = 2),

    # Dense and output layers:
  tf.keras.layers.Flatten(),
  tf.keras.layers.Dense(300, activation = tf.nn.relu),
  tf.keras.layers.Dense(300, activation = tf.nn.relu),
  tf.keras.layers.Dense(200, activation = tf.nn.relu),
  tf.keras.layers.Dense(200, activation = tf.nn.relu),
  tf.keras.layers.Dense(100, activation = tf.nn.relu),
  tf.keras.layers.Dense(1, activation = tf.nn.sigmoid)

])

model.summary() # this is going to print a quick little summary of our model




#COMPILING THE MODEL

model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss = 'binary_crossentropy',
              metrics = [tf.keras.metrics.TruePositives(),
                         tf.keras.metrics.TrueNegatives(),
                         tf.keras.metrics.FalsePositives(),
                         tf.keras.metrics.FalseNegatives()])




#TRAINING

NUMBER_OF_EPOCHS = 5

model.fit(clean_train_ds, epochs = NUMBER_OF_EPOCHS, steps_per_epoch = math.ceil(NUM_TRAIN_IMAGES / BATCH_SIZE))

#Results from training
print("""Epoch 1/5
603/603 [==============================] - 94s 99ms/step - loss: 0.4499 - true_positives: 7784.0000 - true_negatives: 7205.0000 - false_positives: 2407.0000 - false_negatives: 1900.0000
Epoch 2/5
603/603 [==============================] - 62s 103ms/step - loss: 0.2367 - true_positives: 9111.0000 - true_negatives: 8731.0000 - false_positives: 912.0000 - false_negatives: 542.0000
Epoch 3/5
603/603 [==============================] - 61s 102ms/step - loss: 0.1923 - true_positives: 9105.0000 - true_negatives: 9032.0000 - false_positives: 585.0000 - false_negatives: 574.0000
Epoch 4/5
603/603 [==============================] - 61s 100ms/step - loss: 0.1716 - true_positives: 9220.0000 - true_negatives: 9075.0000 - false_positives: 604.0000 - false_negatives: 397.0000
Epoch 5/5
603/603 [==============================] - 62s 102ms/step - loss: 0.1569 - true_positives: 9167.0000 - true_negatives: 9132.0000 - false_positives: 596.0000 - false_negatives: 401.0000
<keras.callbacks.History at 0x7f0ac2a31050>""")




#PERFORMANCE ON TEST DATA

#Beginning testing
test_loss, test_tp, test_tn, test_fp, test_fn = model.evaluate(clean_test_ds, steps = math.ceil(NUM_TEST_IMAGES/BATCH_SIZE))

#Creating a confusion matrix using Seaborn
def draw_confusion_matrix(tp, tn, fp, fn):
  cf_matrix = np.array([[tp, fp], [fn, tn]])
  group_names = ['True Pos','False Pos','False Neg','True Neg']
  group_counts = ["{0:0.0f}".format(value) for value in cf_matrix.flatten()]
  group_percentages = ["{0:.2%}".format(value) for value in cf_matrix.flatten()/np.sum(cf_matrix)]
  labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in zip(group_names,group_counts,group_percentages)]
  labels = np.asarray(labels).reshape(2,2)
  sns.heatmap(cf_matrix, annot = labels, fmt = '', cmap = 'Blues', xticklabels = False, yticklabels = False)

  draw_confusion_matrix(test_tp, test_tn, test_fp, test_fn)

#Calculating Accuracy
accuracy = (test_tp + test_tn) / (test_tp + test_tn + test_fp + test_fn)
print("The accuracy of this model is %.7f, or about %d%%." % (accuracy, round(accuracy*100)))
