In [5]:
import tensorflow as tf
import numpy as np

In [6]:
def preprocessDataset(dataset):
    """
    Preprocesses an entire set of datapoints so that they are usable by a neural network

    :param dataset: The dataset that is to be preprocessed in the form (images, labels)
    :return: The preprocessed dataset in the form (processedImages, processedLabels)
    """

    # Unpacks the dataset into the images (input data) and labels (expected output data)
    images, labels = dataset

    # Preprocesses the input data
    newImages = images.reshape(images.shape[0], images.shape[1], images.shape[2], 3)
    # Condenses the values of the images so that they fall within 0 and 1
    newImages = newImages / 255.0

    # Preprocesses the expected output data
    newLabels = labels.flatten()

    # Converts the labels from a 1D array of numbers ranging from 1 - 10 to a 2D array of 0s
    # in which each number in the 1D array has a respective array within the 2D array, and
    # the value of said number is the index in the other array that is a 1
    #
    # For example:
    # 1D array: [ 1, 2, 2, 4, 3 ]
    # One Hotted 2D Array:
    # [ [ 1, 0, 0, 0 ],   // 1
    #   [ 0, 1, 0, 0 ],   // 2
    #   [ 0, 1, 0, 0 ],   // 2
    #   [ 0, 0, 0, 1 ],   // 4
    #   [ 0, 0, 1, 0 ] ]  // 3
    newLabels = tf.one_hot(newLabels.asType(np.int32), depth=10)

    return newImages, newLabels


def getRawDatasets():
    """
    :return: The unprocessed datasets that the neural network is to be trained on and tested against
    """

    # Using the CIFAR-10 dataset
    return tf.keras.datasets.cifar10.load_data()