In [1]:
import tensorflow as tf
import numpy as np
from datetime import datetime

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


In [2]:
def preprocessDataset(dataset):
    """
    Preprocesses an entire set of datapoints so that they are usable by a neural network

    :param dataset: The dataset that is to be preprocessed in the form (images, labels)
    :return: The preprocessed dataset in the form (processedImages, processedLabels)
    """

    # Unpacks the dataset into the images (input data) and labels (expected output data)
    images, labels = dataset

    # Preprocesses the input data
    newImages = images.reshape(images.shape[0], images.shape[1], images.shape[2], 3)
    # Condenses the values of the images so that they fall within 0 and 1
    newImages = newImages / 255.0

    # Preprocesses the expected output data
    newLabels = labels.flatten()

    # Converts the labels from a 1D array of numbers ranging from 1 - 10 to a 2D array of 0s
    # in which each number in the 1D array has a respective array within the 2D array, and
    # the value of said number is the index in the other array that is a 1
    #
    # For example:
    # 1D array: [ 1, 2, 2, 4, 3 ]
    # One Hotted 2D Array:
    # [ [ 1, 0, 0, 0 ],   // 1
    #   [ 0, 1, 0, 0 ],   // 2
    #   [ 0, 1, 0, 0 ],   // 2
    #   [ 0, 0, 0, 1 ],   // 4
    #   [ 0, 0, 1, 0 ] ]  // 3
    newLabels = tf.one_hot(newLabels.astype(np.int32), depth=10)

    return newImages, newLabels


def getRawDatasets():
    """
    :return: The unprocessed datasets that the neural network is to be trained on and tested against
    """

    # Using the CIFAR-10 dataset
    return tf.keras.datasets.cifar10.load_data()

In [3]:
def getTrainingConfigurations():
    """
    Epochs: The amount of times the neural network trains against the entire training dataset
    Batch Size: The amount of samples ran through before the gradient update is applied
    Callbacks: The set of callbacks that will be applied during the training (e.g. saving)
    Optimizer: The method that will be used in training the neural network
    Loss Function: The loss function the neural network will use to judge its performance
    Metrics: The metrics that will allow the user to understand the performance of the neural network

    :param saveDir: The directory where the neural network is saved, indicated only if loading a neural network
    :return: A tuple of the configurations described above, in order
    """

    epochs = 300
    batchSize = 32

    # Gets the datetime now as to create unique folders
    timeNow = datetime.now().strftime("%Y%m%d-%H%M%S")
    # The folder the fitness logs will be saved to
    logDir = "logs/fit/" + timeNow
    # The directory the neural network will be saved to
    saveDir = "save files/" + timeNow

    callbacks = [tf.keras.callbacks.TensorBoard(log_dir=logDir, histogram_freq=1),
                 tf.keras.callbacks.ModelCheckpoint(filepath=saveDir, verbose=1)]

    optimizer = tf.keras.optimizers.SGD(lr=0.001, momentum=0.9)

    loss = "categorical_crossentropy"

    metrics = ["accuracy"]

    return epochs, batchSize, callbacks, optimizer, loss, metrics

In [4]:
# Main function
if __name__ == "__main__":
    # Training configurations
    epochs, batchSize, callbacks, optimizer, loss, metrics = getTrainingConfigurations()

    # Raw datasets
    rawTrainingDataset, rawTestingDataset = getRawDatasets()

    # Preprocessed datasets
    trainingImages, trainingLabels = preprocessDataset(rawTrainingDataset)
    testingImages, testingLabels = preprocessDataset(rawTestingDataset)

