## Installs for new environments

In [1]:
# import sys
# !{sys.executable} -m pip install tensorflow
# !{sys.executable} -m pip install matplotlib
# !{sys.executable} -m pip install numpy
# !{sys.executable} -m pip install pillow
# !{sys.executable} -m pip install scipy

In [2]:
%load_ext tensorboard

## Imports

In [3]:
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
from tensorflow.keras.preprocessing import image
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
import math

import datetime


## Config

In [4]:
image_dim = 256 # eventual width/height of images
input_shape = (image_dim, image_dim, 3) #(width, height, RGB)

epochs = 5
batch_size = 32
learning_rate = .001

train_dir = 'dataset/train'
validate_dir = 'dataset/validate'

base_log_dir = '/logs'

## Copied Funcs

In [5]:
# Just for the initial validation step. Feel free to ignore.

def get_class_labels(dir):
    """
    Gets the name of each sub-directory in the given directory.
    
    dir: Directory to search.
    return: An array of the names of the sub-directories in dir.
    """
    
    # Get all sub-directories in this directory
    classes = os.listdir(dir)
    
    return classes
    
def get_class_images(classes, dir):
    """
    Gets the paths of all images in each directory.
    
    classes: Name of each class.
    dir: Directory to search.
    return: A 2d array of paths organized by class name.
    """
    
    # Create an array to hold the image paths of each class
    class_paths = []

    # Create image paths of each class
    for label in classes:
        
        # Create an array to hold the image paths of this class (label)
        image_paths = np.array([])

        # Create the path of this class
        class_path = os.path.join(dir, label)

        # Get all images in this directory
        images = os.listdir(class_path)

        # Create the path of each images in this class
        for image in images:
            
            # Create the path of this image
            image_path = os.path.join(class_path, image)

            # Add the image path to the image paths array
            image_paths = np.append(image_paths, image_path)

        # Add the image paths to the class paths array
        class_paths.append(image_paths)
        
    return class_paths


In [6]:
def predict(batch_size, image_paths, model):
    """
    Makes predictions with the model
        
    batch_size: number of predictions to make
    image_paths: paths to images
    model: image classifier model
    return: resulting predictions
    """
    
    images_arr = []
    
    # load images     
    for image_path in image_paths:
        # load the image
        image_pil = load_img(image_path, interpolation='nearest', target_size=(image_dim, image_dim, 3))

        # turn it into an array
        image_arr = img_to_array(image_pil)

        # add the image_arr to the images_arr array
        images_arr.append(image_arr)
 
    # turn it into a numpy arrays so that it can be feed into the model as a batch
    images = np.array(images_arr)
    
    # make a predictions on the batch
    predictions = model.predict(images, batch_size=batch_size)

    return predictions


In [7]:
def predictions_accuracy(class_keys, label, predictions):
    """
    Determine the accuracy of a set of a image predictions
    
    class_keys: list of class keys
    label: true class of the predictions
    predictions: array of image predictions
    return: average correct image predictions
    """
    
    # number of correct predictions
    correct_predictions = 0
    
    # number of predictions made
    n_predictions = len(predictions)
    
    # check how many predictions were correct
    for prediction in predictions:
        # determine the most likely class from the prediction
        most_likely_class = np.argmax(prediction)
        
        # get the label of the prediction
        prediction_label = class_keys[most_likely_class]
        
        # check if it matches the label
        # if so, increment the counter
        if prediction_label == label:
            
            correct_predictions += 1
            
    # calculate the average correct of the predictions
    average = correct_predictions / n_predictions
    
    return average


## Reinforcement Definition

In [8]:
train = ImageDataGenerator(rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    rotation_range=20,
    horizontal_flip=True) # Reinforcements for training dataset

validate = ImageDataGenerator(rescale=1./255) # Reinforcements for validation dataset

## Dataset Loading

In [13]:
#loads the datasets
train_dataset = train.flow_from_directory(train_dir, target_size=(image_dim, image_dim), class_mode='categorical') 
validate_dataset = train.flow_from_directory(validate_dir, target_size=(image_dim, image_dim), class_mode='categorical')

# tf.keras.preprocessing.image_dataset_from_directory - this can be used for no reinforcement

n_train = train_dataset.samples # number of images in training dataset
n_validate = validate_dataset.samples # number of images in validation dataset


Found 8000 images belonging to 10 classes.
Found 2000 images belonging to 10 classes.


## CNN Design

In [14]:
class_keys = list(train_dataset.class_indices.keys())
n_classes = len(class_keys)

# defines the layers of the cnn
model = tf.keras.models.Sequential([
    Conv2D(16, (3, 3), activation='relu', padding='same', input_shape=input_shape),
    Conv2D(16, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2,2), strides=None, padding='valid'),
    Conv2D(32, (3, 3), activation='relu', padding='same'),
    Conv2D(32, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2,2), strides=None, padding='valid'),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2,2), strides=None, padding='valid'),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D(pool_size=(2,2), strides=None, padding='valid'),
    Flatten(),
    Dense(256, activation='relu'),
    Dense(n_classes, activation='softmax')
])

## Initial Accuracy Measurement

In [15]:
# Finds how accurate the model is w/o training

# Get the name of each directory in the root directory and store them as an array.
classes = get_class_labels(validate_dir)

# Get the paths of all the images in the first class directory and store them as a 2d array.
image_paths = get_class_images(classes, validate_dir)

for i, single_class in enumerate(class_keys):
    # label of the class we are making predictions on
    single_class_image_paths = image_paths[i]

    single_class_predictions = predict(int(n_validate / n_classes), single_class_image_paths, model)

    # get the accuracy of predictions on the first class
    single_class_accuracy = predictions_accuracy(class_keys, single_class, single_class_predictions)

    print("Current accuracy of model for class " + single_class + ": " + str(single_class_accuracy))

Current accuracy of model for class butterfly: 0.15
Current accuracy of model for class cat: 0.0
Current accuracy of model for class chicken: 0.0
Current accuracy of model for class cow: 0.0
Current accuracy of model for class dog: 0.0
Current accuracy of model for class elephant: 0.0
Current accuracy of model for class horse: 0.11
Current accuracy of model for class sheep: 0.0
Current accuracy of model for class spider: 0.635
Current accuracy of model for class squirrel: 0.0


## Compilation

In [16]:
log_dir = base_log_dir + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

model.compile(optimizer=tf.keras.optimizers.SGD(lr=learning_rate, momentum=0.9),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_dataset,
                    steps_per_epoch=math.floor(n_train/batch_size),
                    validation_data=validate_dataset,
                    validation_steps=n_validate,
                    epochs=epochs,
                    callbacks=[tensorboard])

  model.fit_generator(train_dataset,


Epoch 1/5
 22/250 [=>............................] - ETA: 3:34:51 - loss: 2.3021 - accuracy: 0.1108