## 1. Load packages

In [1]:
import os
import tensorflow as tf
## Generate batches of tensor image data with real-time data augmentation.
## https://www.tensorflow.org/api_docs/python/tf/keras/preprocessing/image/ImageDataGenerator
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
## The paths sub-module of imutils includes a function to recursively find images 
## based on a root directory.
from imutils import paths
import numpy as np
## Parser for command-line options, arguments and sub-commands
import argparse
## OpenCV
import cv2


if tf.test.gpu_device_name():
    print('GPU found')
else:
    print("No GPU found")

physical_devices = tf.config.list_physical_devices('GPU')
try:
  tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
  # Invalid device or cannot modify virtual devices once initialized.
  pass

GPU found


## 2. Define load_data function 
To load all images under a given directory and assign them with the same label

In [2]:
## Function to load all images in a specific directory
def load_data(path, label):
    print("[INFO] loading images...")
    data = []
    labels = []
    # grab the image paths and randomly shuffle them
    imagePaths = sorted(list(paths.list_images(path)))
    # loop over the input images
    for imagePath in imagePaths:
        # load the image, pre-process it, and store it in the data list
        image = cv2.imread(imagePath)
        image = cv2.resize(image, (224, 224))
        image = tf.keras.preprocessing.image.img_to_array(image)
        data.append(image)

        # extract the class label from the image path and update the
        # labels list   
        labels.append(label)
    
    # scale the raw pixel intensities to the range [0, 1]
    data = np.array(data, dtype="float")
    labels = np.array(labels)                        
    return data,labels

## 3. Specify image locations and assign labels
Here we use imagenette as an example.
Refer to https://github.com/fastai/imagenette for download links

In [3]:
classes= {'tench': 0, 'English springer': 1, 'cassette player': 2, 'chain saw': 3, 'church': 4,
         'French horn': 5, 'garbage truck': 6, 'gas pump': 7, 'golf ball': 8, 'parachute': 9}
print("Load tench images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\train\\n01440764"
data1, labels1 = load_data(path, 0)

print("Load English springer images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\train\\n02102040"
data2, labels2 = load_data(path, 1)

print("Load cassette player images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\train\\n02979186"
data3, labels3 = load_data(path, 2)

print("Load chain saw images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\train\\n03000684"
data4, labels4 = load_data(path, 3)

print("Load church images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\train\\n03028079"
data5, labels5 = load_data(path, 4)

print("Load French horn images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\train\\n03394916"
data6, labels6 = load_data(path, 5)

print("Load garbage truck images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\train\\n03417042"
data7, labels7 = load_data(path, 6)

print("Load gas pump images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\train\\n03425413"
data8, labels8 = load_data(path, 7)

print("Load golf ball images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\train\\n03445777"
data9, labels9 = load_data(path, 8)

print("Load parachute images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\train\\n03888257"
data10, labels10 = load_data(path, 9)

Load tench images
[INFO] loading images...
Load English springer images
[INFO] loading images...
Load cassette player images
[INFO] loading images...
Load chain saw images
[INFO] loading images...
Load church images
[INFO] loading images...
Load French horn images
[INFO] loading images...
Load garbage truck images
[INFO] loading images...
Load gas pump images
[INFO] loading images...
Load golf ball images
[INFO] loading images...
Load parachute images
[INFO] loading images...


Consolidate all training batches into a single training set

In [4]:
x_train_full = np.concatenate((data1, data2, data3, data4, data5, data6, data7, data8, data9, data10), axis = 0)
y_train_full = np.concatenate((labels1, labels2, labels3, labels4, labels5, labels6, labels7, labels8, labels9, labels10), axis = 0)
print(x_train_full.shape)
print(y_train_full.shape)

(9469, 224, 224, 3)
(9469,)


Shuffle the training examples randomly

In [5]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

x_train, y_train = unison_shuffled_copies(x_train_full, y_train_full)
print(y_train)
# Split the training set into a training set and an validation set with a split ratio of 9 : 1.
(x_train, x_valid, y_train, y_valid) = train_test_split(x_train, y_train, test_size=0.1, stratify=y_train, random_state=42)

[2 0 7 ... 0 6 4]


In [6]:
print("Load tench images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\val\\n01440764"
data1, labels1 = load_data(path, 0)

print("Load English springer images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\val\\n02102040"
data2, labels2 = load_data(path, 1)

print("Load cassette player images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\val\\n02979186"
data3, labels3 = load_data(path, 2)

print("Load chain saw images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\val\\n03000684"
data4, labels4 = load_data(path, 3)

print("Load church images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\val\\n03028079"
data5, labels5 = load_data(path, 4)

print("Load French horn images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\val\\n03394916"
data6, labels6 = load_data(path, 5)

print("Load garbage truck images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\val\\n03417042"
data7, labels7 = load_data(path, 6)

print("Load gas pump images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\val\\n03425413"
data8, labels8 = load_data(path, 7)

print("Load golf ball images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\val\\n03445777"
data9, labels9 = load_data(path, 8)

print("Load parachute images")
path = "C:\\Users\\hli36\\OneDrive\\Learning\\Resources\\imagenette2\\val\\n03888257"
data10, labels10 = load_data(path, 9)

Load tench images
[INFO] loading images...
Load English springer images
[INFO] loading images...
Load cassette player images
[INFO] loading images...
Load chain saw images
[INFO] loading images...
Load church images
[INFO] loading images...
Load French horn images
[INFO] loading images...
Load garbage truck images
[INFO] loading images...
Load gas pump images
[INFO] loading images...
Load golf ball images
[INFO] loading images...
Load parachute images
[INFO] loading images...


Consolidate all test batches into a single test set

In [7]:
x_test = np.concatenate((data1, data2, data3, data4, data5, data6, data7, data8, data9, data10), axis = 0)
y_test = np.concatenate((labels1, labels2, labels3, labels4, labels5, labels6, labels7, labels8, labels9, labels10), axis = 0)
x_test, y_test = unison_shuffled_copies(x_test, y_test)
print('The number of training examples is:', x_train.shape[0])
print('The number of validation examples is:', x_valid.shape[0])
print('The number of test examples is:', x_test.shape[0])

The number of training examples is: 8522
The number of validation examples is: 947
The number of test examples is: 3925


In [None]:
#imagenette2 = {"x_train": x_train,
#              "y_train": y_train,
#              "x_valid": x_valid,
#              "y_valid": y_valid,
#              "x_test": x_test,
#              "y_test": y_test,
#              "classes": classes}
##np.save('imagenette2', imagenette2)
np.savez_compressed('x_train', x_train)
np.savez_compressed('y_train', y_train)
np.savez_compressed('x_valid', x_valid)
np.savez_compressed('y_valid', y_valid)
np.savez_compressed('x_test', x_test)
np.savez_compressed('y_test', y_test)