In [None]:
# use python 3.6.6 or version problems may occur

# commonly img recognition systems use imgs between 128-512px wide
# larger than that gets it slow

# Inference Phase (A.K.A Prediction)

# solution for translation invariance is convolution layer

# Max Pooling: A pooling operation that selects the maximum element from the region of the feature map covered by the filter
# The objective is to DOWN-SAMPLE an input representation (image, hidden-layer output matrix, etc.), 
# reducing its dimensionality and allowing for assumptions to be made about features contained in the sub-regions binned.

# dropout prevents NN's habit to just memorizing input data

# 1. Convolutional layers adds translational invariance
# 2. Max pooling layers downsample the data
# 3. Dropout forces the NN to learn in a more robust way
# 4. Dense layers maps the output of previous layers to the output layer.

# 1-3 makes a convolutional block

# Latest Design uses branching pathways, shortcuts between groups of layers & others.

# Datasets:   CIFAR-10 [(32*32px, 2colorChannels), (10 classes), (60,000 imgs)]




In [None]:
# Main Code
# Explore dataset

from keras.datasets import cifar10
import matplotlib.pyplot as plt

# List of names for each CIFAR10 class
cifar10_class_names = {
    0: "Plane",
    1: "Car",
    2: "Bird",
    3: "Cat",
    4: "Deer",
    5: "Dog",
    6: "Frog",
    7: "Horse",
    8: "Boat",
    9: "Truck"
}

# Load the entire data set
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Loop through each picture in the data set
for i in range(5):
    # Grab an image from the data set
    sample_image = x_train[i]
    # Grab the image's expected class id
    image_class_number = y_train[i][0]
    # Look up the class name from the class id
    image_class_name = cifar10_class_names[image_class_number]

    # Draw the image as a plot
    plt.imshow(sample_image)
    # Label the image
    plt.title(image_class_name)
    # Show the plot on the screen
    plt.show()

In [None]:
import keras
from tensorflow.keras.utils import to_categorical # () removed from new version
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from pathlib import Path

# Load data set
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize data set to 0-to-1 range
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# Convert class vectors to binary class matrices
# Our labels are single values from 0 to 9.
# Instead, we want each label to be an array with on element set to 1 and and the rest set to 0.
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [None]:
# when doing classification with more than 1 type of obj,
# the output layer will almost always use a softmax activation func
# It makes sure all class values add up to 100% or 1.

# print model summary
# model = model
# model.summary()

# for sound wave we can use 1d convolutional layer but for img we use 2d

# each filter detects 1 patterns, 
# we use padding if the img has left over px after spliting it with (3,3)
# the name same has a historical reason. better to memorize it.
# conv2d(numOfDifferentFilters, sizeOfWindowforImgTiles(e.g. (3,3)px), padding="same")

# to make our NN more powerful we can add more Conv Layer.
# for 2nd conv layer we wont have the img so we dont need to use padding.

# we use a flatten layer for transition between conv & dense layer

# MaxPooling2d(sizeOfAreaWeWannaPoolTogether(e.g. (22,2)squares))

# dropOut(percentageofConnectionsToRandomlyCut) # Usally 25-50% works well

'2.5.0'

In [None]:
# in keras compiling means we actually want to create the NN in memory.

# the loss func defines how to measure how write/wrong the guesses are.
# for loss func we can use: categorical_crossentropy, binary_crossentropy

# a good optimization algo to start with is adam


'2.5.0'

In the fit() method we need to pass in the batch size - how many img we want to feed in the NN at once during training.

Setting low num will take long to finish but if high computer will run out of memory. Typical batch size: 32 to 128 imgs.

1 full pass through the entire dataset is called an epoch.
Eventualy u'll hit a point where additional training doesn't help anymore.

The large dataset is, less training passes u'll do on it. (e.g. 1 million img, u might do only 5 passes)

It's important to randomize training data order. It's usually default but set explicitly if future version changes it.

Save training results to reuse

In [None]:
Path("/content/dummy.txt").write_text("Hello there!")

#hdf5 binary format is designed for saving & loading large binary files efficiently.

# when training, in console the lower the loss number is the better our NN is performing.


12

In [None]:
# Warning! - Make Adjustments
# Main Code


import keras
from tensorflow.keras.utils import to_categorical # () removed from new version
from keras.datasets import cifar10
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from pathlib import Path

# Load data set
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

# Normalize data set to 0-to-1 range
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255

# Convert class vectors to binary class matrices
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Create a model and add layers
model = Sequential()

model.add(Conv2D(32, (3, 3), padding='same', input_shape=(32, 32, 3), activation="relu"))
model.add(Conv2D(32, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3, 3), padding='same', activation="relu"))
model.add(Conv2D(64, (3, 3), activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(10, activation="softmax"))

# Compile the model
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

# Train the model
model.fit(
    x_train,
    y_train,
    batch_size=64,
    epochs=3, # was 30 epochs
    validation_data=(x_test, y_test),
    shuffle=True
)

# Save neural network structure
model_structure = model.to_json()
f = Path("/content/model_structure.json")
f.write_text(model_structure)

# Save neural network's trained weights
model.save_weights("/content/model_weights.h5")

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [None]:
# Main Code
# Make Predictions

from keras.models import model_from_json
from pathlib import Path
from keras.preprocessing import image
import numpy as np

# These are the CIFAR10 class labels from the training data (in order from 0 to 9)
class_labels = [
    "Plane",
    "Car",
    "Bird",
    "Cat",
    "Deer",
    "Dog",
    "Frog",
    "Horse",
    "Boat",
    "Truck"
]

# Load the json file that contains the model's structure
f = Path("/content/model_structure.json")
model_structure = f.read_text()

# Recreate the Keras model object from the json data
model = model_from_json(model_structure)

# Re-load the model's trained weights
model.load_weights("/content/model_weights.h5")

# Load an image file to test, resizing it to 32x32 pixels (as required by this model)
img = image.load_img("/content/frog.png", target_size=(32, 32))

# Convert the image to a numpy array
image_to_test = image.img_to_array(img)

# Add a fourth dimension to the image (since Keras expects a list of images, not a single image)
list_of_images = np.expand_dims(image_to_test, axis=0)

# Make a prediction using the model
results = model.predict(list_of_images)

# Since we are only testing one image, we only need to check the first result
single_result = results[0]

# We will get a likelihood score for all 10 possible classes. Find out which class had the highest score.
most_likely_class_index = int(np.argmax(single_result))
class_likelihood = single_result[most_likely_class_index]

# Get the name of the most likely class
class_label = class_labels[most_likely_class_index]

# Print the result
print("This is image is a {} - Likelihood: {:2f}".format(class_label, class_likelihood))

This is image is a Plane - Likelihood: 0.999982


We can use pretrained NN in keras to perform img recognition of any of the 1000 types of obj they're already trained on.


Transfer Learning: Adapt existing model to recognize new types of objects instead of starting from scratch.

In [None]:
# Main Code
# Using a pretrained model
# Warning!
# Running this for 1st time keras will download latest model of vgg16
# Around 100mb of data will be downloaded.

import numpy as np
from keras.preprocessing import image
from keras.applications import vgg16

# Load Keras' VGG16 model that was pre-trained against the ImageNet database
model = vgg16.VGG16()

# Load the image file, resizing it to 224x224 pixels (required by this model)
img = image.load_img("/content/bay.jpg", target_size=(224, 224))

# Convert the image to a numpy array
x = image.img_to_array(img)

# Add a fourth dimension (since Keras expects a list of images)
x = np.expand_dims(x, axis=0)

# Normalize the input image's pixel values to the range used when training the neural network
x = vgg16.preprocess_input(x)

# Run the image through the deep neural network to make a prediction
predictions = model.predict(x)

# Look up the names of the predicted classes. Index zero is the results for the first image.
predicted_classes = vgg16.decode_predictions(predictions, top=9)

print("Top predictions for this image:")

for imagenet_id, name, likelihood in predicted_classes[0]:
    print("Prediction: {} - {:2f}".format(name, likelihood))

Top predictions for this image:
Prediction: seashore - 0.395213
Prediction: promontory - 0.326128
Prediction: lakeside - 0.119613
Prediction: breakwater - 0.062801
Prediction: sandbar - 0.045267
Prediction: cliff - 0.011845
Prediction: dock - 0.009196
Prediction: boathouse - 0.003278
Prediction: valley - 0.003194


Transfer Learning: Using a model trained on one set of data as a starting point for modelling a new set of data.

A typical CNN is made up of a series of convolutional layers connected to a dense layer.

The training process teaches each of those convolutional layers to be activated when it sees certain patterns in the input image. The patterns it looks for gets more & more complex in the upcoming layers.


-----------

To reuse pretrained NN with new data, we slice off the last layer. We'll keep all the layers that detect patterns, but remove the part that maps those patterns to specific objects. We'll call THIS pretrained NN a Feature Extractor cuz we're using it to extract training features from images.

Next we'll create a new NN to replace the last layer in the original network. Our new NN just have to tell which pattern maps to which objects, which is why it can learn to do it with a small amount of training data.


Transfer learning is very useful when u don't have a lot of training data but already have a model that solves a similar problem.


In keras terminology the top is the last layer of the NN. We need to also tell what size imgs we're using as training data. The images we're using here are 64*64pixels with 3 color channels.

---------------------

We're using small img sizes in this example to keep the training time as quick as possible, when building your own img recog systems, u can use larger sized imgs like 224*224px.

In [None]:
# Main Code
# Transfer Learning - Feature Extraction

from pathlib import Path
import numpy as np
import joblib
from keras.preprocessing import image
from keras.applications import vgg16

# Path to folders with training data
dog_path = Path("/content/training_data") / "dogs"
not_dog_path = Path("/content/training_data") / "not_dogs"

images = []
labels = []

# Load all the not-dog images
for img in not_dog_path.glob("*.png"):
    # Load the image from disk
    img = image.load_img(img)

    # Convert the image to a numpy array
    image_array = image.img_to_array(img)

    # Add the image to the list of images
    images.append(image_array)

    # For each 'not dog' image, the expected value should be 0
    labels.append(0)

# Load all the dog images
for img in dog_path.glob("*.png"):
    # Load the image from disk
    img = image.load_img(img)

    # Convert the image to a numpy array
    image_array = image.img_to_array(img)

    # Add the image to the list of images
    images.append(image_array)

    # For each 'dog' image, the expected value should be 1
    labels.append(1)

# Create a single numpy array with all the images we loaded
x_train = np.array(images)

# Also convert the labels to a numpy array
y_train = np.array(labels)

# Normalize image data to 0-to-1 range
x_train = vgg16.preprocess_input(x_train)

# Load a pre-trained neural network to use as a feature extractor
pretrained_nn = vgg16.VGG16(weights='imagenet', include_top=False, input_shape=(64, 64, 3))

# Extract features for each image (all in one pass)
features_x = pretrained_nn.predict(x_train)

# Save the array of extracted features to a file
joblib.dump(features_x, "/content/x_train.dat")

# Save the matching array of expected values to a file
joblib.dump(y_train, "/content/y_train.dat")

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


['/content/y_train.dat']

In [None]:
# Main Code
# Transfer Learning - Training with Extracted Features


from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from pathlib import Path
import joblib

# Load data set
x_train = joblib.load("/content/x_train.dat")
y_train = joblib.load("/content/y_train.dat")

# Create a model and add layers
model = Sequential()

model.add(Flatten(input_shape=x_train.shape[1:]))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model
model.compile(
    loss="binary_crossentropy",
    optimizer="adam",
    metrics=['accuracy']
)

# Train the model
model.fit(
    x_train,
    y_train,
    epochs=10,
    shuffle=True
)

# Save neural network structure
model_structure = model.to_json()
f = Path("/content/model_structure.json")
f.write_text(model_structure)

# Save neural network's trained weights
model.save_weights("/content/model_weights.h5")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
# print(x_train.shape)
# x_train.shape = (numOfImg, pixels, pixels, channels)
# x_train.shape[1:] = (pixels, pixels, channels)
# (1,2,3,4)[1:] == (2,3,4)

(58, 2, 2, 512)


In [None]:
# Main Code
# Transfer Learning - Make Predictions




from keras.models import model_from_json
from pathlib import Path
from keras.preprocessing import image
import numpy as np
from keras.applications import vgg16

# Load the json file that contains the model's structure
f = Path("/content/model_structure.json")
model_structure = f.read_text()

# Recreate the Keras model object from the json data
model = model_from_json(model_structure)

# Re-load the model's trained weights
model.load_weights("/content/model_weights.h5")

# Load an image file to test, resizing it to 64x64 pixels (as required by this model)
img = image.load_img("/content/dog.png", target_size=(64, 64))

# Convert the image to a numpy array
image_array = image.img_to_array(img)

# Add a forth dimension to the image (since Keras expects a bunch of images, not a single image)
images = np.expand_dims(image_array, axis=0)

# Normalize the data
images = vgg16.preprocess_input(images)

# Use the pre-trained neural network to extract features from our test image (the same way we did to train the model)
feature_extraction_model = vgg16.VGG16(weights='imagenet', include_top=False, input_shape=(64, 64, 3))
features = feature_extraction_model.predict(images)

# Given the extracted features, make a final prediction using our own model
results = model.predict(features)

# Since we are only testing one image with possible class, we only need to check the first result's first element
single_result = results[0][0]

# Print the result
print("Likelihood that this image contains a dog: {}%".format(int(single_result * 100)))

Likelihood that this image contains a dog: 100%
