In [1]:
# Load Pre-Built Historic Models
import numpy as np
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input, decode_predictions
from tensorflow.keras.preprocessing import image

In [2]:
# Initialize Model
resnet_model = ResNet50(weights='imagenet')

2023-03-25 17:33:19.917982: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels.h5


## Test the ResNet50 Model on a Sample Image

In [5]:
img_path = './images/imagenet_images/dog.jpg'
img = image.load_img(img_path, target_size=(224,224))
x = image.img_to_array(img)
x = np.expand_dims(x, axis=0)
x = preprocess_input(x)

preds = resnet_model.predict(x)

# Decode the results into a list of tuples (class, description, probability)
# This will create one such list for each sample in the batch
print(f'Predicted:\t{decode_predictions(preds, top=3)[0]}')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/imagenet_class_index.json
Predicted:	[('n02106662', 'German_shepherd', 0.9987184), ('n02105412', 'kelpie', 0.0005585266), ('n02105162', 'malinois', 0.00041019832)]


## How About Multiple Images - Display Results Over the Image

In [6]:
import cv2
from os import listdir
from os.path import isfile, join

In [7]:
# Helper funtion to show an image with cv2
def cv_show_img(title, image, wait=0):
    cv2.namedWindow(title)
    cv2.startWindowThread()
    cv2.imshow(title, image)
    cv2.waitKey(wait)
    cv2.waitKey(1)
    cv2.destroyAllWindows()
    cv2.waitKey(1)

# Helper funtion to show multiple images at the same time
def cv_show_mult_img(titleArr, imageArr, wait=0):
    for i in range(len(titleArr)):
        cv2.namedWindow(titleArr[i])
        cv2.startWindowThread()
        cv2.imshow(titleArr[i], imageArr[i])
    cv2.waitKey(wait)
    cv2.waitKey(1)
    cv2.destroyAllWindows()
    cv2.waitKey(1)

In [9]:
# OpenCV function that will display and image and it's predicted labels
def draw_label(name, preds, img):
    '''Displays the output of the preds alongside the original image'''
    BLACK = [0,0,0]
    expanded_img = cv2.copyMakeBorder(img, 300, 0, 0, img.shape[1]+300, cv2.BORDER_CONSTANT, value=BLACK)
    width = img.shape[1]

    for (i, pred) in enumerate(preds):
        cv2.putText(expanded_img, str(name), (width + 50, 50), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0,0,255), 2)
        cv2.putText(expanded_img, f'{pred[1]} {pred[2]}', (width + 50, 50+((i+1)*50)), cv2.FONT_HERSHEY_COMPLEX_SMALL, 2, (0,0,255), 2)

    cv_show_img(str(name), expanded_img)

In [10]:
imagenet_path = './images/imagenet_images/'
files = [f for f in listdir(imagenet_path) if isfile(join(imagenet_path, f))]

for f in files:
    # Load image with tensorflow
    img = image.load_img(f'{imagenet_path}{f}', target_size=(224,224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    # Load image with openCV
    img2 = cv2.imread(f'{imagenet_path}{f}')
    imageL = cv2.resize(img2, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC)

    # Get preds
    preds = resnet_model.predict(x)
    predictions = decode_predictions(preds, top=3)[0]
    draw_label('Predictions', predictions, imageL)
    



## Lets Compare ResNet to VGG16

In [11]:
from tensorflow.keras.applications import vgg16

vgg_model = vgg16.VGG16(weights='imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels.h5


In [13]:
# Similar loop to before, but also get VGG16 preds
for f in files:
    # Load image with tensorflow
    img = image.load_img(f'{imagenet_path}{f}', target_size=(224,224))
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = preprocess_input(x)

    # Load image with openCV
    img2 = cv2.imread(f'{imagenet_path}{f}')
    imageL = cv2.resize(img2, None, fx=0.5, fy=0.5, interpolation=cv2.INTER_CUBIC)

    # Get VGG16 preds
    preds_vgg16 = vgg_model.predict(x)
    predictions_vgg16 = decode_predictions(preds_vgg16, top=3)[0]
    draw_label('VGG 16 Predictions', predictions_vgg16, imageL)

    # Get ResNet preds
    preds_resnet = resnet_model.predict(x)
    predictions_resnet = decode_predictions(preds_resnet, top=3)[0]
    draw_label('ResNet Predictions', predictions_resnet, imageL)



## Transfer Learning

We can use earlier pre-trained models as a starting point to train our own models. To do this we freeze the convolutional weights of a previous model and train an output convolusional layer on top of those weights

In [14]:
# To demonstrate we'll use the CIPHAR-10 dataset
import os
from __future__ import print_function
from tensorflow.keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD

In [15]:
# Constants
batch_size = 32
num_classes = 10
epochs = 10

In [34]:
# Download Data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [35]:
# Data Dimensions
print(f'X Train Shape:\t{x_train.shape}')
print(f'Train Samples:\t{x_train.shape[0]}')
print(f'Test Samples:\t{x_test.shape[0]}')

X Train Shape:	(50000, 32, 32, 3)
Train Samples:	50000
Test Samples:	10000


In [36]:
# Normalize Data
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')

x_train /= 255
x_test /= 255

print(f'Min:\t{x_train.min()}\nMax:\t{x_train.max()}')

Min:	0.0
Max:	1.0


In [37]:
# One-Hot Encoding for Output Labels
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

print(f'Y Train Shape:\t{y_train.shape}')

Y Train Shape:	(50000, 10)


In [38]:
# Build Model
model = Sequential()
model.add(Conv2D(32, (3,3), padding='same', input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Conv2D(32, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(num_classes))
model.add(Activation('softmax'))

In [39]:
# Compile Model
model.compile(
    loss='categorical_crossentropy',
    optimizer=SGD(0.01),
    metrics=['accuracy']
)

print(model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_2 (Conv2D)           (None, 32, 32, 32)        896       
                                                                 
 activation_4 (Activation)   (None, 32, 32, 32)        0         
                                                                 
 conv2d_3 (Conv2D)           (None, 30, 30, 32)        9248      
                                                                 
 activation_5 (Activation)   (None, 30, 30, 32)        0         
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 15, 15, 32)       0         
 2D)                                                             
                                                                 
 dropout_2 (Dropout)         (None, 15, 15, 32)        0         
                                                      

In [40]:
# Train the Model
trained = model.fit(
    x_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    validation_data=(x_test,y_test),
    shuffle=True,
    verbose=1
)

model.save('./model_objects/cifar_simple_cnn.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [41]:
# Score Model
scores = model.evaluate(x_test, y_test, verbose=1)
print(f'Test Loss:\t{scores[0]}')
print(f'Test Accuracy:\t{scores[1]}')

Test Loss:	1.0072827339172363
Test Accuracy:	0.6442999839782715


In [45]:
# Display Some of Our Guesses

# Constants
img_row, img_height, img_depth = 32, 32, 3
color = True
scale = 8

# From the ciphar dataset
guess_list = [
    'airplane',
    'automobile',
    'bird',
    'cat',
    'deer',
    'dog',
    'frog',
    'horse',
    'ship',
    'truck'
]

# Load Model
classifier = load_model('./model_objects/cifar_simple_cnn.h5')

In [46]:
# Helper function to draw our guess with the image
def draw_guess(name, res, img, scale=scale, img_row=img_row, img_height=img_height):
    BLACK = [0,0,0]
    res = int(res)
    pred = guess_list[res]

    expanded_img = cv2.copyMakeBorder(img, 0, 0, 0, img.shape[0]*2, cv2.BORDER_CONSTANT, value=BLACK)
    if not color:
        expanded_img = cv2.cvtColor(expanded_img, cv2.COLOR_BGR2GRAY)
    cv2.putText(expanded_img, pred, (300,80), cv2.FONT_HERSHEY_COMPLEX_SMALL, 3, (0,255,0), 2)
    cv_show_img(name, expanded_img)

In [52]:
# Checkout preds for 10 random images
for i in range(10):
    rand = np.random.randint(0,x_test.shape[0])
    img = x_test[rand]
    img_copy = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
    img = img.reshape(1, img_row, img_height, img_depth)

    # Get Prediction
    res = str(np.argmax(classifier.predict(img, 1, verbose=0)[0]))

    # Display
    draw_guess("Prediction", res, img_copy)

### Preds Aren't Great... Applying Transfer Learning

In [69]:
# More imports
from tensorflow.keras import callbacks, optimizers, Model
from tensorflow.keras.layers import GlobalAveragePooling2D, BatchNormalization
import scipy.misc

In [55]:
# Constants
BATCH_SIZE = 32
NUM_CLASSES = 10
EPOCHS = 1
LEARNING_RATE = 1e-4
MOMENTUM = 0.9

In [56]:
# Download Data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()

In [57]:
X_train = np.array([cv2.resize(x, (48, 48), interpolation=cv2.INTER_AREA) for x in x_train])
X_test = np.array([cv2.resize(x, (48, 48), interpolation=cv2.INTER_AREA) for x in x_test])

In [58]:
# One-Hot Encoding for Output Labels
Y_train = to_categorical(y_train, NUM_CLASSES)
Y_test = to_categorical(y_test, NUM_CLASSES)

In [60]:
# Import base vgg16 model
base_model = vgg16.VGG16(
    weights='imagenet',
    include_top=False,      # Don't include the top layer of the model so that we can add in the shape we need later
    input_shape=(48,48,3)
)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [61]:
# Extract the Last Layer of the Base Model
last_layer = base_model.get_layer('block3_pool').output

In [66]:
# Now we add a classification layer on top of it
x = GlobalAveragePooling2D()(last_layer)
x = BatchNormalization()(x)
x = Dense(256, activation='relu')(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)

In [70]:
# Add in a new top layer that can take the inputs we want
top_layer = Dense(NUM_CLASSES, activation='softmax')(x)

# Construct model
model = Model(base_model.input, top_layer)

In [71]:
# Now all we have to do is iterate over the base model to freeze the layers so we don't train them
for layer in base_model.layers:
    layer.trainable = False

In [75]:
# Compile the New Transferred Model
model.compile(
    loss='binary_crossentropy',
    optimizer=optimizers.Adam(learning_rate=1e-4),
    metrics=['accuracy']
)

model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 48, 48, 3)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 48, 48, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 48, 48, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 24, 24, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 24, 24, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 24, 24, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 12, 12, 128)       0     

In [76]:
# Notice how the trainable params are much lower than the total params - we froze the other ones because they're already learned
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    horizontal_flip=False
)

In [77]:
# Use our Data Generator to get our data
train_datagen.fit(X_train)
train_generator = train_datagen.flow(X_train,
                                     y_train, 
                                     batch_size=BATCH_SIZE
                                    )

val_datagen = ImageDataGenerator(rescale=1. / 255, horizontal_flip=False)

val_datagen.fit(X_test)
val_generator = val_datagen.flow(X_test,
                                 y_test,
                                 batch_size=BATCH_SIZE
                                )

In [78]:
# Now train the model
train_steps_per_epoch = X_train.shape[0] // BATCH_SIZE
val_steps_per_epoch = X_test.shape[0] // BATCH_SIZE

history = model.fit_generator(
    train_generator,
    steps_per_epoch=train_steps_per_epoch,
    validation_data=val_generator,
    validation_steps=val_steps_per_epoch,
    epochs=EPOCHS,
    verbose=1
)

  history = model.fit_generator(




: 