### Notebook 03: Feature Extraction

In [16]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os
# load VGG-16 model: 23 layers, 138,357,544 params, 528MB
from keras.applications.vgg16 import VGG16

# load and evaluate a saved model 
import tensorflow as tf
#from tensorflow import keras

from keras import models
from keras.models import load_model, Sequential, Model
from tensorflow.keras.models import Sequential, Model

from tensorflow.keras.layers import Input, Dense, Dropout, Flatten #, Conv2D, MaxPooling2D
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [3]:
print(tf.__version__)

2.3.1


In [4]:
# Sometimes my tensorflow tries to use GPU support but I don't want it to, have had many errors.
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
sess_cpu = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(device_count={'GPU': 0}))
print(tf.__version__)
if tf.test.gpu_device_name():
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")
if tf.test.gpu_device_name():
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

2.3.1
Please install GPU version of TF
Please install GPU version of TF


In [17]:
%%time
# Load training data from previous notebooks
print('Loading binary X_train from pkl...')
# Save X_train and y_train arrays as pkl files in assets folder (just outside of Github repo) to load in Notebook 3
with open('../../assets/X_training_image_data.pkl', 'rb') as file:
    X_train = pickle.load(file)
    
print('Loading binary y_train from pkl...')
with open('../../assets/y_training_labels.pkl', 'rb') as file:
    y_train = pickle.load(file)
print('Training data loaded!')
print()

Loading binary X_train from pkl...
Loading binary y_train from pkl...
Training data loaded!

Wall time: 2min 9s


In [6]:
%%time
# Load validation data from previous notebooks
print('Loading binary X_valid from pkl...')
# Save X_valid and y_valid arrays as pkl files in assets folder (just outside of Github repo) to load in Notebook 3
with open('../../assets/X_validation_image_data.pkl', 'rb') as file:
    X_valid = pickle.load(file)
    
print('Loading binary y_valid from pkl...')
with open('../../assets/y_validation_labels.pkl', 'rb') as file:
    y_valid = pickle.load(file)
print('Validation data loaded!')
print()

Loading binary X_valid from pkl...
Loading binary y_valid from pkl...
Validation data loaded!

Wall time: 1min 1s


In [7]:
print(f'Length of Validation data: {len(X_valid):,}\nLength of Validation labels: {len(y_valid):,}')
print()
print(f'Length of Training data: {len(X_train):,}\nLength of Training labels: {len(y_train):,}')

Length of Validation data: 43,733
Length of Validation labels: 43,733

Length of Training data: 64,346
Length of Training labels: 64,346


In [8]:
print(f'Validation data shape: {X_valid[0].shape}\nTraining data shape:   {X_train[0].shape}')

Validation data shape: (224, 224, 3)
Training data shape:   (224, 224, 3)


#### List objects must be converted to numpy arrays before feeding tranfer model

In [9]:
%%time
X_valid2 = np.array(X_valid)
print('Loaded!')
print()

Loaded!


In [10]:
%%time
X_train2 = np.array(X_train)
print('Loaded!')
print()

Loaded!


In [11]:
y_valid2 = np.array(y_valid)
print('Loaded!')

Loaded!


In [12]:
y_train2 = np.array(y_train)
print('Loaded!')

Loaded!


In [13]:
print(X_valid2[0].shape)
print(type(X_valid2[0]))
print(type(X_valid2))

(224, 224, 3)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [14]:
print(X_train2[0].shape)
print(type(X_train2[0]))
print(type(X_train2))

(224, 224, 3)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [15]:
# https://keras.io/api/applications/vgg/#vgg16-function
# https://towardsdatascience.com/a-demonstration-of-transfer-learning-of-vgg-convolutional-neural-network-pre-trained-model-with-c9f5b8b1ab0a

# Adjust input size of the model for include_top=False
new_input = Input(shape=(224,224,3)) 

# load the model weights into memory
# Cut-Off the VGG-16 Model after the last Conv2D layer (18)
base_model = VGG16(
                include_top=False,   # include_top=False to load model wihtout the fully-connected output layers used to make predictions
                weights="imagenet", # Weights are downloaded automatically when instantiating a model: Keras Applications ~/.keras/models/
                input_tensor=new_input, # --> MUST INCLUDE THIS PARAM TO FEED CLASSIFIER VGG-16 WEIGHTS
                input_shape=None,
                pooling=None,
                classes=1000,
                classifier_activation="softmax",
            )

# Freeze Conv2D layers from training
for layer in base_model.layers:
    layer.trainable = False

# Summarize the loaded model after dropping the dense top layers for binary classification
base_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [4]:
1000*1000

1000000

In [16]:
# Add dense layers to base_model loaded
transfer_model = Sequential()
transfer_model.add(base_model)

# Flatten layer to transform 4D --> 2D for "fully-connected" dense layers
transfer_model.add(Flatten())
# Hiddden dense layer with 8,192 nodes
transfer_model.add(Dense(8192, activation='relu'))
transfer_model.add(Dropout(0.3))
# Hidden dense layer with 2,048 nodes
transfer_model.add(Dense(2048, activation='relu'))
transfer_model.add(Dropout(0.1))
# Hidden dense layer with 2,048 nodes
transfer_model.add(Dense(2048, activation='relu'))
transfer_model.add(Dense(1, activation='sigmoid'))

# Summarize the new binary classifier to check if all looks ok
transfer_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense (Dense)                (None, 8192)              205529088 
_________________________________________________________________
dropout (Dropout)            (None, 8192)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 2048)              16779264  
_________________________________________________________________
dropout_1 (Dropout)          (None, 2048)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 2048)              4

In [17]:
[2**i for i in range(10)]

[1, 2, 4, 8, 16, 32, 64, 128, 256, 512]

In [18]:
# Establish update frequency and early stopping
bs = 64 # Chose by BASE-2, and 128 was too large for memory (both CoLab's and mine) NEED BATCH GENERATOR!!! 
nb_epochs = 1

# Compile the model before training
transfer_model.compile(
              # Optimization Algorithm, Extension of Stochastic Gradient Descent, 
              optimizer = 'adam', # lr varies from 1 to 0(perfect), optimizer=Adam(lr=args.learning_rate)
              # Objective Function, determines what will be used to fit the model
              loss = 'binary_crossentropy',
              # Accuracy metric helps with interpretation of model performance
              metrics = ['accuracy'])
              #options = run_opts)

# Save the weights at each epoch they improve the model's performance
checkpoint_saver = ModelCheckpoint(filepath = '../../assets/transfer.h5', # why not .hdf5 file ext?
                               verbose = 1,
                               save_best_only = True)

In [None]:
# Fit the transfer model (using 'Adam': extension of stochastic gradient descent (SGD))
transfer_history = transfer_model.fit(x=X_train2, 
                                   y=y_train2,
                                   validation_data=(X_valid2, y_valid2),
                                   batch_size=bs,
                                   epochs=nb_epochs,
                                   callbacks = [checkpoint_saver],
                                   verbose=1)



#### Future work: Batch Generator- GPU can't store entire dataset in memory

In [2]:
from keras.preprocessing.image import ImageDataGenerator
ImageDataGenerator(

In [None]:
# https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
# https://blog.keras.io/building-powerful-image-classification-models-using-very-little-data.html


datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')
'''

'''
# Generator from car simulation was more complex than needed here
model.fit_generator(batch_generator(args.data_dir, X_train, y_train, args.batch_size, True),
model.fit_generator(batch_generator('data', X_train, y_train, 40, True),
                        args.samples_per_epoch, # steps_per_epoch
                        args.nb_epoch,  # epochs
                        max_q_size=1,
                        validation_data=batch_generator(args.data_dir, X_valid, y_valid, args.batch_size, False),
                        validation_data=batch_generator('data', X_valid, y_valid, 40, False),
                        nb_val_samples=len(X_valid), # validation_steps
                        callbacks=[checkpoint], # callback objects can write logs to monitor metrics, save model to disk, early stopping, or view internal states/stats after each batch or epoch 
                        verbose=1)


def batch_generator(data_dir, image_paths, steering_angles, batch_size, is_training):
def batch_generator('data', image_paths, steering_angles, batch_size, is_training):
    """
    Generate training image give image paths and associated steering angles
    """
    images = np.empty([batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS])
    steers = np.empty(batch_size)
    while True:
        i = 0
        for index in np.random.permutation(image_paths.shape[0]):
            center, left, right = image_paths[index]
            steering_angle = steering_angles[index]
            # argumentation
            if is_training and np.random.rand() < 0.6:
                image, steering_angle = augument(data_dir, center, left, right, steering_angle)
            else:
                image = load_image(data_dir, center)
            # add the image and steering angle to the batch
            images[i] = preprocess(image)
            steers[i] = steering_angle
            i += 1
            if i == batch_size:
                break
        yield images, steers

#### From General Assembly Notes Lesson 04 Week 07 Topic: CNN

In [None]:
# Visualize accuracy scores of model after trained on data
# CREDIT DIRECT FROM NOTES LESSON 7p04 CNN
def plot_loss(history, model_name):
    train_loss = history.history['loss'] 
    test_loss = history.history['val_loss'] 
    epoch_labels = history.epoch 
    # Set figure size
    plt.figure(figsize=(12, 8)) 
    # Generate line plot of training, testing loss over epochs
    plt.plot(train_loss, label='Training Loss', color='#185fad') 
    plt.plot(test_loss, label='Validation Loss', color='orange') 
    # Set title
    plt.title(f'Model: {model_name}\nTrain & Validation Loss by Epoch', fontsize=25) 
    plt.xlabel('Epoch', fontsize=18) 
    plt.ylabel('Categorical Crossentropy', fontsize=18) 
    plt.xticks(epoch_labels, epoch_labels)
    plt.legend(fontsize=18);

In [None]:
# CREDIT DIRECT FROM NOTES LESSON 7p04 CNN
plot_loss(trans_history, 'Transfer from VGG-16 to Binary Classifier')

In [None]:
'''
# load weights into new model
loaded_model = '../../assets/model_vgg16_flatten.h5'

# Load base model weights
loaded_model = load_model(loaded_model)

#loaded_model.load_weights("~\.keras\models.h5")
print("Loaded model from disk")
'''

In [None]:
'''
# Cut-Off the VGG-16 Model after the last Conv2D layer (18)
dogg16 = models.Model(inputs=base_model.input,
                           outputs=base_model.get_layer('flatten').output
                          )
'''
# Extract features
#flatten_features = model_vgg16.predict(x)
# save model and architecture to single file
#dogg16.save('../../model_dogg16.h5') 
#print("Saved model to disk")
'''
transfer_model.save('../../assets/transfer_model.h5')
print("Saved model to disk")
del transfer_model
print("Transfer model deleted from memory!")
'''

In [6]:
%store

Stored variables and their in-db values:
X_train                    -> '<unavailable>'
X_valid                    -> '<unavailable>'
dog_data_part2             -> ['2377385', '2377411', '2377429', '2377466', '2377
y_valid                    -> [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0


In [2]:
!ipython --version

7.16.1


In [12]:
%store -r X_valid

In [None]:
%store X_train

In [None]:
# Save X_train and y_train arrays as pkl files in assets folder (just outside of Github repo) to load in Notebook 3
print('Saving binary X_train to pkl...')
# Save X_train and y_train arrays as pkl files in assets folder (just outside of Github repo) to load in Notebook 3
with open('../../assets/X_training_image_data.pkl', 'wb') as outfile:
    pickle.dump(X_train, outfile, pickle.DEFAULT_PROTOCOL)
    
print('Saving binary y_train to pkl...')
with open('../../assets/y_training_labels.pkl', 'wb') as outfile:
    pickle.dump(y_train, outfile, pickle.DEFAULT_PROTOCOL)
print()
print('Training data Pickled!')

In [None]:
import os
import subprocess
for env in os.listdir('/Users/me/miniconda3/envs'):
    subprocess.call(['conda', 'list', '-n', env])

In [1]:
import pickle

In [None]:

pickle.dump(
            obj,
            file,
            protocol=None,
            *,
            fix_imports=True,
            buffer_callback=None,
        )

In [3]:
10.86-1.6

9.26