### Notebook 03: Feature Extraction on Google Colab
> * This Notebook requires a high-RAM processor, the bat

In [1]:
!pip install PyDrive



In [1]:
import os
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [2]:
# Imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os
# load VGG-16 model: 23 layers, 138,357,544 params, 528MB
from keras.applications.vgg16 import VGG16

# load and evaluate a saved model 
import tensorflow as tf
from keras import models
from keras.models import load_model, Sequential, Model
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [4]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Found GPU at: /device:GPU:0


In [5]:
from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('To enable a high-RAM runtime, select the Runtime > "Change runtime type"')
  print('menu, and then select High-RAM in the Runtime shape dropdown. Then, ')
  print('re-execute this cell.')
else:
  print('You are using a high-RAM runtime!')

Your runtime has 27.4 gigabytes of available RAM

You are using a high-RAM runtime!


In [6]:
print(tf.__version__)

2.3.0


In [7]:
!python --version

Python 3.6.9


In [8]:
!ipython --version

5.5.0


In [9]:
# Sometimes my tensorflow tries to use GPU support but I don't want it to, have had many errors.
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
# #tf.compat.v1.Session(), .compat.v1.
# sess_cpu = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(device_count={'GPU': 0}))
# print(tf.__version__)
# if tf.test.gpu_device_name():
#     print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
# else:
#     print("Please install GPU version of TF")

if tf.test.gpu_device_name():
    print('Default GPU Device:{}'.format(tf.test.gpu_device_name()))
else:
    print("Please install GPU version of TF")

Default GPU Device:/device:GPU:0


In [3]:
%%time
# Load training Data
print('Loading binary X_train from pkl...')
# Save X_train and y_train lists as .pkl files from Mounted G-Drive
with open('/content/drive/MyDrive/X_training_image_data.pkl', 'rb') as file:
    X_train = np.array(pickle.load(file))

print('Attempting to load binary y_valid from pkl...')
with open('/content/drive/MyDrive/y_training_labels.pkl', 'rb') as file:
    y_train = np.array(pickle.load(file))
    
print('Training data loaded!')
print()

Loading binary X_train from pkl...
Attempting to load binary y_valid from pkl...
Training data loaded!

CPU times: user 5.33 s, sys: 21.1 s, total: 26.5 s
Wall time: 2min 27s


In [4]:
%%time
# Load Validation Data
print('Attempting to load binary X_valid from pkl...')

# Load X_valid and y_valid lists as .pkl files from Mounted G-Drive
with open('/content/drive/MyDrive/X_validation_image_data.pkl', 'rb') as file:
    X_valid = np.array(pickle.load(file))
print('Attempting to load binary y_valid from pkl...')
with open('/content/drive/MyDrive/y_validation_labels.pkl', 'rb') as file:
    y_valid = np.array(pickle.load(file))

print('Validation data loaded!')
print()

Attempting to load binary X_valid from pkl...
Attempting to load binary y_valid from pkl...
Validation data loaded!

CPU times: user 4.63 s, sys: 13.4 s, total: 18.1 s
Wall time: 1min 40s


In [8]:
len(X_train), len(y_train), type(X_train), X_valid[0].shape, type(X_valid[0])

(64346, 64346, numpy.ndarray, (224, 224, 3), numpy.ndarray)

In [9]:
len(X_valid), len(y_valid), type(X_valid), X_train[0].shape, type(X_train[0])

(43733, 43733, numpy.ndarray, (224, 224, 3), numpy.ndarray)

In [10]:
set(y_valid), set(y_train)

({0, 1}, {0, 1})

In [11]:
print(X_valid[0].shape)
print(type(X_valid[0]))
print(type(X_valid))

(224, 224, 3)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [12]:
print(X_train[0].shape)
print(type(X_train[0]))
print(type(X_train))

(224, 224, 3)
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>


In [4]:
# https://keras.io/api/applications/vgg/#vgg16-function
# https://towardsdatascience.com/a-demonstration-of-transfer-learning-of-vgg-convolutional-neural-network-pre-trained-model-with-c9f5b8b1ab0a

# Adjust input size of the model for include_top=False
new_input = Input(shape=(224,224,3)) 

# load the model weights into memory
# Cut-Off the VGG-16 Model after the last Conv2D layer (18)
base_model = VGG16(
                include_top=False,   # include_top=False to load model wihtout the fully-connected output layers used to make predictions
                weights="imagenet", # Weights are downloaded automatically when instantiating a model: Keras Applications ~/.keras/models/
                input_tensor=new_input, # --> MUST INCLUDE THIS PARAM TO FEED CLASSIFIER VGG-16 WEIGHTS
                input_shape=None,
                pooling=None,
                classes=1000,
                classifier_activation="softmax",
            )

# Freeze base layers from training
for layer in base_model.layers:
    layer.trainable = False

# Summarize the loaded model after dropping the dense top layers for binary classification
base_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [5]:
# Add dense layers to base_model loaded
batch_transfer_model = Sequential()
batch_transfer_model.add(base_model)
# Flatten layer to transform 4D --> 2D for "fully-connected" dense layers
batch_transfer_model.add(Flatten())
# Hiddden dense layer with 8,192 nodes
batch_transfer_model.add(Dense(8192, activation='relu'))
batch_transfer_model.add(Dropout(0.3))
# Hidden dense layer with 2,048 nodes
batch_transfer_model.add(Dense(2048, activation='relu'))
batch_transfer_model.add(Dropout(0.1))
# Hidden dense layer with 2,048 nodes
batch_transfer_model.add(Dense(2048, activation='relu'))
batch_transfer_model.add(Dense(1, activation='sigmoid'))
# Summarize the new binary classifier to check if all looks ok
batch_transfer_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Functional)           (None, 7, 7, 512)         14714688  
_________________________________________________________________
flatten (Flatten)            (None, 25088)             0         
_________________________________________________________________
dense (Dense)                (None, 8192)              205529088 
_________________________________________________________________
dropout (Dropout)            (None, 8192)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 2048)              16779264  
_________________________________________________________________
dropout_1 (Dropout)          (None, 2048)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 2048)              4

In [None]:
[2**i for i in range(10)]

[1, 2, 4, 8, 16, 32, 64, 128, 256, 512]

In [6]:
# Establish number of epochs and batch size
b_size = 64 # choose base-2
nb_epochs = 10

# Initialize training image generator for data augmentation
# Data Augmentation as a form of regularization: random translate/rotate/resize images on the fly
# Augmentor from https://www.sunjackson.com/post/700/ with values & name changed
augmentor = ImageDataGenerator(rotation_range = 32,
                               zoom_range = 0.16,
                               width_shift_range = 0.2,
                               height_shift_range = 0.2,
                               shear_range = 0.16,
                               horizontal_flip = True,
                               fill_mode = "nearest")

# Compile the model before training
batch_transfer_model.compile(
              # Optimization Algorithm, Extension of Stochastic Gradient Descent, 
              optimizer = 'adam', # lr varies from 1 to 0(perfect)
              # Objective Function, determines what will be used to fit the model
              loss = 'binary_crossentropy',
              # Accuracy metric helps with interpretation of model performance
              metrics = ['accuracy'])
              #options = run_opts)

# Save the weights at each epoch they improve the model's performance
checkpoint_saver = ModelCheckpoint(filepath = '/content/drive/MyDrive/batch_transfer_dog.h5', # transfer_dog.h5 ---> 10epochs/256batch/no augmentor
                                    verbose = 1,                                              # batch_transfer_dog.h5 --> 10/128 with augmentor
                                    save_best_only = True)

#### Notes from General Assembly-DSI Lessons 

In [23]:
# Fit the transfer model using stochastic gradient descent
transfer_history = transfer_model.fit(x=X_train, 
                                   y=y_train,
                                   validation_data=(X_valid, y_valid),
                                   batch_size=b_size,
                                   epochs=nb_epochs,
                                   callbacks = [checkpoint_saver],
                                   verbose=1)

Epoch 1/10
Epoch 00001: val_loss improved from inf to 0.10588, saving model to /content/drive/MyDrive/transfer_dog.h5
Epoch 2/10
Epoch 00002: val_loss improved from 0.10588 to 0.09952, saving model to /content/drive/MyDrive/transfer_dog.h5
Epoch 3/10
Epoch 00003: val_loss did not improve from 0.09952
Epoch 4/10
Epoch 00004: val_loss did not improve from 0.09952
Epoch 5/10
Epoch 00005: val_loss did not improve from 0.09952
Epoch 6/10
Epoch 00006: val_loss did not improve from 0.09952
Epoch 7/10
Epoch 00007: val_loss did not improve from 0.09952
Epoch 8/10
Epoch 00008: val_loss did not improve from 0.09952
Epoch 9/10
Epoch 00009: val_loss did not improve from 0.09952
Epoch 10/10
Epoch 00010: val_loss did not improve from 0.09952


In [None]:
# Keras calls data generator function: augmentor.flow --> yields batch to .fit_generator
batch_transfer_history = batch_transfer_model.fit_generator(augmentor.flow(X_train, y_train, batch_size = b_size),
                        validation_data = (X_valid, y_valid),
                        steps_per_epoch = len(X_train) // b_size, 
                        epochs=nb_epochs,
                        callbacks=[checkpoint_saver], # callback objects can write logs to monitor metrics, save model to disk, early stopping, or view internal states/stats after each batch or epoch
                        verbose = 1)

In [None]:
'''
# load weights into new model
loaded_model = '../../assets/model_vgg16_flatten.h5'

# Load base model weights
loaded_model = load_model(loaded_model)

#loaded_model.load_weights("~\.keras\models.h5") 
print("Loaded model from disk")
'''

In [None]:
'''
# Cut-Off the VGG-16 Model after the last Conv2D layer (18)
dogg16 = models.Model(inputs=base_model.input,
                           outputs=base_model.get_layer('flatten').output
                          )
'''
# Extract features
#flatten_features = model_vgg16.predict(x)
# save model and architecture to single file
#dogg16.save('../../model_dogg16.h5') 
#print("Saved model to disk")