In [94]:
import tensorflow as tf
import keras
from keras.applications.mobilenet import MobileNet
# from tensorflow.contrib.keras.applications import MobileNet
from keras.models import Model, load_model
from keras.layers import Flatten, Dense, Dropout, Input
from keras.layers.normalization import BatchNormalization
from keras import optimizers
import numpy as np
import matplotlib.pyplot as plt

import os
import pickle



In [95]:
prefix='emotions_en_de'

In [96]:
# Load training and validation data
dataset = pickle.load(open(os.path.join('Datasets', prefix+'.pkl'), 'rb'))

train_indices = [ i for i,r in enumerate(dataset['rand']) if r<=0.9 ]
check_indices = [ i for i,r in enumerate(dataset['rand']) if r>0.9 ]

print("Training and Validation(='check_') data loaded, %d items total " % (len(dataset['stamp']),))

Training and Validation(='check_') data loaded, 644 items total 


In [97]:
x_train = np.array( dataset['stamp'] )[train_indices]
y_train = np.array( dataset['label'] )[train_indices]

x_test = np.array( dataset['stamp'] )[check_indices]
y_test = np.array( dataset['label'] )[check_indices]

# x_train = [dataset['stamp']][train_indices]
# y_train = [dataset['label']][train_indices]

# x_test = [dataset['stamp']][check_indices]
# y_test = [dataset['label']][check_indices]

In [98]:
dataset = None

In [99]:
def to_rgb(im):
    # I think this will be slow
    print(im.shape)
    n, w, h = im.shape
    ret = np.empty((n, w, h, 3))
    ret[:, :, :, 0] = im
    ret[:, :, :, 1] = im
    ret[:, :, :, 2] = im
    return ret

In [100]:
x_train.shape

(570, 224, 224)

In [101]:
# x_train = np.expand_dims( x_train, -1)
x_train = to_rgb(x_train)

(570, 224, 224)


In [102]:
# x_test = np.expand_dims( x_test, -1)
x_test = to_rgb(x_test)

(74, 224, 224)


In [103]:
x_train

array([[[[   1.,    1.,    1.],
         [   1.,    1.,    1.],
         [   1.,    1.,    1.],
         ..., 
         [  42.,   42.,   42.],
         [  42.,   42.,   42.],
         [  42.,   42.,   42.]],

        [[  11.,   11.,   11.],
         [  11.,   11.,   11.],
         [  11.,   11.,   11.],
         ..., 
         [  70.,   70.,   70.],
         [  70.,   70.,   70.],
         [  70.,   70.,   70.]],

        [[  33.,   33.,   33.],
         [  33.,   33.,   33.],
         [  33.,   33.,   33.],
         ..., 
         [  88.,   88.,   88.],
         [  88.,   88.,   88.],
         [  88.,   88.,   88.]],

        ..., 
        [[  39.,   39.,   39.],
         [  39.,   39.,   39.],
         [  39.,   39.,   39.],
         ..., 
         [  62.,   62.,   62.],
         [  62.,   62.,   62.],
         [  62.,   62.,   62.]],

        [[  23.,   23.,   23.],
         [  23.,   23.,   23.],
         [  23.,   23.,   23.],
         ..., 
         [  62.,   62.,   62.],
       

In [104]:
np.amax(x_train)

255.0

In [105]:
np.amax(x_test)

255.0

In [106]:
x_train = x_train / 255.0
x_test = x_test / 255.0

In [107]:
x_train

array([[[[ 0.00392157,  0.00392157,  0.00392157],
         [ 0.00392157,  0.00392157,  0.00392157],
         [ 0.00392157,  0.00392157,  0.00392157],
         ..., 
         [ 0.16470588,  0.16470588,  0.16470588],
         [ 0.16470588,  0.16470588,  0.16470588],
         [ 0.16470588,  0.16470588,  0.16470588]],

        [[ 0.04313725,  0.04313725,  0.04313725],
         [ 0.04313725,  0.04313725,  0.04313725],
         [ 0.04313725,  0.04313725,  0.04313725],
         ..., 
         [ 0.2745098 ,  0.2745098 ,  0.2745098 ],
         [ 0.2745098 ,  0.2745098 ,  0.2745098 ],
         [ 0.2745098 ,  0.2745098 ,  0.2745098 ]],

        [[ 0.12941176,  0.12941176,  0.12941176],
         [ 0.12941176,  0.12941176,  0.12941176],
         [ 0.12941176,  0.12941176,  0.12941176],
         ..., 
         [ 0.34509804,  0.34509804,  0.34509804],
         [ 0.34509804,  0.34509804,  0.34509804],
         [ 0.34509804,  0.34509804,  0.34509804]],

        ..., 
        [[ 0.15294118,  0.15294118,

In [108]:
np.amax(x_train)

1.0

In [109]:
y_train[0:10]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [110]:
num_classes = 2

In [111]:
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [112]:
y_train[0:10]

array([[ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.],
       [ 1.,  0.]])

In [113]:
batch_size = 8
training_epochs = 20

In [114]:
mobilenet_base_model = MobileNet(
    input_shape=(224, 224, 3),
    alpha=1.0,
    depth_multiplier=1,
    dropout=0.001,
    include_top=False,
    weights='imagenet',
    input_tensor=None,
#     pooling=None,
    pooling='avg',
    classes=2
)

In [115]:
mobilenet_base_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (Activation)  (None, 112, 112, 32)      0         
__________

In [116]:
bottleneck_feature_train = mobilenet_base_model.predict(x_train)
np.save('bottleneck_features_train.npy', bottleneck_feature_train)

In [117]:
x_test.shape

(74, 224, 224, 3)

In [118]:
bottleneck_feature_test = mobilenet_base_model.predict(x_test)
np.save('bottleneck_feature_test.npy', bottleneck_feature_test)

In [119]:
bottleneck_train_data = np.load('bottleneck_features_train.npy')
bottleneck_validation_data = np.load('bottleneck_feature_test.npy')

In [120]:
# New top layers after mobilenet_base_model
# x = Flatten()(mobilenet_base_model.output)
Inp = Input(shape=(1024,))
# x = Dropout(0.2)(mobilenet_base_model.output)
x = Dropout(0.2)(Inp)
x = BatchNormalization()(x)
x = Dense(1024, activation='relu', name = "Dense_1")(x)
x = Dropout(0.5)(x)
x = BatchNormalization()(x)
predictions = Dense(num_classes, activation = 'softmax')(x)

In [121]:
top_model = Model(input=Inp, output=predictions)
top_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 1024)              0         
_________________________________________________________________
dropout_3 (Dropout)          (None, 1024)              0         
_________________________________________________________________
batch_normalization_3 (Batch (None, 1024)              4096      
_________________________________________________________________
Dense_1 (Dense)              (None, 1024)              1049600   
_________________________________________________________________
dropout_4 (Dropout)          (None, 1024)              0         
_________________________________________________________________
batch_normalization_4 (Batch (None, 1024)              4096      
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 2050      
Total para

  """Entry point for launching an IPython kernel.


In [122]:
top_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [123]:
bottleneck_train_data.shape

(570, 1024)

In [124]:
y_train.shape

(570, 2)

In [125]:
bottleneck_validation_data.shape

(74, 1024)

In [126]:
top_model_history = top_model.fit(x=bottleneck_train_data, y=y_train,
                    batch_size=32,
                    epochs=100,
                    verbose=1, # This is for what we want it to display out as it trains 
                    validation_data=(bottleneck_validation_data, y_test))

Train on 570 samples, validate on 74 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Ep

In [127]:
top_model.save_weights('bottleneck_top_model.h5')

In [128]:
# # New top layers after mobilenet_base_model
# # x = Flatten()(mobilenet_base_model.output)
# Inp = Input(shape=(1024,))
# # x = Dropout(0.2)(mobilenet_base_model.output)
# x = Dropout(0.2)(Inp)
# x = BatchNormalization()(x)
# x = Dense(1024, activation='relu', name = "Dense_1")(x)
# x = Dropout(0.5)(x)
# x = BatchNormalization()(x)
# predictions = Dense(num_classes, activation = 'softmax')(x)

In [129]:
# top_model = Model(input=Inp, output=predictions)
# top_model.summary()

In [130]:
# top_model.load_weights('bottleneck_top_model.h5')

In [131]:
top_model.load_weights('bottleneck_top_model.h5')

In [132]:
top_model.evaluate(bottleneck_validation_data, y_test)



[1.0356611453399465, 0.72972972972972971]

In [133]:
top_model.evaluate(mobilenet_base_model.predict(x_test), y_test)



[1.0356611453399465, 0.72972972972972971]

In [134]:
# whole_model = mobilenet_base_model
# whole_model.add(top_model)

In [135]:
whole_model = Model(input=mobilenet_base_model.input, output=top_model(mobilenet_base_model.output))

  """Entry point for launching an IPython kernel.


In [136]:
whole_model.compile(loss='categorical_crossentropy',
              optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
              metrics=['accuracy'])

In [137]:
whole_model.evaluate(x_test, y_test)



[1.0356611453399465, 0.72972972972972971]

In [138]:
whole_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (Activation)  (None, 112, 112, 32)      0         
__________

In [139]:
whole_model.layers[10]

<keras.applications.mobilenet.DepthwiseConv2D at 0x7f4438671128>

In [140]:
first_trainable_layer_index = 22
print(len(whole_model.layers))
print("first_trainable_layer :", whole_model.layers[first_trainable_layer_index])

84
first_trainable_layer : <keras.applications.mobilenet.DepthwiseConv2D object at 0x7f44383f5ba8>


In [141]:
for layer in whole_model.layers[:first_trainable_layer_index]:
    print(layer)
    layer.trainable = False

<keras.engine.topology.InputLayer object at 0x7f446167b3c8>
<keras.layers.convolutional.Conv2D object at 0x7f446167b668>
<keras.layers.normalization.BatchNormalization object at 0x7f446167b5f8>
<keras.layers.core.Activation object at 0x7f446167b908>
<keras.applications.mobilenet.DepthwiseConv2D object at 0x7f446167bd68>
<keras.layers.normalization.BatchNormalization object at 0x7f44662f0ac8>
<keras.layers.core.Activation object at 0x7f443874bcc0>
<keras.layers.convolutional.Conv2D object at 0x7f443874bda0>
<keras.layers.normalization.BatchNormalization object at 0x7f44386f4320>
<keras.layers.core.Activation object at 0x7f4438671e80>
<keras.applications.mobilenet.DepthwiseConv2D object at 0x7f4438671128>
<keras.layers.normalization.BatchNormalization object at 0x7f44385fff60>
<keras.layers.core.Activation object at 0x7f443869c2e8>
<keras.layers.convolutional.Conv2D object at 0x7f4438619a58>
<keras.layers.normalization.BatchNormalization object at 0x7f44385c3160>
<keras.layers.core.Activ

In [142]:
for layer in whole_model.layers[first_trainable_layer_index:]:
    print(layer)
    layer.trainable = True

<keras.applications.mobilenet.DepthwiseConv2D object at 0x7f44383f5ba8>
<keras.layers.normalization.BatchNormalization object at 0x7f44383a3a58>
<keras.layers.core.Activation object at 0x7f443839ed30>
<keras.layers.convolutional.Conv2D object at 0x7f443839ef28>
<keras.layers.normalization.BatchNormalization object at 0x7f443834f908>
<keras.layers.core.Activation object at 0x7f44382c8c50>
<keras.applications.mobilenet.DepthwiseConv2D object at 0x7f44382c8860>
<keras.layers.normalization.BatchNormalization object at 0x7f4438275710>
<keras.layers.core.Activation object at 0x7f44381f0a20>
<keras.layers.convolutional.Conv2D object at 0x7f44381f09e8>
<keras.layers.normalization.BatchNormalization object at 0x7f443821d5c0>
<keras.layers.core.Activation object at 0x7f4438198dd8>
<keras.applications.mobilenet.DepthwiseConv2D object at 0x7f4438198e48>
<keras.layers.normalization.BatchNormalization object at 0x7f4438141588>
<keras.layers.core.Activation object at 0x7f44380c2c18>
<keras.layers.con

In [143]:
# # fine-tuning should be done with a very slow learning rate, and typically with the SGD optimizer rather than an 
# # adaptative learning rate optimizer such as RMSProp. This is to make sure that the magnitude of the updates stays 
# # very small, so as not to wreck the previously learned features

# # whole_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# whole_model.compile(loss='categorical_crossentropy',
#               optimizer=optimizers.SGD(lr=1e-4, momentum=0.9),
#               metrics=['accuracy'])

In [144]:
whole_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_5 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (Activation)  (None, 112, 112, 32)      0         
__________

In [145]:
# head_model.fit(x, y, batch_size = batch_size, verbose = 1)

In [146]:
whole_model.evaluate(x_test, y_test)



[1.0356611453399465, 0.72972972972972971]

In [147]:
# history = whole_model.fit(x=x_train, y=y_train,
#                     batch_size=batch_size,
#                     epochs=training_epochs,
#                     verbose=1, # This is for what we want it to display out as it trains 
#                     validation_data=(x_test, y_test))

In [148]:
from keras.callbacks import *

class CyclicLR(Callback):
    """This callback implements a cyclical learning rate policy (CLR).
    The method cycles the learning rate between two boundaries with
    some constant frequency, as detailed in this paper (https://arxiv.org/abs/1506.01186).
    The amplitude of the cycle can be scaled on a per-iteration or 
    per-cycle basis.
    This class has three built-in policies, as put forth in the paper.
    "triangular":
        A basic triangular cycle w/ no amplitude scaling.
    "triangular2":
        A basic triangular cycle that scales initial amplitude by half each cycle.
    "exp_range":
        A cycle that scales initial amplitude by gamma**(cycle iterations) at each 
        cycle iteration.
    For more detail, please see paper.
    
    # Example
        ```python
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., mode='triangular')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```
    
    Class also supports custom scaling functions:
        ```python
            clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.))
            clr = CyclicLR(base_lr=0.001, max_lr=0.006,
                                step_size=2000., scale_fn=clr_fn,
                                scale_mode='cycle')
            model.fit(X_train, Y_train, callbacks=[clr])
        ```    
    # Arguments
        base_lr: initial learning rate which is the
            lower boundary in the cycle.
        max_lr: upper boundary in the cycle. Functionally,
            it defines the cycle amplitude (max_lr - base_lr).
            The lr at any cycle is the sum of base_lr
            and some scaling of the amplitude; therefore 
            max_lr may not actually be reached depending on
            scaling function.
        step_size: number of training iterations per
            half cycle. Authors suggest setting step_size
            2-8 x training iterations in epoch.
        mode: one of {triangular, triangular2, exp_range}.
            Default 'triangular'.
            Values correspond to policies detailed above.
            If scale_fn is not None, this argument is ignored.
        gamma: constant in 'exp_range' scaling function:
            gamma**(cycle iterations)
        scale_fn: Custom scaling policy defined by a single
            argument lambda function, where 
            0 <= scale_fn(x) <= 1 for all x >= 0.
            mode paramater is ignored 
        scale_mode: {'cycle', 'iterations'}.
            Defines whether scale_fn is evaluated on 
            cycle number or cycle iterations (training
            iterations since start of cycle). Default is 'cycle'.
    """

    def __init__(self, base_lr=0.001, max_lr=0.006, step_size=2000., mode='triangular',
                 gamma=1., scale_fn=None, scale_mode='cycle'):
        super(CyclicLR, self).__init__()

        self.base_lr = base_lr
        self.max_lr = max_lr
        self.step_size = step_size
        self.mode = mode
        self.gamma = gamma
        if scale_fn == None:
            if self.mode == 'triangular':
                self.scale_fn = lambda x: 1.
                self.scale_mode = 'cycle'
            elif self.mode == 'triangular2':
                self.scale_fn = lambda x: 1/(2.**(x-1))
                self.scale_mode = 'cycle'
            elif self.mode == 'exp_range':
                self.scale_fn = lambda x: gamma**(x)
                self.scale_mode = 'iterations'
        else:
            self.scale_fn = scale_fn
            self.scale_mode = scale_mode
        self.clr_iterations = 0.
        self.trn_iterations = 0.
        self.history = {}

        self._reset()

    def _reset(self, new_base_lr=None, new_max_lr=None,
               new_step_size=None):
        """Resets cycle iterations.
        Optional boundary/step size adjustment.
        """
        if new_base_lr != None:
            self.base_lr = new_base_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_step_size != None:
            self.step_size = new_step_size
        self.clr_iterations = 0.
        
    def clr(self):
        cycle = np.floor(1+self.clr_iterations/(2*self.step_size))
        x = np.abs(self.clr_iterations/self.step_size - 2*cycle + 1)
        if self.scale_mode == 'cycle':
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(cycle)
        else:
            return self.base_lr + (self.max_lr-self.base_lr)*np.maximum(0, (1-x))*self.scale_fn(self.clr_iterations)
        
    def on_train_begin(self, logs={}):
        logs = logs or {}

        if self.clr_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.base_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.clr())        
            
    def on_batch_end(self, epoch, logs=None):
        
        logs = logs or {}
        self.trn_iterations += 1
        self.clr_iterations += 1
        K.set_value(self.model.optimizer.lr, self.clr())

        self.history.setdefault('lr', []).append(K.get_value(self.model.optimizer.lr))
        self.history.setdefault('iterations', []).append(self.trn_iterations)

        for k, v in logs.items():
            self.history.setdefault(k, []).append(v)

In [149]:
clr = CyclicLR(base_lr=0.00005, max_lr=0.0002,
                    step_size=1500., mode='triangular2')

In [150]:
history = whole_model.fit(x=x_train, y=y_train,
                    batch_size=batch_size,
                    epochs=training_epochs,
                    verbose=1, # This is for what we want it to display out as it trains 
                    callbacks=[clr],
                    validation_data=(x_test, y_test))

Train on 570 samples, validate on 74 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [151]:
def plot_train(hist):
    h = hist.history
    if 'acc' in h:
        meas='acc'
        loc='lower right'
    else:
        meas='loss'
        loc='upper right'
    plt.plot(hist.history[meas])
    plt.plot(hist.history['val_'+meas])
    plt.title('model '+meas)
    plt.ylabel(meas)
    plt.xlabel('epoch')
    plt.legend(['train', 'validation'], loc=loc)
    

In [152]:
plot_train(history)

In [153]:
whole_model.predict_on_batch(x_test[10:15])

array([[  9.99981999e-01,   1.80099341e-05],
       [  9.85866845e-01,   1.41331861e-02],
       [  9.99754369e-01,   2.45599338e-04],
       [  8.91232491e-01,   1.08767517e-01],
       [  7.51295447e-01,   2.48704538e-01]], dtype=float32)

In [154]:
score = whole_model.evaluate(x_test[10:15], y_test[10:15], verbose=0)
print('Test loss:', score[0])
print('Test accuracy:', score[1])

Test loss: 0.0831207633018
Test accuracy: 1.0


In [155]:
whole_model.evaluate(x_test, y_test)



[1.3269044884631562, 0.68918918918918914]

In [156]:
whole_model.save("mobilenet_voice_sentiment_model_SGD_1_clr.h5")

In [157]:
whole_model.evaluate(x_test, y_test)



[1.3269044884631562, 0.68918918918918914]

In [158]:
from keras.utils.generic_utils import CustomObjectScope

with CustomObjectScope({'relu6': keras.applications.mobilenet.relu6,'DepthwiseConv2D': keras.applications.mobilenet.DepthwiseConv2D}):    
    loaded_model = load_model("mobilenet_voice_sentiment_model_SGD_1_clr.h5")



In [159]:
loaded_model.evaluate(x_test, y_test)



[1.3269044884631562, 0.68918918918918914]

Training more lower layers

In [160]:
# Re-initializing mobilenet_base_model & top_model

top_model.load_weights('bottleneck_top_model.h5')

In [161]:
mobilenet_base_model = MobileNet(
    input_shape=(224, 224, 3),
    alpha=1.0,
    depth_multiplier=1,
    dropout=0.001,
    include_top=False,
    weights='imagenet',
    input_tensor=None,
#     pooling=None,
    pooling='avg',
    classes=2
)

In [162]:
whole_model_2 = Model(input=mobilenet_base_model.input, output=top_model(mobilenet_base_model.output))

  """Entry point for launching an IPython kernel.


In [163]:
whole_model_2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (Activation)  (None, 112, 112, 32)      0         
__________

In [164]:
1e-4

0.0001

In [165]:
# fine-tuning should be done with a very slow learning rate, and typically with the SGD optimizer rather than an 
# adaptative learning rate optimizer such as RMSProp. This is to make sure that the magnitude of the updates stays 
# very small, so as not to wreck the previously learned features

# whole_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
whole_model_2.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Adam(lr=1e-4, decay=0.1),
              metrics=['accuracy'])

In [166]:
whole_model_2.evaluate(x_test, y_test)



[1.0356611453399465, 0.72972972972972971]

In [167]:
first_trainable_layer_index = 22
print(len(whole_model_2.layers))
print("first_trainable_layer :", whole_model_2.layers[first_trainable_layer_index])
print("first_trainable_layer name :", whole_model_2.layers[first_trainable_layer_index].name)

84
first_trainable_layer : <keras.applications.mobilenet.DepthwiseConv2D object at 0x7f43ffb01fd0>
first_trainable_layer name : conv_dw_4


In [168]:
for layer in whole_model_2.layers[:first_trainable_layer_index]:
    print(layer)
    layer.trainable = False

<keras.engine.topology.InputLayer object at 0x7f44046c94e0>
<keras.layers.convolutional.Conv2D object at 0x7f44046c96d8>
<keras.layers.normalization.BatchNormalization object at 0x7f44046c92b0>
<keras.layers.core.Activation object at 0x7f44046c9da0>
<keras.applications.mobilenet.DepthwiseConv2D object at 0x7f44046c9d30>
<keras.layers.normalization.BatchNormalization object at 0x7f43ffe1a748>
<keras.layers.core.Activation object at 0x7f43ffe46d68>
<keras.layers.convolutional.Conv2D object at 0x7f43ffe46e48>
<keras.layers.normalization.BatchNormalization object at 0x7f43ffdec3c8>
<keras.layers.core.Activation object at 0x7f43ffd68c50>
<keras.applications.mobilenet.DepthwiseConv2D object at 0x7f43ffd68f98>
<keras.layers.normalization.BatchNormalization object at 0x7f43ffd11400>
<keras.layers.core.Activation object at 0x7f43ffc90f28>
<keras.layers.convolutional.Conv2D object at 0x7f43ffc90c50>
<keras.layers.normalization.BatchNormalization object at 0x7f43ffcb9390>
<keras.layers.core.Activ

In [169]:
for layer in whole_model_2.layers[first_trainable_layer_index:]:
    print(layer)
    layer.trainable = True

<keras.applications.mobilenet.DepthwiseConv2D object at 0x7f43ffb01fd0>
<keras.layers.normalization.BatchNormalization object at 0x7f43ffa14cf8>
<keras.layers.core.Activation object at 0x7f43ffaad0f0>
<keras.layers.convolutional.Conv2D object at 0x7f43ffa2c940>
<keras.layers.normalization.BatchNormalization object at 0x7f43ff9d5518>
<keras.layers.core.Activation object at 0x7f43ff9b9e80>
<keras.applications.mobilenet.DepthwiseConv2D object at 0x7f43ff955e10>
<keras.layers.normalization.BatchNormalization object at 0x7f43ff97f2b0>
<keras.layers.core.Activation object at 0x7f43ff8e1c18>
<keras.layers.convolutional.Conv2D object at 0x7f43ff8f9780>
<keras.layers.normalization.BatchNormalization object at 0x7f43ff8a5240>
<keras.layers.core.Activation object at 0x7f43ff820c88>
<keras.applications.mobilenet.DepthwiseConv2D object at 0x7f43ff888ba8>
<keras.layers.normalization.BatchNormalization object at 0x7f43ff7cd080>
<keras.layers.core.Activation object at 0x7f43ff7c8630>
<keras.layers.con

In [170]:
whole_model_2.evaluate(x_test, y_test)



[1.0356611453399465, 0.72972972972972971]

In [171]:
whole_model_2.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_7 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
conv1 (Conv2D)               (None, 112, 112, 32)      864       
_________________________________________________________________
conv1_bn (BatchNormalization (None, 112, 112, 32)      128       
_________________________________________________________________
conv1_relu (Activation)      (None, 112, 112, 32)      0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, 112, 112, 32)      288       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, 112, 112, 32)      128       
_________________________________________________________________
conv_dw_1_relu (Activation)  (None, 112, 112, 32)      0         
__________

In [173]:
history = whole_model_2.fit(x=x_train, y=y_train,
                    batch_size=batch_size,
                    epochs=training_epochs,
                    verbose=1, # This is for what we want it to display out as it trains 
                    callbacks=[clr],
                    validation_data=(x_test, y_test))

Train on 570 samples, validate on 74 samples
Epoch 1/20
 32/570 [>.............................] - ETA: 776s - loss: 1.8554 - acc: 0.6250 

ResourceExhaustedError: OOM when allocating tensor with shape[1024,1024]
	 [[Node: training_10/Adam/Square_83 = Square[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"](training_10/Adam/gradients/model_5_2/Dense_1/MatMul_grad/MatMul_1)]]

Caused by op 'training_10/Adam/Square_83', defined at:
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-172-4fb7b84081d6>", line 6, in <module>
    validation_data=(x_test, y_test))
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/keras/engine/training.py", line 1575, in fit
    self._make_train_function()
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/keras/engine/training.py", line 960, in _make_train_function
    loss=self.total_loss)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/keras/legacy/interfaces.py", line 87, in wrapper
    return func(*args, **kwargs)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/keras/optimizers.py", line 433, in get_updates
    v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py", line 1358, in square
    return tf.square(x)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py", line 447, in square
    return gen_math_ops.square(x, name=name)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/tensorflow/python/ops/gen_math_ops.py", line 2591, in square
    result = _op_def_lib.apply_op("Square", x=x, name=name)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/cheeseprata/anaconda3/envs/TF13-PY3.6/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[1024,1024]
	 [[Node: training_10/Adam/Square_83 = Square[T=DT_FLOAT, _device="/job:localhost/replica:0/task:0/gpu:0"](training_10/Adam/gradients/model_5_2/Dense_1/MatMul_grad/MatMul_1)]]


In [None]:
history = whole_model_2.fit(x=x_train, y=y_train,
                    batch_size=batch_size,
                    epochs=10,
                    verbose=1, # This is for what we want it to display out as it trains 
                    callbacks=[clr],
                    validation_data=(x_test, y_test))

In [None]:
whole_model_2.evaluate(x_test, y_test)

In [None]:
whole_model_2.save("mobilenet_voice_sentiment_model_3_clr.h5")

In [None]:
from keras.utils.generic_utils import CustomObjectScope

with CustomObjectScope({'relu6': keras.applications.mobilenet.relu6,'DepthwiseConv2D': keras.applications.mobilenet.DepthwiseConv2D}):    
    loaded_model = load_model("mobilenet_voice_sentiment_model_3.h5")

In [None]:
loaded_model.evaluate(x_test, y_test)

In [None]:
training_epochs = 30

In [None]:
# Re-initializing mobilenet_base_model & top_model

top_model.load_weights('bottleneck_top_model.h5')

mobilenet_base_model = MobileNet(
    input_shape=(224, 224, 3),
    alpha=1.0,
    depth_multiplier=1,
    dropout=0.001,
    include_top=False,
    weights='imagenet',
    input_tensor=None,
#     pooling=None,
    pooling='avg',
    classes=2
)

whole_model_3 = Model(input=mobilenet_base_model.input, output=top_model(mobilenet_base_model.output))

whole_model_3.summary()

# fine-tuning should be done with a very slow learning rate, and typically with the SGD optimizer rather than an 
# adaptative learning rate optimizer such as RMSProp. This is to make sure that the magnitude of the updates stays 
# very small, so as not to wreck the previously learned features

# whole_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
whole_model_3.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Adam(lr=1e-5, decay=0.1),
              metrics=['accuracy'])

whole_model_3.evaluate(x_test, y_test)

In [None]:
first_trainable_layer_index = 22
print(len(whole_model_3.layers))
print("first_trainable_layer :", whole_model_3.layers[first_trainable_layer_index])
print("first_trainable_layer name :", whole_model_3.layers[first_trainable_layer_index].name)

for layer in whole_model_3.layers[:first_trainable_layer_index]:
    print(layer)
    layer.trainable = False

for layer in whole_model_3.layers[first_trainable_layer_index:]:
    print(layer)
    layer.trainable = True

In [None]:
history = whole_model_3.fit(x=x_train, y=y_train,
                    batch_size=batch_size,
                    epochs=training_epochs,
                    verbose=1, # This is for what we want it to display out as it trains 
                    callbacks=[clr],
                    validation_data=(x_test, y_test))

In [None]:
# Re-initializing mobilenet_base_model & top_model

top_model.load_weights('bottleneck_top_model.h5')

mobilenet_base_model = MobileNet(
    input_shape=(224, 224, 3),
    alpha=1.0,
    depth_multiplier=1,
    dropout=0.001,
    include_top=False,
    weights='imagenet',
    input_tensor=None,
#     pooling=None,
    pooling='avg',
    classes=2
)

whole_model_4 = Model(input=mobilenet_base_model.input, output=top_model(mobilenet_base_model.output))

whole_model_4.summary()

# fine-tuning should be done with a very slow learning rate, and typically with the SGD optimizer rather than an 
# adaptative learning rate optimizer such as RMSProp. This is to make sure that the magnitude of the updates stays 
# very small, so as not to wreck the previously learned features

# whole_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
whole_model_4.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Adam(lr=1e-4, decay=0.1),
              metrics=['accuracy'])

whole_model_4.evaluate(x_test, y_test)

first_trainable_layer_index = 34
print(len(whole_model_4.layers))
print("first_trainable_layer :", whole_model_4.layers[first_trainable_layer_index])
print("first_trainable_layer name :", whole_model_4.layers[first_trainable_layer_index].name)

for layer in whole_model_4.layers[:first_trainable_layer_index]:
    print(layer)
    layer.trainable = False

for layer in whole_model_4.layers[first_trainable_layer_index:]:
    print(layer)
    layer.trainable = True

history = whole_model_4.fit(x=x_train, y=y_train,
                    batch_size=batch_size,
                    epochs=training_epochs,
                    verbose=1, # This is for what we want it to display out as it trains 
                    callbacks=[clr],
                    validation_data=(x_test, y_test))

In [None]:
# Re-initializing mobilenet_base_model & top_model

top_model.load_weights('bottleneck_top_model.h5')

mobilenet_base_model = MobileNet(
    input_shape=(224, 224, 3),
    alpha=1.0,
    depth_multiplier=1,
    dropout=0.001,
    include_top=False,
    weights='imagenet',
    input_tensor=None,
#     pooling=None,
    pooling='avg',
    classes=2
)

whole_model_5 = Model(input=mobilenet_base_model.input, output=top_model(mobilenet_base_model.output))

whole_model_5.summary()

# fine-tuning should be done with a very slow learning rate, and typically with the SGD optimizer rather than an 
# adaptative learning rate optimizer such as RMSProp. This is to make sure that the magnitude of the updates stays 
# very small, so as not to wreck the previously learned features

# whole_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
whole_model_5.compile(loss='categorical_crossentropy',
              optimizer=optimizers.Adam(lr=1e-5, decay=0.1),
              metrics=['accuracy'])

whole_model_5.evaluate(x_test, y_test)

first_trainable_layer_index = 34
print(len(whole_model_5.layers))
print("first_trainable_layer :", whole_model_5.layers[first_trainable_layer_index])
print("first_trainable_layer name :", whole_model_5.layers[first_trainable_layer_index].name)

for layer in whole_model_5.layers[:first_trainable_layer_index]:
    print(layer)
    layer.trainable = False

for layer in whole_model_5.layers[first_trainable_layer_index:]:
    print(layer)
    layer.trainable = True

history = whole_model_5.fit(x=x_train, y=y_train,
                    batch_size=batch_size,
                    epochs=training_epochs,
                    verbose=1, # This is for what we want it to display out as it trains 
                    callbacks=[clr],
                    validation_data=(x_test, y_test))

In [None]:
history = whole_model_5.fit(x=x_train, y=y_train,
                    batch_size=batch_size,
                    epochs=20,
                    verbose=1, # This is for what we want it to display out as it trains 
                    callbacks=[clr],
                    validation_data=(x_test, y_test))

In [None]:
whole_model_5.evaluate(x_test, y_test)