### Understanding Convolutions

In [1]:
from theano.sandbox import cuda

Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
%matplotlib inline
import utils; reload(utils)
from utils import *
from __future__ import division, print_function

Using Theano backend.


In [3]:
path = "dogscats/"
model_path = path + 'models/'
if not os.path.exists(model_path):
    os.mkdir(model_path)

In [4]:
batch_size = 64

#### Up until now, the validation accuracy of the neural network has been higher than the training accuracy.

- That happens due to dropout. Dropout is a layer that randomly deletes an activation in the previous layer with prob(0.5). That only occurs during training, and not validation, therefore the accuracy difference.

- Dropout is employed to avoid the overfitting of the model. It ensures that no one portion of the network is able to overfit to one part of the training set.

- However, if overused, dropout can cause underfiting of the model.

### Removing Dropout  from VGG16.

##### Steps
- Loading previously finetuned catsvsdogs model.
- Spliting model between convolutional and dense layers
- pre-calculating the output of conv layers to avoid redundant calculations.
- Creating a new model with only dense layers, eliminating dropout
- Training the new model using the output of the convolutional layers as training data.


In [5]:
# This function takes in the dimensionality passed in and makes the model binary dependent.
model = vgg_ft(2)

  .format(self.name, input_shape))


In [6]:
# Loads all the weights saved to the specified h5py file.
model.load_weights('data/redux/results/ft3.h5')
# ?? model.load_weights()

In [7]:
# The model expects layers that make up the model, and those layers are in the form of a list.
layers = model.layers
print(type(layers))

# model.summary()

<type 'list'>


In [8]:


# The last index in the list which is a convolutional network is retrieved.
last_conv_idx = [index for index, layer in enumerate(layers) if type(layer) is Convolution2D][-1]

model.summary()



____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_1 (Lambda)                (None, 3, 224, 224)   0           lambda_input_1[0][0]             
____________________________________________________________________________________________________
zeropadding2d_1 (ZeroPadding2D)  (None, 3, 226, 226)   0           lambda_1[0][0]                   
____________________________________________________________________________________________________
convolution2d_1 (Convolution2D)  (None, 64, 224, 224)  1792        zeropadding2d_1[0][0]            
____________________________________________________________________________________________________
zeropadding2d_2 (ZeroPadding2D)  (None, 64, 226, 226)  0           convolution2d_1[0][0]            
___________________________________________________________________________________________

In [9]:
# As you can see above in the summary of the model, the last convolutional layer is at index 30.
last_conv_idx

30

In [10]:
# The layers are of type list.
print(type(layers))
# The number of layers present in the model is 40
print(len(layers))
# The 30th layer is a convolution2D layer.
print(layers[last_conv_idx])

<type 'list'>
40
<keras.layers.convolutional.Convolution2D object at 0x7fbf75b9c590>


In [11]:
# print(layers[:last_conv_idx+1])
# The convolutional "section" of the model comprises of the first 31 indices
conv_layers = layers[:last_conv_idx+1]
# A convolution specific model is then generated, and it does not contain any dropouts.
conv_model = Sequential(conv_layers)
# Dense Layers(Fully Connected layers) comprise of every portion
# After the 31st index.
fc_layers = layers[last_conv_idx+1:]
# Te fully connected layers are also a list that can be iterated through to remove dropout.
# print(type(fc_layers))

In [41]:
# get_batches calls the function with arguments of the file directory and retrieves info in batches.
# The are an iterable directory and are in "Keras" format.
?? get_batches()
# This helps in the augmentation process of the data in realtime.
?? image.ImageDataGenerator()

In [12]:
batches = get_batches(path+'train', shuffle=False, batch_size=batch_size)
val_batches = get_batches(path+'valid', shuffle=False, batch_size=batch_size)

val_classes= val_batches.classes
print(type(val_classes))
trn_classes = batches.classes
val_labels = onehot(val_classes)
print(type(val_labels))
trn_labels = onehot(trn_classes)

Found 23000 images belonging to 2 classes.
Found 2000 images belonging to 2 classes.
<type 'numpy.ndarray'>
<type 'numpy.ndarray'>


In [13]:
print(len(val_classes))
print(len(trn_classes))
print(val_labels[:2])
print(trn_labels[-2:])

2000
23000
[[ 1.  0.]
 [ 1.  0.]]
[[ 0.  1.]
 [ 0.  1.]]


In [58]:
?? conv_model.predict_generator()

In [14]:
# A numpy array of predictions is generated. We are passing in val_batches which is a generator
# created above in kernel(51) that returns tuples (inputs, targets) 
val_features = conv_model.predict_generator(val_batches, val_batches.nb_sample)
print(type(val_features))

<type 'numpy.ndarray'>


In [15]:
# the numpy array retrieved via predict_generator is of length 2000
# and each index of the array holds the prediction of cats vs dogs as a 512 dimensional tensor and holds 14 X 14 px images.
print(len(val_features))
print(len(val_features[2]))
# print(val_features[2])
print(val_labels[2])

2000
512
[ 1.  0.]


In [17]:
trn_features = conv_model.predict_generator(batches, batches.nb_sample)

In [18]:
save_array(model_path + 'train_convlayer_features.bc', trn_features)
save_array(model_path + 'valid_convlayer_features.bc', val_features)

In [13]:
# Models have been previously saved and now loaded
trn_features = load_array(model_path+'train_convlayer_features.bc')
val_features = load_array(model_path+'valid_convlayer_features.bc')

In [14]:
# The shape of the final convolutional layer.
print(type(trn_features))
trn_features.shape

<type 'numpy.ndarray'>


(23000, 512, 14, 14)

In [73]:
?? layer.get_weights()

In [15]:
# Copy the weights from the pre-trained model.
# NB: Since we're removing dropout, we want to half the weights
def proc_wgts(layer):
    return [o/2 for o in layer.get_weights()]

In [16]:
# Good choice for RNN's, but it is used for CNN? why??
opt = RMSprop(lr=0.00001, rho=0.7)
print(l1)

<function l1 at 0x7fbf7624bb90>


In [103]:
# ?? zip()
print(conv_layers[-1])
model.summary()
print(conv_layers[-1].output_shape[1:])

<keras.layers.convolutional.Convolution2D object at 0x7f26630e2ad0>
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_2 (Lambda)                (None, 3, 224, 224)   0           lambda_input_2[0][0]             
____________________________________________________________________________________________________
zeropadding2d_14 (ZeroPadding2D) (None, 3, 226, 226)   0           lambda_2[0][0]                   
____________________________________________________________________________________________________
convolution2d_14 (Convolution2D) (None, 64, 224, 224)  1792        zeropadding2d_14[0][0]           
____________________________________________________________________________________________________
zeropadding2d_15 (ZeroPadding2D) (None, 64, 226, 226)  0           convolution2d_14[0][0]           
_______________________

In [128]:
print(type(model.layers))
print(type(fc_layers))

print("MODEL LAYERS",model.layers)
print("#############################################")
print(len(model.layers))
print("#############################################")
print("#############################################")
print("Fully_Connected Layers",fc_layers)
print("#############################################")
print(len(fc_layers))

print("#############################################")
print("#############################################")
print("#############################################")
print("#############################################")

print(zip(model.layers, fc_layers))

<type 'list'>
<type 'list'>
MODEL LAYERS [<keras.layers.core.Lambda object at 0x7f2666f31050>, <keras.layers.convolutional.ZeroPadding2D object at 0x7f2663387dd0>, <keras.layers.convolutional.Convolution2D object at 0x7f26632c0910>, <keras.layers.convolutional.ZeroPadding2D object at 0x7f26632d0490>, <keras.layers.convolutional.Convolution2D object at 0x7f266327a990>, <keras.layers.pooling.MaxPooling2D object at 0x7f2663387d90>, <keras.layers.convolutional.ZeroPadding2D object at 0x7f26632a9a50>, <keras.layers.convolutional.Convolution2D object at 0x7f26632368d0>, <keras.layers.convolutional.ZeroPadding2D object at 0x7f266323d810>, <keras.layers.convolutional.Convolution2D object at 0x7f2663267dd0>, <keras.layers.pooling.MaxPooling2D object at 0x7f26632a9bd0>, <keras.layers.convolutional.ZeroPadding2D object at 0x7f2663218450>, <keras.layers.convolutional.Convolution2D object at 0x7f2663221a10>, <keras.layers.convolutional.ZeroPadding2D object at 0x7f26862bd750>, <keras.layers.convolut

In [17]:
# Below is the model created to remove the dropout layers
def get_fc_model():
    model = Sequential([
        # The MaxPooling2D's input shape comprise of the last two values of the output shape of the conv_layers[-1]
        # which is (512,14,14)
        MaxPooling2D(input_shape=conv_layers[-1].output_shape[1:]),
        Flatten(),
        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(0.),
        Dense(4096, activation='relu'),
        BatchNormalization(),
        Dropout(0.),
        Dense(2, activation='softmax')
        ])

    for l1,l2 in zip(model.layers, fc_layers):
        print("l1",l1)
        print("l2",l2)
        l1.set_weights(proc_wgts(l2))

    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [121]:
# ?? zip()
# ?? l1
print(l1)
# ?? set_weights()
model.summary()

<function l1 at 0x7f2666fc8500>
____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
lambda_2 (Lambda)                (None, 3, 224, 224)   0           lambda_input_2[0][0]             
____________________________________________________________________________________________________
zeropadding2d_14 (ZeroPadding2D) (None, 3, 226, 226)   0           lambda_2[0][0]                   
____________________________________________________________________________________________________
convolution2d_14 (Convolution2D) (None, 64, 224, 224)  1792        zeropadding2d_14[0][0]           
____________________________________________________________________________________________________
zeropadding2d_15 (ZeroPadding2D) (None, 64, 226, 226)  0           convolution2d_14[0][0]           
___________________________________________________________

In [18]:
fc_model = get_fc_model()

l1 <keras.layers.pooling.MaxPooling2D object at 0x7fbf71908ad0>
l2 <keras.layers.pooling.MaxPooling2D object at 0x7fbf75c274d0>
l1 <keras.layers.core.Flatten object at 0x7fbf719089d0>
l2 <keras.layers.core.Flatten object at 0x7fbf75b41650>
l1 <keras.layers.core.Dense object at 0x7fbf7173b9d0>
l2 <keras.layers.core.Dense object at 0x7fbf75abec10>
l1 <keras.layers.normalization.BatchNormalization object at 0x7fbf7173b8d0>
l2 <keras.layers.normalization.BatchNormalization object at 0x7fbf75ac94d0>
l1 <keras.layers.core.Dropout object at 0x7fbf8b162d10>
l2 <keras.layers.core.Dropout object at 0x7fbf75a33410>
l1 <keras.layers.core.Dense object at 0x7fbf717b8710>
l2 <keras.layers.core.Dense object at 0x7fbf75a018d0>
l1 <keras.layers.normalization.BatchNormalization object at 0x7fbf717b8910>
l2 <keras.layers.normalization.BatchNormalization object at 0x7fbf75a17b10>
l1 <keras.layers.core.Dropout object at 0x7fbf717b8610>
l2 <keras.layers.core.Dropout object at 0x7fbf757e22d0>
l1 <keras.layers

In [19]:
fc_model.fit(trn_features, trn_labels, nb_epoch=3, 
             batch_size=batch_size, validation_data=(val_features, val_labels))

Train on 23000 samples, validate on 2000 samples
Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x7fbd1268bc50>