## Implementation of capsule networks

3D- object dataset created using MAYA

In [0]:
!pip install pydrive


In [2]:
from __future__ import print_function
from keras import backend as K
from keras.layers import Layer
from keras import activations,optimizers
from keras import utils
from keras.datasets import cifar10
from keras.models import Model
from keras.optimizers import Adam
from keras.layers import *
from keras.preprocessing.image import ImageDataGenerator

#--------------linear algebra, data processing----------#
import numpy as np 
import pandas as pd
import glob,string
import codecs
from tqdm import tqdm

#--------------Google authentication--------------------#
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
import tensorflow as tf
from oauth2client.client import GoogleCredentials

Using TensorFlow backend.


### 1. Authenticate and create the PyDrive client.


In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
zip_file = drive.CreateFile({'id': '1Kc-NFM_mfvnrx31rcpsnrMWarEb2Gnmc'})
zip_file.GetContentFile('3D_object_dataset.zip')

In [5]:
!unzip '3D_object_dataset.zip'

Archive:  3D_object_dataset.zip
   creating: 3D_object_dataset/
   creating: 3D_object_dataset/Testing/
   creating: 3D_object_dataset/Testing/Headphones/
  inflating: 3D_object_dataset/Testing/Headphones/Electronics_Headphone_X135_Y0_Z0.png  
  inflating: 3D_object_dataset/Testing/Headphones/Electronics_Headphone_X135_Y0_Z135.png  
  inflating: 3D_object_dataset/Testing/Headphones/Electronics_Headphone_X135_Y0_Z180.png  
  inflating: 3D_object_dataset/Testing/Headphones/Electronics_Headphone_X135_Y0_Z360.png  
  inflating: 3D_object_dataset/Testing/Headphones/Electronics_Headphone_X135_Y0_Z45.png  
  inflating: 3D_object_dataset/Testing/Headphones/Electronics_Headphone_X135_Y0_Z90.png  
  inflating: 3D_object_dataset/Testing/Headphones/Electronics_Headphone_X135_Y135_Z0.png  
  inflating: 3D_object_dataset/Testing/Headphones/Electronics_Headphone_X135_Y135_Z180.png  
  inflating: 3D_object_dataset/Testing/Headphones/Electronics_Headphone_X135_Y135_Z225.png  
  inflating: 3D_object_dat

## 2. Data Processing
The dataset is initially divided into three categories. Training, Testing and validation. The split for training validation is 15%. The image augmentation is used to create more images to assist the training process

In [0]:
training_directory = '3D_object_dataset/Training'
validation_directory = '3D_object_dataset/validation'
testing_directory = '3D_object_dataset/Testing'

In [0]:
batch_size = 15
epochs = 30

In [9]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=20,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   horizontal_flip=True)

val_datagen = ImageDataGenerator(rescale=1./255,
                                 rotation_range=20,
                                 width_shift_range=0.2,
                                 height_shift_range=0.2,
                                 horizontal_flip=True)

train_generator = train_datagen.flow_from_directory(
        training_directory,
        target_size=(128,128),
        batch_size=batch_size,
        color_mode="grayscale",
        class_mode='categorical',
        shuffle=True)

validation_generator = val_datagen.flow_from_directory(
        validation_directory,
        target_size=(128,128),
        batch_size=batch_size,
        color_mode="grayscale",
        class_mode='categorical',
        shuffle = True)

Found 1605 images belonging to 4 classes.
Found 264 images belonging to 4 classes.


In [11]:
# the squashing function.
# we use 0.5 in stead of 1 in hinton's paper.
# if 1, the norm of vector will be zoomed out.
# if 0.5, the norm will be zoomed in while original norm is less than 0.5
# and be zoomed out while original norm is greater than 0.5.
def squash(x, axis=-1):
    s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon()
    scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm)
    return scale * x


# define our own softmax function instead of K.softmax
# because K.softmax can not specify axis.
def softmax(x, axis=-1):
    ex = K.exp(x - K.max(x, axis=axis, keepdims=True))
    return ex / K.sum(ex, axis=axis, keepdims=True)


# define the margin loss like hinge loss
def margin_loss(y_true, y_pred):
    lamb, margin = 0.5, 0.1
    return K.sum(y_true * K.square(K.relu(1 - margin - y_pred)) + lamb * (
        1 - y_true) * K.square(K.relu(y_pred - margin)), axis=-1)


class Capsule(Layer):
    """A Capsule Implement with Pure Keras
    There are two vesions of Capsule.
    One is like dense layer (for the fixed-shape input),
    and the other is like timedistributed dense (for various length input).

    The input shape of Capsule must be (batch_size,
                                        input_num_capsule,
                                        input_dim_capsule
                                       )
    and the output shape is (batch_size,
                             num_capsule,
                             dim_capsule
                            )

    Capsule Implement is from https://github.com/bojone/Capsule/
    Capsule Paper: https://arxiv.org/abs/1710.09829
    """

    def __init__(self,
                 num_capsule,
                 dim_capsule,
                 routings=3,
                 share_weights=True,
                 activation='squash',
                 **kwargs):
        super(Capsule, self).__init__(**kwargs)
        self.num_capsule = num_capsule
        self.dim_capsule = dim_capsule
        self.routings = routings
        self.share_weights = share_weights
        if activation == 'squash':
            self.activation = squash
        else:
            self.activation = activations.get(activation)

    def build(self, input_shape):
        input_dim_capsule = input_shape[-1]
        if self.share_weights:
            self.kernel = self.add_weight(
                name='capsule_kernel',
                shape=(1, input_dim_capsule,
                       self.num_capsule * self.dim_capsule),
                initializer='glorot_uniform',
                trainable=True)
        else:
            input_num_capsule = input_shape[-2]
            self.kernel = self.add_weight(
                name='capsule_kernel',
                shape=(input_num_capsule, input_dim_capsule,
                       self.num_capsule * self.dim_capsule),
                initializer='glorot_uniform',
                trainable=True)

    def call(self, inputs):
        """Following the routing algorithm from Hinton's paper,
        but replace b = b + <u,v> with b = <u,v>.

        This change can improve the feature representation of Capsule.

        However, you can replace
            b = K.batch_dot(outputs, hat_inputs, [2, 3])
        with
            b += K.batch_dot(outputs, hat_inputs, [2, 3])
        to realize a standard routing.
        """

        if self.share_weights:
            hat_inputs = K.conv1d(inputs, self.kernel)
        else:
            hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1])

        batch_size = K.shape(inputs)[0]
        input_num_capsule = K.shape(inputs)[1]
        hat_inputs = K.reshape(hat_inputs,
                               (batch_size, input_num_capsule,
                                self.num_capsule, self.dim_capsule))
        hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3))

        b = K.zeros_like(hat_inputs[:, :, :, 0])
        for i in range(self.routings):
            c = softmax(b, 1)
            o = self.activation(K.batch_dot(c, hat_inputs, [2, 2]))
            if i < self.routings - 1:
                b = K.batch_dot(o, hat_inputs, [2, 3])
                if K.backend() == 'theano':
                    o = K.sum(o, axis=1)

        return o

    def compute_output_shape(self, input_shape):
        return (None, self.num_capsule, self.dim_capsule)


batch_size = 16
num_classes = 100
epochs = 50

# A common Conv2D model
input_image = Input(shape=(128,128, 1))
x = Conv2D(64, (5, 5), activation='relu')(input_image)
#x= MaxPooling2D((2,2))(x)
x = Conv2D(128, (3, 3), activation='relu')(x)
x = Dropout(0.2)(x)
x = Conv2D(256, (3, 3), activation='relu')(x)
x = MaxPooling2D((3, 3))(x)
x = Conv2D(256, (3, 3), activation='relu')(x)
x = Dropout(0.2)(x)
x = BatchNormalization()(x)
#x = MaxPooling2D((2, 2))(x)
x = Conv2D(512, (3, 3), activation='relu')(x)
x = Dense((128))(x)
x = Dense((512))(x)

"""now we reshape it as (batch_size, input_num_capsule, input_dim_capsule)
then connect a Capsule layer.

the output of final model is the lengths of 10 Capsule, whose dim=16.

the length of Capsule is the proba,
so the problem becomes a 10 two-classification problem.
"""

x = Reshape((-1, 128))(x)
capsule = Capsule(len(train_generator.class_indices), 32, 3, True)(x)
output = Lambda(lambda z: K.sqrt(K.sum(K.square(z), 2)))(capsule)
model = Model(inputs=input_image, outputs=output)

# we use a margin loss
#adam = K.optimizers.Adam(lr=0.001)
model.compile(loss=margin_loss, optimizer='adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 128, 128, 1)       0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 124, 124, 64)      1664      
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 122, 122, 128)     73856     
_________________________________________________________________
dropout_3 (Dropout)          (None, 122, 122, 128)     0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 120, 120, 256)     295168    
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 40, 40, 256)       0         
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 38, 38, 256)       590080    
__________

In [0]:
from keras.callbacks import EarlyStopping,ModelCheckpoint,ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator
patience =10

#log_file_path = base_path + dataset_name + '_emotion_training.log'
#csv_logger = CSVLogger(log_file_path, append=False)
early_stop = EarlyStopping('val_loss', patience=patience)
reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1,
                                  patience=int(patience/4), verbose=1)
trained_models_path = 'trained_model_2'
model_names = trained_models_path + '.{epoch:02d}.{val_acc:02f}.hdf5'
model_checkpoint = ModelCheckpoint(model_names, 'val_loss', verbose=1,
                                                    save_best_only=True)
callbacks = [model_checkpoint, early_stop, reduce_lr]

In [14]:
model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])

model.fit_generator(generator=train_generator,
	                    steps_per_epoch=66,
	                    validation_data=validation_generator,
	                    validation_steps=16,
	                    epochs=epochs,
                      callbacks= callbacks)

Epoch 1/50

Epoch 00001: val_loss improved from inf to 1.38629, saving model to trained_model_2.01.0.200855.hdf5
Epoch 2/50

Epoch 00002: val_loss improved from 1.38629 to 1.38629, saving model to trained_model_2.02.0.243590.hdf5
Epoch 3/50

Epoch 00003: val_loss improved from 1.38629 to 1.38629, saving model to trained_model_2.03.0.290598.hdf5

Epoch 00003: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
Epoch 4/50

Epoch 00004: val_loss improved from 1.38629 to 1.38629, saving model to trained_model_2.04.0.316239.hdf5
Epoch 5/50

Epoch 00005: val_loss improved from 1.38629 to 1.38629, saving model to trained_model_2.05.0.307692.hdf5

Epoch 00005: ReduceLROnPlateau reducing learning rate to 1.0000000474974514e-05.
Epoch 6/50

Epoch 00006: val_loss did not improve from 1.38629
Epoch 7/50

Epoch 00007: val_loss did not improve from 1.38629

Epoch 00007: ReduceLROnPlateau reducing learning rate to 1.0000000656873453e-06.
Epoch 8/50

Epoch 00008: val_loss did not impro

<keras.callbacks.History at 0x7f7fd045a2b0>