In [1]:
# We took some code and adapted from: https://software.intel.com/en-us/articles/understanding-capsule-network-architecture

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from keras import layers, models, optimizers
from keras.layers import Input, Conv1D, Dense
from keras.layers import Reshape, Layer, Lambda
from keras.models import Model
from keras.utils import to_categorical
from keras import initializers
from keras.optimizers import Adam
from keras.datasets import mnist
from keras import backend as K

import pandas as pd
import numpy as np
import tensorflow as tf

Using TensorFlow backend.


In [3]:
def squash(output_vector, axis=-1):
    norm = tf.reduce_sum(tf.square(output_vector), axis, keep_dims=True)
    return output_vector * norm / ((1 + norm) * tf.sqrt(norm + 1.0e-10))

In [4]:
class MaskingLayer(Layer):
    def call(self, inputs, **kwargs):
        input, mask = inputs
        return K.batch_dot(input, mask, 1)

    def compute_output_shape(self, input_shape):
        output_shape = input_shape[0][-1]
        return (None, output_shape)

In [5]:
def PrimaryCapsule(n_vector, n_channel, n_kernel_size, n_stride, padding='valid'):
    def builder(inputs):
        output = Conv1D(filters=n_vector * n_channel, kernel_size=n_kernel_size, strides=n_stride, padding=padding)(inputs)
        output = Reshape( target_shape=[-1, n_vector], name='primary_capsule_reshape')(output)
        return Lambda(squash, name='primary_capsule_squash')(output)
    return builder

In [6]:
class CapsuleLayer(Layer):
    def __init__(self, n_capsule, n_vec, n_routing, **kwargs):
        super(CapsuleLayer, self).__init__(**kwargs)
        self.n_capsule = n_capsule
        self.n_vector = n_vec
        self.n_routing = n_routing
        self.kernel_initializer = initializers.get('he_normal')
        self.bias_initializer = initializers.get('zeros')

    def build(self, input_shape): # input_shape is a 4D tensor
        self.input_n_capsule = input_shape[1]
        self.input_n_vector = input_shape[2]
        
        self.W = self.add_weight(shape=[self.input_n_capsule, self.n_capsule, self.input_n_vector, self.n_vector], initializer=self.kernel_initializer, name='W')
        self.bias = self.add_weight(shape=[1, self.input_n_capsule, self.n_capsule, 1, 1], initializer=self.bias_initializer, name='bias', trainable=False)
        self.built = True

    def call(self, inputs, training=None):
        input_expand = tf.expand_dims(tf.expand_dims(inputs, 2), 2)
        input_tiled = tf.tile(input_expand, [1, 1, self.n_capsule, 1, 1])
        input_hat = tf.scan(lambda ac, x: K.batch_dot(x, self.W, [3, 2]), elems=input_tiled, initializer=K.zeros( [self.input_n_capsule, self.n_capsule, 1, self.n_vector]))
        for i in range(self.n_routing): # routing
            c = tf.nn.softmax(self.bias, dim=2)
            outputs = squash(tf.reduce_sum( c * input_hat, axis=1, keep_dims=True))
            if i != self.n_routing - 1:
                self.bias += tf.reduce_sum(input_hat * outputs, axis=-1, keep_dims=True)
        return tf.reshape(outputs, [-1, self.n_capsule, self.n_vector])

    def compute_output_shape(self, input_shape):
        # output current layer capsules
        return (None, self.n_capsule, self.n_vector)

In [7]:
class LengthLayer(Layer):
    def call(self, inputs, **kwargs):
        return tf.sqrt(tf.reduce_sum(tf.square(inputs), axis=-1, keep_dims=False))

    def compute_output_shape(self, input_shape):
        output_shape = input_shape[0:-1]
        return tuple(output_shape)

In [8]:
def margin_loss(y_ground_truth, y_prediction):
    _m_plus = 0.9
    _m_minus = 0.1
    _lambda = 0.5
    L = y_ground_truth * tf.square(tf.maximum(0., _m_plus - y_prediction)) + _lambda * ( 1 - y_ground_truth) * tf.square(tf.maximum(0., y_prediction - _m_minus))
    return tf.reduce_mean(tf.reduce_sum(L, axis=1))

In [9]:
def load_data():
    df = pd.read_csv("../dataset/embeddings-ft1-clean.csv", sep=";")
    df = df.drop('id', axis=1)
    all_but_target = list(set(df.columns.values)-set(["target"]))
    X = df[all_but_target]
    y = df[["target"]]
    return X.as_matrix(), pd.get_dummies(y).as_matrix()

In [10]:
X, y = load_data()

  import sys


In [11]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [12]:
def reshapeX(X):
    return X.reshape((-1, 300, 1)).astype('float32')
def reshapeY(y):
    return y.reshape((-1, 4, 1))

In [13]:
x_train = reshapeX(x_train)
x_test = reshapeX(x_test)

In [14]:
print(x_train.shape)
print(y_train.shape)

(337850, 300, 1)
(337850, 4)


In [15]:
input_shape = [300, 1]
n_class = 4
n_routing = 3

In [16]:
x = Input(shape=input_shape)
conv1 = Conv1D(filters=256, kernel_size=10, strides=1, padding='valid', activation='relu', name='conv1')(x)
primary_capsule = PrimaryCapsule( n_vector=8, n_channel=32, n_kernel_size=10, n_stride=2)(conv1)
target_capsule = CapsuleLayer( n_capsule=n_class, n_vec=16, n_routing=n_routing, name='target_capsule')(primary_capsule)
output_capsule = LengthLayer(name='output_capsule')(target_capsule)

Instructions for updating:
keep_dims is deprecated, use keepdims instead
Instructions for updating:
dim is deprecated, use axis instead


In [17]:
mask_input = Input(shape=(n_class, ))
mask = MaskingLayer()([target_capsule, mask_input])  # two inputs
dec = Dense(512, activation='relu')(mask)
dec = Dense(1024, activation='relu')(dec)
dec = Dense(300, activation='sigmoid')(dec)
dec = Reshape(input_shape)(dec)

In [18]:
model = Model([x, mask_input], [output_capsule, dec])
model.compile(optimizer='adam', loss=[ margin_loss, 'mae' ], metrics=[ margin_loss, 'mae', 'accuracy'])

In [19]:
model.fit([x_train, y_train], [y_train, x_train], batch_size=500, epochs=10, validation_split=0.2)

Train on 270280 samples, validate on 67570 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7f819b4c6ed0>

In [20]:
predictions, decoded = model.predict([x_test, y_test])

In [21]:
preds = np.argmax(predictions, axis=1)
ytest = np.argmax(y_test, axis=1)

In [22]:
accuracy = accuracy_score(ytest, preds)

In [23]:
accuracy

0.8900702082568699