In [1]:
import keras, gzip, pickle
from keras.layers import *
from keras.optimizers import *
from keras.models import Model, load_model
from keras.callbacks import *
from LBC import LBC
from sklearn.preprocessing import OneHotEncoder

Using TensorFlow backend.


In [2]:
def load_data():
    with gzip.open('/home/aaditya/.keras/datasets/mnist.pkl.gz', 'rb') as f:
        train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
    x_train, y_train = train_set
    x_val, y_val = valid_set
    x_test, y_test = test_set

    x_train = np.array(x_train, dtype='float32').reshape(-1,28,28,1)
    x_val = np.array(x_val, dtype='float32').reshape(-1,28,28,1)
    x_test = np.array(x_test, dtype='float32').reshape(-1,28,28,1)

    y_train = OneHotEncoder(dtype=np.float32, sparse=False).fit_transform(y_train.reshape(-1,1))
    y_val = OneHotEncoder(dtype=np.float32, sparse=False).fit_transform(y_val.reshape(-1,1))
    y_test = OneHotEncoder(dtype=np.float32, sparse=False).fit_transform(y_test.reshape(-1,1))
    
    return (x_train, y_train), (x_val, y_val), (x_test, y_test)

In [3]:
train, val, test = load_data()
x_train, y_train = train
x_val, y_val = val
x_test, y_test = test

In [4]:
lbc_s = {}

def conv(ip, filters, kernel_size, stride=1, padding='same', activation='sigmoid', dilation=1, name=None, point_filters=0, sparsity=0.5):
    if point_filters <= 0:
        point_filters = filters
    
    key = str(ip._keras_shape[-1]) + '_' + str(filters) + '_' + str(kernel_size) + '_' + str(stride) + '_' + str(sparsity)
    if key not in lbc_s:
        lbc_s[key] = LBC(filters=filters, kernel_size=kernel_size, stride=stride, padding=padding, activation='relu', dilation=dilation, sparsity=sparsity)

    x = lbc_s[key](ip)
    x = Conv2D(filters=point_filters, kernel_size=1, strides=1, padding='same', use_bias=False)(x)
    return x

def residual(ip, f, kernel_size=3, name=None, sparsity=0.5):
    x = Conv2D(filters=f, kernel_size=1, padding='same')(ip)
    x = conv(x, filters=512, point_filters=f, kernel_size=kernel_size, sparsity=sparsity)
    return Add()([ip,x])

In [5]:
def LBCNN(include_top=True, weights=None,
          input_tensor=None, input_shape=None,
          pooling=None,
          classes=10):
    """Instantiates the LBCNN architecture.

    Optionally loads weights pre-trained
    on ImageNet. Note that when using TensorFlow,
    for best performance you should set
    `image_data_format='channels_last'` in your Keras config
    at ~/.keras/keras.json.

    The model and the weights are compatible with both
    TensorFlow and Theano. The data format
    convention used by the model is the one
    specified in your Keras config file.

    # Arguments
        include_top: whether to include the 3 fully-connected
            layers at the top of the network.
        weights: one of `None` (random initialization),
              'imagenet' (pre-training on ImageNet),
              or the path to the weights file to be loaded.
        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
            to use as image input for the model.
        input_shape: optional shape tuple, only to be specified
            if `include_top` is False (otherwise the input shape
            has to be `(224, 224, 3)` (with `channels_last` data format)
            or `(3, 224, 224)` (with `channels_first` data format).
            It should have exactly 3 input channels,
            and width and height should be no smaller than 48.
            E.g. `(200, 200, 3)` would be one valid value.
        pooling: Optional pooling mode for feature extraction
            when `include_top` is `False`.
            - `None` means that the output of the model will be
                the 4D tensor output of the
                last convolutional layer.
            - `avg` means that global average pooling
                will be applied to the output of the
                last convolutional layer, and thus
                the output of the model will be a 2D tensor.
            - `max` means that global max pooling will
                be applied.
        classes: optional number of classes to classify images
            into, only to be specified if `include_top` is True, and
            if no `weights` argument is specified.

    # Returns
        A Keras model instance.

    # Raises
        ValueError: in case of invalid argument for `weights`,
            or invalid input shape.
    """
    if weights is not None and not os.path.exists(weights):
        raise ValueError('The `weights` argument should be either '
                         '`None` (random initialization) '
                         'or the path to the weights file to be loaded.')

    # Determine proper input shape
    input_shape = (28,28,1)

    if input_tensor is None:
        img_input = Input(shape=input_shape)
    else:
        if not K.is_keras_tensor(input_tensor):
            img_input = Input(tensor=input_tensor, shape=input_shape)
        else:
            img_input = input_tensor
            
    structure = [[64,1], [64, 2], [128,4]]
    # Block 1
    x = BatchNormalization()(img_input)
    x = conv(x, filters=512, kernel_size=3, sparsity=0.5, point_filters=32)
    # Blocks (Rest)
    for f, n in structure:
        x = conv(x, filters=512, kernel_size=3, stride=2, sparsity=0.5, point_filters=f)
        for i in range(n): 
            x = residual(x, f=f, sparsity=0.5)
    
    if include_top:
        # Classification block
        x = GlobalAveragePooling2D(name='avg_pool')(x)
#         x = Flatten()(x)
#         x = Dense(128, activation='tanh')(x)
        x = Activation('tanh')(x)
        x = Dense(classes, activation='softmax', name='predictions')(x)
    else:
        if pooling == 'avg':
            x = GlobalAveragePooling2D()(x)
        elif pooling == 'max':
            x = GlobalMaxPooling2D()(x)

    # Ensure that the model takes into account
    # any potential predecessors of `input_tensor`.
    if input_tensor is not None:
        inputs = get_source_inputs(input_tensor)
    else:
        inputs = img_input
    # Create model.
    model = Model(inputs, x, name='CNN')

    if weights is not None:
        model.load_weights(weights)

    return model

In [6]:
model = LBCNN()

In [7]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 28, 28, 1)    0                                            
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 28, 28, 1)    4           input_1[0][0]                    
__________________________________________________________________________________________________
lbc_1 (LBC)                     (None, 28, 28, 512)  0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 28, 28, 32)   16384       lbc_1[0][0]                      
__________________________________________________________________________________________________
lbc_2 (LBC

In [8]:
model.compile(Nadam(0.01), loss='categorical_crossentropy', metrics=['accuracy'])

In [9]:
model.fit(x_train, y_train, batch_size=64, epochs=3, validation_data=(x_val, y_val))

Train on 50000 samples, validate on 10000 samples
Epoch 1/3

KeyboardInterrupt: 