### Objective

`Maxout` Layer introduced in [Maxout Networks](https://arxiv.org/pdf/1302.4389.pdf) is a awesome activation layer. However, unlike normal activation (such as tanh, relu, sigmoid...), it does not operate in elementwise, but in pairwise, implementation technique is required.



In [1]:
%matplotlib inline

import numpy as np
import tensorflow as tf

import matplotlib.pyplot as plt

## Create Custom Maxout Layer

In [2]:
from tensorflow.keras import backend as K
from tensorflow.keras.layers import Layer, Reshape
from tensorflow.keras.utils import get_custom_objects

class MaxOut(Layer):
    def __init__(self, num_factor=2, **kwargs):
        """
        initialize maxout layer
        
        * num factor : the number of elements to compare 
                       when calculating max operation,
                       default = 2
        """
        self.num_factor = num_factor
        super().__init__(**kwargs)
    
    def build(self, input_shape):
        if input_shape[-1] % self.num_factor != 0:
            raise ValueError(
                f"the number of features on the last axis "
                f"should be a multiple of num_factor.\n"
                f"* input shape : {input_shape}\n"
                f"* num factor : {self.num_factor}\n")
        target_shape = (
            input_shape[1:-1] 
            + [input_shape[-1]//self.num_factor] 
            + [self.num_factor])
        self.reshape = Reshape(target_shape)        
        super().build(input_shape)
    
    def call(self, inputs):
        """
        reduce by max operation after reshape 
        based on the last axis(feature axis)
        """
        reshaped = self.reshape(inputs)
        return K.max(reshaped, axis=-1)
    
    def get_config(self):
        """
        For saving and loading MaxOut layer
        """
        config = super().get_config()
        config.update({
            "num_factor":self.num_factor
        })
        return config

# register MaxOut Layer
get_custom_objects().update({"MaxOut":MaxOut})    

### Test Maxout's operation

In [3]:
inputs = tf.constant([[1, 2, 3, 4, 5, 6]], tf.float32)

In [4]:
num_factor = 2

outputs = MaxOut(num_factor)(inputs).numpy()
print("num factor=",num_factor)
print("-> result : ", outputs)

num factor= 2
-> result :  [[2. 4. 6.]]


In [5]:
num_factor = 3

outputs = MaxOut(num_factor)(inputs).numpy()
print("num factor=",num_factor)
print("-> result : ", outputs)

num factor= 3
-> result :  [[3. 6.]]


In [6]:
num_factor = 4

outputs = MaxOut(num_factor)(inputs).numpy()
print("num factor=",num_factor)
print("-> result : ", outputs)

ValueError: ignored

## Maxout Network for Fashion-MNIST
---

### Load Data

In [7]:
from tensorflow.keras.datasets import fashion_mnist

train, test = fashion_mnist.load_data()

train_X, train_y = train
train_X = train_X / 255.
train_X = train_X[..., None]

test_X, test_y = test
test_X = test_X / 255.
test_X = test_X[..., None]

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz


### build Model

In [14]:
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Conv2D, Dropout
from tensorflow.keras.layers import Dense, MaxPool2D
from tensorflow.keras.layers import Flatten
from tensorflow.keras.models import Model

inputs = Input((28,28, 1))

# 4 blocks of (Conv - Maxout- Dropout)

# Block 1
hidden = Conv2D(32, (3,3))(inputs)
hidden = MaxOut(2)(hidden)
hidden = Dropout(0.3)(hidden)

# Block 2 with Maxpool
hidden = Conv2D(32, (3,3))(hidden)
hidden = MaxOut(2)(hidden)
hidden = Dropout(0.3)(hidden)
hidden = MaxPool2D((2,2))(hidden)

# Block 3
hidden = Conv2D(64, (3,3))(hidden)
hidden = MaxOut(2)(hidden)
hidden = Dropout(0.3)(hidden)

# Block 4 with Maxpool
hidden = Conv2D(64, (3,3))(hidden)
hidden = MaxOut(2)(hidden)
hidden = Dropout(0.3)(hidden)
hidden = MaxPool2D((2,2))(hidden)

flat = Flatten()(hidden)
fc = Dense(40)(flat)
fc = MaxOut(2)(fc)
outputs = Dense(10)(fc)

model = Model(inputs, outputs)

### Compile Model

In [15]:
from tensorflow.keras.losses import SparseCategoricalCrossentropy
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.metrics import SparseCategoricalAccuracy

model.compile(optimizer=Adam(1e-3), 
              loss=SparseCategoricalCrossentropy(from_logits=True),
              metrics=[SparseCategoricalAccuracy()])

### Train Model

In [16]:
model.fit(x=train_X, y=train_y, 
          batch_size=256, epochs=20, 
          validation_split=0.1,)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7fb778708c88>

### Evaluate Model

In [18]:
model.evaluate(test_X, test_y)



[0.2596690058708191, 0.9132999777793884]