## Import packages

In [1]:
import colorcet
import numpy as np
import os
import pandas as pd
import tensorflow as tf

from copy import copy
from bokeh.io import output_notebook, export_png, reset_output
from bokeh.layouts import row, column
from bokeh.palettes import linear_palette
from bokeh.plotting import figure, output_file, show, ColumnDataSource
from bokeh.models import Range1d, CustomJS, Slider, ColorBar, LinearColorMapper
from tensorflow.keras import models
from tensorflow.keras import layers

reset_output()
output_notebook()

os.environ["KMP_DUPLICATE_LIB_OK"]="TRUE"

## Define activation functions

In [2]:
def activation_functions(x, method, **kwargs):
    VALID_METHOD = {'Identity', 'Sigmoid', 'Hyperbolic Tangent', 'ReLU', 'Leaky ReLU', 'ELU', 'Softmax', 'Maxout'}
    if method not in VALID_METHOD:
        raise ValueError(f"Method must be one of {VALID_METHOD}.")

    tx = tf.Variable(initial_value=x)
    with tf.GradientTape() as tape: 
        if method == 'Identity':
            ty = 1. * tx
        elif method == 'Sigmoid':
            ty = 1. / (1. + tf.math.exp(-1 * tx))
        elif method == 'Hyperbolic Tangent':
            ty = (tf.math.exp(tx) - tf.math.exp(-1. * tx)) / (tf.math.exp(tx) + tf.math.exp(-1. * tx))
        elif method == 'ReLU':
            ty = tf.where(tx < 0, tf.zeros_like(tx), tx)
        elif method == 'Leaky ReLU':
            if 'alpha' not in kwargs:
                kwargs['alpha'] = 0.3
            ty = tf.where(tx < 0, kwargs['alpha'] * tx, tx)
        elif method == 'ELU':
            if 'alpha' not in kwargs:
                kwargs['alpha'] = 1.0
            ty = tf.where(tx < 0., kwargs['alpha'] * (tf.math.exp(tx) - 1.), tx)
        elif method == 'Softmax':
            tx = tf.Variable(initial_value=np.meshgrid(x, x))
            ty = tf.exp(tx) / tf.reduce_sum(tf.exp(tx), 0)
        elif method == 'Maxout':
            tx = tf.Variable(initial_value=np.meshgrid(x, x))
            ty = tf.where(tx[0,:,:] < tx[1,:,:], tx[1,:,:], tx[0,:,:])

    ty_grad = tape.gradient(ty, tx)
    if method == 'Softmax':
        ty_grad = tf.Variable([ty[0,:,:] * (1 - ty[0,:,:]),
                               -1 * ty[0,:,:] * ty[1,:,:]])
        
    return ty.numpy(), ty_grad.numpy()

## Visualization 
#### Identity, Sigmoid, Hyperbolic Tangent, ReLU, Leaky ReLU and ELU

In [3]:
colors = ['#000000', '#f59042', '#7ef542', '#42d1f5', '#9c33ff', '#ff33a7']
for method, color in zip(['Identity', 'Sigmoid', 'Hyperbolic Tangent', 'ReLU', 'Leaky ReLU', 'ELU'], colors):
    
    xmin, xmax = -5, 5
    ymin, ymax = -1.25, 1.25
    if method == 'Identity':
        xmin, xmax = -3, 3
        ymin, ymax = -3, 3
    elif method in {'ReLU', 'Leaky ReLU', 'ELU'}:
        xmin, xmax = -3, 3
        ymin, ymax = -2, 4
        
    x = np.linspace(-30, 30, num=2000)
    y, y_grad = activation_functions(x, method)

    p1 = figure(title=method, tooltips=[('x, f(x)', '(@x, @y)')], tools=['pan', 'wheel_zoom', 'reset', 'save'], x_axis_label='x', y_axis_label='f(x)')
    p1.line(x, y, line_width=5, line_color=color)
    p1.x_range=Range1d(xmin, xmax)
    p1.y_range=Range1d(ymin, ymax)


    p2 = figure(title=f'{method} (Derivative)', tooltips=[("x, f'(x)", '(@x, @y)')], tools=['pan', 'wheel_zoom', 'reset', 'save'], x_axis_label='x', y_axis_label="f'(x)")
    p2.line(x, y_grad, line_width=5, line_color=color)
    p2.x_range=Range1d(-5, 5)
    p2.y_range=Range1d(-0.25, 1.25)
    
    p1.title.text_font_size = p1.xaxis.axis_label_text_font_size = p1.yaxis.axis_label_text_font_size = "16pt"
    p2.title.text_font_size = p2.xaxis.axis_label_text_font_size = p2.yaxis.axis_label_text_font_size = "16pt"
    p1.title.align = p2.title.align = 'center'

    show(row(p1, p2))
    export_png(row(p1, p2), filename=f'{method}.png')

## Interactive Visualization
#### Leaky ReLU

In [4]:
x = np.linspace(-30, 30, num=2000)
y = copy(x)
y_grad = np.ones(y.shape)
y[x < 0] = 0.3 * x[x < 0]
y_grad[x < 0] = 0.3

xmin, xmax = -3, 3
ymin, ymax = -2, 4

source = ColumnDataSource(data=dict(x=x, y=y, y_grad=y_grad))

alpha_slider = Slider(start=0.01, end=1, value=0.3, step=0.01, title="Alpha")

callback = CustomJS(args=dict(source=source, alpha=alpha_slider),
                    code="""
    const data = source.data;
    const A = alpha.value;
    const x = data['x']
    const y = data['y']
    const y_grad = data['y_grad']
    for (var i = 0; i < x.length; i++) {
        if(x[i] < 0){
            y[i] = A * x[i]
            y_grad[i] = A
        }
    }
    source.change.emit();
""")

p1 = figure(title='Leaky ReLU', tooltips=[('x, f(x)', '(@x, @y)')], tools=['save'], x_axis_label='x', y_axis_label="f(x)")
p1.line('x', 'y', source=source, line_width=5, line_color='#9c33ff')
p1.x_range=Range1d(xmin, xmax)
p1.y_range=Range1d(ymin, ymax)
p1.title.text_font_size = '16pt'
p1.xaxis.axis_label_text_font_size = "16pt"
p1.yaxis.axis_label_text_font_size = "16pt"
p1.title.align = 'center'

p2 = figure(title='Leaky ReLU (Derivative)', tooltips=[("x, f'(x)", '(@x, @y)')], tools=['save'], x_axis_label='x', y_axis_label="f'(x)")
p2.line(x='x', y='y_grad', source=source, line_width=5, line_color='#9c33ff')
p2.x_range=Range1d(-5, 5)
p2.y_range=Range1d(-0.25, 1.25)
p2.title.text_font_size = '16pt'
p2.xaxis.axis_label_text_font_size = "16pt"
p2.yaxis.axis_label_text_font_size = "16pt"
p2.title.align = 'center'

alpha_slider.js_on_change('value', callback)

output_file("Leaky ReLU.html")
show(column(row(p1, p2), alpha_slider))
reset_output()
output_notebook()

## Interactive Visualization
#### ELU

In [5]:
x = np.linspace(-30, 30, num=2000)
y = copy(x)
y_grad = np.ones(y.shape)
y[x < 0] = 1 * (np.e ** x[x < 0] - 1)
y_grad[x < 0] = 1 * np.e ** x[x < 0]

xmin, xmax = -3, 3
ymin, ymax = -2, 4

source = ColumnDataSource(data=dict(x=x, y=y, y_grad=y_grad))

alpha_slider = Slider(start=0.01, end=1, value=1, step=0.01, title="Alpha")

callback = CustomJS(args=dict(source=source, alpha=alpha_slider),
                    code="""
    const data = source.data;
    const A = alpha.value;
    const x = data['x']
    const y = data['y']
    const y_grad = data['y_grad']
    for (var i = 0; i < x.length; i++) {
        if(x[i] < 0){
            y[i] = A * (Math.exp(x[i]) - 1)
            y_grad[i] = A * Math.exp(x[i])
        }
    }
    source.change.emit();
""")

p1 = figure(title='ELU', tooltips=[('x, f(x)', '(@x, @y)')], tools=['save'], x_axis_label='x', y_axis_label="f(x)")
p1.line('x', 'y', source=source, line_width=5, line_color='#ff33a7')
p1.x_range=Range1d(xmin, xmax)
p1.y_range=Range1d(ymin, ymax)
p1.title.text_font_size = '16pt'
p1.xaxis.axis_label_text_font_size = "16pt"
p1.yaxis.axis_label_text_font_size = "16pt"
p1.title.align = 'center'

p2 = figure(title='ELU (Derivative)', tooltips=[("x, f'(x)", '(@x, @y)')], tools=['save'], x_axis_label='x', y_axis_label="f'(x)")
p2.line(x='x', y='y_grad', source=source, line_width=5, line_color='#ff33a7')
p2.x_range=Range1d(-5, 5)
p2.y_range=Range1d(-0.25, 1.25)
p2.title.text_font_size = '16pt'
p2.xaxis.axis_label_text_font_size = "16pt"
p2.yaxis.axis_label_text_font_size = "16pt"
p2.title.align = 'center'

alpha_slider.js_on_change('value', callback)

output_file("ELU.html")
show(column(row(p1, p2), alpha_slider))
reset_output()
output_notebook()

## Visualization 
#### Softmax

In [6]:
x = np.linspace(0., 10., num=200)
y, y_grad = activation_functions(x, 'Softmax')

color_mapper = LinearColorMapper(linear_palette(colorcet.bmy, 256), low=np.min(y), high=np.max(y))
color_bar = ColorBar(color_mapper=color_mapper, border_line_color=None, location=(0,0))
p1 = figure(title='Softmax (1st)', tooltips=[('x1', '$x'), ('x2', '$y'), ('f(x)', '@image')], tools=['pan', 'wheel_zoom', 'reset', 'save'], x_axis_label='x1', y_axis_label='x2')
p1.image(image=[y[0,:,:]], x=0, y=0, dw=10, dh=10, color_mapper=color_mapper)
p1.x_range.range_padding = p1.y_range.range_padding = 0
p1.add_layout(color_bar, 'right')

color_mapper = LinearColorMapper(linear_palette(colorcet.bmy, 256), low=np.min(y_grad), high=np.max(y_grad))
color_bar = ColorBar(color_mapper=color_mapper, border_line_color=None, location=(0,0))
p2 = figure(title='Softmax (Derivative of x1)', tooltips=[('x1', '$x'), ('x2', '$y'), ("f'(x)", '@image')], tools=['pan', 'wheel_zoom', 'reset', 'save'], x_axis_label='x1', y_axis_label='x2')
p2.image(image=[y_grad[0,:,:]], x=0, y=0, dw=10, dh=10, color_mapper=color_mapper)
p2.x_range.range_padding = p2.y_range.range_padding = 0
p2.add_layout(color_bar, 'right')

color_mapper = LinearColorMapper(linear_palette(colorcet.bmy, 256), low=np.min(y_grad), high=np.max(y_grad))
color_bar = ColorBar(color_mapper=color_mapper, border_line_color=None, location=(0,0))
p3 = figure(title='Softmax (Derivative of x2)', tooltips=[('x1', '$x'), ('x2', '$y'), ("f'(x)", '@image')], tools=['pan', 'wheel_zoom', 'reset', 'save'], x_axis_label='x1', y_axis_label='x2')
p3.image(image=[y_grad[1,:,:]], x=0, y=0, dw=10, dh=10, color_mapper=color_mapper)
p3.x_range.range_padding = p3.y_range.range_padding = 0
p3.add_layout(color_bar, 'right')

p1.title.text_font_size = p1.xaxis.axis_label_text_font_size = p1.yaxis.axis_label_text_font_size = "16pt"
p2.title.text_font_size = p2.xaxis.axis_label_text_font_size = p2.yaxis.axis_label_text_font_size = "16pt"
p3.title.text_font_size = p3.xaxis.axis_label_text_font_size = p3.yaxis.axis_label_text_font_size = "16pt"
p1.title.align = p2.title.align = p3.title.align = 'center'

show(row(p1, p2, p3))
_ = export_png(row(p1, p2, p3), filename='Softmax.png')

## Visualization 
#### Maxout

In [7]:
x = np.linspace(0., 10., num=200)
y, y_grad = activation_functions(x, 'Maxout')

color_mapper = LinearColorMapper(linear_palette(colorcet.bmy, 256), low=np.min(y), high=np.max(y))
color_bar = ColorBar(color_mapper=color_mapper, border_line_color=None, location=(0,0))
p1 = figure(title='Maxout', tooltips=[('x1', '$x'), ('x2', '$y'), ('f(x)', '@image')], tools=['pan', 'wheel_zoom', 'reset', 'save'], x_axis_label='x1', y_axis_label='x2')
p1.image(image=[y], x=0, y=0, dw=10, dh=10, color_mapper=color_mapper)
p1.x_range.range_padding = p1.y_range.range_padding = 0
p1.add_layout(color_bar, 'right')

color_mapper = LinearColorMapper(linear_palette(colorcet.bmy, 256), low=np.min(y_grad[0,:,:]), high=np.max(y_grad[0,:,:]))
color_bar = ColorBar(color_mapper=color_mapper, border_line_color=None, location=(0,0))
p2 = figure(title='Maxout (Derivative of x1)', tooltips=[('x1', '$x'), ('x2', '$y'), ("f'(x)", '@image')], tools=['pan', 'wheel_zoom', 'reset', 'save'], x_axis_label='x1', y_axis_label='x2')
p2.image(image=[y_grad[1,:,:]], x=0, y=0, dw=10, dh=10, color_mapper=color_mapper)
p2.x_range.range_padding = p2.y_range.range_padding = 0
p2.add_layout(color_bar, 'right')

p1.title.text_font_size = p1.xaxis.axis_label_text_font_size = p1.yaxis.axis_label_text_font_size = "16pt"
p2.title.text_font_size = p2.xaxis.axis_label_text_font_size = p2.yaxis.axis_label_text_font_size = "16pt"
p1.title.align = p2.title.align = 'center'

show(row(p1, p2))
_ = export_png(row(p1, p2), filename='Maxout.png')

## Why Non-linear Activation Function is Important

In [8]:
def colormap(x):
    if x == 0:
        return '#f58742'
    return '#8d42f5'
colormap = np.vectorize(colormap)

In [9]:
train_x = np.array([np.concatenate([np.random.normal(2, 0.25, 100), np.random.normal(8, 0.25, 100)]),
                    np.concatenate([np.random.normal(2, 0.25,  50), np.random.normal(8, 0.25, 50),
                                    np.random.normal(2, 0.25,  50), np.random.normal(8, 0.25, 50)])]).T
train_y = np.zeros(200)
train_y[0:50] = 1
train_y[150:200] = 1

test_x = np.array(np.meshgrid(np.linspace(0., 10., num=200), np.linspace(0., 10., num=200)))

In [10]:
linear_network = models.Sequential()
linear_network.add(layers.Dense(64, activation='linear', input_shape=(train_x.shape[-1], )))
linear_network.add(layers.Dense(1, activation='sigmoid'))
linear_network.compile(optimizer='adam', loss='binary_crossentropy')
_ = linear_network.fit(train_x, train_y, epochs=100, verbose=0)

In [11]:
nonlinear_network = models.Sequential()
nonlinear_network.add(layers.Dense(64, activation='elu', input_shape=(train_x.shape[-1], )))
nonlinear_network.add(layers.Dense(2, activation='elu'))
nonlinear_network.add(layers.Dense(1, activation='sigmoid'))
nonlinear_network.compile(optimizer='adam', loss='binary_crossentropy')
_ = nonlinear_network.fit(train_x, train_y, epochs=100, verbose=0)

In [12]:
test_y = linear_network.predict(np.c_[test_x[0].ravel(), test_x[1].ravel()]).reshape(test_x[0].shape)
color_mapper = LinearColorMapper(linear_palette(colorcet.bmy, 256), low=0, high=1)
color_bar = ColorBar(color_mapper=color_mapper, border_line_color=None, location=(0, 0))
p1 = figure(title='Decision Boundary (Linear)', tools=['pan', 'wheel_zoom', 'reset', 'save'], x_axis_label='x1', y_axis_label='x2')
p1.image(image=[test_y], x=0, y=0, dw=10, dh=10, color_mapper=color_mapper)
color_mapper = LinearColorMapper(linear_palette(colorcet.bmy, 256), low=1, high=0)
p1.circle(train_x[:,0], train_x[:,1].reshape(-1), size=10, color=colormap(train_y), line_color='black', alpha=0.8)
p1.x_range.range_padding = p1.y_range.range_padding = 0
p1.add_layout(color_bar, 'right')

test_y = nonlinear_network.predict(np.c_[test_x[0].ravel(), test_x[1].ravel()]).reshape(test_x[0].shape)
color_mapper = LinearColorMapper(linear_palette(colorcet.bmy, 256), low=0, high=1)
color_bar = ColorBar(color_mapper=color_mapper, border_line_color=None, location=(0, 0))
p2 = figure(title='Decision Boundary (ReLU)', tools=['pan', 'wheel_zoom', 'reset', 'save'], x_axis_label='x1', y_axis_label='x2')
p2.image(image=[test_y], x=0, y=0, dw=10, dh=10, color_mapper=color_mapper)
color_mapper = LinearColorMapper(linear_palette(colorcet.bmy, 256), low=1, high=0)
p2.circle(train_x[:,0], train_x[:,1].reshape(-1), size=10, color=colormap(train_y), line_color='black', alpha=0.8)
p2.x_range.range_padding = p2.y_range.range_padding = 0
p2.add_layout(color_bar, 'right')

p1.title.text_font_size = p1.xaxis.axis_label_text_font_size = p1.yaxis.axis_label_text_font_size = "16pt"
p2.title.text_font_size = p2.xaxis.axis_label_text_font_size = p2.yaxis.axis_label_text_font_size = "16pt"
p1.title.align = p2.title.align = 'center'

show(row(p1, p2))
_ = export_png(row(p1, p2), filename='Linear vs Nonlinear.png')

In [13]:
weights_0 = nonlinear_network.layers[0].get_weights()
transform_0 = np.c_[weights_0[0]]
bias_0 = weights_0[1]

weights_1 = nonlinear_network.layers[1].get_weights()
transform_1 = np.c_[weights_1[0]]
bias_1 = weights_1[1]

In [14]:
x_hat_0 = np.matmul(train_x, transform_0) + bias_0
output_0 = x_hat_0
output_0[output_0 < 0] = 0

x_hat_1 = np.matmul(x_hat_0, transform_1) + bias_1
output_1 = x_hat_1
output_1[output_1 < 0] = 0

In [15]:
nonlinear_subnetwork = models.Sequential()
nonlinear_subnetwork.add(layers.Dense(2, activation='linear', input_shape=(output_1.shape[-1], )))
nonlinear_subnetwork.add(layers.Dense(1, activation='sigmoid'))
nonlinear_subnetwork.compile(optimizer='adam', loss='binary_crossentropy')
_ = nonlinear_subnetwork.fit(output_1, train_y, epochs=100, verbose=0)

In [16]:
p1.title.text = 'Before Nonlinear Transformation'

test_y = nonlinear_subnetwork.predict(np.c_[test_x[0].ravel(), test_x[1].ravel()]).reshape(test_x[0].shape)
color_mapper = LinearColorMapper(linear_palette(colorcet.bmy, 256), low=0, high=1)
color_bar = ColorBar(color_mapper=color_mapper, border_line_color=None, location=(0, 0))
p2 = figure(title='After Nonlinear Transformation', tools=['pan', 'wheel_zoom', 'reset', 'save'], x_axis_label="x'1", y_axis_label="x'2")
p2.image(image=[test_y], x=-1, y=-1, dw=25, dh=25, color_mapper=color_mapper)
color_mapper = LinearColorMapper(linear_palette(colorcet.bmy, 256), low=1, high=0)
p2.circle(output_1[:,0], output_1[:,1].reshape(-1), size=10, color=colormap(train_y), line_color='black', alpha=0.8)
p2.x_range.range_padding = p2.y_range.range_padding = 0
p2.add_layout(color_bar, 'right')

p1.title.text_font_size = p1.xaxis.axis_label_text_font_size = p1.yaxis.axis_label_text_font_size = "16pt"
p2.title.text_font_size = p2.xaxis.axis_label_text_font_size = p2.yaxis.axis_label_text_font_size = "16pt"
p1.title.align = p2.title.align = 'center'

show(p2)
_ = export_png(p2, filename='Nonlinear Transformation.png')