In [None]:
"""some notes in optimizing activation functions"""

In [1]:
from functions import *

import numpy as np
import scipy.special

In [8]:
small = np.arange(9).reshape(3,3) - 4
print(small)
large = np.arange(10000).reshape(100,100)-5000
print(large)

[[-4 -3 -2]
 [-1  0  1]
 [ 2  3  4]]
[[-5000 -4999 -4998 ... -4903 -4902 -4901]
 [-4900 -4899 -4898 ... -4803 -4802 -4801]
 [-4800 -4799 -4798 ... -4703 -4702 -4701]
 ...
 [ 4700  4701  4702 ...  4797  4798  4799]
 [ 4800  4801  4802 ...  4897  4898  4899]
 [ 4900  4901  4902 ...  4997  4998  4999]]


In [9]:
# relu, leaky relu, elu, tanh, softmax

# forward: x -> y
# backward: x -> y -> dy -> dx

In [10]:
relu = lambda x: x * (x>=0)
relu_ = lambda x, y, dy: dy * (x>=0).astype(int)

relu2 = np.vectorize(lambda x: x if x>=0 else 0)
relu2_ = np.vectorize(lambda x, y, dy: dy if x>=0 else 0)

relu3 = lambda x: np.maximum(x, 0.0)

In [30]:
%timeit relu(large)
%timeit relu2(large)
%timeit relu3(large)

26.4 µs ± 3.17 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.23 ms ± 43.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)
30.4 µs ± 751 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [31]:
%timeit relu_(large, large, large)
%timeit relu2_(large, large, large)

22.3 µs ± 909 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)
1.9 ms ± 54.1 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
lrelu = lambda x, alpha=0.001: np.maximum(x, alpha*x)
lrelu_ = lambda x, y, dy, alpha=0.001: dy * ((x<0)*alpha + 1-alpha)

In [None]:
# efficiency: vectorize < built-in < basic operators
# but use vectorize to avoid exponential on entire array

In [None]:
elu = np.vectorize(lambda x, alpha=1.0: x if x >= 0 else alpha*(exp(x)-1.0))
elu_ = np.vectorize(lambda x, y, dy, alpha=1.0: dy if x >= 0 else exp(x)*dy)

In [None]:
def single_elu(x, alpha=1.0):
    return x if x>=0 else alpha*(exp(x)-1.0)

def single_elu_(x, y, dy, alpha=1.0):
    return dy if y>0 else exp(x)*dy

elu  = np.vectorize(single_elu)
elu_ = np.vectorize(single_elu_)

In [None]:
tanh = lambda x: np.tanh(x)
sech = lambda x: 1 / np.cosh(x)

In [38]:
%timeit np.dot(large, large)

507 µs ± 2.22 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [None]:
softmax