# Obtain the SELU parameters for arbitrary fixed points

*Author:* Guenter Klambauer, 2017

tested under Python 3.5


In [17]:
import numpy as np
from scipy.special import erf,erfc
from sympy import Symbol, solve, nsolve

### Function to obtain the parameters for the SELU with arbitrary fixed point (mean variance)

In [18]:
def getSeluParameters(fixedpointMean=0,fixedpointVar=1):
    """ Finding the parameters of the SELU activation function. The function returns alpha and lambda for the desired fixed point. """
    
    import sympy
    from sympy import Symbol, solve, nsolve

    aa = Symbol('aa')
    ll = Symbol('ll')
    nu = fixedpointMean 
    tau = fixedpointVar 

    mean =  0.5*ll*(nu + np.exp(-nu**2/(2*tau))*np.sqrt(2/np.pi)*np.sqrt(tau) + \
                        nu*erf(nu/(np.sqrt(2*tau))) - aa*erfc(nu/(np.sqrt(2*tau))) + \
                        np.exp(nu+tau/2)*aa*erfc((nu+tau)/(np.sqrt(2*tau))))

    var = 0.5*ll**2*(np.exp(-nu**2/(2*tau))*np.sqrt(2/np.pi*tau)*nu + (nu**2+tau)* \
                          (1+erf(nu/(np.sqrt(2*tau)))) + aa**2 *erfc(nu/(np.sqrt(2*tau))) \
                          - aa**2 * 2 *np.exp(nu+tau/2)*erfc((nu+tau)/(np.sqrt(2*tau)))+ \
                          aa**2*np.exp(2*(nu+tau))*erfc((nu+2*tau)/(np.sqrt(2*tau))) ) - mean**2

    eq1 = mean - nu
    eq2 = var - tau

    res = nsolve( (eq2, eq1), (aa,ll), (1.67,np.sqrt(tau)))
    return float(res[0]),float(res[1])


In [19]:
### To recover the parameters of the SELU with mean zero and unit variance
getSeluParameters(0,1)

(1.6732632423543774, 1.0507009873554802)

In [20]:
### To obtain new parameters for mean zero and variance 2
myAlpha, myLambda = getSeluParameters(0,2)
getSeluParameters(0,2)

(1.9712557503462642, 1.0607090761030131)

### Adjust the SELU function and Dropout to your new parameters

In [5]:
def selu(x):
    with ops.name_scope('elu') as scope:
        alpha = myAlpha
        scale = myLambda
        return scale*tf.where(x>=0.0, x, alpha*tf.nn.elu(x))

In [6]:
def dropout_selu(x, rate, alpha=- myAlpha*myLambda, noise_shape=None, seed=None, name=None, training=False):
    """Dropout to a value with rescaling."""

    def dropout_selu_impl(x, rate, alpha, noise_shape, seed, name):
        keep_prob = 1.0 - rate
        x = ops.convert_to_tensor(x, name="x")
        if isinstance(keep_prob, numbers.Real) and not 0 < keep_prob <= 1:
            raise ValueError("keep_prob must be a scalar tensor or a float in the "
                                             "range (0, 1], got %g" % keep_prob)
        keep_prob = ops.convert_to_tensor(keep_prob, dtype=x.dtype, name="keep_prob")
        keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())

        alpha = ops.convert_to_tensor(alpha, dtype=x.dtype, name="alpha")
        keep_prob.get_shape().assert_is_compatible_with(tensor_shape.scalar())

        # Do nothing if we know keep_prob == 1
        if tensor_util.constant_value(keep_prob) == 1:
            return x

        noise_shape = noise_shape if noise_shape is not None else array_ops.shape(x)
        # uniform [keep_prob, 1.0 + keep_prob)
        random_tensor = keep_prob
        random_tensor += random_ops.random_uniform(noise_shape, seed=seed, dtype=x.dtype)
        # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
        binary_tensor = math_ops.floor(random_tensor)
        #binary_tensor2 = math_ops.ceil(random_tensor)
        ret = x * binary_tensor + alpha * (1-binary_tensor)

        #a = tf.sqrt(1.0/(keep_prob+alpha^2*keep_prob*(1.0-keep_prob)))
        a = tf.sqrt(1.0 / keep_prob + tf.pow(alpha,2) * keep_prob * 1.0 - keep_prob)
        #a = tf.sqrt(tf.div(1.0, tf.add(keep_prob ,tf.multiply(tf.pow(alpha,2) , tf.multiply(keep_prob,    tf.subtract(1.0,keep_prob)))) ))

        b = -a * (1 - keep_prob) * alpha
        #b = tf.neg( tf.mul(a , (tf.multiply(tf.subtract(1.0,keep_prob),alpha))))
        ret = a * ret + b
        #ret = tf.add(tf.multiply(a , ret) , b)
        ret.set_shape(x.get_shape())
        return ret

    with ops.name_scope(name, "dropout", [x]) as name:
        return utils.smart_cond(training,
            lambda: dropout_selu_impl(x, rate, alpha, noise_shape, seed, name),
            lambda: array_ops.identity(x))


### For completeness: These are the correct expressions for mean zero and unit variance

In [7]:
myAlpha = -np.sqrt(2/np.pi) / (np.exp(0.5) * erfc(1/np.sqrt(2))-1 )  
myLambda = (1-np.sqrt(np.exp(1))*erfc(1/np.sqrt(2)))  *  \
            np.sqrt( 2*np.pi/ (2 + np.pi -2*np.sqrt(np.exp(1))*(2+np.pi)*erfc(1/np.sqrt(2)) + \
            np.exp(1)*np.pi*erfc(1/np.sqrt(2))**2 + 2*np.exp(2)*erfc(np.sqrt(2))))

In [8]:
print("Alpha parameter of the SELU: ", myAlpha)
print("Lambda parameter of the SELU: ", myLambda)

Alpha parameter of the SELU:  1.67326324235
Lambda parameter of the SELU:  1.05070098736
