In [1]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

def tanh(x):
    return np.tanh(x)

def tanh_derivative(x):
    return 1 - np.tanh(x)**2

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

def leaky_relu(x, alpha=0.01):
    return np.where(x > 0, x, alpha * x)

def leaky_relu_derivative(x, alpha=0.01):
    return np.where(x > 0, 1, alpha)

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=-1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=-1, keepdims=True)

# Example usage:
x = np.array([-2, -1, 0, 1, 2])

# Sigmoid
sigmoid_result = sigmoid(x)
sigmoid_derivative_result = sigmoid_derivative(sigmoid_result)

# Hyperbolic Tangent (tanh)
tanh_result = tanh(x)
tanh_derivative_result = tanh_derivative(tanh_result)

# Rectified Linear Unit (ReLU)
relu_result = relu(x)
relu_derivative_result = relu_derivative(x)

# Leaky Rectified Linear Unit (Leaky ReLU)
leaky_relu_result = leaky_relu(x)
leaky_relu_derivative_result = leaky_relu_derivative(x)

# Softmax
softmax_result = softmax(x)

# Print results
print("Sigmoid:", sigmoid_result)
print("Sigmoid Derivative:", sigmoid_derivative_result)

print("\nTanh:", tanh_result)
print("Tanh Derivative:", tanh_derivative_result)

print("\nReLU:", relu_result)
print("ReLU Derivative:", relu_derivative_result)

print("\nLeaky ReLU:", leaky_relu_result)
print("Leaky ReLU Derivative:", leaky_relu_derivative_result)

print("\nSoftmax:", softmax_result)


Sigmoid: [0.11920292 0.26894142 0.5        0.73105858 0.88079708]
Sigmoid Derivative: [0.10499359 0.19661193 0.25       0.19661193 0.10499359]

Tanh: [-0.96402758 -0.76159416  0.          0.76159416  0.96402758]
Tanh Derivative: [0.44338254 0.58781675 1.         0.58781675 0.44338254]

ReLU: [0 0 0 1 2]
ReLU Derivative: [0 0 0 1 1]

Leaky ReLU: [-0.02 -0.01  0.    1.    2.  ]
Leaky ReLU Derivative: [0.01 0.01 0.01 1.   1.  ]

Softmax: [0.01165623 0.03168492 0.08612854 0.23412166 0.63640865]


ReLU has been the best activation function in the deep learning community for a long time, but Google’s brain team announced Swish as an alternative to ReLU in 2017. Research by the authors of the papers shows that simply be substituting ReLU units with Swish units improves the classification accuracy on ImageNet by 0.6% for Inception-ResNet-v2, hence, it outperforms ReLU in many deep neural nets.

SWEDISH ACTIVATION FUNCTION:

Mathematical formula: Y = X * sigmoid(X)
Bounded below but Unbounded above: Y approach to constant value at X approaches negative infinity but Y approach to infinity as X approaches infinity.
Derivative of Swish, Y’ = Y + sigmoid(X) * (1-Y)
Soft curve and non-monotonic function.

In [2]:
import numpy as np

def swish(x, beta=1.0):
    return x * (1 / (1 + np.exp(-beta * x)))

def swish_derivative(x, beta=1.0):
    swish_x = swish(x, beta)
    return swish_x + beta * x * swish_x * (1 - swish_x)

# Example usage:
x = np.array([-2, -1, 0, 1, 2])

# Swish
swish_result = swish(x)
swish_derivative_result = swish_derivative(x)

# Print results
print("Swish:", swish_result)
print("Swish Derivative:", swish_derivative_result)


Swish: [-0.23840584 -0.26894142  0.          0.73105858  1.76159416]
Swish Derivative: [ 0.35208054  0.07232949  0.          0.92767051 -0.92164547]
