In [336]:
import numpy as np

# 1 Softmax
### $\sigma(z_i)=\frac{e^{z_i}}{\sum_{j=0}^K e^{z_j}}$for $i=1,.....,K$ and $z=(z_1,....,z_k)\in R^k$

In [337]:
def softmax(L):
    exp_l = np.exp(L)
    return np.divide(exp_l, exp_l.sum())

In [338]:
L = [5,6,7]
softmax(L)

array([0.09003057, 0.24472847, 0.66524096])

# 2 Sigmoid

In [339]:
def sigmoid(X):
    return np.divide(1, 1+np.exp(np.dot(-1, X)))

In [340]:
X = np.array([-100, -10, 0, 10, 100])
sigmoid(X)

array([3.72007598e-44, 4.53978687e-05, 5.00000000e-01, 9.99954602e-01,
       1.00000000e+00])

# 3 Cross Etropy
### $Cross-Entropy= -\sum_{i=0}^n y_iln(p_i)+(1-y_i)ln(1-p_i)$

In [341]:
def cross_entropy(Y, P):
    Y = np.float_(Y)
    P = np.float_(P)
    return -np.sum(Y * np.log(P) + (1-Y) * np.log(1-P))

In [342]:
Y=[1,1,0]
p=[0.8,0.7,0.1]
cross_entropy(Y,p) # Expected 0.69

0.6851790109107685

# Prediction formula
$\hat{y}=\sigma(WX+b)=w_1x_1+w_2x_2+......+w_nx_n+b$

In [343]:
# Output (prediction) formula
def output_formula(features, weights, bias):
    y_hat = sigmoid(np.matmul(features,weights)+bias)
    return y_hat

In [344]:
weights=[1,1]
features=[1,1]
bias=-1
output = output_formula(features, weights, bias)

In [345]:
# Error (log-loss) formula
def error_formula(y, output):
    return -y * np.log(output) - (1 - y) * np.log(1 - output)

In [346]:
y=np.array([1])
error_formula(y,output)

array([0.31326169])

In [347]:
# Gradient descent step
def update_weights(x, y, weights, bias, learnrate):
    y_hat = output_formula(x, weights, bias)
    error = y - y_hat
    weights = weights + learnrate * error * x
    bias = bias +  learnrate * error
    return weights, bias

In [348]:
x=np.array([1,1])
weights = np.random.normal(scale=1 / 2**.5, size=2)
y=np.array([1])
bias=1
update_weights(x,y,weights, bias, 0.001)

(array([-1.00349853,  1.24460059]), array([1.00022432]))