In [1]:
import numpy as np

# 1 Softmax
### $\sigma(z_i)=\frac{e^{z_i}}{\sum_{j=0}^K e^{z_j}}$ for $i=1,.....,K$ and $z=(z_1,....,z_k)\in R^k$

### Python Implementation

In [2]:
def softmax(L):
    exp_l = np.exp(L)
    return np.divide(exp_l, exp_l.sum())

### Usage

In [3]:
L = [5,6,7]
softmax(L)

array([0.09003057, 0.24472847, 0.66524096])

# 2 Sigmoid

### Python Implementation

In [4]:
# Activation (sigmoid) function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_v2(X):
    return np.divide(1, 1+np.exp(np.dot(-1, X)))

### Usage

In [5]:
X = np.array([-100, -10, 0, 10, 100])
sigmoid(X)

array([3.72007598e-44, 4.53978687e-05, 5.00000000e-01, 9.99954602e-01,
       1.00000000e+00])

# 3 Sigmoid derivative

### Python Implementation

In [6]:
def sigmoid_prime(x):
    return sigmoid(x) * (1-sigmoid(x))

### Usage

# 4 Prediction Formula
$\hat{y}=\sigma(WX+b)=w_1x_1+w_2x_2+......+w_nx_n+b$

### Python Implementation

In [7]:
# Output (prediction) formula
def predict(features, weights, bias):
    return sigmoid(np.matmul(features, weights) + bias)

### Usage

In [8]:
weights =[0,0]
features=[0,0]

bias=0

predict(features, weights, bias)

0.5

# 5 Log Loss Function
### $loss=-y_iln(p_i)-(1-y_i)ln(1-p_i)$

### Python Implementation

In [9]:
def error_formula(y, predicted):
    return -y * np.log(predicted) - (1 - y) * np.log(1 - predicted)

### Usge

In [10]:
y = np.array([[1, 0]])

weights  = np.array([1,1])
features = np.array([[1,1],[0.1, 0.5]])

bias = -1

predicted = predict(features, weights, bias)

error_formula(y,predicted)

array([[0.31326169, 0.51301525]])

# 6 Cross Etropy
### $Cross-Entropy= -\sum_{i=0}^n y_iln(p_i)+(1-y_i)ln(1-p_i)$

### Python Implementation

In [11]:
def cross_entropy(y, predicted):
    return np.sum(error_formula(y,predicted))
    
def cross_entropy_v2(Y, P):
    Y = np.float_(Y)
    P = np.float_(P)
    return -np.sum(Y * np.log(P) + (1-Y) * np.log(1-P))

### Usage

In [12]:
y = np.array([[1, 0]])

weights  = np.array([1,1])
features = np.array([[1,1],[0.1, 0.5]])

bias = -1

predicted = predict(features, weights, bias)

cross_entropy(y,predicted)

0.8262769399181754

In [13]:
# Gradient descent step
def update_weights(x, y, weights, bias, learnrate):
    y_hat = predict(x, weights, bias)
    error = y - y_hat
    weights = weights + learnrate * error * x
    bias = bias +  learnrate * error
    return weights, bias

In [14]:
x=np.array([1,1])
weights = np.random.normal(scale=1 / 2**.5, size=2)
y=np.array([1])
bias=1
update_weights(x,y,weights, bias, 0.001)

(array([-0.03178006, -0.33806202]), array([1.00034763]))

# 7 Back Propagation error term

$\delta=(y-\hat{y})f^{'}(h)=(y-\hat{y})f^{'}\Sigma{w_ix_i}$<br>
Remember that $(y-\hat{y})$  is the output error and $f^{'}(h)$ refers to the derivative of the activation function, $f(h)$.<br>
In general in a neural network the error term is:<br>
$\delta_j=(y_j-\hat{y_j})f^{'}(h_j)$ where h_j is the j-th hidden layer<br>
Then we can figure out one weight update $\Delta{w_{ij}}$ as:<br>
$\Delta{w_{ij}}=\alpha\delta_jx_i$ where $\alpha$ is the learning rate and i the i-th input of the j-th layer of the network

In [15]:
def error_term_formula(x, y, output):
    return (y - output) * sigmoid_prime(x)

### Usage
Now I'll write this out in code for the case of only one output unit. We'll also be using the sigmoid as the activation function $f(h)$.

In [22]:
learnrate = 0.5
x = np.array([1, 2, 3, 4])
y = np.array(0.5)

# Initial weights
w = np.array([0.5, -0.5, 0.3, 0.1])

### Calculate one gradient descent step for each weight
### Note: Some steps have been consolidated, so there are
###       fewer variable names than in the above sample code

# TODO: Calculate the node's linear combination of inputs and weights
h = np.matmul(x, w)

# TODO: Calculate output of neural network
nn_output = sigmoid(h)

# TODO: Calculate error of neural network
error = y - nn_output

# TODO: Calculate the error term
#       Remember, this requires the output gradient, which we haven't
#       specifically added a variable for.
error_term = (error) * sigmoid_prime(h)

# TODO: Calculate change in weights
del_w = learnrate * error_term * x

print('Neural Network output:')
print(nn_output)
print('Amount of Error:')
print(error)
print('Change in Weights:')
print(del_w)

Neural Network output:
0.6899744811276125
Amount of Error:
-0.1899744811276125
Change in Weights:
[-0.02031869 -0.04063738 -0.06095608 -0.08127477]


We can also use the function above error_term_formula

In [24]:
error_term = error_term_formula(h, y, nn_output)
del_w = [ learnrate * error_term * x[0],
          learnrate * error_term * x[1],
          learnrate * error_term * x[2],
          learnrate * error_term * x[3]]
# or del_w = learnrate * error_term * x
del_w

[-0.020318691802303994,
 -0.04063738360460799,
 -0.06095607540691198,
 -0.08127476720921598]