# Activation function

\begin{align*}
Binary \space Step \space Function (x) = 
\begin{cases}
1 \quad if \quad \sum_{i=1}^{m} w_i x_i + b \ge threshold \\
0 \quad if \quad \sum_{i=1}^{m} w_i x_i + b \lt threshold \\
\end{cases}
\end{align*}

\begin{align*}
ReLU (x) = 
\begin{cases}
0 \quad if \quad x \lt 0 \\
1 \quad if \quad x \ge  x
\end{cases}
\end{align*}

\begin{align*}
Sigmoid (x) = 

\frac{1}{1 + e^{-x}} \in [0, 1]

\end{align*}

\begin{align*}
Tanh (x) = 

\frac{ e^{x} - e^{-x}  }{ e^{x} + e^{-x}} \in [0, 1]

\end{align*}

# Derivative / Gradient

## Derivative

$$
f'(x) = lim_{\triangle x \rightarrow 0} \frac { f(x+\triangle) - f(x) } {\triangle x} \\ 
y' = lim_{\triangle x \rightarrow 0} \frac { \triangle y } {\triangle x} \\
\frac {dy} {dx} = lim_{\triangle x \rightarrow 0} \frac { \triangle y } {\triangle x}
$$

## Gradient

$$
\nabla f = [ \frac{\partial f}{\partial x} , \frac{\partial f}{\partial y} , \frac{\partial f}{\partial z} ]  \\ 
\nabla f = [ \frac{\partial f}{\partial x}i + \frac{\partial f}{\partial y}j + \frac{\partial f}{\partial z}k ]
$$

## Direction of increase of function

$$
D_{\vec{b}}f = \nabla f.\vec{b} = \|  \nabla f \| \| b \| \cos{\theta}
$$

## Gradient Decent

$$
\vec{x_0} = (x_0, y_0) \\
\vec{x_{n+1}} = \vec{x_n} - \eta \nabla f \vec{x_n}
$$

$w_{j, i}$ là trọng số kết nối từ ngõ vào neural thứ i đến neural thứ j ở lớp sau nó
$$
a_j = \sum_{i=1}^{n}x_i w_{j,i} + b_j \\
o_j = \sigma (a_j) = \frac{1}{1 + e^{-a_j}} \\
$$

# Cost function 

$$
MSE = \frac{1}{n} \sum_{i=1}^{n} (y_i - (ax_i+b)^2) \\
$$

# Update

$$
J = Costfunction() \\
w = w - \eta\frac{\partial J}{\partial w}
$$

# Train new neural network with gradient descent

$$
a_j = \sum_{i=1}^{20}x_i w_{j, i} + b_j \\
o_j = \sigma(a_j)=\frac{1}{1+e^{-a_j}}  \\
J = \frac{1}{5} \sum_{t=1}^{5} \sum_{k=1}^{5} (y_k^t - o_k^t)^2 \space trong \ đó \ k: \ là \ số \ ngõ \ ra, \ t \ là \ số \ mẫu \
$$

In [76]:
import numpy as np  
import matplotlib.pyplot as plt 

In [77]:
char = np.matrix([
    [0, 0, 1, 0,
     0, 1, 1, 0,
     0, 0, 1, 0,
     0, 0, 1, 0,
     1, 1, 1, 1], 
    [1, 1, 1, 1,
     0, 0, 0, 1,
     1, 1, 1, 1,
     1, 0, 0, 0,
     1, 1, 1, 1],
    [1, 1, 1, 1,
     0, 0, 0, 1,
     1, 1, 1, 1,
     0, 0, 0, 1,
     1, 1, 1, 1],
    [1, 0, 0, 1,
     1, 0, 0, 1,
     1, 1, 1, 1,
     0, 0, 0, 1,
     0, 0, 0, 1],
    [1, 1, 1, 1,
     1, 0, 0, 0,
     1, 1, 1, 1,
     0, 0, 0, 1,
     1, 1, 1, 1]
], dtype= np.int8)

target = np.matrix([
    [1, 0, 0, 0, 0],
    [0, 1, 0, 0, 0],
    [0, 0, 1, 0, 0],
    [0, 0, 0, 1, 0],
    [0, 0, 0, 0, 1],
])

In [None]:
for i in range(1, 6):
    
    plt.subplot(1, 5, i)
    imgplot = plt.imshow(char[i-1, :].reshape(5, 4), cmap= 'gray')

In [79]:
def sigmoid(x):
    sigmoid_ = 1 / (1 + np.exp(-x))
    return sigmoid_

In [80]:
J = np.zeros(100)
w = np.matrix(np.random.uniform(-0.1, 0.1, (5, 20)))
alpha = 0.1 

In [None]:
epochs = 2
for epoch in range(epochs):
    dJ_dw = np.zeros_like(w)
    print(dJ_dw)
    for sample in range(5): # number of samples
        # print(sample)
        X = char[sample, :]
        y = target[sample, :]
        y_pred = sigmoid( np.dot(X, w.T) )

        d = np.multiply(np.multiply((y - y_pred), y_pred), (1 - y_pred))
        dJ_dw += d.T * X

    # Update
    w += alpha*dJ_dw
    print(w)
    # Check the output, calculate 
    A = sigmoid(char*w.T)
    J[i] = np.mean(np.power((y - A), 2))

    print(dJ_dw)

plt.plot(J)
plt.ylabel('Cost function')
plt.xlabel('Iteration')
plt.show