In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np

Linear regression and logistic regression can be viewed as simplest neural network without hidden layer.

The basic steps are similar: **feedforward**, **compute cost**, **Backpropagate**, **just weights**

Neural network units can perform as logic gates, and with billion logic gates we can build a computer.

| x1| x2|and|or |nand|nor|xor|
|:--:|:-:|:-:|:-:|:-:|:-:|:-:|
| 0 | 0 | 0 | 0 | 1 | 1 | 0 |  
| 0 | 1 | 0 | 1 | 1 | 0 | 1 | 
| 1 | 0 | 0 | 1 | 1 | 0 | 1 | 
| 1 | 1 | 1 | 1 | 0 | 0 | 0 | 

## Neural network

Build logic gates with neural network (sigmoid as active function). 

### Step 1: single unit

### 1. Feedforward
\begin{equation}
z = W^TX
\end{equation}

\begin{equation}
\hat{y} = \sigma(z)
\end{equation}

\begin{equation}
\sigma(z) = \frac{1}{1+e^{-z}}
\end{equation}

### 2. Compute cost function
\begin{equation}
Loss(y, \hat{y}) = -\frac{1}{n}(y^Tlog(\hat{y})+(1-y)^Tlog(1-\hat{y}))
\end{equation}

### 3. Backpropagation
\begin{eqnarray}
\frac{\delta Loss(y, \hat{y})}{\delta W} &=& \frac{\delta Loss(y, \hat{y})}{\delta \hat{y}}\cdot\frac{\delta \hat{y}}{\delta z}\cdot\frac{\delta z}{\delta W} \\
&=& \frac{1}{n}X^T(\hat{y}-y)
\end{eqnarray}

### 4. Gradient descent
\begin{equation}
W = W - \alpha  \frac{\delta Loss(y, \hat{y})}{\delta W}
\end{equation}

In [None]:
class NeuralNetwork:
    def __init__(self, x, y, iterations, learning_rate):
        self.input       = np.c_[np.ones((x.shape[0], 1)), x]
        self.y           = y
        self._iterations = iterations
        self._rate       = learning_rate
        self.weights     = np.random.rand(self.input.shape[1],1) 
        self.output      = np.zeros(y.shape)
        
    def _sigmoid(self, x):
        return 1.0/(1 + np.exp(-x))
    
    def _feedforward(self):
        self.output = self._sigmoid(self.input.dot(self.weights))
        
    def _backprop(self):
        delta_weights = self.input.T.dot(self.output-self.y)/len(self.y)        
        self.weights -= self._rate * delta_weights
    
    def get_loss(self):
        return -(self.y.T.dot(np.log(self.output))+(1-self.y).T.dot(np.log(1-self.output))).mean()

    def train(self):
        for i in range(self._iterations):
            self._feedforward()
            self._backprop()
            
iterations = 5000    
learning_rate = 0.5

# input vector
x = np.array([[0,0], [0,1], [1,0], [1,1]])
# logic gates
gates = {
    'AND':  np.array([[0],[0],[0],[1]]),
    'OR':   np.array([[0],[1],[1],[1]]),
    'NAND': np.array([[1],[1],[1],[0]]),
    'NOR':  np.array([[1],[0],[0],[0]]),
    'XOR':  np.array([[0],[1],[1],[0]])
}

for key,value in gates.items():
    nn = NeuralNetwork(x, value, iterations, learning_rate)
    nn.train()
    print(f"{key}:\n", nn.output)
    print(f'loss: {nn.get_loss()}')

Neural network without hidden layer can be viewed as logistic regression unit. It's a simple linear classifier.

## Neural network

Build logic gates with neural network. 

### Step 2: with 1 hidden layer

### 1. Feedforward
\begin{equation}
z_1 = W_1^TX
\end{equation}

\begin{equation}
a_1 = \sigma(z_1)
\end{equation}

\begin{equation}
z_2 = W_2^Ta_1
\end{equation}

\begin{eqnarray}
\hat{y} &=& \sigma(z_2) \\
&=& \sigma(W_2^T\sigma(W_1^TX))
\end{eqnarray}

\begin{equation}
\sigma(z) = \frac{1}{1+e^{-z}}
\end{equation}

### 2. Compute cost function
\begin{equation}
Loss(y, \hat{y}) = \frac{1}{2n}(\hat{y} - y)^T \cdot (\hat{y} - y)
\end{equation}

### 3. Backpropagation
\begin{eqnarray}
\frac{\delta Loss(y, \hat{y})}{\delta W_2} &=& \frac{\delta Loss(y, \hat{y})}{\delta \hat{y}}\cdot\frac{\delta \hat{y}}{\delta z_2}\cdot\frac{\delta z_2}{\delta W_2} \\
&=& \frac{1}{n}(\hat{y}-y)\cdot \hat{y}(1-\hat{y})\cdot a_1
\end{eqnarray}

\begin{eqnarray}
\frac{\delta Loss(y, \hat{y})}{\delta W_1} &=& \frac{\delta Loss(y, \hat{y})}{\delta \hat{y}}\cdot\frac{\delta \hat{y}}{\delta z_2}\cdot\frac{\delta z_2}{\delta y_1}\cdot\frac{\delta y_1}{\delta z_1}\cdot\frac{\delta z_1}{\delta W_1} \\
&=& \frac{1}{n}(\hat{y}-y)\cdot \hat{y}(1-\hat{y})\cdot W_2 \cdot a_1(1-a_1)\cdot X
\end{eqnarray}

### 4. Gradient descent
\begin{equation}
W_1 = W_1 - \alpha  \frac{\delta Loss(y, \hat{y})}{\delta W_1}
\end{equation}

\begin{equation}
W_2 = W_2 - \alpha  \frac{\delta Loss(y, \hat{y})}{\delta W_2}
\end{equation}

In [None]:
class NeuralNetwork:
    def __init__(self, x, y, neurons, iterations, learning_rate):
        self.input       = np.c_[np.ones((x.shape[0], 1)), x]
        self.y           = y
        self._neurons    = neurons
        self._iterations = iterations
        self._rate       = learning_rate
        self.weights1    = np.random.rand(self.input.shape[1], self._neurons) 
        self.weights2    = np.random.rand(self._neurons+1, 1) 
        self.output      = np.zeros(y.shape)
        
    def _sigmoid(self, x):
        return 1.0/(1 + np.exp(-x))
    
    def _sigmoid_deritive(self, x):
        return x*(1 - x)
    
    def _feedforward(self):
        _layer1 = self._sigmoid(self.input.dot(self.weights1))
        self.layer1 = np.c_[np.ones((_layer1.shape[0], 1)), _layer1]
        self.output = self._sigmoid(self.layer1.dot(self.weights2))
        
    def _backprop(self):
        e = 1/len(self.y) * (self.output - self.y) * self._sigmoid_deritive(self.output)
        delta_weights2 = self.layer1.T.dot(e)
        delta_weights1 = self.input.T.dot(e.dot(self.weights2[1:].T) * self._sigmoid_deritive(self.layer1[:,1:]))
        
        self.weights1 -= self._rate * delta_weights1
        self.weights2 -= self._rate * delta_weights2
        
    def get_loss(self):
        return 1/(2*len(y)) * np.asscalar((self.output-self.y).T.dot(self.output-self.y))
        
    def train(self):
        for i in range(self._iterations):
            self._feedforward()
            self._backprop()
            
iterations = 5000    
learning_rate = 0.5
neurons = 3

# input vector
x = np.array([[0,0], [0,1], [1,0], [1,1]])
# logic gates
gates = {
    'AND':  np.array([[0],[0],[0],[1]]),
    'OR':   np.array([[0],[1],[1],[1]]),
    'NAND': np.array([[1],[1],[1],[0]]),
    'NOR':  np.array([[1],[0],[0],[0]]),
    'XOR':  np.array([[0],[1],[1],[0]])
}

for key,value in gates.items():
    nn = NeuralNetwork(x, value, neurons, iterations, learning_rate)
    nn.train()
    print(f"{key}:\n", nn.output)
    print(f'loss: {nn.get_loss()}')