# Chapter1 Review of Neural Network

## 1.1 Math and Python

## 1.2 Prediction of Neural Network

In [6]:
import numpy as np

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

x = np.random.randn(10, 2)
W1 = np.random.randn(2, 4)
b1 = np.random.randn(4)
W2 = np.random.randn(4, 3)
b2 = np.random.randn(3)

h = np.dot(x, W1) + b1
a = sigmoid(h)
s = np.dot(a, W2) + b2

print(s)

[[ 2.10134872 -1.41417644 -1.16212914]
 [ 2.43040025 -1.29084118 -1.66664124]
 [ 2.63500701 -1.4937869  -1.69107609]
 [ 2.37488515 -1.54861461 -1.36167413]
 [ 1.68977795 -0.48500454 -1.26002134]
 [ 2.36920073 -1.55192972 -1.35136696]
 [ 1.03172442  0.87037616 -1.08451248]
 [ 1.88929268 -0.78081083 -1.33039661]
 [ 1.93177135 -1.12932423 -1.15879173]
 [ 0.97835534  0.36948306 -0.83115496]]


In [7]:
import numpy as np

class Sigmoid:
    def __init__(self):
        self.params = []

    def forward(self, x):
        return 1 / (1 + np.exp(-x))
    

class Affine:
    def __init__(self, W, b):
        self.params = [W, b]

    def forward(self, x):
        W, b = self.params
        out = np.dot(x, W) + b
        return out

In [8]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size):
        I, H, O = input_size, hidden_size, output_size

        W1 = np.random.randn(I, H)
        b1 = np.random.randn(H)
        W2 = np.random.randn(H, O)
        b2 = np.random.randn(O)

        self.layers = [
            Affine(W1, b1),
            Sigmoid(),
            Affine(W2, b2)
        ]

        self.params = []
        for layer in self.layers:
            self.params += layer.params

    def predict(self, x):
        for layer in self.layers:
            x = layer.forward(x)
        return x


In [9]:
x = np.random.randn(10, 2)
model = TwoLayerNet(2, 4, 3) # input_size, hidden_size, output_size
s = model.predict(x)
print(s)

[[-1.60663214  1.72760197  1.8463626 ]
 [-1.50555447  1.5165633   1.51227392]
 [-1.39396086  0.85685637  1.09181215]
 [-1.56936315  1.26091425  1.81409576]
 [-1.62468107  1.71787897  1.91875439]
 [-1.53733119  1.53035004  1.69287601]
 [-1.50490494  1.44085546  1.57030694]
 [-1.40561903  0.77334277  1.17102664]
 [-1.4519405   1.13861015  1.36879361]
 [-1.58880335  1.79276497  1.66510344]]


## 1.3 Laerning of Neural Network

### 1.3.1 Loss Function
![img](./fig/1_3_1.drawio.svg)

softmax function is shown as folloing:
$$
    \begin{align}
        y_k &= \frac{\exp(s_k)}{\sum_i^n \exp(s_i)} \\
        L &= -\sum_i^n t_k \log{y_k}
    \end{align}
$$


In [10]:
def softmax(x):
    if x.ndim == 2:
        x = x - x.max(axis=1, keepdims=True)
        x = np.exp(x)
        x /= x.sum(axis=1, keepdims=True)
    elif x.ndim == 1:
        x = x - np.max(x)
        x = np.exp(x) / np.sum(np.exp(x))

    return x

def cross_entropy_error(y, t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)

    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size


class SoftmaxWithLoss:
    def __init__(self):
        self.params = []
        self.grads = []
        self.loss = None
        self.y = None
        self.t = None

    def forward(self, x, t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y, self.t)
        return self.loss

    def backward(self, dout=1):
        batch_size = self.t.shape[0]
        if self.t.size == self.y.size:
            dx = (self.y - self.t) / batch_size
        else:
            dx = self.y
        return dx

### 1.3.2 Differentiation and Gradient


### 1.3.4 Calculation Graph

#### 1.3.4.2 Branch(Copy) Node

The result of Backpropation through a branch copy node is the sum of hte gradients from the branched outputs.
$$
    \frac{\partial L}{\partial x} + \frac{\partial L}{\partial x}
$$

#### 1.3.4.3 Repeat node
