In [31]:
import numpy as np
import matplotlib.pyplot as plt
trace = False
trace_forward = False
class FC:
    def __init__(self, in_num, out_num, lr = 0.001):
        self._in_num = in_num
        self._out_num = out_num
        self.w = np.random.randn(out_num, in_num)
        self.b = np.zeros((out_num,1))
        self.lr = lr
    
    def _sigmoid(self, in_data):
        return 1/(1 + np.exp(-in_data))
    def forward(self, in_data):
        # Z = W * X + b    X = sigmoid(Z) 
        self.topVal = self._sigmoid(np.dot(self.w, in_data) + self.b)
        self.bottomVal = in_data
        return self.topVal
        
    def backward(self, loss):
        residual_z = loss * self.topVal * (1 - self.topVal)
        grad_w = np.dot(residual_z, self.bottomVal.T)
        # sum and mean are all right , but sum needs less iteration times
        grad_b = np.sum(residual_z)
        self.w -= self.lr * grad_w
        self.b -= self.lr * grad_b
        residual_x = np.dot(self.w.T, residual_z)
        return residual_x

class SquareLoss:
    '''
    Same as above, not thread safe
    '''
    def forward(self, y, t):
        self.loss = y - t
        if trace:
            print '=== Loss ==='.format(self.loss.shape)
            print self.loss
        return np.sum(self.loss * self.loss) /  self.loss.shape[1] / 2
    def backward(self):
        if trace:
            print '=== loss {0} ==='.format(self.loss.shape)
            print self.loss
        return self.loss
class Net:
    def __init__(self, input_num=2, hidden_num=4, out_num = 1, lr = 0.1):
        self.fc1 = FC(input_num, hidden_num, lr)
        self.fc2 = FC(hidden_num, out_num, lr)
        self.loss = SquareLoss()
    
    def train(self, X, y):
        for i in range(10000):
            layer1out = self.fc1.forward(X)
            layer2out = self.fc2.forward(layer1out)
            loss = self.loss.forward(layer2out, y)
            if i % 1000 == 0:
                print 'iter = {0}, loss ={1}'.format(i, loss)
                print '=== Label vs Prediction ==='
                print 't={0}'.format(y)
                print 'y={0}'.format(layer2out)
            layer2loss = self.loss.backward()
            layer1loss = self.fc2.backward(layer2loss)
            saliency = self.fc1.backward(layer1loss)
        layer1out = self.fc1.forward(X)
        layer2out = self.fc2.forward(layer1out)
        print '=== Final ==='
        print 'X={0}'.format(X)
        print 't={0}'.format(y)
        print 'y={0}'.format(layer2out)
        

def conv2(X, k):
    x_row,x_col = X.shape
    k_row,k_col = k.shape
    ret_row, ret_col = x_row - k_row + 1, x_col - k_col + 1
    ret = np.empty((ret_row,ret_col))
    for y in range(ret_row):
        for x in range(ret_col):
            sub = X[y : y + k_row, x:x+k_col]
            ret[y,x] = np.sum(sub * k)
    return ret

class ConvLayer:
    def __init__(self, in_channel, out_channel, kernel_size):
        self.w = np.random.randn(in_channel, out_channel, kernel_size, kernel_size)
        self.b = np.zeros((out_channel))
        
    def _relu(self):
        x[x<0] = 0
        return x
    def forward(self, in_data):
        in_channel, in_row, in_col = in_data.shape()
        out_channel, kernel_row, kernel_col = self.w.shape[1:]
        self.top_val = np.zeros((out_channel, in_row - kernel_row + 1, in_col - kernel_col + 1))
        for j in range(out_channel):
            for i in range(in_channel):
                self.top_val[j] += conv2(in_data[i], self.w[i,j])
            self.top_val[j] += self.b[j]
            self.top_val[j] = self.relu(self.top_val[j])
        return self.top_val
    
    
# example from https://mattmazur.com/2015/03/17/a-step-by-step-backpropagation-example/

# and operation
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]).T
y = np.array([[0],[0],[0],[1]]).T

net = Net(2,4,1,0.1)
net.train(X,y)

iter = 0, loss =0.269525940243
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.80380606  0.93660952  0.79029445  0.90889228]]
iter = 1000, loss =0.0223421469579
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.06629745  0.21128364  0.20459654  0.70361967]]
iter = 2000, loss =0.00690946210576
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.03179877  0.1210863   0.118635    0.84022397]]
iter = 3000, loss =0.00339689604832
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.0218827   0.08561778  0.08418313  0.88918884]]
iter = 4000, loss =0.00212307956075
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.01736496  0.06791147  0.06694525  0.91288249]]
iter = 5000, loss =0.00150529011182
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.01474725  0.05728456  0.05656752  0.92690121]]
iter = 6000, loss =0.00115057276106
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[[ 0.01301421  0.05013786  0.04957145  0.93624988]]
iter = 7000, loss =0.000923820946912
=== Label vs Prediction ===
t=[[0 0 0 1]]
y=[