In [1]:
import numpy as np
import sklearn.datasets
import sklearn.linear_model

In [21]:
# 生成数据集
np.random.seed(0)
X, y = sklearn.datasets.make_moons(200, noise = 0.2)
# X的每一行表示特征个数，即神经元个数，200行表示有200个样本
# y表示每个样本的真实值，只有0和1，可以理解为真与否

In [25]:
print(X)
print(y)

[[ 0.74346118  0.46465633]
 [ 1.65755662 -0.63203157]
 [-0.15878875  0.25584465]
 [-1.088752   -0.39694315]
 [ 1.768052   -0.25443213]
 [ 1.95416454 -0.12850579]
 [ 0.93694537  0.36597075]
 [ 0.88446589 -0.47595401]
 [ 0.80950246  0.3505231 ]
 [ 1.2278091  -0.64785108]
 [-0.38454276  0.50916381]
 [ 0.09252135 -0.31618454]
 [ 1.79531658 -0.32235591]
 [ 1.43861749 -0.15796611]
 [-0.82364866  0.86822754]
 [ 0.99633397  0.1731019 ]
 [ 0.66388701  0.94659669]
 [ 0.13229471 -0.26032619]
 [ 0.2482245   0.7860477 ]
 [-1.00392102  1.15207238]
 [ 2.08208438  0.00715606]
 [ 0.87081342 -0.4366643 ]
 [ 0.37268327  1.01743002]
 [ 1.26735927 -0.11813675]
 [-0.13270154  1.26653562]
 [ 0.20331     0.19519454]
 [ 1.98373996 -0.11222315]
 [ 1.82749513 -0.03085446]
 [-0.03857867  0.0838378 ]
 [ 0.03351023  0.63113817]
 [ 0.94193283  0.63204507]
 [-0.39131894  0.40925201]
 [ 0.88357043 -0.35868845]
 [-0.01141219  0.30437635]
 [ 0.75877114  0.76057045]
 [ 1.79414416  0.28323389]
 [ 0.56116634 -0.0330033 ]
 

In [26]:
num_examples = len(X)   # size of training set
num_examples

200

In [4]:
nn_input_dim = 2   # 表示神经元个数，即输入的参数的个数

In [5]:
nn_output_dim = 2

In [6]:
lr = 0.01

In [7]:
reg_lambda = 0.01

In [18]:
def calculate_loss(model):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    z1 = X.dot(W1) + b1
    a1 = np.tanh(z1)
    
    z2 = a1.dot(W2) + b2
    
    exp_scores = np.exp(z2)
    probs = exp_scores / np.sum(exp_scores, axis = 1, keepdims = True)
    
    log_probs = -np.log(probs[range(num_examples), y])
    loss = np.sum(log_probs)
    
    return 1./num_examples * loss

In [19]:
def build_model(nn_hdim, num_passes = 30000, print_loss = False):
    # nn_hdim 表示隐藏层的个数
    # nn_imput_dim表示神经元个数，即输入层个数
    # / np.sqrt(nn_input_dim) 进行了标准化
    # W1表示输入层到隐藏层的权值矩阵
    W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
    b1 = np.zeros((1, nn_hdim))
    # nn_output_dim 表示输出层的个数
    # W2 表示隐藏层到输出层的权值矩阵
    W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
    b2 = np.zeros((1, nn_output_dim))
    
    model = {}
    
    # Gradient descent
    for i in range(0, num_passes):
        # forward
        z1 = X.dot(W1) + b1   # 得到隐藏层的输入z1，固定行的某个元素表示固定样本的隐藏层的某个输入
        a1 = np.tanh(z1)      # activate func tanh， 得到隐藏层的输出
        z2 = a1.dot(W2) + b2  # 此时，把隐藏层的输出变成输出层的输入，得到输出层的输入
        exp_scores = np.exp(z2)   # 作用exp()
        probs = exp_scores / np.sum(exp_scores, axis = 1, keepdims = True)  # this is softmax
        # softmax，得到probs是最终的结果
        
        # bp
        delta3 = probs
        delta3[range(num_examples), y] -= 1
        # 对softmax求导，意思大概是取X每一行列坐标为y对应的值，
        # 比如第i行的列坐标取y的第i个元素的值
        # this is the derivative of softmax[no need to thoroughly understand yet]
        # we'll revisit in week later
        
        dW2 = (a1.T).dot(delta3)
        db2 = np.sum(delta3, axis = 0, keepdims = True)
        delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2)) # tanh derivative
        dW1 = np.dot(X.T, delta2)
        db1 = np.sum(delta2, axis = 0)
        
        # optional
        W1 += -lr * dW1
        b1 += -lr * db1
        W2 += -lr * dW2
        b2 += -lr * db2
        
        model = {'W1':W1, 'b1':b1, 'W2':W2, 'b2':b2}
        
        if print_loss and i % 1000 ==  0:
            print("Loss after iteration %i: %f" % (i, calculate_loss(model)))
    return model


In [20]:
# n-dimensional hidden layer
model = build_model(10, print_loss = True)

Loss after iteration 0: 0.362723
Loss after iteration 1000: 0.072866
Loss after iteration 2000: 0.053184
Loss after iteration 3000: 0.041005
Loss after iteration 4000: 0.029380
Loss after iteration 5000: 0.023095
Loss after iteration 6000: 0.018241
Loss after iteration 7000: 0.015106
Loss after iteration 8000: 0.013222
Loss after iteration 9000: 0.012009
Loss after iteration 10000: 0.011162
Loss after iteration 11000: 0.010518
Loss after iteration 12000: 0.009996
Loss after iteration 13000: 0.009550
Loss after iteration 14000: 0.009156
Loss after iteration 15000: 0.008800
Loss after iteration 16000: 0.008466
Loss after iteration 17000: 0.008137
Loss after iteration 18000: 0.007796
Loss after iteration 19000: 0.007427
Loss after iteration 20000: 0.007017
Loss after iteration 21000: 0.006557
Loss after iteration 22000: 0.006059
Loss after iteration 23000: 0.005545
Loss after iteration 24000: 0.005027
Loss after iteration 25000: 0.004517
Loss after iteration 26000: 0.004028
Loss after ite