## pre-define

In [16]:
import numpy as np

In [17]:
# Defining the sigmoid function for activations 
# 定义 sigmoid 激活函数
def sigmoid(x):
    return 1/(1+np.exp(-x))

# Derivative of the sigmoid function
# 激活函数的导数
def sigmoid_prime(x):
    return sigmoid(x) * (1 - sigmoid(x))

# Input data
# 输入数据
x = np.array([0.1, 0.3])
# Target
# 目标
y = 0.2
# Input to output weights
# 输入到输出的权重
weights = np.array([-0.8, 0.5])

# The learning rate, eta in the weight step equation
# 权重更新的学习率
learnrate = 0.5

# the linear combination performed by the node (h in f(h) and f'(h))
# 输入和权重的线性组合
h = x[0]*weights[0] + x[1]*weights[1]
# or h = np.dot(x, weights)

# The neural network output (y-hat)
# 神经网络输出
nn_output = sigmoid(h)

# output error (y - y-hat)
# 输出误差
error = y - nn_output

# output gradient (f'(h))
# 输出梯度
output_grad = sigmoid_prime(h)

# error term (lowercase delta)
error_term = error * output_grad

# Gradient descent step 
# 梯度下降一步
del_w = [ learnrate * error_term * x[0],
          learnrate * error_term * x[1]]
# or del_w = learnrate * error_term * x

## gradient

In [18]:
def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1/(1+np.exp(-x))

def sigmoid_prime(x):
    """
    # Derivative of the sigmoid function
    """
    return sigmoid(x) * (1 - sigmoid(x))

learnrate = 0.5
x = np.array([1, 2, 3, 4])
y = np.array(0.5)

# Initial weights
w = np.array([0.5, -0.5, 0.3, 0.1])

### Calculate one gradient descent step for each weight
### Note: Some steps have been consolidated, so there are fewer variable names than in the above sample code

# TODO: Calculate the node's linear combination of inputs and weights
h = np.dot(x, w)

# TODO: Calculate output of neural network
nn_output = sigmoid(h)

# TODO: Calculate error of neural network
error = y-nn_output

# TODO: Calculate the error term
#       Remember, this requires the output gradient, which we haven't specifically added a variable for.
error_term = error*sigmoid_prime(h)

# TODO: Calculate change in weights
del_w = learnrate*error_term*x

print('Neural Network output:')
print(nn_output)
print('Amount of Error:')
print(error)
print('Change in Weights:')
print(del_w)

Neural Network output:
0.6899744811276125
Amount of Error:
-0.1899744811276125
Change in Weights:
[-0.02031869 -0.04063738 -0.06095608 -0.08127477]


## description

In [19]:
import numpy as np
import pandas as pd

admissions = pd.read_csv('binary.csv')

# Make dummy variables for rank
data = pd.concat([admissions, pd.get_dummies(admissions['rank'], prefix='rank')], axis=1)
data = data.drop('rank', axis=1)

# Standarize features
for field in ['gre', 'gpa']:
    mean, std = data[field].mean(), data[field].std()
    data.loc[:,field] = (data[field]-mean)/std
    
# Split off random 10% of the data for testing
np.random.seed(42)
sample = np.random.choice(data.index, size=int(len(data)*0.9), replace=False)
data, test_data = data.iloc[sample], data.drop(sample)

# Split into features and targets
features, targets = data.drop('admit', axis=1), data['admit']
features_test, targets_test = test_data.drop('admit', axis=1), test_data['admit']

用梯度下降来更新权重的算法概述：

权重步长设定为 0： \Delta w_i = 0Δw 
i
​	 =0
对训练数据中的每一条记录：
通过网络做正向传播，计算输出 \hat y = f(\sum_i w_i x_i) 
y
^
​	 =f(∑ 
i
​	 w 
i
​	 x 
i
​	 )
计算输出单元的误差项（error term） \delta = (y - \hat y) * f'(\sum_i w_i x_i)δ=(y− 
y
^
​	 )∗f 
′
 (∑ 
i
​	 w 
i
​	 x 
i
​	 )
更新权重步长 \Delta w_i = \Delta w_i + \delta x_iΔw 
i
​	 =Δw 
i
​	 +δx 
i
​	 
更新权重 w_i = w_i + \eta \Delta w_i / mw 
i
​	 =w 
i
​	 +ηΔw 
i
​	 /m。其中 \etaη 是学习率， mm 是数据点个数。这里我们对权重步长做了平均，为的是降低训练数据中大的变化。
重复 ee 代（epoch）。

In [21]:
import numpy as np

def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1 / (1 + np.exp(-x))

# TODO: We haven't provided the sigmoid_prime function like we did in
#       the previous lesson to encourage you to come up with a more
#       efficient solution. If you need a hint, check out the comments
#       in solution.py from the previous lecture.

# Use to same seed to make debugging easier
np.random.seed(42)

n_records, n_features = features.shape
last_loss = None

# Initialize weights
weights = np.random.normal(scale=1 / n_features**.5, size=n_features)

# Neural Network hyperparameters
epochs = 1000
learnrate = 0.5

for e in range(epochs):
    del_w = np.zeros(weights.shape)
    for x, y in zip(features.values, targets):
        # Loop through all records, x is the input, y is the target

        # Note: We haven't included the h variable from the previous
        #       lesson. You can add it if you want, or you can calculate
        #       the h together with the output

        # TODO: Calculate the output---#RIGHT
        output = sigmoid(np.dot(x, weights))

        # TODO: Calculate the error---#WRONG
#         error = np.abs(y-output)
        error = y-output

        # TODO: Calculate the error term---#RIGHT
        error_term = error*output*(1-output)

        # TODO: Calculate the change in weights for this sample
        #       and add it to the total weight change---#RIGHT
        del_w += error_term*x

    # TODO: Update weights using the learning rate and the average change in weights
    weights += learnrate*del_w/n_records

    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        out = sigmoid(np.dot(features, weights))
        loss = np.mean((out - targets) ** 2)
        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss


# Calculate accuracy on test data
tes_out = sigmoid(np.dot(features_test, weights))
predictions = tes_out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

Train loss:  0.26276093849966364
Train loss:  0.20928619409324895
Train loss:  0.20084292908073417
Train loss:  0.1986215647552789
Train loss:  0.19779851396686018
Train loss:  0.19742577912189863
Train loss:  0.19723507746241065
Train loss:  0.19712945625092465
Train loss:  0.19706766341315074
Train loss:  0.19703005801777368
Prediction accuracy: 0.725


## self MLP

要实现一个 4x3x2 网络的正向传播，用 sigmoid 作为两层的激活函数。

要做的事情：


> 1.计算隐藏层的输入       
> 2.计算隐藏层输出         
> 3.计算输出层的输入       
> 4.计算神经网络的输出    

In [22]:
import numpy as np

def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1/(1+np.exp(-x))

# Network size
N_input = 4
N_hidden = 3
N_output = 2

np.random.seed(42)
# Make some fake data
X = np.random.randn(4)

weights_input_to_hidden = np.random.normal(0, scale=0.1, size=(N_input, N_hidden))
weights_hidden_to_output = np.random.normal(0, scale=0.1, size=(N_hidden, N_output))


# TODO: Make a forward pass through the network
hidden_layer_in = np.dot(X, weights_input_to_hidden)
hidden_layer_out = sigmoid(hidden_layer_in)

print('Hidden-layer Output:')
print(hidden_layer_out)

output_layer_in = np.dot(hidden_layer_out, weights_hidden_to_output)
output_layer_out = sigmoid(output_layer_in)

print('Output-layer Output:')
print(output_layer_out)

Hidden-layer Output:
[0.41492192 0.42604313 0.5002434 ]
Output-layer Output:
[0.49815196 0.48539772]


## backpropagation

In [319]:
import numpy as np


def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1 / (1 + np.exp(-x))


x = np.array([0.5, 0.1, -0.2])
target = 0.6
learnrate = 0.5

weights_input_hidden = np.array([[0.5, -0.6],
                                 [0.1, -0.2],
                                 [0.1, 0.7]])

weights_hidden_output = np.array([0.1, -0.3])
print(x.shape, weights_input_hidden.shape, weights_hidden_output.shape)

## Forward pass
hidden_layer_input = np.dot(x, weights_input_hidden)
print('*'*50+'1')
print(x.shape)
print(hidden_layer_input.shape)
hidden_layer_output = sigmoid(hidden_layer_input)
print(hidden_layer_output)

output_layer_in = np.dot(hidden_layer_output, weights_hidden_output)
print('*'*50+'2')
print(output_layer_in)
output = sigmoid(output_layer_in)
print(output)

## Backwards pass
## TODO: Calculate output error
error = target - output

# TODO: Calculate error term for output layer
print('*'*50+'output_et')
output_error_term = error * output * (1-output)
print(output_error_term, output_error_term.shape)

# TODO: Calculate error term for hidden layer
# hidden_error = np.dot(output_error_term, weights_hidden_output)
print('*'*50)
print(output_error_term, weights_hidden_output, hidden_layer_output)
print(output_error_term.shape, weights_hidden_output.shape, hidden_layer_output.shape)
hidden_error_term = np.dot(output_error_term, weights_hidden_output) * hidden_layer_output * (1 - hidden_layer_output)
print('*'*50+'hidden_et')
print(np.dot(output_error_term, weights_hidden_output), np.dot(output_error_term, weights_hidden_output).shape)
print(hidden_layer_output, (1 - hidden_layer_output))
print(hidden_error_term, hidden_error_term.shape)
print(np.dot(output_error_term, weights_hidden_output).shape, hidden_layer_output.shape)

# TODO: Calculate change in weights for hidden layer to output layer
delta_w_h_o = learnrate * output_error_term * hidden_layer_output

# TODO: Calculate change in weights for input layer to hidden layer
print('*'*50)
print(hidden_error_term.shape, x.shape)
print(hidden_error_term, x)
delta_w_i_h = learnrate * hidden_error_term * x[:, None]

print('*'*50)
print('Change in weights for hidden layer to output layer:')
print(delta_w_h_o)
print('Change in weights for input layer to hidden layer:')
print(delta_w_i_h)

(3,) (3, 2) (2,)
**************************************************1
(3,)
(2,)
[0.55971365 0.38698582]
**************************************************2
-0.06012438223148006
0.48497343084992534
**************************************************output_et
0.028730669543515018 ()
**************************************************
0.028730669543515018 [ 0.1 -0.3] [0.55971365 0.38698582]
() (2,) (2,)
**************************************************hidden_et
[ 0.00287307 -0.0086192 ] (2,)
[0.55971365 0.38698582] [0.44028635 0.61301418]
[ 0.00070802 -0.00204471] (2,)
(2,) (2,)
**************************************************
(2,) (3,)
[ 0.00070802 -0.00204471] [ 0.5  0.1 -0.2]
**************************************************
Change in weights for hidden layer to output layer:
[0.00804047 0.00555918]
Change in weights for input layer to hidden layer:
[[ 1.77005547e-04 -5.11178506e-04]
 [ 3.54011093e-05 -1.02235701e-04]
 [-7.08022187e-05  2.04471402e-04]]


## backpropagation II

In [28]:
import numpy as np

np.random.seed(21)

def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1 / (1 + np.exp(-x))


# Hyperparameters
n_hidden = 2  # number of hidden units
epochs = 900
# learnrate = 0.005
learnrate = 0.1

n_records, n_features = features.shape
last_loss = None
# Initialize weights
weights_input_hidden = np.random.normal(scale=1 / n_features ** .5,
                                        size=(n_features, n_hidden))
weights_hidden_output = np.random.normal(scale=1 / n_features ** .5,
                                         size=n_hidden)

for e in range(epochs):
    del_w_input_hidden = np.zeros(weights_input_hidden.shape)
    del_w_hidden_output = np.zeros(weights_hidden_output.shape)
    for x, y in zip(features.values, targets):
        ## Forward pass ##
        # TODO: Calculate the output
        hidden_input = np.dot(x, weights_input_hidden)
        hidden_output = sigmoid(hidden_input)

        output = np.dot(hidden_output,
                                weights_hidden_output))

        ## Backward pass ##
        # TODO: Calculate the network's prediction error
        error = y - output

        # TODO: Calculate error term for the output unit
        output_error_term = error * output * (1 - output)

        ## propagate errors to hidden layer

        # TODO: Calculate the hidden layer's contribution to the error
        hidden_error = np.dot(output_error_term, weights_hidden_output)

        # TODO: Calculate the error term for the hidden layer
        hidden_error_term = hidden_error * hidden_output * (1 - hidden_output)

        # TODO: Update the change in weights
        del_w_hidden_output += output_error_term * hidden_output
        del_w_input_hidden += hidden_error_term * x[:, None]

    # TODO: Update weights
    weights_input_hidden += learnrate * del_w_input_hidden / n_records
    weights_hidden_output += learnrate * del_w_hidden_output / n_records

    # Printing out the mean square error on the training set
    if e % (epochs / 10) == 0:
        hidden_output = sigmoid(np.dot(x, weights_input_hidden))
        out = sigmoid(np.dot(hidden_output,
                             weights_hidden_output))
        loss = np.mean((out - targets) ** 2)

        if last_loss and last_loss < loss:
            print("Train loss: ", loss, "  WARNING - Loss Increasing")
        else:
            print("Train loss: ", loss)
        last_loss = loss

# Calculate accuracy on test data
hidden = sigmoid(np.dot(features_test, weights_input_hidden))
out = sigmoid(np.dot(hidden, weights_hidden_output))
predictions = out > 0.5
accuracy = np.mean(predictions == targets_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

Train loss:  0.2759952337979974
Train loss:  0.2536039891630312
Train loss:  0.2406311012332461
Train loss:  0.23311403345410195
Train loss:  0.22865891204402225
Train loss:  0.22593795573755224
Train loss:  0.22422426346337285
Train loss:  0.22311377786966663
Train loss:  0.22237609891584925
Train loss:  0.22187608079923876
Prediction accuracy: 0.750


## test normal datas

In [251]:
import numpy as np

inputs = np.array([[0.5, -0.2, 0.1]])
targets = np.array([[0.4]])
learnrate = 0.5

# 3*2
weights_input_hidden = np.array([[0.1, -0.2],
                                 [0.4, 0.5],
                                 [-0.3, 0.2]])
# 2*1
weights_hidden_output = np.array([[0.3],
                                  [-0.1]])
# x = np.array([0.5, 0.1, -0.2])
# target = 0.6
# learnrate = 0.5

# weights_input_hidden = np.array([[0.5, -0.6],
#                                  [0.1, -0.2],
#                                  [0.1, 0.7]])

# weights_hidden_output = np.array([0.1, -0.3])

delta_weights_i_h = np.zeros(weights_input_hidden.shape)
delta_weights_h_o = np.zeros(weights_hidden_output.shape)

In [252]:
# (3,) float (3, 2) (2,)
print(inputs.shape, targets.shape, weights_input_hidden.shape, weights_hidden_output.shape)

(1, 3) (1, 1) (3, 2) (2, 1)


In [254]:
import numpy as np


def sigmoid(x):
    """
    Calculate sigmoid
    """
    return 1 / (1 + np.exp(-x))

for x, target in zip(inputs, targets):
    print('*'*50+'start')
    print(x, target)
    ## Forward pass
    hidden_inputs = np.dot(x, weights_input_hidden) # signals into hidden layer
    hidden_outputs = sigmoid(hidden_inputs) # signals from hidden layer

    # TODO: Output layer - Replace these values with your calculations.
    final_inputs = np.dot(hidden_outputs, weights_hidden_output) # signals into final output layer
    final_outputs = sigmoid(final_inputs) # signals from final output layer

    #### Implement the backward pass here ####
    ### Backward pass ###

    # TODO: Output error - Replace this value with your calculations.
    error = target - final_outputs # Output layer error is the difference between desired target and actual output.
    print('*'*50+'error')
    print(error)
    
    # TODO: Calculate the hidden layer's contribution to the error
    hidden_error = np.dot(weights_hidden_output, error)
    print('*'*50+'hidden_error')
    print(hidden_error)

    # TODO: Backpropagated error terms - Replace these values with your calculations.
    output_error_term = error * final_outputs * (1-final_outputs)
    hidden_error_term = hidden_error * hidden_outputs * (1-hidden_outputs)

    # Weight step (input to hidden)
    delta_weights_i_h += hidden_error_term * x[:, None]
    # Weight step (hidden to output)
    delta_weights_h_o += (output_error_term * hidden_outputs)[:, None]
    print('*'*50+'delta')
    print('Change in weights for hidden layer to output layer:')
    print(delta_weights_h_o)
    print('Change in weights for input layer to hidden layer:')
    print(delta_weights_i_h)
weights_hidden_output += learnrate * delta_weights_h_o / 1
weights_input_hidden += learnrate * delta_weights_i_h / 1
print('*'*50+'END')
print(weights_hidden_output, weights_hidden_output.shape)
print(weights_input_hidden, weights_input_hidden.shape)

**************************************************start
[ 0.5 -0.2  0.1] [0.4]
**************************************************error
[-0.12322849]
**************************************************hidden_error
[-0.0360372   0.01319681]
**************************************************delta
Change in weights for hidden layer to output layer:
[[-0.03001429]
 [-0.02817863]]
Change in weights for input layer to hidden layer:
[[-0.00918281  0.003186  ]
 [ 0.00367312 -0.0012744 ]
 [-0.00183656  0.0006372 ]]
**************************************************END
[[ 0.27743499]
 [-0.12118151]] (2, 1)
[[ 0.09306739 -0.19763219]
 [ 0.40277304  0.49905288]
 [-0.30138652  0.20047356]] (3, 2)


In [249]:
print(weights_hidden_output, weights_hidden_output.shape)
print(weights_input_hidden, weights_input_hidden.shape)

[[ 0.19039158]
 [-0.20307588]] (2, 1)
[[ 0.06789587 -0.18699981]
 [ 0.41284165  0.49479992]
 [-0.30642083  0.20260004]] (3, 2)


In [250]:
np.allclose(weights_hidden_output, np.array([[ 0.37275328], 
                                              [-0.03172939]]))

False

## END

In [379]:
x = np.array([[0.5, -0.2, 0.1]])
target = np.array([[0.4]])
learnrate = 0.5

# 3*2
weights_input_hidden = np.array([[0.1, -0.2],
                                 [0.4, 0.5],
                                 [-0.3, 0.2]])
# 2*1
weights_hidden_output = np.array([[0.3],
                                  [-0.1]])

delta_weights_i_h = np.zeros(weights_input_hidden.shape)
delta_weights_h_o = np.zeros(weights_hidden_output.shape)

In [380]:
print(x, x.shape)
print(target, target.shape)

[[ 0.5 -0.2  0.1]] (1, 3)
[[0.4]] (1, 1)


In [381]:
print(delta_weights_i_h, delta_weights_i_h.shape)
print(delta_weights_h_o, delta_weights_h_o.shape)

[[0. 0.]
 [0. 0.]
 [0. 0.]] (3, 2)
[[0.]
 [0.]] (2, 1)


In [382]:
weights_input_hidden, weights_input_hidden.shape

(array([[ 0.1, -0.2],
        [ 0.4,  0.5],
        [-0.3,  0.2]]), (3, 2))

In [383]:
weights_hidden_output, weights_hidden_output.shape

(array([[ 0.3],
        [-0.1]]), (2, 1))

In [384]:
hidden_output = sigmoid(np.dot(x, weights_input_hidden))

hidden_output, hidden_output.shape

(array([[0.4850045 , 0.45512111]]), (1, 2))

In [385]:
weights_hidden_output, weights_hidden_output.shape

(array([[ 0.3],
        [-0.1]]), (2, 1))

In [386]:
final_output = sigmoid(np.dot(hidden_layer_output, weights_hidden_output))

final_output, final_output.shape

(array([0.53225901]), (1,))

In [387]:
error = target - output

target, error

(array([[0.4]]), array([[-0.08497343]]))

In [388]:
hidden_error = np.dot(weights_hidden_output, error)

hidden_error, hidden_error.shape

(array([[-0.02549203],
        [ 0.00849734]]), (2, 1))

In [389]:
output_error_term = error * final_output * (1-final_output)

output_error_term

array([[-0.02115493]])

In [390]:
hidden_error, hidden_output, hidden_output[0]

(array([[-0.02549203],
        [ 0.00849734]]),
 array([[0.4850045 , 0.45512111]]),
 array([0.4850045 , 0.45512111]))

In [391]:
hidden_error.shape, hidden_output.shape, hidden_output[0].shape

((2, 1), (1, 2), (2,))

In [392]:
hidden_error_term = hidden_error * hidden_output[0][:, None] * (1-hidden_output[0][:, None])

hidden_error_term

array([[-0.00636728],
       [ 0.00210722]])

In [393]:
x[0][:, None]

array([[ 0.5],
       [-0.2],
       [ 0.1]])

In [394]:
delta_weights_i_h += x[0][:, None] * hidden_error_term.reshape(1,2)

delta_weights_i_h

array([[-0.00318364,  0.00105361],
       [ 0.00127346, -0.00042144],
       [-0.00063673,  0.00021072]])

In [395]:
delta_weights_h_o += hidden_outputs[:, None] * output_error_term

delta_weights_h_o

array([[-0.01025281],
       [-0.00963049]])

In [398]:
weights_hidden_output -= learnrate * delta_weights_h_o / 1
weights_input_hidden -= learnrate * delta_weights_i_h / 1

In [399]:
weights_hidden_output, weights_input_hidden

(array([[ 0.3034176 ],
        [-0.09678984]]), array([[ 0.10106121, -0.2003512 ],
        [ 0.39957551,  0.50014048],
        [-0.29978776,  0.19992976]]))