/
nn_notes.py
174 lines (143 loc) · 6.68 KB
/
nn_notes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#!/usr/bin/env python
"""
nn_notes.py
Code taken from:
https://pyimagesearch.com/2021/05/06/backpropagation-from-scratch-with-python/
Backpropagation from scratch with Python
by Adrian Rosebrock on May 6, 2021
"""
# import the necessary packages
import numpy as np
class NeuralNetwork:
def __init__(self, layers, alpha=0.1):
# initialize the list of weights matrices, then store the
# network architecture and learning rate
self.W = []
self.layers = layers
self.alpha = alpha
# start looping from the index of the first layer but
# stop before we reach the last two layers
for i in np.arange(0, len(layers) - 2):
# randomly initialize a weight matrix connecting the
# number of nodes in each respective layer together,
# adding an extra node for the bias
w = np.random.randn(layers[i] + 1, layers[i + 1] + 1)
self.W.append(w / np.sqrt(layers[i]))
# the last two layers are a special case where the input
# connections need a bias term but the output does not
w = np.random.randn(layers[-2] + 1, layers[-1])
self.W.append(w / np.sqrt(layers[-2]))
def __repr__(self):
# construct and return a string that represents the network
# architecture
return "NeuralNetwork: {}".format(
"-".join(str(l) for l in self.layers))
def sigmoid(self, x):
# compute and return the sigmoid activation value for a
# given input value
return 1.0 / (1 + np.exp(-x))
def sigmoid_deriv(self, x):
# compute the derivative of the sigmoid function ASSUMING
# that x has already been passed through the 'sigmoid'
# function
return x * (1 - x)
def fit(self, X, y, epochs=1000, displayUpdate=100):
# insert a column of 1's as the last entry in the feature
# matrix -- this little trick allows us to treat the bias
# as a trainable parameter within the weight matrix
X = np.c_[X, np.ones((X.shape[0]))]
# loop over the desired number of epochs
for epoch in np.arange(0, epochs):
# loop over each individual data point and train
# our network on it
for (x, target) in zip(X, y):
self.fit_partial(x, target)
# check to see if we should display a training update
if epoch == 0 or (epoch + 1) % displayUpdate == 0:
loss = self.calculate_loss(X, y)
print("[INFO] epoch={}, loss={:.7f}".format(
epoch + 1, loss))
def fit_partial(self, x, y):
# construct our list of output activations for each layer
# as our data point flows through the network; the first
# activation is a special case -- it's just the input
# feature vector itself
A = [np.atleast_2d(x)]
# FEEDFORWARD:
# loop over the layers in the network
for layer in np.arange(0, len(self.W)):
# feedforward the activation at the current layer by
# taking the dot product between the activation and
# the weight matrix -- this is called the "net input"
# to the current layer
net = A[layer].dot(self.W[layer])
# computing the "net output" is simply applying our
# nonlinear activation function to the net input
out = self.sigmoid(net)
# once we have the net output, add it to our list of
# activations
A.append(out)
# BACKPROPAGATION
# the first phase of backpropagation is to compute the
# difference between our *prediction* (the final output
# activation in the activations list) and the true target
# value
error = A[-1] - y
# from here, we need to apply the chain rule and build our
# list of deltas 'D'; the first entry in the deltas is
# simply the error of the output layer times the derivative
# of our activation function for the output value
D = [error * self.sigmoid_deriv(A[-1])]
# once you understand the chain rule it becomes super easy
# to implement with a 'for' loop -- simply loop over the
# layers in reverse order (ignoring the last two since we
# already have taken them into account)
for layer in np.arange(len(A) - 2, 0, -1):
# the delta for the current layer is equal to the delta
# of the *previous layer* dotted with the weight matrix
# of the current layer, followed by multiplying the delta
# by the derivative of the nonlinear activation function
# for the activations of the current layer
delta = D[-1].dot(self.W[layer].T)
delta = delta * self.sigmoid_deriv(A[layer])
D.append(delta)
# since we looped over our layers in reverse order we need to
# reverse the deltas
D = D[::-1]
# WEIGHT UPDATE PHASE
# loop over the layers
for layer in np.arange(0, len(self.W)):
# update our weights by taking the dot product of the layer
# activations with their respective deltas, then multiplying
# this value by some small learning rate and adding to our
# weight matrix -- this is where the actual "learning" takes
# place
self.W[layer] += -self.alpha * A[layer].T.dot(D[layer])
def predict(self, X, addBias=True):
# initialize the output prediction as the input features -- this
# value will be (forward) propagated through the network to
# obtain the final prediction
p = np.atleast_2d(X)
# check to see if the bias column should be added
if addBias:
# insert a column of 1's as the last entry in the feature
# matrix (bias)
p = np.c_[p, np.ones((p.shape[0]))]
# loop over our layers in the network
for layer in np.arange(0, len(self.W)):
# computing the output prediction is as simple as taking
# the dot product between the current activation value 'p'
# and the weight matrix associated with the current layer,
# then passing this value through a nonlinear activation
# function
p = self.sigmoid(np.dot(p, self.W[layer]))
# return the predicted value
return p
def calculate_loss(self, X, targets):
# make predictions for the input data points then compute
# the loss
targets = np.atleast_2d(targets)
predictions = self.predict(X, addBias=False)
loss = 0.5 * np.sum((predictions - targets) ** 2)
# return the loss
return loss