-
Notifications
You must be signed in to change notification settings - Fork 0
/
NeuralNetwork.py
212 lines (184 loc) · 9.01 KB
/
NeuralNetwork.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
import pickle
import numpy as np
import numpy.linalg as la
import matplotlib.pyplot as plt
np.set_printoptions(linewidth=300)
image_size = 28 # width and length
no_of_different_labels = 10 # i.e. 0, 1, 2, 3, ..., 9
image_pixels = image_size * image_size
# Edit data path to your own directory if needed
data_path = ""
# Saves train and test data to pickle file
# Makes loading faster for future runs
try:
train_data = pickle.load(open("train_data.pickle", "rb"))
test_data = pickle.load(open("test_data.pickle", "rb"))
except (OSError, IOError) as e:
train_data = np.loadtxt(data_path + "mnist_train.csv", delimiter=",")
test_data = np.loadtxt(data_path + "mnist_test.csv", delimiter=",")
pickle.dump(train_data, open("train_data.pickle", "wb"))
pickle.dump(test_data, open("test_data.pickle", "wb"))
fac = .99 / 255
# separate images and labels, map each pixel value into range [.01,1]
train_imgs = np.asfarray(train_data[:, 1:]) * fac + .01
test_imgs = np.asfarray(test_data[:, 1:]) * fac + .01
train_labels = np.asfarray(train_data[:, :1])
test_labels = np.asfarray(test_data[:, :1])
lr = np.arange(no_of_different_labels)
# transform labels into one hot representation
train_labels = (lr == train_labels).astype(np.float)
test_labels = (lr == test_labels).astype(np.float)
# Sigmoid activation function. Takes input in range (-inf,inf) and outputs in range (0,1)
# This is a variation of the traditional y=1/(1+exp(x)) function.
# I found this version to work slightly better
def sigmoid(input_vector):
return np.arctan(input_vector) / np.pi + .5
# derivative of the sigmoid function
def deriv_sigmoid(input_vector):
return 1. / (np.pi * (1 + input_vector ** 2))
# shuffles a pair of vectors together,
# i.e. both vectors are shuffled exactly the same way
def shuffle_together(a, b):
rng_state = np.random.get_state()
np.random.shuffle(a)
np.random.set_state(rng_state)
np.random.shuffle(b)
class NeuralNet:
# Constructor
# Initalizes all neurons, weights and biases to zero.
def __init__(self, input_params=[image_pixels,100,no_of_different_labels], batch_size=1):
# "params" defines the network architecture.
# by default, the net has 3 layers with 784 neurons
# in the first layer, 100 in the 2nd, and 10 in the 3rd.
# A network can be created with any number and size of layers,
# but for the task of classifying MNIST digits the first layer
# must have 784 neurons and the last layer must have 10.
self.params = input_params
#
self.neurons = []
self.weighted_inputs = []
self.weights = []
self.biases = []
for i in range(len(self.params) - 1):
self.weights.append(np.zeros((self.params[i + 1], self.params[i])))
self.biases.append(np.zeros((self.params[i + 1])))
self.weighted_inputs.append(np.zeros((self.params[i+1], batch_size)))
for i in range(len(self.params)):
self.neurons.append(np.zeros((self.params[i], batch_size)))
self.errors = np.zeros((self.params[-1], batch_size))
# randomly initializes all weights and biases normally with variance 1
def random_init(self):
for i in range(len(self.params) - 1):
self.weights[i] = np.random.randn(self.params[i + 1], self.params[i])
self.biases[i] = np.random.randn(self.params[i + 1])
# calculates the cost (sum of squared errors) or average cost of a batch
def get_cost(self):
if 1 in self.errors.shape:
return la.norm(self.errors)
else:
return sum(la.norm(self.errors, axis=0))/self.errors.shape[1]
# add weights and biases of two networks
def adjust_net(self, gradient):
for i in range(len(self.params) - 1):
self.weights[i] += gradient.weights[i]
self.biases[i] += gradient.biases[i]
# scale all weights and biases by some factor
def scale_net(self, factor):
for i in range(len(self.params) - 1):
self.weights[i] *= factor
self.biases[i] *= factor
return self
# Classifies an input image or batch of images by propagating it forward through the net.
# Multiple image vectors can be classified in one pass using matrix methods.
# The network's "choice" of an image's classification can be
# determined directly by np.argmax(self.neurons[-1]).
def classify(self, input_imgs, input_labels):
if input_imgs.ndim == 1:
input_imgs.shape = (1, self.params[0])
input_labels.shape = (1, self.params[-1])
self.neurons[0] = input_imgs.T
for i in range(len(self.params) - 1):
self.weighted_inputs[i] = (self.weights[i] @ self.neurons[i]) + np.outer(self.biases[i], np.ones((self.neurons[0].shape[1])))
self.neurons[i+1] = sigmoid(self.weighted_inputs[i])
self.errors = self.neurons[-1] - input_labels.T
# Uses the backpropagation algorithm to compute the negative gradient w.r.t the cost
# stored in neural net object "gradient". This object is not meant
# to be used as a neural net, it is simply a convenient way to store
# the adjustments needed for each of the weights and biases.
# This implementation is capable of back-propagating multiple error vectors
# in one pass using matrix methods.
def backprop(self):
batch_size = self.errors.shape[1]
gradient = NeuralNet(input_params=self.params, batch_size=batch_size)
for i in range(-1, -len(self.params), -1):
# neurons error (derivative of the cost w.r.t each neuron activation)
if i != -1:
gradient.neurons[i] = (self.weights[i + 1].T @ gradient.neurons[i + 1]) * deriv_sigmoid(self.weighted_inputs[i])
else:
gradient.neurons[-1] = 2 * self.errors * deriv_sigmoid(self.weighted_inputs[-1])
# bias error (derivative of the cost w.r.t each bias)
gradient.biases[i] = np.sum(gradient.neurons[i], 1) / batch_size
# weight error (derivative of the cost w.r.t. each weight)
temp = np.zeros((batch_size, self.params[i], self.params[i-1]))
for j in range(batch_size):
temp[j] = np.outer(gradient.neurons[i][:, j], self.neurons[i - 1][:, j])
gradient.weights[i] = np.sum(temp, 0) / batch_size
# negative gradient:
gradient.scale_net(-1)
return gradient
# Train the network using stochastic gradient descent,
# with stochastic batches of size "batch_size",
# taking steps proportional to the learn_rate.
# A learn_rate of 3 worked well for the default network
# This will run through the entire training set
# "epochs" number of times
def train(self, epochs=10, learn_rate=3.0, batch_size=10, init=True):
# randomly initialize the network
if init:
self.random_init()
print("Displaying avg cost each epoch:")
for k in range(epochs):
shuffle_together(train_imgs, train_labels)
avg_cost = 0.0
# divide the training set into (10000/batch_size) batches
for i in range(10000 // batch_size):
# classify a batch of images
self.classify(train_imgs[i * batch_size:i * batch_size + batch_size], train_labels[i * batch_size:i * batch_size + batch_size])
# calculate the average cost for the batch
avg_cost += self.get_cost()
# calculate the gradient vector via backprop(),
# then scale the gradient by learn_rate and add to the net
self.adjust_net(self.backprop().scale_net(learn_rate))
# average over the number of batches
avg_cost /= (10000/batch_size)
print('%.5f'%avg_cost, "\tepoch", k+1)
# Tests the network on all test_imgs.
# prints the confusion matrix, the average least-squares cost, and the accuracy.
def test(self):
avg_cost = 0.0
cm = np.zeros((10,10), dtype= int)
for i in range(10000):
self.classify(test_imgs[i], test_labels[i])
avg_cost += self.get_cost()
cm[np.argmax(test_labels[i]),np.argmax(self.neurons[-1])] += 1
avg_cost /= 10000
accuracy = cm.trace()/10000
print("Confusion Matrix:")
print(cm)
print("average cost: ", avg_cost)
print("accuracy: ", accuracy*100, "%")
# --------- end Neural Net class ----------
# displays an image in grayscale.
def show_img(index, type="train"):
if type == "train":
img = train_imgs[index].reshape((28, 28))
plt.imshow(img, cmap="Greys")
plt.show()
print(train_labels[index])
elif type == "test":
img = test_imgs[index].reshape((28, 28))
plt.imshow(img, cmap="Greys")
plt.show()
print(test_labels[index])
else:
print("parameter error")