In [1]:
import numpy as np

In [2]:
def sigmoid(x, derivative=False):
    """
    Parameters:
        x: array_like
        derivative: bool to specify whether to calculate derivative or not
        
    Returns:
        numpy array of sigmoid of x
    """
    return 1. / (1. + np.exp(-x)) if not derivative else x * (1. - x)

In [3]:
class Dense:
    def __init__(self, nodes, func=sigmoid):
        """
        Parameters:
            nodes: array_like. Number of neurons in each layer
            func: activation funcion
            
        Returns:
            Dense object
        """
        self.__func = func
        self.__nodes = np.array(nodes)
        
        self.__neurons = [np.empty([node, 1]) for node in nodes]
        self.__weights = [np.random.uniform(-1., 1., [nodes[i + 1], nodes[i]]) for i in range(len(nodes) - 1)]
        self.__biases = [np.random.uniform(-1., 1., [nodes[i + 1], 1]) for i in range(len(nodes) - 1)]
        
        self.__reset_deltas()
        
    def __reset_deltas(self):
        self.__delta_weights = [np.zeros([self.__nodes[i + 1], self.__nodes[i]]) for i in range(len(self.__nodes) - 1)]
        self.__delta_biases = [np.zeros([self.__nodes[i + 1], 1]) for i in range(len(self.__nodes) - 1)]
    
    def get_delta_weights(self):
        return self.__delta_weights
        
    def guess(self, x):
        """
        Parameters:
            x: 2d-array. Inputs of the network
            
        Returns:
            numpy array. Output of the network
        """
        output = np.empty([len(x), self.__nodes[-1], 1])
        for i, cur_input in enumerate(x):
            self.__neurons[0] = np.array(cur_input).reshape(-1, 1)
            for j in range(len(self.__neurons) - 1):
                self.__neurons[j + 1] = self.__func(self.__weights[j] @ self.__neurons[j] + self.__biases[j])

            output[i] = self.__neurons[-1]
        return output
    
    def train(self, x, y, batch_size=128, epochs_num=1, lr=0.01):
        """
        Parameters:
            x: 2d-array. Inputs of the network
            y: 2d-array. Expected outputs of the network
            batch_size: int. Number of inputs used during each training step
            epochs_num: int. Number of epochs
            lr: float. Learning rate of the training process
        """
        assert len(x) == len(y)
        
        for _ in range(epochs_num):
            for step in range(len(x) // batch_size):
                self.__reset_deltas()
                for cur_input, cur_output in zip(x[batch_size * step: batch_size * (step + 1)], \
                                                 y[batch_size * step: batch_size * (step + 1)]):
                    self.__neurons[0] = np.array(cur_input).reshape(-1, 1)
                    for i in range(len(self.__neurons) - 1):
                        self.__neurons[i + 1] = self.__func(self.__weights[i] @ self.__neurons[i] + self.__biases[i])
                        
                    error = cur_output.reshape(-1, 1) - self.__neurons[-1]
                    grad = self.__func(self.__neurons[-1], True) * error
                    self.__delta_weights[-1] += (grad @ self.__neurons[-2].transpose())
                    self.__delta_biases[-1] += grad

                    for i in range(len(self.__nodes) - 3, -1, -1):
                        error = self.__weights[i + 1].transpose() @ error
                        grad = self.__func(self.__neurons[i + 1], True) * error
                        self.__delta_weights[i] += (grad @ self.__neurons[i].transpose())
                        self.__delta_biases[i] += grad
                        
                for i in range(len(self.__weights)):
                    self.__weights[i] += (self.__delta_weights[i] * lr / batch_size)
                    self.__biases[i] += (self.__delta_biases[i] * lr / batch_size)

In [4]:
possible_inputs  = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
possible_outputs = np.array([[0]   , [1]   , [1]   , [0]])

indexes = np.random.uniform(0, 4, [1280]).astype(int)
inputs = possible_inputs[indexes]
outputs = possible_outputs[indexes]

In [5]:
nn = Dense([2, 4, 4, 1])
for i in range(1, 16):
    nn.train(inputs, outputs, batch_size=16, epochs_num=5, lr = 1)
    print("after %d epochs:\n" % (5 * i))
    print(nn.guess(possible_inputs))

after 5 epochs:

[[[0.13185176]]

 [[0.76097299]]

 [[0.3605699 ]]

 [[0.39736284]]]
after 10 epochs:

[[[0.04182574]]

 [[0.91651299]]

 [[0.36150503]]

 [[0.36950932]]]
after 15 epochs:

[[[0.02972532]]

 [[0.938102  ]]

 [[0.35630381]]

 [[0.3605327 ]]]
after 20 epochs:

[[[0.02426596]]

 [[0.94728557]]

 [[0.35391956]]

 [[0.35674377]]]
after 25 epochs:

[[[0.02101142]]

 [[0.95239332]]

 [[0.3533081 ]]

 [[0.35540273]]]
after 30 epochs:

[[[0.0187663 ]]

 [[0.95561314]]

 [[0.35390572]]

 [[0.35555277]]]
after 35 epochs:

[[[0.01706382]]

 [[0.95779739]]

 [[0.35535811]]

 [[0.35670153]]]
after 40 epochs:

[[[0.01568693]]

 [[0.95935817]]

 [[0.35740377]]

 [[0.35852702]]]
after 45 epochs:

[[[0.01452507]]

 [[0.96052556]]

 [[0.35983611]]

 [[0.36079211]]]
after 50 epochs:

[[[0.01351885]]

 [[0.96144144]]

 [[0.36249183]]

 [[0.36331639]]]
after 55 epochs:

[[[0.01263461]]

 [[0.96219875]]

 [[0.36524503]]

 [[0.36596369]]]
after 60 epochs:

[[[0.01185167]]

 [[0.96285984]]

 [[

In [6]:
possible_inputs  = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
possible_outputs = np.array([[0, 1], [1, 0], [1, 0], [0, 1]])

indexes = np.random.uniform(0, 4, [1280]).astype(int)
inputs = possible_inputs[indexes]
outputs = possible_outputs[indexes]

In [7]:
nn = Dense([2, 4, 4, 2])
for i in range(1, 16):
    nn.train(inputs, outputs, batch_size=16, epochs_num=5, lr = 1)
    print("after %d epochs:\n" % (5 * i))
    print(nn.guess(possible_inputs))

after 5 epochs:

[[[0.48167563]
  [0.51635853]]

 [[0.48341771]
  [0.51580755]]

 [[0.48217983]
  [0.52165033]]

 [[0.48506693]
  [0.5264281 ]]]
after 10 epochs:

[[[0.0822002 ]
  [0.92439053]]

 [[0.91501297]
  [0.07853528]]

 [[0.93952111]
  [0.05117118]]

 [[0.07919931]
  [0.92777569]]]
after 15 epochs:

[[[0.03768323]
  [0.96402004]]

 [[0.96713114]
  [0.03055213]]

 [[0.9724372 ]
  [0.02947524]]

 [[0.03715861]
  [0.96452279]]]
after 20 epochs:

[[[0.02761292]
  [0.97269753]]

 [[0.97700899]
  [0.0229009 ]]

 [[0.97986236]
  [0.02299805]]

 [[0.02739691]
  [0.97288149]]]
after 25 epochs:

[[[0.0227137 ]
  [0.97694747]]

 [[0.98160983]
  [0.01922261]]

 [[0.98335684]
  [0.01935234]]

 [[0.02258864]
  [0.97704706]]]
after 30 epochs:

[[[0.01968656]
  [0.97964026]]

 [[0.9843277 ]
  [0.01685975]]

 [[0.98545611]
  [0.01693822]]

 [[0.01960272]
  [0.97970462]]]
after 35 epochs:

[[[0.01759294]
  [0.98156274]]

 [[0.98613406]
  [0.01516094]]

 [[0.98689609]
  [0.01520149]]

 [[0.017531

In [8]:
test_imgs_filename = "mnist/t10k-images.idx3-ubyte"
test_labels_filename = "mnist/t10k-labels.idx1-ubyte"
train_imgs_filename = "mnist/train-images.idx3-ubyte"
train_labels_filename = "mnist/train-labels.idx1-ubyte"

test_imgs_file = open(test_imgs_filename, "rb")
test_labels_file = open(test_labels_filename, "rb")
train_imgs_file = open(train_imgs_filename, "rb")
train_labels_file = open(train_labels_filename, "rb")

LABEL_OFFSET = 8
IMG_OFFSET = 16
IMG_SIZE = 784
LABEL_SIZE = 1
TEST_SIZE = 10000
TRAIN_SIZE = 60000

In [9]:
test_imgs = np.empty([TEST_SIZE, IMG_SIZE], dtype=np.uint8)
test_labels = np.empty([TEST_SIZE, 10], dtype=np.float64)
train_imgs = np.empty([TRAIN_SIZE, IMG_SIZE], dtype=np.uint8)
train_labels = np.empty([TRAIN_SIZE, 10], dtype=np.float64)

In [10]:
test_imgs_file.seek(IMG_OFFSET)
for i in range(len(test_imgs)):
    cur_img = test_imgs_file.read1(IMG_SIZE)
    cur_img = np.array([cur_img[j] for j in range(len(cur_img))])
    test_imgs[i] = cur_img
    
test_labels_file.seek(LABEL_OFFSET)
for i in range(len(test_labels)):
    cur_label = test_labels_file.read1(LABEL_SIZE)
    cur_label = np.array([cur_label[j] for j in range(len(cur_label))])
    test_labels[i] = np.zeros([10])
    test_labels[i][cur_label] = 1.
    
train_imgs_file.seek(IMG_OFFSET)
for i in range(len(train_imgs)):
    cur_img = train_imgs_file.read1(IMG_SIZE)
    cur_img = np.array([cur_img[j] for j in range(len(cur_img))])
    train_imgs[i] = cur_img
    
train_labels_file.seek(LABEL_OFFSET)
for i in range(len(train_labels)):
    cur_label = train_labels_file.read1(LABEL_SIZE)
    cur_label = np.array([cur_label[j] for j in range(len(cur_label))])
    train_labels[i] = np.zeros([10])
    train_labels[i][cur_label] = 1.

In [11]:
test_imgs = test_imgs / 255.0
train_imgs = train_imgs / 255.0

In [12]:
# https://stackoverflow.com/questions/4601373/better-way-to-shuffle-two-numpy-arrays-in-unison
p = np.random.permutation(len(train_imgs))
train_imgs = train_imgs[p]
train_labels = train_labels[p]

p = np.random.permutation(len(test_imgs))
test_imgs = test_imgs[p]
test_labels = test_labels[p]

del p

In [13]:
nn = Dense([784, 128, 128, 10])
np.set_printoptions(suppress=True)

outputs = nn.guess(test_imgs)
accuracy = 0
for j in range(len(test_imgs)):
    if np.argmax(outputs[j]) == np.argmax(test_labels[j]):
        accuracy += 1
accuracy /= len(test_imgs)
print("Accuracy after %d epoch(s): %f" %((0), accuracy))

for i in range(1, 16):
    nn.train(train_imgs, train_labels, batch_size=256, epochs_num=1, lr=.1)
    outputs = nn.guess(test_imgs)
    
    accuracy = 0
    for j in range(len(test_imgs)):
        if np.argmax(outputs[j]) == np.argmax(test_labels[j]):
            accuracy += 1
    accuracy /= len(test_imgs)
    print("Accuracy after %d epoch(s): %f" %((i), accuracy))

Accuracy after 0 epoch(s): 0.094000
Accuracy after 1 epoch(s): 0.860100
Accuracy after 2 epoch(s): 0.893400
Accuracy after 3 epoch(s): 0.907600
Accuracy after 4 epoch(s): 0.915800
Accuracy after 5 epoch(s): 0.921400
Accuracy after 6 epoch(s): 0.926100
Accuracy after 7 epoch(s): 0.930500
Accuracy after 8 epoch(s): 0.934200
Accuracy after 9 epoch(s): 0.937000
Accuracy after 10 epoch(s): 0.940100
Accuracy after 11 epoch(s): 0.941100
Accuracy after 12 epoch(s): 0.943300
Accuracy after 13 epoch(s): 0.944500


KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt

%matplotlib inline

fig, axs = plt.subplots(5, 5, figsize=(25, 25))
for i in range(5):
    for j in range(5):
        index = j + i * 5
        axs[i][j].imshow(test_imgs[index].reshape(28, 28), cmap="gray")
        axs[i][j].set_title(np.argmax(test_labels[index]))

In [None]:
%matplotlib inline

fig, axs = plt.subplots(5, 5, figsize=(25, 25))
for i in range(5):
    for j in range(5):
        index = j + i * 5
        axs[i][j].imshow(train_imgs[index].reshape(28, 28), cmap="gray")
        axs[i][j].set_title(np.argmax(train_labels[index]))