In [1]:
import gzip
import struct
import numpy as np
import pandas as pd

with gzip.open('MNIST\\t10k-images-idx3-ubyte','rb') as f:
    magic, size = struct.unpack(">II", f.read(8))
    nrows, ncols = struct.unpack(">II", f.read(8))
    data = np.frombuffer(f.read(), dtype=np.dtype(np.uint8).newbyteorder('>'))
    data = data.reshape((size, nrows, ncols))

with gzip.open('MNIST\\t10k-labels-idx1-ubyte','rb') as f:
    magic, size = struct.unpack(">II", f.read(8))
    labels = np.frombuffer(f.read(), dtype=np.dtype(np.uint8).newbyteorder('>'))
    labels = labels.reshape((size,))

In [2]:
# Chooser:
# Input layer: image[28*28 = 784]
# Hidden layer 1: [16]
# Hidden layer 2: [18]
# Output: [10]
CHOOSER_HIDDEN1_SIZE = 16
CHOOSER_HIDDEN2_SIZE = 8
CHOOSER_OUTPUT_SIZE = 10



class Chooser:
    def __init__(self, paths=None, input_size=784):
        # paths: array of csv files containing  weights and biases
        if paths is None:
            self.hidden_layer1 = np.zeros(CHOOSER_HIDDEN1_SIZE)
            self.hidden_layer2 = np.zeros(CHOOSER_HIDDEN2_SIZE)
            self.output_layer = np.zeros(CHOOSER_OUTPUT_SIZE)
            self.weights1 = np.random.normal(loc=0, scale=1/input_size**2, size=(CHOOSER_HIDDEN1_SIZE, input_size))
            self.weights2 = np.random.normal(loc=0, scale=1/CHOOSER_HIDDEN1_SIZE**2, size=(CHOOSER_HIDDEN2_SIZE, CHOOSER_HIDDEN1_SIZE))
            self.weights3 = np.random.normal(loc=0, scale=1/CHOOSER_HIDDEN2_SIZE**2, size=(CHOOSER_OUTPUT_SIZE, CHOOSER_HIDDEN2_SIZE))
            self.weights = (self.weights1, self.weights2, self.weights3)
            self.input_size = input_size
        else:
            self.weights1 = pd.read_csv(paths[0], index_col=0).to_numpy()
            self.weights2 = pd.read_csv(paths[1], index_col=0).to_numpy()
            self.weights3 = pd.read_csv(paths[2], index_col=0).to_numpy()
            self.hidden_layer1 = pd.read_csv(paths[3], index_col=0).to_numpy().flatten()
            self.hidden_layer2 = pd.read_csv(paths[4], index_col=0).to_numpy().flatten()
            self.output_layer = pd.read_csv(paths[5], index_col=0).to_numpy().flatten()
            self.weights = (self.weights1, self.weights2, self.weights3)
            self.input_size = input_size


    def train(self, input_data, right_answer):
        if input_data.size != self.input_size:
            raise ValueError("Chooser input len error")

        input_data = input_data / 255

        z_hidden_layer1 = np.dot(self.weights1, input_data) + self.hidden_layer1
        a_hidden_layer1 = np.fromiter(map(Chooser.relu, z_hidden_layer1), float)
        z_hidden_layer2 = np.dot(self.weights2, a_hidden_layer1) + self.hidden_layer2
        a_hidden_layer2 = np.fromiter(map(Chooser.relu, z_hidden_layer2), float)
        z_out = np.dot(self.weights3, a_hidden_layer2) + self.output_layer
        a_out = np.fromiter(map(Chooser.relu, z_out), float)

        desired_answers = np.zeros(CHOOSER_OUTPUT_SIZE)
        desired_answers[right_answer] = 1

        # cost = np.sum([(a_out[i] - desired_answers[i])**2 for i in range(CHOOSER_OUTPUT_SIZE)])

        gradient_out = np.zeros(shape=self.output_layer.shape)
        gradient_biases_out = np.zeros(shape=self.output_layer.shape)
        gradient_weights3 = np.zeros(shape=self.weights3.shape)
        gradient_hl2 = np.zeros(shape=a_hidden_layer2.shape)
        gradient_biases_hl2 = np.zeros(shape=self.hidden_layer2.shape)
        gradient_weights2 = np.zeros(shape=self.weights2.shape)
        gradient_hl1 = np.zeros(shape=a_hidden_layer1.shape)
        gradient_biases_hl1 = np.zeros(shape=self.hidden_layer1.shape)
        gradient_weights1 = np.zeros(shape=self.weights1.shape)


        # Output layer cost f., weights, biases
        for i in range(CHOOSER_OUTPUT_SIZE):
            gradient_out[i] = 2*(a_out[i]-desired_answers[i])

        for i in range(CHOOSER_HIDDEN2_SIZE):
            for j in range(CHOOSER_OUTPUT_SIZE):
                gradient_weights3[j, i] = a_hidden_layer2[i] * np.heaviside(z_out[j], 1) * gradient_out[j]

        for i in range(CHOOSER_OUTPUT_SIZE):
            gradient_biases_out[i] = np.heaviside(z_out[i], 1) * gradient_out[i]

        # Second hidden layer
        for i in range(CHOOSER_HIDDEN2_SIZE):
            for j in range(CHOOSER_OUTPUT_SIZE):
                gradient_hl2[i] += self.weights3[j, i] * np.heaviside(z_out[j], 1) * gradient_out[j]

        for i in range(CHOOSER_HIDDEN1_SIZE):
            for j in range(CHOOSER_HIDDEN2_SIZE):
                gradient_weights2[j, i] = a_hidden_layer1[i] * np.heaviside(z_hidden_layer2[j], 1) * gradient_hl2[j]

        for i in range(CHOOSER_HIDDEN2_SIZE):
            gradient_biases_hl2[i] = np.heaviside(z_hidden_layer2[i], 1) * gradient_hl2[i]

        # First hidden layer
        for i in range(CHOOSER_HIDDEN1_SIZE):
            for j in range(CHOOSER_HIDDEN2_SIZE):
                gradient_hl1[i] += self.weights2[j, i] * np.heaviside(z_hidden_layer2[j], 1) * gradient_hl2[j]

        for i in range(self.input_size):
            for j in range(CHOOSER_HIDDEN1_SIZE):
                gradient_weights1[j, i] = input_data[i] * np.heaviside(z_hidden_layer1[j], 1) * gradient_hl1[j]

        for i in range(CHOOSER_HIDDEN1_SIZE):
            gradient_biases_hl1[i] = np.heaviside(z_hidden_layer1[i], 1) * gradient_hl1[i]

        return np.array([gradient_biases_hl1, gradient_biases_hl2, gradient_biases_out, gradient_weights1, gradient_weights2, gradient_weights3], dtype=object)

    def use_grad(self, gradient, learning_rate):
        self.hidden_layer1 -= gradient[0] * learning_rate
        self.hidden_layer2 -= gradient[1] * learning_rate
        self.output_layer -= gradient[2] * learning_rate
        self.weights1 -= gradient[3] * learning_rate
        self.weights2 -= gradient[4] * learning_rate
        self.weights3 -= gradient[5] * learning_rate

    def test(self, data_x, data_y):
        loss = 0
        for i in range(len(data_x)):
            desired_answers = np.zeros(CHOOSER_OUTPUT_SIZE)
            desired_answers[data_y[i]] = 1
            loss += np.sum((self._process(data_x[i].reshape(-1))-desired_answers)**2)
        return loss/len(data_x)

    def process(self, input_data):
        return np.argmax(self._process(input_data))

    def _process(self, input_data):
        if input_data.size != self.input_size:
            raise ValueError("Chooser input len error")

        input_data = input_data / 255

        out_hidden_layer1 = np.fromiter(map(Chooser.relu, np.dot(self.weights1, input_data) + self.hidden_layer1),
                                        float)
        out_hidden_layer2 = np.fromiter(
            map(Chooser.relu, np.dot(self.weights2, out_hidden_layer1) + self.hidden_layer2), float)
        out = np.fromiter(map(Chooser.relu, np.dot(self.weights3, out_hidden_layer2) + self.output_layer), float)
        return out

    def get_init_variance(self):
        summation = 0
        count = 0
        for weights in self.weights:
            for weight in weights.reshape(-1):
                summation += weight
                count += 1

        mean = summation/count
        variance = 0

        for weights in self.weights:
            for weight in weights.reshape(-1):
                variance += (weight - mean)**2

        print(f"Mean = {summation/count};")
        print(f"Variance = {variance/count}")
        print(f"Standard deviation = {(variance/count)**0.5}")

    @staticmethod
    def relu(x):
        return max(0, x)

In [3]:
import pickle

with open("nn1.pickle", "rb") as f:
    nn = pickle.load(f)

In [4]:
def training(neironka, iterations=10_000, batch=200, test_size=20):
    for iteration in range(0, iterations):
        gradient = np.zeros(6, dtype='object')
        if iteration%30==0:
            print(f'Iteration {iteration}, lossTrain = {neironka.test(data[:batch], labels[:batch])}, lossTest = {neironka.test(data[batch:batch + test_size], labels[batch:batch + test_size])}')
        for sample_num in range(batch * (iteration % 5), batch * (iteration % 5 + 1)):
            gradient += neironka.train(data[sample_num].reshape(-1), labels[sample_num])
        gradient /= batch
        neironka.use_grad(gradient, learning_rate=0.1)

# nn = Chooser()
# training(nn)

In [5]:
nn.test(data[1_000:], labels[1_000:])

0.35186080185491636