In [0]:
import numpy as np
import matplotlib.pyplot as plt
from tensorflow import keras
import pickle as pickle

In [0]:
class MathUtils():
    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))

    @staticmethod
    def sigmoid_deriv(x):
        return MathUtils.sigmoid(x) * (1 - MathUtils.sigmoid(x))

    @staticmethod
    def relu(x):
        return np.maximum(0, x)

    @staticmethod
    def relu_deriv(x):
        # return np.greater(x, 0).astype(int)
        x[x <= 0] = 0
        x[x > 0] = 1
        return x

    @staticmethod
    def softmax(x):
        x_exp = np.exp(x)
        x_sum = np.sum(x_exp, axis=0, keepdims=True)
        return x_exp / x_sum

    @staticmethod
    def hotOne(array, output_size):
        assert len(array.shape) == 2, "Input has to have shape (data,data_size)"
        Y_orig = array
        Y = np.zeros((output_size, Y_orig.shape[-1]))
        for i in range(0, Y_orig.shape[1]):
            value = Y_orig[0, i]
            Y[value, i] = 0.999999999
        return Y

    @staticmethod
    def softmax_deriv(x):
        return MathUtils.softmax(x) * (1 - MathUtils.softmax(x))

    @staticmethod
    def cross_entropy(A, Y):
        M = A.shape[1]
        logprobs = np.multiply(np.log(A), Y)
        cost = - np.sum(logprobs) / M
        return float(np.squeeze(cost))

    @staticmethod
    def cross_entropy_deriv(A, Y):
        return -(Y / A) + (1 + Y) / (1 + A)

In [0]:
def loadMnist():
    (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()
    x_train = (x_train.reshape(x_train.shape[0], -1) / 255.0).T
    x_test = (x_test.reshape(x_test.shape[0], -1) / 255.0).T
    y_train = (y_train.reshape(y_train.shape[0], 1)).T
    y_test = (y_test.reshape(y_test.shape[0], 1)).T
    y_train = MathUtils.hotOne(y_train, 10)
    y_test = MathUtils.hotOne(y_test, 10)
    return (x_train, y_train), (x_test, y_test)


def loadCifar10():
    (x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()
    x_train = (x_train.reshape(x_train.shape[0], -1) / 255.0).T
    x_test = (x_test.reshape(x_test.shape[0], -1) / 255.0).T
    y_train = (y_train.reshape(y_train.shape[0], 1)).T
    y_test = (y_test.reshape(y_test.shape[0], 1)).T
    y_train = MathUtils.hotOne(y_train, 10)
    y_test = MathUtils.hotOne(y_test, 10)
    print(x_train)
    return (x_train, y_train), (x_test, y_test)  

(x_train, y_train), (x_test, y_test) = loadMnist()

In [0]:
class NN():
    def init_params(self, S):
        parameters = {}
        for i in range(1, len(S)):
            #(np.sqrt(2 / S[i - 1])) important for weight initialization
            parameters["W" + str(i)] = np.random.randn(S[i], S[i - 1]) * (np.sqrt(2 / S[i - 1]))
            parameters["B" + str(i)] = np.zeros((S[i], 1))
        return parameters

    def forward_propagate(self, X, params, S):
        cache = {}
        cache["Z1"] = np.dot(params["W1"], X) + params["B1"]
        cache["A1"] = MathUtils.relu(cache["Z1"])
        for i in range(2, len(S) - 1):
            cache["Z" + str(i)] = np.dot(params["W" + str(i)],
                                         cache["A" + str(i - 1)]) + params["B" + str(i)]
            cache["A" + str(i)] = MathUtils.relu(cache["Z" + str(i)])

        cache["Z" + str(len(S) - 1)] = np.dot(params["W" + str(len(S) - 1)],
                                              cache["A" + str(len(S) - 2)]) + params["B" + str(len(S) - 1)]
        cache["A" + str(len(S) - 1)] = MathUtils.softmax(cache["Z" + str(len(S) - 1)])
        return cache

    def back_propagate(self, X, Y, cache, parameters, S):
        gradients = {}
        M = Y.shape[1]
        gradients["dz" + str(len(S) - 1)] = (cache["A" + str(len(S) - 1)] - Y) / M
        for i in range(2, len(S)):
            gradients["da" + str(len(S) - i)] = np.dot(parameters["W" + str(len(S) - i + 1)].T,
                                                       gradients["dz" + str(len(S) - i + 1)])
            gradients["dz" + str(len(S) - i)] = gradients["da" + str(len(S) - i)] * MathUtils.relu_deriv(
                cache["Z" + str(len(S) - i)])

        gradients["dw1"] = np.dot(gradients["dz1"], X.T)  # dot devido a ser a soma remember my dude produto escalar
        gradients["db1"] = np.sum(gradients["dz1"], axis=1, keepdims=True)
        for i in range(2, len(S)):
            gradients["dw" + str(i)] = np.dot(gradients["dz" + str(i)], cache["A" + str(i - 1)].T)
            gradients["db" + str(i)] = np.sum(gradients["dz" + str(i)], axis=1, keepdims=True)

        return gradients

    def learn(self, gradients, parameters, learning_rate, network_dims):
        for i in range(1, len(network_dims)):
            parameters["W" + str(i)] -= learning_rate * gradients["dw" + str(i)]
            parameters["B" + str(i)] -= learning_rate * gradients["db" + str(i)]

    def get_accuracy(self, Yhat, Y):
        m = Yhat.shape[1]
        sum = 0
        Yhat_max = np.argmax(Yhat, axis=0)
        Y_max = np.argmax(Y, axis=0)
        for i in range(0, m):
            if Yhat_max[i] == Y_max[i]:
                sum += 1
        return (sum / m) * 100.0

    def evaluate(self, dataset):
        (X, Y) = dataset
        cache = self.forward_propagate(X, self.params, self.S)
        cost = MathUtils.cross_entropy(cache["A" + str(len(self.S) - 1)], Y)
        accuracy = self.get_accuracy(cache["A" + str(len(self.S) - 1)], Y)
        return {"cache": cache, "cost": cost, "accuracy": accuracy}

    # (1,784) <--input data shape
    def predict(self, input_data):
        assert input_data.shape == (self.S[0], 1)
        X = input_data

        cache = self.forward_propagate(X, self.params, self.S)
        label = np.argmax(cache["A" + str(len(self.S) - 1)])
        return label

    def validate_dataset(self, dataset, name):
        assert isinstance(dataset, tuple) and len(dataset) == 2, (
                name + " has to be a tuple of size 2 -> (x_train,y_train)")
        assert isinstance(dataset[0], np.ndarray) and isinstance(dataset[1], np.ndarray), (
                name + " data has to be numpy array")
        assert len(dataset[0].shape) == 2 and len(dataset[1].shape) == 2, (
                name + " data has to be of shape (data_size,data) and labels (label_size,label)")

    def save(self, path="neural_network"):
        outfile = open(path, 'wb')
        pickle.dump(self, outfile)
        outfile.close()

    @staticmethod
    def load(path="neural_network"):
        infile = open(path, 'rb')
        nn = pickle.load(infile)
        infile.close()
        return nn

    def __init__(self, dataset, val_dataset=None, epochs=2000, learning_rate=0.5, shape=None,
                 print_costs=True,
                 stop_when_loss=True):
        # Validations
        self.validate_dataset(dataset, "Data set")
        if val_dataset is not None:
            self.validate_dataset(val_dataset, "Validation set")

        (X, Y) = dataset
        assert len(X.shape) == 2
        assert len(Y.shape) == 2
        if val_dataset is not None:
            (X_test, Y_test) = val_dataset
            assert len(X_test.shape) == 2
            assert len(Y_test.shape) == 2

        if shape is not None:
            self.S = shape
            assert len(self.S) > 2
            assert self.S[0] == X.shape[0] and self.S[-1] == Y.shape[0]
        else:
            self.S = [dataset[0].shape[0], 30, 10]
        self.params = self.init_params(self.S)
        
        previous_accuracy = 0
        previous_val_accuracy = 0
        for i in range(0, epochs):
            cache = self.forward_propagate(X, self.params, self.S)
            cost = MathUtils.cross_entropy(cache["A" + str(len(self.S) - 1)], Y)
            accuracy = self.get_accuracy(cache["A" + str(len(self.S) - 1)], Y)
            grads = self.back_propagate(X, Y, cache, self.params, self.S)
            self.learn(grads, self.params, learning_rate, self.S)

            if i % 100 == 0:
                if print_costs:
                    print("Epoch:{},Cost:{}, Accuracy:{}".format(i, cost, accuracy))
                    if val_dataset is not None:
                        cache_test = self.forward_propagate(X_test, self.params, self.S)
                        cost_test = MathUtils.cross_entropy(cache_test["A" + str(len(self.S) - 1)], Y_test)
                        accuracy_test = self.get_accuracy(cache_test["A" + str(len(self.S) - 1)], Y_test)
                        print("Validation Cost:{}, Validation Accuracy:{}".format(cost_test, accuracy_test))
            if stop_when_loss:
                previous_accuracy = accuracy
                if val_dataset is not None:
                    previous_val_accuracy = accuracy_test
                    if previous_val_accuracy > accuracy_test:
                        break
                else:
                    if previous_accuracy > accuracy:
                        break

In [12]:
nn = NN((x_train,y_train),val_dataset=(x_test,y_test), shape=[784,200,80, 10],epochs = 10000,learning_rate=0.01)

Shape A1[0]: 200 , Shape A1[1]:60000
Epoch:0,Cost:2.428848745603354, Accuracy:9.415
Validation Cost:2.4077156537790256, Validation Accuracy:10.09
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000
Shape A1[0]: 200 , Shape A1[1]:60000


KeyboardInterrupt: ignored

In [0]:
filename = 'neural_network'
outfile = open(filename,'wb')
pickle.dump(nn,outfile)
outfile.close()

In [0]:
infile = open('neural_network', 'rb')
nn = pickle.load(infile)
infile.close()

index = 49
d = new_nn.predict(x_test[index].reshape(1, 784))

print("Predicted {}, Result:{}".format(d, y_test[index]))
plt.imshow(x_test[index].reshape(28,28))
plt.show()