In [1]:
import numpy as np
import matplotlib.pyplot as plt
import random
import pandas as pd

In [2]:
data = pd.read_csv("mnist_train.csv")
test = pd.read_csv("mnist_test.csv")

In [3]:
test.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [76]:
data = np.array(data)
test = np.array(test)
m,n = data.shape
labels = data[:,0]
values = data[:,1:n]/255
test_labels = test[:,0]
test_values = test[:,1:n]/255

In [117]:
class Layer:
    def __init__(self):
        self.inputNum = None
        self.outputNum = None

class Dense(Layer):
    def __init__(self,inputNum,outputNum):
        self.inputNum = inputNum
        self.outputNum = outputNum
        self.weigths = np.random.rand(self.outputNum,self.inputNum) - 0.5
        self.bias = np.random.rand(self.outputNum,1) - 0.5

    def forward(self,inputs):
        self.inputs = inputs
        return np.dot(self.weigths,inputs) + self.bias
    
    def backward(self,outputsGradient,learningRate):
        weights_gradient = np.dot(outputsGradient,self.inputs.T)
        self.weigths -= learningRate*weights_gradient
        self.bias -= learningRate*outputsGradient
        return np.dot(self.weigths.T,outputsGradient)
    

class NeuralNetwork():
    def __init__(self,layers:list[Layer])->list[int]:
        self.layers = layers
    


    def batch_train(self,dataset_values,dataset_labels, epochs, learningRate):
        # meanError = np.reshape([float(0)]*10,(10,1))
        for epoch in range(epochs):
            for i,(img,answer) in enumerate(zip(dataset_values,dataset_labels)):
                desiredOutput = np.reshape([1 if x==answer else 0 for x in range(10)],(10,1)) 
                currentImg = np.reshape(img,(n-1,1))
                input = currentImg
                for li,layer in enumerate(self.layers):
                    # print(f"Camada {li}, Resultado: {input}")
                    # if isinstance(layer, Dense):
                        # print(f"Pesos: {layer.weigths}")
                    input = layer.forward(input)
                # print(f"ResultadoFinal: {input}")
                # print("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX")
                error = (input-desiredOutput)
                # meanError += 1/i * (error-meanError) if i != 0 else error
                backwardsInput = error
                if np.isnan(backwardsInput).any():
                    raise KeyError("NaN found in backwardsInput")
                for layer in reversed(self.layers):
                    backwardsInput = layer.backward(backwardsInput,0.1)


    def test(self,img):
        pass

In [118]:
class Activation(Layer):
    def function(self,inputs):
        pass
    def function_prime(self,inputs):
        pass

    def forward(self,inputs):
        self.inputs = inputs
        return self.function(self.inputs)

    def backward(self,outputGradient,learningRate):
        return np.multiply(outputGradient,self.function_prime(self.inputs))
    

class Sigmoid(Activation):
    def function(self,inputs):
        return 1/(1+np.exp(-inputs))
    
    def function_prime(self,inputs):
        temp = self.function(inputs)
        return temp*(1-temp)

class Relu(Activation):
    def function(self, inputs):
        return np.maximum(0,inputs)
    
    def function_prime(self, inputs):
        return inputs > 0


class Softmax(Activation):
    
    def function(self,z):
        assert len(z.shape) == 2
        s = np.max(z, axis=1)
        s = s[:, np.newaxis] # necessary step to do broadcasting
        e_x = np.exp(z - s)
        div = np.sum(e_x, axis=1)
        div = div[:, np.newaxis] # dito
        return e_x / div

    def function_prime(self, inputs):
        return inputs



class Tanh(Activation):
    def __init__(self):
        self.tanh = lambda x: np.tanh(x)
        self.tanh_prime = lambda x: 1- np.tanh(x)**2
        
    def forward(self,inputs):
        self.inputs = inputs
        return self.tanh(self.inputs)

    def backward(self,outputGradient,learningRate):
        return np.multiply(outputGradient,self.tanh_prime(self.inputs))

In [119]:

network = NeuralNetwork([
    Dense(784,64),
    Relu(),
    Dense(64,10),
    Relu(),
    Softmax()
])

In [120]:
network.batch_train(values,labels,epochs=5,learningRate=0.01)

In [107]:
correct = 0
for num in range(len(test_labels)):
    input = np.reshape(values[num],(n-1,1))
    for layer in network.layers:
        input = layer.forward(input)
    if input.argmax() == labels[num]:
        correct+=1
print(f"porcentagem de acerto: {correct/len(test_labels)*100:.6f}%")

porcentagem de acerto: 10.010000%


In [91]:
np.random.rand(10,10) - 0.5

array([[ 0.31459492,  0.23333024, -0.23644032,  0.48718017,  0.3938476 ,
         0.24288346,  0.24924377, -0.19102264, -0.39158794,  0.18124085],
       [-0.47933132,  0.44465571,  0.29832451, -0.48010703, -0.08102057,
        -0.39667936, -0.2231252 , -0.26293234, -0.29723882, -0.14613893],
       [-0.44969978, -0.49582499, -0.25296551,  0.23170741, -0.25352456,
        -0.33547671,  0.35967052,  0.46125314,  0.34648881,  0.31062922],
       [-0.4994559 , -0.18599081, -0.38621358, -0.22872259,  0.19630237,
        -0.44175974,  0.25013626,  0.46285402, -0.32670655, -0.46386791],
       [ 0.18102111, -0.02907684,  0.07595399,  0.18241464,  0.07498263,
         0.18640231, -0.01420094,  0.29723065, -0.28193557,  0.14065089],
       [ 0.43631269, -0.26911207,  0.12089175, -0.38585435, -0.30825124,
         0.04382397,  0.08687756, -0.22495661,  0.06175823,  0.01284097],
       [ 0.25646436, -0.04302567,  0.4667354 , -0.13872297, -0.30090192,
        -0.04401194, -0.38518124, -0.40376112

In [27]:
teste = np.array([
 [  0.        ],
 [233.187544  ],
 [  0.        ],
 [  0.        ],
 [182.66424031],
 [  0.        ],
 [ 72.79797629],
 [  0.        ],
 [  0.        ],
 [  0.        ]])

soft = Softmax()

soft.forward(teste)

TypeError: Softmax.function() takes 1 positional argument but 2 were given

In [220]:
teste = np.array([
 [ 546.78040258],
 [ 522.71577662],
 [1983.2688931 ],
 [ 268.79432226],
 [ 321.28196723],
 [ 448.98075504],
 [1088.70506932],
 [ 219.95386547],
 [ 514.59306463],
 [1161.43603811]])

dense = Softmax()

dense.forward(teste)

  exp_values = np.exp(inputs)
  return exp_values / np.sum(exp_values)


array([[ 0.],
       [ 0.],
       [nan],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.]])