In [20]:
import numpy as np
import matplotlib.pyplot as plt
import random
import pandas as pd
np.random.seed(0)

In [21]:
data = pd.read_csv("mnist_train.csv")
test = pd.read_csv("mnist_test.csv")

In [22]:
test.head()

Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [23]:
data = np.array(data)
test = np.array(test)
m,n = data.shape
labels = data[:,0]
values = data[:,1:n]/255
test_labels = test[:,0]
test_values = test[:,1:n]

In [24]:
def oneHotEncode(len,value):
    vector = np.zeros((len,1))
    vector[value,0] = 1
    return vector

def checkAccuracy(prediction,value):
    return prediction.argmax() == value

def calculateCategoricalCrossEntropyLoss(prediction,true):
    y_pred_clipped = np.clip(prediction,1e-7,1-1e-7)
    return -np.log(np.sum(y_pred_clipped*oneHotEncode(10,true)))

def calculateError(prediction,true):
    return prediction - true

In [30]:

class Layer:
    def __init__(self):
        self.inputNum = None
        self.outputNum = None

class Dense(Layer):        
    def __init__(self,n_inputs,n_neurons):
        self.weights = np.random.rand(n_neurons,n_inputs) -0.5
        self.bias = np.zeros((n_neurons,1))
        self.layerName = "Dense"

    def foward(self, input):
        self.inputs = input
        return np.dot(self.weights,self.inputs) + self.bias
    
    def backward(self,gradient,learningRate):
        weights_gradient = np.dot(gradient,self.inputs.T)
        self.weights -= learningRate*weights_gradient
        self.bias -= learningRate*gradient
        return np.dot(self.weights.T,gradient)
    

class NeuralNetwork():
    def __init__(self,layers):
        self.layers = layers

    def fowardStep(self,input):
        inputData = input
        for layer in self.layers:
            inputData = layer.foward(inputData)
            # print(inputData)
        return inputData
    
    def backwardStep(self,gradient,learningRate):
        gradientData = gradient
        for layer in reversed(self.layers):
            # print(f"layer : {layer.layerName} - gradientData: {gradientData}")
            gradientData = layer.backward(gradientData,learningRate)
        return gradientData


    def train(self,images,labels,epochs,learningRate):
        for epoch in range(epochs):
            preEpochWeights = self.layers[0].weights.copy()
            correctGuesses = 0
            finalGradient = np.zeros((10,1))
            for imgIndex ,(img,label) in enumerate(zip(images,labels)):
                currImg = np.reshape(img,(n-1,1))
                fowardResult = self.fowardStep(currImg)
                correctGuesses += 1 if checkAccuracy(fowardResult,label) else 0
                if np.isnan(fowardResult).any():
                    raise ValueError("O array contém valores NaN!")
                # print(f"Label: {label} \nPrediction: {fowardResult} \nGradient: {gradient}")
                gradient = calculateError(fowardResult,oneHotEncode(10,label))
                finalGradient += 1/(imgIndex) * (gradient-finalGradient) if (imgIndex) != 0 else gradient
            # print(finalGradient)
                # if imgIndex % 10 == 0:
                self.backwardStep(finalGradient,learningRate)
                finalGradient = np.zeros((10,1))
            postEpochWeights = self.layers[0].weights.copy()
            if np.array_equal(preEpochWeights, postEpochWeights):
                raise AttributeError("Os pesos das camada 1 não estao sendo atualizados")
            print(f"Epoch {epoch} --> Accuracy = {correctGuesses/len(labels)*100:.2f}%")




In [31]:
class Activation_Relu:
    def __init__(self):
        self.layerName = "Relu"
    def foward(self,inputs):
        self.inputs = inputs
        self.outputs = np.maximum(0,inputs)
        return self.outputs
    
    def backward(self,gradient,learningRate):
        return np.multiply(gradient,(self.inputs > 0).astype(float))
    
class Activation_Softmax:
    def __init__(self):
        self.layerName = "Softmax"
    def foward(self,inputs):
        self.inputs = inputs
        normalizedInput = inputs-np.max(inputs)
        tmp = np.exp(normalizedInput)
        self.output = tmp/np.sum(tmp)
        return self.output
    def backward(self,gradient,learningRate):
        return gradient


In [32]:
network = NeuralNetwork([
    Dense(784,10),
    Activation_Relu(),
    Dense(10,10),
    Activation_Softmax()
])

In [56]:
network.train(values,labels,10,0.5)

Epoch 0 --> Accuracy = 85.44%
Epoch 1 --> Accuracy = 85.76%
Epoch 2 --> Accuracy = 86.03%
Epoch 3 --> Accuracy = 86.25%
Epoch 4 --> Accuracy = 86.48%
Epoch 5 --> Accuracy = 86.70%
Epoch 6 --> Accuracy = 86.90%
Epoch 7 --> Accuracy = 87.07%
Epoch 8 --> Accuracy = 87.21%
Epoch 9 --> Accuracy = 87.36%


In [57]:
correct = 0
wrong = []
for num in range(len(test_labels)):
    input = np.reshape(values[num],(n-1,1))
    for layer in network.layers:
        input = layer.foward(input)
    if input.argmax() == labels[num]:
        correct+=1
    else:
        wrong.append(num)

    
print(f"porcentagem de acerto: {correct/len(test_labels)*100:.6f}%")

porcentagem de acerto: 89.010000%


In [58]:
len(wrong)

1099