In [1]:
import numpy as np

In [2]:
# Função para carregar o conjunto de dados MNIST
def load_mnist(image_file, label_file):
    with open(label_file, 'rb') as lbpath:
        lbpath.read(8)
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8)

    with open(image_file, 'rb') as imgpath:
        imgpath.read(16)
        images = np.frombuffer(imgpath.read(), dtype=np.uint8).reshape(len(labels), 784)

    return images, labels

In [3]:
# one hot
def one_hot_convert(vec):
    matrix = []
    for idx in vec:
      m = np.zeros((10, 1))
      m[idx] = 1
      matrix.append(m)
    return np.array(matrix)

In [10]:
# LEITURA DOS DADOS
train_images, train_labels = load_mnist('dataset/train-images.idx3-ubyte', 'dataset/train-labels.idx1-ubyte')
test_images, test_labels = load_mnist('dataset/t10k-images.idx3-ubyte', 'dataset/t10k-labels.idx1-ubyte')

print('ORIGINAL')
print('train_images',train_images.shape)
print('train_labels',train_labels.shape)
print('test_images',test_images.shape)
print('test_labels',test_labels.shape)

ORIGINAL
train_images (60000, 784)
train_labels (60000,)
test_images (10000, 784)
test_labels (10000,)


In [12]:
# Transformar as imagens em vetores e normalizá-las
X_train = train_images.reshape(train_images.shape[0], -1)
X_test = test_images.reshape(test_images.shape[0], -1)

# Adicionar uma coluna de 1s para considerar o termo de bias (intercept) no modelo linear
X_train = np.column_stack((X_train, np.ones(X_train.shape[0])))
X_test = np.column_stack((X_test, np.ones(X_test.shape[0])))

y_train = one_hot_convert(train_labels).reshape(train_labels.shape[0], -1)
y_test = test_labels.reshape(test_labels.shape[0], -1)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(60000, 785) (60000, 10)
(10000, 785) (10000, 1)


In [34]:
# Funções de ativação para o neurônio
def activate_functions(type, matrix):
    if type == 'sigmoid':
        return 1 / (1 + np.exp(-matrix))
    elif type == 'softmax':
        exp_matrix = np.exp(matrix - np.max(matrix, axis=1, keepdims=True))
        return exp_matrix / np.sum(exp_matrix, axis=1, keepdims=True)   
    elif type == 'tanh':
        return np.tanh(matrix)

# Função de treino para o classificador perceptron logístico
def train_logistic_perceptron(X, y, epochs, l_rate):
    weights = np.random.randn(y.shape[1], X.shape[1]) * 0.1 # Matriz com dimensões: num_classes X num_atributos
    
    for epoch in range(epochs): # Iterando épocas
        if epoch % 5 == 0:
            print('Epoch: ', epoch)
        
        z = X_train @ weights.T
        result = activate_functions('softmax', z)
        error =  result - y # Erro por classe
        grad = error / len(X_train)

        # Ajustar os pesos para cada classe separadamente
        weights -= l_rate * np.dot(grad.T, X_train)

    return weights

# Treinar o classificador
weights = train_logistic_perceptron(X_train, y_train, epochs=700, l_rate=0.01)
print(weights.shape)

Epoch:  0
Epoch:  5
Epoch:  10
Epoch:  15
Epoch:  20
Epoch:  25
Epoch:  30
Epoch:  35
Epoch:  40
Epoch:  45
Epoch:  50
Epoch:  55
Epoch:  60
Epoch:  65
Epoch:  70
Epoch:  75
Epoch:  80
Epoch:  85
Epoch:  90
Epoch:  95
Epoch:  100
Epoch:  105
Epoch:  110
Epoch:  115
Epoch:  120
Epoch:  125
Epoch:  130
Epoch:  135
Epoch:  140
Epoch:  145
Epoch:  150
Epoch:  155
Epoch:  160
Epoch:  165
Epoch:  170
Epoch:  175
Epoch:  180
Epoch:  185
Epoch:  190
Epoch:  195
Epoch:  200
Epoch:  205
Epoch:  210
Epoch:  215
Epoch:  220
Epoch:  225
Epoch:  230
Epoch:  235
Epoch:  240
Epoch:  245
Epoch:  250
Epoch:  255
Epoch:  260
Epoch:  265
Epoch:  270
Epoch:  275
Epoch:  280
Epoch:  285
Epoch:  290
Epoch:  295
Epoch:  300
Epoch:  305
Epoch:  310
Epoch:  315
Epoch:  320
Epoch:  325
Epoch:  330
Epoch:  335
Epoch:  340
Epoch:  345
Epoch:  350
Epoch:  355
Epoch:  360
Epoch:  365
Epoch:  370
Epoch:  375
Epoch:  380
Epoch:  385
Epoch:  390
Epoch:  395
Epoch:  400
Epoch:  405
Epoch:  410
Epoch:  415
Epoch:  420
Ep

In [35]:
# Função de predição usando o classificador linear
def predict_logistic_perceptron(X, W):
    z = X @ weights.T
    result = activate_functions('softmax', z)

    # Converte as saídas para as classes preditas (0 a 9) usando a função argmax
    # A classe predita será o índice do valor máximo em cada linha
    classe = np.argmax(result, axis=1)
    print(classe.shape)

    return np.expand_dims(classe, axis=1)

# Realizar a predição no conjunto de teste
y_pred_test = predict_logistic_perceptron(X_test, weights)

print(y_pred_test.shape)
print(y_test.shape)

(10000,)
(10000, 1)
(10000, 1)


In [36]:
# Calcular a acurácia
accuracy = np.mean(y_pred_test == y_test)
# Imprimir a acurácia
print("Acurácia:", accuracy)

# Avaliar o desempenho do classificador
erro = 1 - (np.sum(y_pred_test == y_test) / len(y_test))
print("Taxa de erro: {}".format(erro))

Acurácia: 0.9048
Taxa de erro: 0.09519999999999995
