In [25]:
import numpy as np
from sklearn.decomposition import PCA
from sklearn.linear_model import LinearRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import accuracy_score

In [14]:
# Função para carregar o conjunto de dados MNIST
def load_mnist(image_file, label_file):
    with open(label_file, 'rb') as lbpath:
        lbpath.read(8)
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8)

    with open(image_file, 'rb') as imgpath:
        imgpath.read(16)
        images = np.frombuffer(imgpath.read(), dtype=np.uint8).reshape(len(labels), 784)

    return images, labels

In [15]:
# one hot
def one_hot_convert(vec):
    matrix = []
    for idx in vec:
      m = np.zeros((10, 1))
      m[idx] = 1
      matrix.append(m)
    return np.array(matrix)

In [16]:
# LEITURA DOS DADOS
train_images, train_labels = load_mnist('dataset/train-images.idx3-ubyte', 'dataset/train-labels.idx1-ubyte')
test_images, test_labels = load_mnist('dataset/t10k-images.idx3-ubyte', 'dataset/t10k-labels.idx1-ubyte')

print('ORIGINAL')
print('train_images',train_images.shape)
print('train_labels',train_labels.shape)
print('test_images',test_images.shape)
print('test_labels',test_labels.shape)

ORIGINAL
train_images (60000, 784)
train_labels (60000,)
test_images (10000, 784)
test_labels (10000,)


In [17]:
# Transformar as imagens em vetores e normalizá-las
X_train = train_images.reshape(train_images.shape[0], -1)
X_test = test_images.reshape(test_images.shape[0], -1)

# Adicionar uma coluna de 1s para considerar o termo de bias (intercept) no modelo linear
X_train = np.column_stack((X_train, np.ones(X_train.shape[0])))
X_test = np.column_stack((X_test, np.ones(X_test.shape[0])))

y_train = one_hot_convert(train_labels).reshape(train_labels.shape[0], -1)
y_test = test_labels.reshape(test_labels.shape[0], -1)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

(60000, 785) (60000, 10)
(10000, 785) (10000, 1)


In [18]:
pca = PCA(n_components=100)
train_images = pca.fit_transform(train_images)
test_images = pca.transform(test_images)

In [23]:
regress = LinearRegression()
modelo = OneVsRestClassifier(regress)


In [28]:
modelo.fit(train_images, train_labels)  # Treinamento do modelo

# Fazendo previsões
pred = modelo.predict(test_images)  # Fazendo previsões nos dados de teste

# Calculando a acurácia
acuracia = accuracy_score(test_labels, pred)  # Comparando as previsões com os valores reais

# Calculando o total de erros positivos
erros_positivos = np.sum((test_labels == 1) & (pred != 1))

# Calculando o total de previsões corretas
acertos = np.sum(test_labels == pred)

# Calculando o total de verdadeiros positivos
acertos_positivos = np.sum((test_labels == 1) & (pred == 1))

# Imprimindo os resultados
print('Acuracia:', acuracia * 100)

Acuracia: 86.18
