## Homework 8_1_MINST_10_classes_classification_using_logistic_oop_to_do

In [1]:
import numpy as np

class LogisticRegression:
    def __init__(self, learning_rate=0.00001, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        
    @staticmethod
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))

    def forward(self, X):
        return self.sigmoid(X @ self.weights)

    def predict(self, X):
        return np.argmax(self.forward(X), axis=1, keepdims=True)

    def loss(self, X, Y):
        y_hat = self.forward(X)
        return -np.sum(Y * np.log(y_hat) + (1 - Y) * np.log(1 - y_hat)) / X.shape[0]

    def gradient(self, X, Y):
        return X.T @ (self.forward(X) - Y) / X.shape[0]
    
    def report(self, iteration, X, Y):
        accuracy = np.count_nonzero(self.predict(X) == np.argmax(Y, axis=1, keepdims=True)) * 100.0 / Y.shape[0]
        print(f'iteration: {iteration}, loss: {self.loss(X, Y)}, accuracy: {accuracy}%')

    def fit(self, X, Y):
        self.weights = np.zeros((X.shape[1], Y.shape[1]))
        for i in range(self.n_iters):
            self.weights -= self.lr * self.gradient(X, Y)
            if i % 10 == 0:
                self.report(i, X, Y)
        self.report(self.n_iters, X, Y)
        

In [2]:
# Load the MINST data and prepare the training and testing data sets
import gzip
import struct

def load_images(filename):
    # Open and unzip the file of images:
    with gzip.open(filename, 'rb') as f:
        # Read the header information into a bunch of variables:
        _ignored, n_images, columns, rows = struct.unpack('>IIII', f.read(16))
        # Read all the pixels into a NumPy array:
        all_pixels = np.frombuffer(f.read(), dtype=np.uint8)
        # Reshape the pixels into a matrix where each line is an image:
        return all_pixels.reshape(n_images, columns * rows)

def prepend_bias(X):
    # Insert a column of 1s in the position 0 of X.
    # (“axis=1” stands for: “insert a column, not a row”)
    return np.insert(X, 0, 1, axis=1)

def load_labels(filename):
    # Open and unzip the file of images:
    with gzip.open(filename, 'rb') as f:
        # Skip the header bytes:
        f.read(8)
        # Read all the labels into a list:
        all_labels = f.read()
        # Reshape the list of labels into a one-column matrix:
        return np.frombuffer(all_labels, dtype=np.uint8).reshape(-1, 1)

def one_hot_encode(Y):
    n_labels = Y.shape[0]
    n_classes = 10
    encoded_Y = np.zeros((n_labels, n_classes))
    for i in range(n_labels):
        label = Y[i]
        encoded_Y[i][label] = 1
    return encoded_Y
    
x_train = load_images("train-images-idx3-ubyte.gz")
x_test  = load_images("t10k-images-idx3-ubyte.gz")

# 60000 images, each 785 elements (1 bias + 28 * 28 pixels), X_train is (60000,785) 
X_train = prepend_bias(x_train)

# 10000 images, each 785 elements, with the same structure as X_train, X_test is (10000,785) 
X_test = prepend_bias(x_test)

# 60K labels, each a single digit from 0 to 9
Y_train_unencoded = load_labels("train-labels-idx1-ubyte.gz")

# 60K labels, each consisting of 10 one-hot encoded elements
Y_train = one_hot_encode(Y_train_unencoded)

# 10000 labels, each a single digit from 0 to 9
Y_test = load_labels("t10k-labels-idx1-ubyte.gz")

In [3]:
# Testing

clf = LogisticRegression(learning_rate=0.00001, n_iters=200)
clf.fit(X_train, Y_train)
predictions = clf.predict(X_test)
Accuracy = np.count_nonzero(predictions == Y_test) * 100.0 / Y_test.shape[0]
print(f"Testing accuracy: {Accuracy}%")

iteration: 0, loss: 8.434456875083336, accuracy: 67.17833333333333%
iteration: 10, loss: 1.4547390723537275, accuracy: 83.60166666666667%
iteration: 20, loss: 1.2398963816638573, accuracy: 85.65666666666667%
iteration: 30, loss: 1.1396556120791268, accuracy: 86.51666666666667%
iteration: 40, loss: 1.0782380125640527, accuracy: 87.06%
iteration: 50, loss: 1.0358904545227758, accuracy: 87.44833333333334%
iteration: 60, loss: 1.0045795052261213, accuracy: 87.75333333333333%
iteration: 70, loss: 0.9802869242363641, accuracy: 88.04333333333334%
iteration: 80, loss: 0.9607633942382621, accuracy: 88.265%
iteration: 90, loss: 0.9446448405352792, accuracy: 88.44666666666667%
iteration: 100, loss: 0.9310525665216144, accuracy: 88.60166666666667%
iteration: 110, loss: 0.9193931113524326, accuracy: 88.74833333333333%
iteration: 120, loss: 0.9092500542315094, accuracy: 88.86%
iteration: 130, loss: 0.9003218435711583, accuracy: 88.99%
iteration: 140, loss: 0.8923842516815339, accuracy: 89.0783333333