## Homework 8_2_MINST_10_classes_classification_using_sofmax_oop_to_do

In [1]:
import numpy as np

class SofmaxRegression:
    def __init__(self, learning_rate=0.00001, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        
    def softmax(self, logits):
        return np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)

    def forward(self, X):
        return self.softmax(X @ self.weights)

    def predict(self, X):
        probs = self.forward(X)
        return np.argmax(probs, axis=1, keepdims=True)

    def loss(self,X, Y):
        N = X.shape[0]
        probs = self.forward(X)
        loss = -np.sum(Y * np.log(probs)) / N
        return loss
    
    def gradient(self, X, Y):
        N = X.shape[0]
        probs = self.forward(X)
        return np.dot(X.T, (probs - Y)) / N
    
    def report(self, iteration, X, Y):
        accuracy = np.count_nonzero(self.predict(X) == np.argmax(Y, axis=1, keepdims=True)) * 100.0 / Y.shape[0]
        print(f'iteration: {iteration}, loss: {self.loss(X, Y)}, accuracy: {accuracy}%')


    def fit(self, X, Y):
        self.weights = np.zeros((X_train.shape[1], Y_train.shape[1]))
        for i in range(self.n_iters):
            if i % 20 == 0:
                self.report(i, X, Y)
            self.weights -= self.gradient(X, Y) * self.lr
        self.report(self.n_iters, X, Y)


In [2]:
# Load the MINST data and prepare the training and testing data sets
import gzip
import struct

def load_images(filename):
    # Open and unzip the file of images:
    with gzip.open(filename, 'rb') as f:
        # Read the header information into a bunch of variables:
        _ignored, n_images, columns, rows = struct.unpack('>IIII', f.read(16))
        # Read all the pixels into a NumPy array:
        all_pixels = np.frombuffer(f.read(), dtype=np.uint8)
        # Reshape the pixels into a matrix where each line is an image:
        return all_pixels.reshape(n_images, columns * rows)

def prepend_bias(X):
    # Insert a column of 1s in the position 0 of X.
    # (“axis=1” stands for: “insert a column, not a row”)
    return np.insert(X, 0, 1, axis=1)

def load_labels(filename):
    # Open and unzip the file of images:
    with gzip.open(filename, 'rb') as f:
        # Skip the header bytes:
        f.read(8)
        # Read all the labels into a list:
        all_labels = f.read()
        # Reshape the list of labels into a one-column matrix:
        return np.frombuffer(all_labels, dtype=np.uint8).reshape(-1, 1)

def one_hot_encode(Y):
    n_labels = Y.shape[0]
    n_classes = 10
    encoded_Y = np.zeros((n_labels, n_classes))
    for i in range(n_labels):
        label = Y[i]
        encoded_Y[i][label] = 1
    return encoded_Y
    
x_train = load_images("train-images-idx3-ubyte.gz")
x_test  = load_images("t10k-images-idx3-ubyte.gz")

# 60000 images, each 785 elements (1 bias + 28 * 28 pixels), X_train is (60000,785) 
X_train = prepend_bias(x_train)

# 10000 images, each 785 elements, with the same structure as X_train, X_test is (10000,785) 
X_test = prepend_bias(x_test)

# 60K labels, each a single digit from 0 to 9
Y_train_unencoded = load_labels("train-labels-idx1-ubyte.gz")

# 60K labels, each consisting of 10 one-hot encoded elements
Y_train = one_hot_encode(Y_train_unencoded)

# 10000 labels, each a single digit from 0 to 9
Y_test = load_labels("t10k-labels-idx1-ubyte.gz")

In [3]:
# Testing

clf = SofmaxRegression(learning_rate=0.00001, n_iters=200)
clf.fit(X_train, Y_train)
predictions = clf.predict(X_test)
Accuracy = np.count_nonzero(predictions == Y_test)* 100.0 /Y_test.shape[0]
print("Testing accuracy: %.2f%%" % (Accuracy))

iteration: 0, loss: 2.3025850929940437, accuracy: 9.871666666666666%
iteration: 20, loss: 0.552313271160402, accuracy: 86.44333333333333%
iteration: 40, loss: 0.45615562085755945, accuracy: 88.16333333333333%
iteration: 60, loss: 0.4161933858511783, accuracy: 88.88%
iteration: 80, loss: 0.39283271098835054, accuracy: 89.43%
iteration: 100, loss: 0.3770472958645998, accuracy: 89.72666666666667%
iteration: 120, loss: 0.3654537491480679, accuracy: 90.00166666666667%
iteration: 140, loss: 0.3564651899859296, accuracy: 90.17666666666666%
iteration: 160, loss: 0.3492264959093779, accuracy: 90.36833333333334%
iteration: 180, loss: 0.3432308075538642, accuracy: 90.52666666666667%
iteration: 200, loss: 0.33815601481437285, accuracy: 90.61666666666666%
Testing accuracy: 91.10%
