
<br>
This file is for fashion mnist classification<br>


In [14]:
import numpy as np
import matplotlib.pyplot as plt
from util import get_mnist_data
from logistic_np import add_one, LogisticClassifier
# import time
#import pdb

In [15]:
class SoftmaxClassifier(LogisticClassifier):
    def __init__(self, w_shape):
        """__init__
        
        :param w_shape: create w with shape w_shape using normal distribution
        """
        super(SoftmaxClassifier, self).__init__(w_shape)

In [16]:
    def softmax(self, x):
        """softmax
        Compute softmax on the second axis of x
    
        :param x: input
        """
        # [TODO 2.3]
        # Compute softmax
        z =  np.dot(x,self.w)
        z_max = np.empty((z.shape[0],1))
        z_max = np.amax(z, axis = 1)
        z_max = np.tile(z_max, (10,1)).T
        z_max = z - z_max
        r = np.exp(z_max)
        # for i in range(z.shape[0]):
        #     a = np.max(z[i,:])
        #     for j in range(z.shape[1]):
        #         z[i,j] = np.exp(z[i,j]-a)
        return r

In [17]:
    def feed_forward(self, x):
        """feed_forward
        This function compute the output of your softmax regression model
        
        :param x: input
        """
        # [TODO 2.3]
        # Compute a feed forward pass
        x = self.softmax(x)
        s = np.sum(x,axis=1)
        for i in range(x.shape[0]):
            x[i,:] = x[i,:]/s[i]
        return x

In [18]:
    def compute_loss(self, y, y_hat):
        """compute_loss
        Compute the loss using y (label) and y_hat (predicted class)
        :param y:  the label, the actual class of the samples
        :param y_hat: the class probabilities of all samples in our data
        """
        # [TODO 2.4]
        # Compute categorical loss
        for i in range (y.shape[0]):
            for j in range (y.shape[1]):
                y[i,j] = np.float32(y[i,j]*np.log(y_hat[i,j]) )
        # print(y)
        return -1/(y.shape[0])*np.sum(np.sum(y,axis = 1))   
        

In [19]:
    def get_grad(self, x, y, y_hat):
        """get_grad
        Compute and return the gradient of w
        :param loss: computed loss between y_hat and y in the train dataset
        :param y_hat: predicted y
        """ 
        # [TODO 2.5]
        # Compute gradient of the loss function with respect to w
        grad = np.dot(x.T,(y_hat-y))
        return grad/x.shape[0]

In [20]:
def plot_loss(train_loss, val_loss):
    plt.figure(1)
    plt.clf()
    plt.plot(train_loss, color='b')
    plt.plot(val_loss, color='g')

In [21]:
def draw_weight(w):
    label_names = ['T-shirt', 'Trouser', 'Pullover', 'Dress', 'Coat', 'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
    plt.figure(2, figsize=(8, 6))
    plt.clf()
    w = w[0:(28*28),:].reshape(28, 28, 10)
    for i in range(10):
        ax = plt.subplot(3, 4, i+1)
        plt.imshow(w[:,:,i], interpolation='nearest')
        plt.axis('off')
        ax.set_title(label_names[i])

In [22]:
def normalize(train_x, val_x, test_x):
    """normalize
    This function computes train mean and standard deviation on all pixels then applying data scaling on train_x, val_x and test_x using these computed values
    Note that in this classification problem, the data is already flatten into a shape of (num_samples, image_width*image_height)
    :param train_x: train images, shape=(num_train, image_height*image_width)
    :param val_x: validation images, shape=(num_val, image_height*image_width)
    :param test_x: test images, shape=(num_test, image_height*image_width)
    """
    # [TODO 2.1]
    # train_mean and train_std should have the shape of (1, 1)
    train_mean = np.mean(train_x) #single value
    train_std = np.std(train_x)
    train_x = (train_x - train_mean)/train_std
    val_x = (val_x - train_mean)/train_std
    test_x = (test_x - train_mean)/train_std
    # print(self.w_shape)
    return train_x,val_x,test_x

In [23]:
def create_one_hot(labels, num_k=10):
    """create_one_hot
    This function creates a one-hot (one-of-k) matrix based on the given labels
    :param labels: list of labels, each label is one of 0, 1, 2,... , num_k - 1
    :param num_k: number of classes we want to classify
    """
    # [TODO 2.2]
    # Create the one-hot label matrix here based on labels
    one_hot_labels = np.zeros((labels.shape[0],num_k))
    z = 0
    for i in labels:
        one_hot_labels[z,i] = 1
        z += 1
    return one_hot_labels

In [24]:
def test(y_hat, test_y):
    """test
    Compute the confusion matrix based on labels and predicted values 
    :param classifier: the trained classifier
    :param y_hat: predicted probabilites, output of classifier.feed_forward
    :param test_y: test labels
    """
    
    confusion_mat = np.zeros((10,10))
    
    # [TODO 2.7]
    # Compute the confusion matrix here
    y_hat_label = np.argmax(y_hat,axis=1)
    test_y_label = np.argmax(test_y,axis=1)
    for i in range(10):
        for j in range(10):
            confusion_mat[i,j] = np.intersect1d(np.array(np.where(test_y_label==i)),np.array(np.where(y_hat_label==j))).shape[0]
    confusion_mat = confusion_mat/np.sum(confusion_mat,axis=1)
    #confusion_mat = confusion_mat/np.sum(confusion_mat,axis=1)
    np.set_printoptions(precision=2)
    print('Confusion matrix:')
    print(confusion_mat)
    print('Diagonal values:')
    print(confusion_mat.flatten()[0::11])

In [25]:
if __name__ == "__main__":
    np.random.seed(2020)

    # Load data from file
    # Make sure that fashion-mnist/*.gz files is in data/
    train_x, train_y, val_x, val_y, test_x, test_y = get_mnist_data()
    num_train = train_x.shape[0]
    num_val = val_x.shape[0]
    num_test = test_x.shape[0]  

    #generate_unit_testcase(train_x.copy(), train_y.copy()) 

    # Convert label lists to one-hot (one-of-k) encoding
    train_y = create_one_hot(train_y)
    val_y = create_one_hot(val_y)
    test_y = create_one_hot(test_y)

    # Normalize our data
    train_x, val_x, test_x = normalize(train_x, val_x, test_x)
    
    # Pad 1 as the last feature of train_x and test_x
    train_x = add_one(train_x) 
    val_x = add_one(val_x)
    test_x = add_one(test_x)
    
    # Create classifier
    num_feature = train_x.shape[1]
    dec_classifier = SoftmaxClassifier((num_feature, 10))
    momentum = np.zeros_like(dec_classifier.w)

    # Define hyper-parameters and train-related parameters
    num_epoch = 3347
    learning_rate = 0.01
    momentum_rate = 0.9
    epochs_to_draw = 10
    all_train_loss = []
    all_val_loss = []
    plt.ion()
    for e in range(num_epoch):    
        # tic = time.clock()
        train_y_hat = dec_classifier.feed_forward(train_x)
        val_y_hat = dec_classifier.feed_forward(val_x)
        train_loss = dec_classifier.compute_loss(train_y, train_y_hat)
        val_loss = dec_classifier.compute_loss(val_y, val_y_hat)
        grad = dec_classifier.get_grad(train_x, train_y, train_y_hat)
       
        # dec_classifier.numerical_check(train_x, train_y, grad)
        # Updating weight: choose either normal SGD or SGD with momentum
        dec_classifier.update_weight(grad, learning_rate)
        #dec_classifier.update_weight_momentum(grad, learning_rate, momentum, momentum_rate)
        all_train_loss.append(train_loss) 
        all_val_loss.append(val_loss)
        # toc = time.clock()
        # print(toc-tic)
        # [TODO 2.6]
        # print(val_loss)
        # Propose your own stopping condition here
        best_val_loss = float("inf")
        patience = 6  # Number of consecutive increases in validation loss before stopping
        consecutive_increases = 0
        if val_loss[-1] < best_val_loss:
            best_val_loss = val_loss
                # Optionally, save the model's parameters
                
        else:
            consecutive_increases += 1
        
        if consecutive_increases >= patience:
            print("Stopped due to overfitting.")
            break
    y_hat = dec_classifier.feed_forward(test_x)
    # print(y_hat)
    test(y_hat, test_y)

Reading fashion MNIST data...
<class 'numpy.ndarray'>
Done reading
Confusion matrix:
[[0.93 0.02 0.   0.03 0.   0.   0.04 0.   0.   0.  ]
 [0.02 0.93 0.   0.07 0.   0.   0.   0.   0.   0.  ]
 [0.04 0.   0.61 0.05 0.23 0.   0.08 0.   0.   0.  ]
 [0.02 0.03 0.02 0.8  0.07 0.   0.02 0.   0.   0.  ]
 [0.   0.02 0.09 0.05 0.74 0.   0.08 0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.89 0.   0.1  0.02 0.  ]
 [0.24 0.   0.11 0.03 0.23 0.   0.45 0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.04 0.   0.84 0.   0.12]
 [0.   0.   0.   0.03 0.   0.04 0.   0.   0.94 0.  ]
 [0.   0.   0.   0.   0.   0.02 0.   0.02 0.   0.96]]
Diagonal values:
[0.93 0.93 0.61 0.8  0.74 0.89 0.45 0.84 0.94 0.96]
