In [84]:
import numpy as np
import pandas as pd

In [85]:
# Load the MNIST Fashion dataset
train = pd.read_csv('fashion-mnist_train.csv')
test = pd.read_csv('fashion-mnist_test.csv')


In [86]:
# Split data to x, labels and test, train
y_train, x_train = train.iloc[:, 0], train.iloc[:, 1:]
y_test, x_test = test.iloc[:, 0], test.iloc[:, 1:]


In [87]:
# Check the shape of data
print("train data shape: ", x_train.shape)
print("train label shape: ", y_train.shape)
print("test data shape: ", x_test.shape)
print("test label shape: ", y_test.shape)


train data shape:  (60000, 784)
train label shape:  (60000,)
test data shape:  (10000, 784)
test label shape:  (10000,)


In [88]:
# Show data
x_train.head(5)


Unnamed: 0,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,pixel10,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,5,0,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,1,2,0,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [89]:
y_train.head(5)


0    2
1    9
2    6
3    0
4    3
Name: label, dtype: int64

In [90]:
# Convert dataframe to numpy array
x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()
x_test = x_test.to_numpy()



In [91]:
#Normalize data
x_train = x_train / 255
x_test = x_test / 255


In [92]:
#initialize the size of layers
input_size = x_train.shape[1]
hidden_size = 128
output_size = 10


In [93]:

def initialize_params(input_size, hidden_size, output_size):
    #W1 = np.zeros((input_size, hidden_size))
    b1 = np.zeros((1,hidden_size))
    #W2 = np.zeros((hidden_size, output_size))
    b2 = np.zeros((1,output_size))

    W1 = np.random.rand(input_size, hidden_size)*np.sqrt(2.0/input_size)
    #b1 = np.full((1, hidden_size),0.1)
    W2 = np.random.rand(hidden_size, output_size)*np.sqrt(2.0/hidden_size)
    #b2 = np.full((1, output_size),0.1)

   
    return W1, b1, W2, b2


def relu(x):
    return np.maximum(0, x)


def svm_loss(X, y, W1, b1, W2, b2, reg):
    N = X.shape[0]
    
    scores1 = np.dot(X,W1) + b1
    h = relu(scores1)
    scores2 = np.dot(h, W2) + b2

    margins = np.maximum(0, scores2 - scores2[np.arange(N), y].reshape(-1, 1) + 1)
    margins[np.arange(N), y] = 0
    loss = np.sum(margins) / N

    reg_loss = 0.5 * reg * (np.sum(W1 * W1) + np.sum(W2 * W2))
    loss += reg_loss

    dscores2 = np.zeros_like(scores2)
    dscores2[margins > 0] = 1
    dscores2[np.arange(N), y] -= np.sum(dscores2, axis=1)
    dscores2 /= N

    dW2 = np.dot(h.T, dscores2)
    db2 = np.sum(dscores2, axis=0, keepdims=True)

    dh = np.dot(dscores2, W2.T)
    dscores1 = dh * (scores1 > 0)

    dW1 = np.dot(X.T, dscores1)
    db1 = np.sum(dscores1, axis=0, keepdims=True)

    dW2 += reg * W2
    dW1 += reg * W1

    return loss, dW1, db1, dW2, db2


def train(X, y, learning_rate, reg, epochs):

    W1, b1, W2, b2 = initialize_params(input_size, hidden_size, output_size)

    for epoch in range(epochs):
        loss, dW1, db1, dW2, db2 = svm_loss(X, y, W1, b1, W2, b2, reg)
        W1 -= learning_rate * dW1
        b1 -= learning_rate * db1
        W2 -= learning_rate * dW2
        b2 -= learning_rate * db2

        if epoch % 100 == 0:
            print("Epoch %d: loss %f" % (epoch, loss))

    return W1, b1, W2, b2


def predict(X, W1, b1, W2, b2):
    scores1 = np.dot(X, W1) + b1
    h = relu(scores1)
    scores2 = np.dot(h, W2) + b2
    return np.argmax(scores2, axis=1)


def accuracy(X, y, W1, b1, W2, b2):
    y_pred = predict(X, W1, b1, W2, b2)
    return np.mean(y_pred == y)


W1, b1, W2, b2 = train(x_train, y_train, 0.001, 0.001, 2000)
b = accuracy(x_test, y_test, W1, b1, W2, b2)
print("Accuracy: ", b*100)



Epoch 0: loss 16.107119
Epoch 100: loss 8.344527
Epoch 200: loss 6.169683
Epoch 300: loss 4.317952
Epoch 400: loss 3.519326
Epoch 500: loss 2.822504
Epoch 600: loss 2.574321
Epoch 700: loss 2.204226
Epoch 800: loss 1.836140
Epoch 900: loss 1.752090
Epoch 1000: loss 1.679184
Epoch 1100: loss 1.617811
Epoch 1200: loss 1.563888
Epoch 1300: loss 1.518158
Epoch 1400: loss 1.477784
Epoch 1500: loss 1.437182
Epoch 1600: loss 1.396726
Epoch 1700: loss 1.343698
Epoch 1800: loss 1.318889
Epoch 1900: loss 1.291116
Accuracy:  69.32000000000001
