In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [3]:
_X, _y = mnist["data"], mnist["target"]
_y = _y.astype(np.uint8)

In [4]:
X_train, X_test, y_train, y_test = _X[:60000], _X[60000:], _y[:60000], _y[60000:]

In [5]:
X_train = X_train / 255
X_test = X_test / 255

In [6]:
y_train_large = (y_train >= 7)
y_train_odd = (y_train % 2 == 1)
y_train_multilabel = (np.c_[y_train_large, y_train_odd]).astype(np.uint8)

y_test_large = (y_test >= 7)
y_test_odd = (y_test % 2 == 1)
y_test_multilabel = (np.c_[y_test_large, y_test_odd]).astype(np.uint8)

In [7]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [8]:
def predict(X, W):
    return np.round(sigmoid(X @ W))

In [9]:
def compute_cost(X, T, W):
    epsilon = 1e-5
    N = len(T)
    K = np.size(T, 1)
    cost = - (1/N) * (np.ones((1,N)) @ (np.multiply(np.log(sigmoid(X @ W) + epsilon), T)) @ np.ones((K,1)) +
                      np.ones((1,N)) @ (np.multiply(np.log(1 - sigmoid(X @ W) + epsilon), (1 - T))) @ np.ones((K,1)))
    return cost

In [10]:
def batch_gd(X, T, W, learning_rate, iterations, batch_size):
    N = len(T)
    cost_history = np.zeros((iterations,1))
    shuffled_indices = np.random.permutation(N)
    X_shuffled = X[shuffled_indices]
    T_shuffled = T[shuffled_indices]

    for i in range(iterations):
        j = i % N
        X_batch = X_shuffled[j:j+batch_size]
        T_batch = T_shuffled[j:j+batch_size]
        # batch가 epoch 경계를 넘어가는 경우, 앞 부분으로 채워줌
        if X_batch.shape[0] < batch_size:
            X_batch = np.vstack((X_batch, X_shuffled[:(batch_size - X_batch.shape[0])]))
            T_batch = np.vstack((T_batch, T_shuffled[:(batch_size - T_batch.shape[0])]))
        W = W - (learning_rate/batch_size) * (X_batch.T @ (sigmoid(X_batch @ W) - T_batch))
        cost_history[i] = compute_cost(X_batch, T_batch, W)
        if i % 10 == 0:
            print(cost_history[i][0])
    return (cost_history, W)

In [11]:
X = np.hstack((np.ones((np.size(X_train, 0),1)),X_train))
T = y_train_multilabel

K = np.size(T, 1)
M = np.size(X, 1)
W = np.zeros((M,K))

iterations = 2000
learning_rate = 0.01

initial_cost = compute_cost(X, T, W)

print("Initial Cost is: {} \n".format(initial_cost[0][0]))

(cost_history, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 256)

Initial Cost is: 1.3862543615198037 

1.3687927680890142
1.265294299222866
1.20327120110395
1.1600066702280398
1.1244887128003864
1.0791078341301592
1.0474679357601144
1.023012893166785
0.9877663661673508
0.9714981707334052
0.95166815510206
0.9286685362142428
0.9192198018158195
0.9006387253481967
0.8828029913434877
0.875013497191967
0.8653407463724194
0.8565952275254338
0.8287656665492664
0.8221723403487936
0.8115768777940852
0.8126560989421755
0.8013599217060996
0.7907437340969996
0.8086087053338149
0.7983180152162588
0.7771057296750794
0.7755509315598472
0.7543896200563878
0.7468329521911677
0.7405568335090087
0.7294130686387184
0.7289102217622789
0.7231930680541832
0.7137333586281058
0.7161050317992516
0.7124439476998532
0.7223657872534275
0.7254432247001
0.7141880522748352
0.7125708854953822
0.702938508884878
0.6999819136959257
0.7043001035876821
0.7113910036841927
0.7067566381551484
0.6914993047259426
0.6797644261108942
0.675945078018479
0.6538186862716295
0.6456015103992669
0.634

In [None]:
## Accuracy
X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
y_pred = predict(X_, W_optimal)
score = sum(y_pred == y_test_multilabel)/ len(y_test_multilabel)

print(score)