In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [3]:
_X, _y = mnist["data"], mnist["target"]
_y = _y.astype(np.uint8)

In [4]:
X_train, X_test, y_train, y_test = _X[:60000], _X[60000:], _y[:60000], _y[60000:]

In [5]:
X_train = X_train / 255
X_test = X_test / 255

In [6]:
y_train_large = (y_train >= 7)
y_train_odd = (y_train % 2 == 1)
y_train_multilabel = (np.c_[y_train_large, y_train_odd]).astype(np.uint8)

y_test_large = (y_test >= 7)
y_test_odd = (y_test % 2 == 1)
y_test_multilabel = (np.c_[y_test_large, y_test_odd]).astype(np.uint8)

In [7]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [8]:
def predict(X, W):
    return np.round(sigmoid(X @ W))

In [28]:
def compute_cost(X, T, W):
    epsilon = 1e-5
    N = len(T)
    K = np.size(T, 1)
    cost = - (1/N) * np.ones((1,N)) @ (np.multiply(np.log(sigmoid(X @ W) + epsilon), T)) @ np.ones((K,1)) + \
          (- (1/N) * np.ones((1,N)) @ (np.multiply(np.log(1 - sigmoid(X @ W) + epsilon), (1 - T))) @ np.ones((K,1)))
    return cost

In [24]:
def batch_gd(X, T, W, learning_rate, iterations, batch_size):
    N = len(T)
    cost_history = np.zeros((iterations,1))
    shuffled_indices = np.random.permutation(N)
    X_shuffled = X[shuffled_indices]
    T_shuffled = T[shuffled_indices]

    for i in range(iterations):
        j = i % N
        X_batch = X_shuffled[j:j+batch_size]
        T_batch = T_shuffled[j:j+batch_size]
        # batch가 epoch 경계를 넘어가는 경우, 앞 부분으로 채워줌
        if X_batch.shape[0] < batch_size:
            X_batch = np.vstack((X_batch, X_shuffled[:(batch_size - X_batch.shape[0])]))
            T_batch = np.vstack((T_batch, T_shuffled[:(batch_size - T_batch.shape[0])]))
        W = W - (learning_rate/batch_size) * (X_batch.T @ (sigmoid(X_batch @ W) - T_batch))
        cost_history[i] = compute_cost(X_batch, T_batch, W)
        if i % 10 == 0:
            print(cost_history[i][0])

    return (cost_history, W)

In [25]:
y_train_multilabel

array([[0, 1],
       [0, 0],
       [0, 0],
       ...,
       [0, 1],
       [0, 0],
       [1, 0]], dtype=uint8)

In [30]:
X = np.hstack((np.ones((np.size(X_train, 0),1)),X_train))
T = y_train_multilabel

K = np.size(T, 1)
M = np.size(X, 1)
W = np.zeros((M,K))

iterations = 2000
learning_rate = 0.01

initial_cost = compute_cost(X, T, W)
print("Initial Cost is: {} \n".format(initial_cost[0][0]))

(cost_history, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 25)

Initial Cost is: 1.386254361519962 

1.371562344118162
1.2443026797808128
1.12087734641946
0.9130497710356509
0.9525008778287885
1.0455213553853304
0.9271487813319529
0.7610318533451474
0.7214337094981859
0.7370809970719829
0.7612369147418179
0.8425573965769991
0.8276409345842012
0.8715756355788393
0.903016275266288
0.9005939492607414
0.9502275800250233
0.8629534378467929
0.8631589342070811
0.8537932508100096
0.814319069426589
0.7299088312182276
0.7381904286470109
0.8443522772476361
0.9629685323712587
0.8747018639794758
1.0841284094068342
0.8156068779844285
0.8601381822644083
0.8743313614760715
0.8600976847871312
0.753522352266142
0.7875417568434826
0.6547383086141267
0.6775649937105073
0.6932264197153619
0.650004035335418
0.6292532200312051
0.7543904875061268
0.746159376496436
0.6239647264205174
0.544686470710119
0.4450078732719553
0.6313497480214934
0.685702022483454
0.6684816211968114
0.6571717321046839
0.6239414252970119
0.7075690110749082
0.7842679822312991
0.915852514800968
1.015

In [31]:
## Accuracy: 각각의 클래스에 대한 정확도를 구하는 방식으로 측정하는 코드임
X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
y_pred = predict(X_, W_optimal)
score = sum(y_pred == y_test_multilabel)/ len(y_test_multilabel)

print(score)

[0.8852 0.8643]
