In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
import seaborn as sns

In [2]:
from sklearn.datasets import fetch_openml
mnist = fetch_openml('mnist_784', version=1, cache=True)
mnist.keys()

dict_keys(['data', 'target', 'frame', 'categories', 'feature_names', 'target_names', 'DESCR', 'details', 'url'])

In [3]:
X, y = mnist["data"], mnist["target"]

In [4]:
y = y.astype(np.uint8)

In [5]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()

In [6]:
enc.fit(y[:,np.newaxis])

  """Entry point for launching an IPython kernel.


OneHotEncoder()

In [7]:
Y = enc.transform(y[:,np.newaxis]).toarray()

  """Entry point for launching an IPython kernel.


In [8]:
X_train, X_test, y_train, y_test = X[:60000], X[60000:], Y[:60000], Y[60000:]

In [9]:
X_train = X_train / 255
X_test = X_test / 255

In [10]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [12]:
def softmax(X, W):
    K = np.size(W, 1) # W의 열의 갯수 -> w의 갯수 
    A = np.exp(X @ W)
    B = np.diag(1 / (np.reshape(A @ np.ones((K,1)), -1)))
    Y = B @ A
    return Y

In [13]:
def compute_cost(X, T, W):
    epsilon = 1e-5
    N = len(T)
    K = np.size(T, 1)
    cost = - (1/N) * np.ones((1,N)) @ (np.multiply(np.log(softmax(X, W) + epsilon), T)) @ np.ones((K,1))
    return cost

In [14]:
def compute_cost_L2(X, T, W, lambd = 0.005):
    epsilon = 1e-5
    N = len(T)
    K = np.size(T, 1)
    L2 = (lambd/(2*N))* np.sqrt(np.sum(np.square(W)))
    cost = - (1/N) * np.ones((1,N)) @ (np.multiply(np.log(softmax(X, W) + epsilon), T)) @ np.ones((K,1)) + L2
    return cost

In [15]:
def predict(X, W):
    return np.argmax((X @ W), axis=1)

In [63]:
def batch_gd(X, T, W, learning_rate, iterations, batch_size):
    N = len(T)
    cost_history = np.zeros((iterations,1))
    shuffled_indices = np.random.permutation(N)
    X_shuffled = X[shuffled_indices]
    T_shuffled = T[shuffled_indices]

    for i in range(iterations):
        j = i % N
        X_batch = X_shuffled[j:j+batch_size]
        T_batch = T_shuffled[j:j+batch_size]
        # batch가 epoch 경계를 넘어가는 경우, 앞 부분으로 채워줌
        if X_batch.shape[0] < batch_size:
            X_batch = np.vstack((X_batch, X_shuffled[:(batch_size - X_batch.shape[0])]))
            T_batch = np.vstack((T_batch, T_shuffled[:(batch_size - T_batch.shape[0])]))
        W = W - (learning_rate/batch_size) * (X_batch.T @ (softmax(X_batch, W) - T_batch))
        
        #cost_history[i] = compute_cost(X_batch, T_batch, W)
        cost_history[i] = compute_cost_L2(X_batch, T_batch, W)

        
        if i % 1000 == 0:
            print(cost_history[i][0])

    return (cost_history, W)

In [64]:
X = np.hstack((np.ones((np.size(X_train, 0),1)),X_train))
T = y_train

K = np.size(T, 1)
M = np.size(X, 1)
W = np.zeros((M,K))

iterations = 50000
learning_rate = 0.01

#initial_cost = compute_cost(X, T, W)
initial_cost = compute_cost_L2(X, T, W)

print("Initial Cost is: {} \n".format(initial_cost[0][0]))

(cost_history, W_optimal) = batch_gd(X, T, W, learning_rate, iterations, 64)

Initial Cost is: 2.3024850979937352 

2.279741995806013
0.6182768877809083
0.5514645913646093
0.4545403329078704
0.35784193352448185
0.364801196599361
0.39206022461462897
0.4475444628925033
0.35147482038082306
0.3786321829633911
0.29892088837177516
0.15372934320518922
0.14013897856124427
0.26139058767500123
0.35197766306306194
0.2073447956682141
0.22904291183153433
0.22088602404036486
0.5116792335748496
0.33015862568209003
0.28420283114919004
0.22761625062770682
0.22863237164336825
0.2962303958809342
0.32575073893865353
0.38459263709030156
0.3846721113166033
0.42454800296344214
0.2859113209942503
0.22395807610282173
0.3008997294200552
0.22621391182586423
0.17117058900762222
0.37117040763155457
0.3481700023921881
0.20734752764240677
0.2506801063149976
0.3253197749356844
0.2936978305174381
0.367444583893912
0.3049192151136238
0.24665525716417708
0.22785657830338826
0.2860168816329977
0.2445766001350602
0.16417057801395046
0.27786906884099816
0.19780658387195477
0.37272265950394007
0.1586

In [65]:
## Accuracy
X_ = np.hstack((np.ones((np.size(X_test, 0),1)),X_test))
T_ = y_test
y_pred = predict(X_, W_optimal)
score = float(sum(y_pred == np.argmax(T_, axis=1)))/ float(len(y_test))

print(score)

0.918


In [None]:
from sklearn.base import BaseEstimator 
from sklearn.base import RegressorMixin
    N = len(T)
    cost_history = np.zeros((iterations,1))
    shuffled_indices = np.random.permutation(N)
    X_shuffled = X[shuffled_indices]
    T_shuffled = T[shuffled_indices]

    for i in range(iterations):
        j = i % N
        X_batch = X_shuffled[j:j+batch_size]
        T_batch = T_shuffled[j:j+batch_size]
        # batch가 epoch 경계를 넘어가는 경우, 앞 부분으로 채워줌
        if X_batch.shape[0] < batch_size:
            X_batch = np.vstack((X_batch, X_shuffled[:(batch_size - X_batch.shape[0])]))
            T_batch = np.vstack((T_batch, T_shuffled[:(batch_size - T_batch.shape[0])]))
        W = W - (learning_rate/batch_size) * (X_batch.T @ (softmax(X_batch, W) - T_batch))
        
        #cost_history[i] = compute_cost(X_batch, T_batch, W)
        cost_history[i] = compute_cost_L2(X_batch, T_batch, W)

        
        if i % 1000 == 0:
            print(cost_history[i][0])

    return (cost_history, W)

class TSEstimatory(BaseEstimator, RegressorMixin):

    def __init__(self, **model_hyper_parameters):
        """
        """
        super().__init__()

    def fit(self, X, Y=None):
        """
        Fit global model on X features to minimize 
        a given function on Y.

        @param X
        @param Y
        """
		# TODO
        return self

    def predict(self, X):
        """
        @param X: features vector the model will be evaluated on
        """
		# TODO
        y_pred = predict(X_, W_optimal)
        np.argmax((X @ W), axis=1)
		return None