In [47]:
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from keras.datasets import mnist
import numpy as np

np.random.seed(34)

In [38]:
def sigmoid(x):

    return np.exp(np.minimum(x, 0))/(1+np.exp(-np.abs(x)))

In [39]:
#ORのデータセット
x_train_or = np.array([[0, 1], [1, 0], [0, 0], [1, 1]])
y_train_or = np.array([[1], [1], [0], [1]])
x_valid_or, y_valid_or = x_train_or, y_train_or
x_test_or, y_test_or = x_train_or, y_train_or

W_or = np.random.uniform(low=-0.08, high=0.08, size=(2,1)).astype("float32")
b_or = np.zeros(shape=(1,)).astype("float32")

In [40]:
def np_log(x):
    return np.log(np.clip(a=x, a_max=1e+10, a_min=1e-10))

In [43]:
def train_or(x, y, epa = 1.0):

    global W_or, b_or

    batch_size = x.shape[0]

    y_hat = sigmoid(np.matmul(x, W_or)+ b_or)

    cost = -(y*np_log(y_hat) + (1-y)*np_log(1-y_hat)).mean()
    delta = y_hat - y

    #この２行理解不足
    dW = np.matmul(x.T, delta) /batch_size 
    db = np.matmul(np.ones(shape=(batch_size, )), delta)/batch_size

    W_or -= dW * epa
    b_or -= db * epa

    return cost, y_hat

def valid_or(x, y):
    y_hat = sigmoid(np.matmul(x, W_or)+b_or)
    cost = -(y*np_log(y_hat)+ (1-y)*np_log(1-y_hat)).mean()

    return cost, y_hat


    

In [44]:
for epoch in range(1000):
    x_train_or, y_train_or = shuffle(x_train_or, y_train_or)
    cost = train_or(x_train_or, y_train_or)
    cost, y_pred = valid_or(x_valid_or, y_valid_or)
print(y_pred)

[[0.99593311]
 [0.99593304]
 [0.01017459]
 [0.99999983]]


### MNIST(ソフトマックス回帰の実装)

In [46]:
def softmax(x):
    x -= x.max(axis=1, keepdims=True)
    x_exp = np.exp(x)
    return x_exp/np.sum(x_exp, axis=1, keepdims=True)


In [51]:
(x_mnist_1, y_mnist_1), (x_mnist_2, y_mnist_2) = mnist.load_data()

x_mnist = np.r_[x_mnist_1, x_mnist_2]   #縦方向に結合
y_mnist = np.r_[y_mnist_1, y_mnist_2]

x_mnist = x_mnist.astype("float32")/255.
y_mnist = np.eye(N=10)[y_mnist.astype("int32").flatten()]   #eye:単位行列を作成

x_mnist=x_mnist.reshape(x_mnist.shape[0], -1)

x_train_mnist, x_test_mnist, y_train_mnist, y_test_mnist = train_test_split(x_mnist, y_mnist, test_size=10000)
x_train_mnist, x_valid_mnist, y_train_mnist, y_valid_mnist = train_test_split(x_train_mnist, y_train_mnist, test_size=10000)

In [55]:
W_mnist = np.random.uniform(low=-0.08, high=0.08, size=(784, 10)).astype("float32")
b_mnist = np.zeros(shape=(10,)).astype("float32")

In [57]:
def train_mnist(x, y, eps=1.0):

    global W_mnist, b_mnist
    batch_size = x.shape[0]

    y_hat = sigmoid(np.matmul(x, W_mnist) + b_mnist)

    cost = (-y*np_log(y_hat)).sum(axis=1).mean()
    delta = y_hat - y

    dW = np.matmul(x.T, delta) /batch_size
    db = np.matmul(np.ones(shape=(batch_size, )), delta) /batch_size
    W_mnist -= dW*eps
    b_mnist -= db*eps

    return cost

def valid_mnist(x, y):
    y_hat = sigmoid(np.matmul(x, W_mnist) +b_mnist)
    cost = (-y*np_log(y_hat)).sum(axis=1).mean()

    return cost, y_hat

In [58]:
for epoch in range(100):
    x_train_mnist, y_train_mnist = shuffle(x_train_mnist, y_train_mnist)
    cost = train_mnist(x_train_mnist, y_train_mnist)
    cost, y_pred = valid_mnist(x_valid_mnist, y_valid_mnist)
    if epoch % 10 == 9 or epoch == 0:
        print('EPOCH: {}, Valid Cost: {:.3f}, Valid Accuracy: {:.3f}'.format(
            epoch + 1,
            cost,
            accuracy_score(y_valid_mnist.argmax(axis=1), y_pred.argmax(axis=1))
        ))

EPOCH: 1, Valid Cost: 12.809, Valid Accuracy: 0.115
EPOCH: 10, Valid Cost: 1.314, Valid Accuracy: 0.729
EPOCH: 20, Valid Cost: 0.565, Valid Accuracy: 0.833
EPOCH: 30, Valid Cost: 0.511, Valid Accuracy: 0.863
EPOCH: 40, Valid Cost: 0.501, Valid Accuracy: 0.877
EPOCH: 50, Valid Cost: 0.497, Valid Accuracy: 0.884
EPOCH: 60, Valid Cost: 0.490, Valid Accuracy: 0.889
EPOCH: 70, Valid Cost: 0.484, Valid Accuracy: 0.892
EPOCH: 80, Valid Cost: 0.479, Valid Accuracy: 0.894
EPOCH: 90, Valid Cost: 0.475, Valid Accuracy: 0.896
EPOCH: 100, Valid Cost: 0.472, Valid Accuracy: 0.897
