In [1]:
import cupy as cp
from dataset import load_mnist
from sklearn.model_selection import train_test_split

cp.cuda.Device(0).use()

In [2]:
train_data, train_label, test_data, test_label = load_mnist()
train_data, valid_data, train_label, valid_label = train_test_split(train_data, train_label, test_size=0.2)

In [3]:
train_data = cp.asarray(train_data)
test_data = cp.asarray(test_data)
valid_data = cp.asarray(valid_data)
train_label = cp.asarray(train_label)
test_label = cp.asarray(test_label)
valid_label = cp.asarray(valid_label)

In [17]:
def label_to_one_hot(y, n_class):
    one_hot = cp.zeros((y.shape[0], n_class))
    for i in range(len(y)):
        one_hot[i][y[i]] = 1
    
    return one_hot

    
def softmax_with_loss(out, y):
    max_val = cp.max(out, axis=1).reshape(-1, 1)
    exp_out = cp.exp(out - max_val)
    sum_exp_out = cp.sum(exp_out, axis=1).reshape(-1, 1)
    out = exp_out / sum_exp_out
    
    log_out = cp.log(out + 1e-7)

    loss = cp.sum(-log_out * y)
    
    return loss, out


class LogisticRegression(object):
    def __init__(self):
        self.params = {}
        self.params['W'] = cp.random.randn(784, 10)
        
        self.params['b'] = cp.random.randn(10)
        
        self.grads = {}
    
    def forward(self, x, y):
        
        y = label_to_one_hot(y, 10)
        
        self.x = x

        out = cp.dot(x, self.params['W']) + self.params['b']
    
        loss, out = softmax_with_loss(out, y)
        
        self.loss_grad = out / batch_size
        
        return loss / x.shape[0]
        
    def backward(self):
        self.grads['W'] = cp.dot(self.x.T, self.loss_grad)
        self.grads['b'] = cp.sum(self.loss_grad, axis=0)

In [18]:
batch_size = 3
batch_train_count = int(train_data.shape[0] / batch_size)
batch_train_inputs, batch_train_targets = [], []
for i in range(batch_train_count):
    batch_train_inputs.append(train_data[i*batch_size : (i+1)*batch_size])
    batch_train_targets.append(train_label[i*batch_size : (i+1)*batch_size])

In [19]:
model = LogisticRegression()

In [20]:
epoch = 10
learning_rate = 5e-3
for i in range(epoch):
    for j in range(batch_train_count):
        inputs = cp.array(batch_train_inputs[j])
        targets = cp.array(batch_train_targets[j])
        
        loss = model.forward(inputs, targets)
        
        if j % 500 == 0:
            print('loss', loss)
        
        model.backward()
        
        for key in model.params.keys():
            print(model.grads[key])
            model.params[key] -= learning_rate * model.grads[key]

loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 10.745397067305548
loss 16.11809565095832
loss 10.745403909985077
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 10.745397067305548
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 10.745397067305548
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832
loss 16.11809565095832


KeyboardInterrupt: 