In [19]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from collections import OrderedDict

%matplotlib inline

In [20]:

def cross_entropy_error(y,t,eps=1e-8):
    """ common/functions.py  """
    
    if y.ndim == 1:
        t = t.reshape(1,t.size)
        y = y.reshape(1,y.size)
    
    # one-hot-vectorから正解ラベルのインデックスに
    if t.size == y.size:
        t = t.argmax(axis=1)
    batch_size = y.shape[0]
    # print y
    # print y[np.arange(batch_size),t]
    return -np.sum(np.log(y[np.arange(batch_size),t]+eps))/batch_size

def softmax(x):
    """ common/functions.py  """
    
    
    # mnistとかやる場合は2次元配列(データ数x入力次元)
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x,axis=0)
        exp_x = np.exp(x)
        y = exp_x / np.sum(exp_x,axis=0)
        return y.T
    
    x = x - np.max(x)
    exp_x = np.exp(x)
    return exp_x / np.sum(exp_x)

def sigmoid(x):
    """ シグモイド関数 """
    return 1/(1+np.exp(-x))



In [21]:

def numerical_gradient(f,x,eps=1e-8):
    """ common/gradient.py """
    f0 = f(x)
    grad = np.zeros_like(x)

    # http://www.aipacommander.com/entry/2017/05/14/172220    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        i = it.multi_index
        #print i
        
        x[i]+=eps
        f1 = f(x)
        x[i]-=eps
        grad[i] = (f1-f0)/eps
        
        it.iternext()
    
    return grad


In [22]:
class MulLayer:
    def __init__(self):
        self.x = None
        self.y = None
    
    def forward(self,x,y):
        self.x = x
        self.y = y
        out = x * y
        
        return out
    
    def backward(self,dout):
        """ doutは出力値の偏微分 """
        dx = dout * self.y
        dy = dout * self.x
        
        return dx,dy

In [23]:
apple = 100
apple_num = 2
tax = 1.1

# layer
mul_apple_layer = MulLayer()
mul_tax_layer = MulLayer()

# forward
apple_price = mul_apple_layer.forward(apple,apple_num)
price = mul_tax_layer.forward(apple_price,tax)

print price

220.0


In [24]:
dprice = 1 # これは天下り的というか自分で決めて良い(らしい)
dapple_price,dtax = mul_tax_layer.backward(dprice)
dapple,dapple_num = mul_apple_layer.backward(dapple_price)

print(dapple,dapple_num,dtax)

(2.2, 110.00000000000001, 200)


In [25]:
class AddLayer:
    def __init__(self):
        pass

    def forward(self,x,y):
        out = x+y
        return out

    def backward(self,dout):
        return dout.copy(),dout.copy() #dx,dy

In [26]:
class Relu:
    def __init__(self):
        self.mask = None
    
    def forward(self,x):
        self.mask = (x<=0)
        out = x.copy()
        out[self.mask]=0
        
        return out
    
    def backward(self,dout):
        dx = dout.copy()
        dx[self.mask] = 0
        return dx


In [27]:
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self,x):
        out = 1.0/(1.0+np.exp(-x))
        self.out = out
        
        return out
    
    def forward(self,dout):
        dx = dout * self.out * ( 1.0- self.out )
        return dx
        

In [28]:
class Affine:
    def __init__(self,W,b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None
    
    def forward(self,x):
        self.x = x
        out = np.dot(x,self.W)+self.b
        
        return out
    
    def backward(self,dout):
        dx = np.dot(dout,self.W.T)
        self.dW = np.dot(self.x.T,dout)
        self.db = np.sum(dout,axis=0)
        
        return dx

In [29]:
class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None
        self.t = None
    
    def forward(self,x,t):
        self.t = t
        self.y = softmax(x)
        self.loss = cross_entropy_error(self.y,self.t)
        
        return self.loss # スカラー
    
    def backward(self,dout=1):
        batch_size = self.t.shape[0]
        dx = (self.y - self.t)/batch_size
        
        return dx

In [30]:
class TwoLayerNet:
    def __init__(self,input_size,hidden_size,output_size,weight_init_std=0.01):
        self.params={}
        self.params["W1"] = weight_init_std * np.random.randn(input_size,hidden_size)
        self.params["W2"] = weight_init_std * np.random.randn(hidden_size,output_size)
        self.params["b1"] = np.zeros(hidden_size)
        self.params["b2"] = np.zeros(output_size)
        
        self.layers = OrderedDict()
        self.layers["Affine1"] = Affine(self.params["W1"],self.params["b1"])
        self.layers["Relu1"] = Relu()
        self.layers["Affine2"] = Affine(self.params["W2"],self.params["b2"])
        self.lastLayer = SoftmaxWithLoss()
    
    def predict(self,x):
        for layer in self.layers.values():
            x = layer.forward(x)
        return x
    
    def loss(self,x,t):
        y = self.predict(x)
        return self.lastLayer.forward(y,t)
    
    def accuracy(self,x,t):
        y = self.predict(x)
        y = np.argmax(y,axis=1)
        t = np.argmax(t,axis=1)
        
        accuracy = np.sum(y==t) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self,x,t):
        loss_W = lambda W:self.loss(x,t)
        
        grads = {}
        for key in ["W1","b1","W2","b2"]:
            grads[key] = numerical_gradient(loss_W,self.params[key])
        return grads
    
    def gradient(self,x,t):
        self.loss(x,t)
        dout =1
        dout = self.lastLayer.backward(dout)
        
        for layer in self.layers.values()[::-1]:
            dout = layer.backward(dout)
        
        grads = {}
        grads["W1"] = self.layers["Affine1"].dW
        grads["b1"] = self.layers["Affine1"].db
        grads["W2"] = self.layers["Affine2"].dW
        grads["b2"] = self.layers["Affine2"].db
        
        return grads

In [31]:
dataset = pd.read_csv("./train.csv")
# datasetを読み込む
t_temp = dataset[[0]].values.ravel().astype(np.uint8)
x_dataset = np.array(dataset.iloc[:,1:].values.astype(np.uint8))/256.0
n_dataset = len(t_temp)
t_dataset = np.zeros((n_dataset,10),np.uint8)
t_dataset[np.arange(n_dataset),t_temp]=1 # 1-hot 表現にする

# ランダムシャッフルする
index = np.arange(n_dataset)
np.random.shuffle(index)

x_dataset = x_dataset[index]
t_dataset = t_dataset[index]

# 教師データとテストデータに分割
n_test = n_dataset / 4
n_train = n_dataset - n_test 
x_train , t_train, x_test, t_test = \
    x_dataset[:n_train],t_dataset[:n_train],x_dataset[n_train:],t_dataset[n_train:]

MemoryError: 

In [None]:
network = TwoLayerNet(input_size= 784,hidden_size=100,output_size=10)

x_batch = x_train[:3]
t_batch = t_train[:3]

grad_numerical = network.numerical_gradient(x_batch,t_batch)
grad_backprop = network.numerical_gradient(x_batch,t_batch)

for key in grad_numerical.keys():
    diff = np.average(np.abs(grad_backprop[key]-grad_numerical[key]))
    print(key + ":" + str(diff))

In [None]:
network = TwoLayerNet(input_size= 784,hidden_size=100,output_size=10)

iters_num = 5000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size/batch_size,1)

for i in range(iters_num):
    batch_mask = np.random.choice(train_size,batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = network.gradient(x_batch,t_batch)
    
    for key in ["W1","b1","W2","b2"]:
        network.params[key] -= learning_rate*grad[key]
    
    loss = network.loss(x_batch,t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc = network.accuracy(x_train,t_train)
        test_acc = network.accuracy(x_test,t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        
        print(train_acc,test_acc)