In [1]:
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.datasets import mnist

In [2]:
(x_train, t_train), (x_test,t_test) = mnist.load_data()
x_train  = x_train.reshape(60000, 784)
x_test   = x_test.reshape(10000, 784)
x_train  = x_train.astype('float32')
x_test   = x_test.astype('float32')
x_train /= 255
x_test  /= 255
t_train  = keras.utils.to_categorical(t_train, 10)
t_test   = keras.utils.to_categorical(t_test, 10)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
def Softmax(a):
    c=np.max(a)
    exp_a=np.exp(a-c)
    sum_a=np.sum(exp_a)
    y=exp_a/sum_a
    return y

def ReLu(x):
    return np.maximum(x,0)

def dReLu(u):
    tmp=u>=0
    return tmp.astype('int64')

def cross_entropy(y,t):
    delta=1e-7
    return -np.sum(t*np.log(y+delta))

In [4]:
class Affine:
    def __init__(self,w,b,e=0.0001,myu=0.8,l=0.01):
        self.w=w #重みの行列
        self.b=b #バイアス（スカラー）
        self.x=None #入力ベクトル、一個前の層の出力
        self.u=None #入力の和
        self.z=None #出力
        self.dw=None #BPのための重みの更新量
        self.db=None #BPのためのバイアスの更新量
        self.delta=None
        self.e=e #学習率
        self.myu=myu #モメンタムのパラメーター0.5~0.9
        self.l=l #重み減衰のパラメーター0.01~0.0001
        self.w_before=None
        self.b_before=None
    
    def forward(self,x):
        self.x=x
        self.u=np.dot(self.w.T,x)+self.b
        
    def relu(self):
        self.z=ReLu(self.u)
        
    def softmax(self):
        self.z=Softmax(self.u)
        
    def backward(self,delta,w):
        self.delta=np.dot(w,delta)*dReLu(self.u)
        self.dw=np.dot(self.x,self.delta.T)
        self.db=np.sum(self.delta)

    def set_before(self):
        self.w_before=self.w.copy()
        self.b_before=self.b
        
    def update(self):
        self.w=(self.w-self.e*self.dw)
        self.b=(self.b-self.e*self.db)

    def update_momentum(self):
      dw_before=self.w-self.w_before
      db_before=self.b-self.b_before

      self.w=(self.w-self.e*self.dw+self.myu*dw_before)
      self.b=(self.b-self.e*self.db+self.myu*db_before)

    def update_atte(self): #重み減衰はバイアスには適用しないのが普通
      self.w=(self.w-self.e*(self.dw+self.l*self.w))
      self.b=(self.b-self.e*self.db)

    def update_mom_atte(self):
      dw_before=self.w-self.w_before
      db_before=self.b-self.b_before

      self.w=(self.w-self.e*(self.dw+self.l*self.w)+self.myu*dw_before)
      self.b=(self.b-self.e*self.db+self.myu*db_before)


In [8]:
class OneLayer:
  def __init__(self,u0,u1):
    self.params={}
    self.params['W1']=np.random.rand(u0,u1)*np.sqrt(2/u0)
    self.params['b1']=np.random.rand()

class ThreeLayers:
    def __init__(self,u0,u1,u2,u3):
        self.params={}
        self.params['W1']=np.random.rand(u0,u1)*np.sqrt(2/u0)
        self.params['b1']=np.random.rand()
        self.params['W2']=np.random.rand(u1,u2)*np.sqrt(2/u1)
        self.params['b2']=np.random.rand()
        self.params['W3']=np.random.rand(u2,u3)*np.sqrt(2/u2)
        self.params['b3']=np.random.rand()

class FourLayers:
  def __init__(self,u0,u1,u2,u3,u4):
    self.params={}
    self.params['W1']=np.random.rand(u0,u1)*np.sqrt(2/u0)
    self.params['b1']=np.random.rand()
    self.params['W2']=np.random.rand(u1,u2)*np.sqrt(2/u1)
    self.params['b2']=np.random.rand()
    self.params['W3']=np.random.rand(u2,u3)*np.sqrt(2/u2)
    self.params['b3']=np.random.rand()
    self.params['W4']=np.random.rand(u3,u4)*np.sqrt(2/u3)
    self.params['b4']=np.random.rand()

class TwoLayers:
    def __init__(self,u0,u1,u2):
        self.params={}
        self.params['W1']=np.random.rand(u0,u1)*np.sqrt(2/u0)
        self.params['b1']=np.random.rand()
        self.params['W2']=np.random.rand(u1,u2)*np.sqrt(2/u1)
        self.params['b2']=np.random.rand()

In [25]:
layers=ThreeLayers(784,50,100,10)
affine1=Affine(layers.params['W1'],layers.params['b1'])
affine2=Affine(layers.params['W2'],layers.params['b2'])
affine3=Affine(layers.params['W3'],layers.params['b3'])

In [26]:
times=0
epoch=20
list_e=[]
stop=0
stop_flag=False
learning_flag=False

while True:
    index_train=np.arange(60000)
    random_index_train=np.random.permutation(index_train)
    list_e_tmp=[]

    for i in random_index_train:
        x=x_train[i].reshape(784,1)
        t=t_train[i].reshape(10,1)
    
        affine1.forward(x)
        affine1.relu()
    
        affine2.forward(affine1.z)
        affine2.relu()
    
        affine3.forward(affine2.z)
        affine3.softmax()
    
        affine3.delta=affine3.z-t
        affine3.dw=np.dot(affine3.x,affine3.delta.T)
        affine3.db=np.sum(affine3.delta)
    
        affine2.backward(affine3.delta,affine3.w)
        affine1.backward(affine2.delta,affine2.w)

        #if learning_flag:
          #affine1.update_mom_atte()
          #affine2.update_mom_atte()
          #affine3.update_mom_atte()
        #else:
          #affine1.update()
          #affine2.update()
          #affine3.update()

        
        #if learning_flag:
          #affine1.update_momentum()
          #affine2.update_momentum()
          #affine3.update_momentum()
        #else:
          #affine1.update()
          #affine2.update()
          #affine3.update()
       
        affine1.update_atte()
        affine2.update_atte()
        affine3.update_atte()

        #affine1.update()
        #affine2.update()
        #affine3.update()

        list_e_tmp.append(cross_entropy(affine3.z,t))
        learning_flag=True
        #affine1.set_before()
        #affine2.set_before()
        #affine3.set_before()
        #print(affine3.z.reshape(1,10))
    
    list_e.append(list_e_tmp)
        
    times+=1
    print(times)
    if times>=epoch:
        break

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20


In [27]:
count=0
for i in range(10000):
    x=x_test[i].reshape(784,1)
    t=np.argmax(t_test[i])
    
    affine1.forward(x)
    affine1.relu()
    affine2.forward(affine1.z)
    affine2.relu()
    affine3.forward(affine2.z)
    affine3.softmax()
    y=np.argmax(affine3.z)
    if t==y:
        count+=1

print(count)
accuracy=count*100/10000
print(accuracy)

9196
91.96


In [28]:
count2=0
for i in range(60000):
    x=x_train[i].reshape(784,1)
    t=np.argmax(t_train[i])
    affine1.forward(x)
    affine1.relu()
    affine2.forward(affine1.z)
    affine2.relu()
    affine3.forward(affine2.z)
    affine3.softmax()
    y=np.argmax(affine3.z)
    if t==y:
        count2+=1

acc=count2*100/60000
print(acc)
    

91.80166666666666


In [29]:
d_array=np.array([0.05,0.1,0.15,0.2,0.25])

for d in d_array:
  count=0
  for i in range(10000):
      r=np.random.rand(784)
      x_noise=x_test[i].copy()
      x_noise[np.where(r<d)]=np.random.rand()
      x=x_noise.reshape(784,1)
      t=np.argmax(t_test[i])
    
      affine1.forward(x)
      affine1.relu()
      affine2.forward(affine1.z)
      affine2.relu()
      affine3.forward(affine2.z)
      affine3.softmax()
      y=np.argmax(affine3.z)
      if t==y:
          count+=1

  accuracy=count*100/10000
  print('d:',d,'accu:',accuracy)

d: 0.05 accu: 91.38
d: 0.1 accu: 89.18
d: 0.15 accu: 84.15
d: 0.2 accu: 78.49
d: 0.25 accu: 71.94


In [None]:
layers1=ThreeLayers(784,200,100,10)
affine1_1=Affine(layers1.params['W1'],layers1.params['b1'])
affine1_2=Affine(layers1.params['W2'],layers1.params['b2'])
affine1_3=Affine(layers1.params['W3'],layers1.params['b3'])

In [None]:
times=0
epoch=10
list_e=[]
stop=0
stop_flag=False
learning_flag=False

while True:
    index_train=np.arange(60000)
    random_index_train=np.random.permutation(index_train)
    list_e_tmp=[]

    for i in random_index_train:
        x=x_train[i].reshape(784,1)
        t=t_train[i].reshape(10,1)
    
        affine1_1.forward(x)
        affine1_1.relu()
    
        affine1_2.forward(affine1_1.z)
        affine1_2.relu()
    
        affine1_3.forward(affine1_2.z)
        affine1_3.softmax()
    
        affine1_3.delta=affine1_3.z-t
        affine1_3.dw=np.dot(affine1_3.x,affine1_3.delta.T)
        affine1_3.db=np.sum(affine1_3.delta)
    
        affine1_2.backward(affine1_3.delta,affine1_3.w)
        affine1_1.backward(affine1_2.delta,affine1_2.w)

        if learning_flag:
          affine1_1.update_mom_atte()
          affine1_2.update_mom_atte()
          affine1_3.update_mom_atte()
        else:
          affine1_1.update()
          affine1_2.update()
          affine1_3.update()

        
        #if learning_flag:
          #affine1.update_momentum()
          #affine2.update_momentum()
          #affine3.update_momentum()
        #else:
          #affine1.update()
          #affine2.update()
          #affine3.update()
       
        #affine1.update_atte()
        #affine2.update_atte()
        #affine3.update_atte()

        #affine1.update()
        #affine2.update()
        #affine3.update()

        list_e_tmp.append(cross_entropy(affine1_3.z,t))
        learning_flag=True
        affine1_1.set_before()
        affine1_2.set_before()
        affine1_3.set_before()
        #print(affine3.z.reshape(1,10))
    
    list_e.append(list_e_tmp)
        
    times+=1
    print(times)
    if times>=epoch:
        break

1
2
3
4
5
6
7
8
9
10


In [16]:
count=0
for i in range(10000):
    x=x_test[i].reshape(784,1)
    t=np.argmax(t_test[i])
    
    affine1_1.forward(x)
    affine1_1.relu()
    affine1_2.forward(affine1_1.z)
    affine1_2.relu()
    affine1_3.forward(affine1_2.z)
    affine1_3.softmax()
    y=np.argmax(affine1_3.z)
    if t==y:
        count+=1

print(count)
accuracy=count*100/10000
print(accuracy)

3497
34.97


In [17]:
count2=0
for i in range(60000):
    x=x_train[i].reshape(784,1)
    t=np.argmax(t_train[i])
    affine1_1.forward(x)
    affine1_1.relu()
    affine1_2.forward(affine1_1.z)
    affine1_2.relu()
    affine1_3.forward(affine1_2.z)
    affine1_3.softmax()
    y=np.argmax(affine1_3.z)
    if t==y:
        count2+=1

acc=count2*100/60000
print(acc)
    

35.11833333333333


In [23]:
d_array=np.array([0.05,0.1,0.15,0.2,0.25])

for d in d_array:
  count=0
  for i in range(10000):
      r=np.random.rand(784)
      x_noise=x_test[i].copy()
      x_noise[np.where(r<d)]=np.random.rand()
      x=x_noise.reshape(784,1)
      t=np.argmax(t_test[i])
    
      affine1_1.forward(x)
      affine1_1.relu()
      affine1_2.forward(affine1_1.z)
      affine1_2.relu()
      affine1_3.forward(affine1_2.z)
      affine1_3.softmax()
      y=np.argmax(affine1_3.z)
      if t==y:
          count+=1

  accuracy=count*100/10000
  print('d:',d,'accu:',accuracy)

d: 0.05 accu: 31.89
d: 0.1 accu: 27.89
d: 0.15 accu: 25.02
d: 0.2 accu: 23.1
d: 0.25 accu: 20.52


In [15]:
layers2=FourLayers(784,50,100,80,10)
affine2_1=Affine(layers2.params['W1'],layers2.params['b1'])
affine2_2=Affine(layers2.params['W2'],layers2.params['b2'])
affine2_3=Affine(layers2.params['W3'],layers2.params['b3'])
affine2_4=Affine(layers2.params['W4'],layers2.params['b4'])

In [16]:
times=0
epoch=20
list_e=[]
learning_flag=False

while True:
    index_train=np.arange(60000)
    random_index_train=np.random.permutation(index_train)
    list_e_tmp=[]

    for i in random_index_train:
        x=x_train[i].reshape(784,1)
        t=t_train[i].reshape(10,1)
    
        affine2_1.forward(x)
        affine2_1.relu()
    
        affine2_2.forward(affine2_1.z)
        affine2_2.relu()
    
        affine2_3.forward(affine2_2.z)
        affine2_3.relu()

        affine2_4.forward(affine2_3.z)
        affine2_4.softmax()

        affine2_4.delta=affine2_4.z-t
        affine2_4.dw=np.dot(affine2_4.x,affine2_4.delta.T)
        affine2_4.db=np.sum(affine2_4.delta)
    
        affine2_3.backward(affine2_4.delta,affine2_4.w)
        affine2_2.backward(affine2_3.delta,affine2_3.w)
        affine2_1.backward(affine2_2.delta,affine2_2.w)

        if learning_flag:
          affine2_1.update_mom_atte()
          affine2_2.update_mom_atte()
          affine2_3.update_mom_atte()
          affine2_4.update_mom_atte()
        else:
          affine2_1.update()
          affine2_2.update()
          affine2_3.update()
          affine2_4.update()
    
        list_e_tmp.append(cross_entropy(affine2_4.z,t))
        affine2_1.set_before()
        affine2_2.set_before()
        affine2_3.set_before()
        affine2_4.set_before()
        learning_flag=True
        #print(affine3.z.reshape(1,10))
    
    list_e.append(list_e_tmp)
        
    times+=1
    print(times)
    if times>=epoch:
        break

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20


In [17]:
count=0
for i in range(10000):
    x=x_test[i].reshape(784,1)
    t=np.argmax(t_test[i])
    
    affine2_1.forward(x)
    affine2_1.relu()
    affine2_2.forward(affine2_1.z)
    affine2_2.relu()
    affine2_3.forward(affine2_2.z)
    affine2_3.relu()
    affine2_4.forward(affine2_3.z)
    affine2_4.softmax()

    y=np.argmax(affine2_4.z)
    if t==y:
        count+=1

print(count)
accuracy=count*100/10000
print(accuracy)

8767
87.67


In [18]:
count2=0
for i in range(60000):
    x=x_train[i].reshape(784,1)
    t=np.argmax(t_train[i])
    affine2_1.forward(x)
    affine2_1.relu()
    affine2_2.forward(affine2_1.z)
    affine2_2.relu()
    affine2_3.forward(affine2_2.z)
    affine2_3.relu()
    affine2_4.forward(affine2_3.z)
    affine2_4.softmax()
    y=np.argmax(affine2_4.z)
    if t==y:
        count2+=1

acc=count2*100/60000
print(acc)

87.91


In [19]:
d_array=np.array([0.05,0.1,0.15,0.2,0.25])

for d in d_array:
  count=0
  for i in range(10000):
      r=np.random.rand(784)
      x_noise=x_test[i].copy()
      x_noise[np.where(r<d)]=np.random.rand()
      x=x_noise.reshape(784,1)
      t=np.argmax(t_test[i])
    
      affine2_1.forward(x)
      affine2_1.relu()
      affine2_2.forward(affine2_1.z)
      affine2_2.relu()
      affine2_3.forward(affine2_2.z)
      affine2_3.relu()
      affine2_4.forward(affine2_3.z)
      affine2_4.softmax()
      y=np.argmax(affine2_4.z)
      if t==y:
          count+=1

  accuracy=count*100/10000
  print('d:',d,'accu:',accuracy)

d: 0.05 accu: 85.6
d: 0.1 accu: 74.57
d: 0.15 accu: 64.04
d: 0.2 accu: 53.44
d: 0.25 accu: 47.81


In [20]:
layers3=TwoLayers(784,100,10)
affine3_1=Affine(layers3.params['W1'],layers3.params['b1'])
affine3_2=Affine(layers3.params['W2'],layers3.params['b2'])

In [21]:
times=0
epoch=20
list_e=[]
learning_flag=False

while True:
    index_train=np.arange(60000)
    random_index_train=np.random.permutation(index_train)
    list_e_tmp=[]

    for i in random_index_train:
        x=x_train[i].reshape(784,1)
        t=t_train[i].reshape(10,1)
    
        affine3_1.forward(x)
        affine3_1.relu()
    
        affine3_2.forward(affine3_1.z)
        affine3_2.softmax()

        affine3_2.delta=affine3_2.z-t
        affine3_2.dw=np.dot(affine3_2.x,affine3_2.delta.T)
        affine3_2.db=np.sum(affine3_2.delta)
    
        affine3_1.backward(affine3_2.delta,affine3_2.w)

        if learning_flag:
          affine3_1.update_mom_atte()
          affine3_2.update_mom_atte()
        else:
          affine3_1.update()
          affine3_2.update()
    
        list_e_tmp.append(cross_entropy(affine3_2.z,t))
        affine3_1.set_before()
        affine3_2.set_before()
        learning_flag=True
        #print(affine3.z.reshape(1,10))
    
    list_e.append(list_e_tmp)
        
    times+=1
    print(times)
    if times>=epoch:
        break

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20


In [22]:
count=0
for i in range(10000):
    x=x_test[i].reshape(784,1)
    t=np.argmax(t_test[i])
    
    affine3_1.forward(x)
    affine3_1.relu()
    affine3_2.forward(affine3_1.z)
    affine3_2.softmax()
    y=np.argmax(affine3_2.z)
    if t==y:
        count+=1

print(count)
accuracy=count*100/10000
print(accuracy)

9144
91.44


In [23]:
count2=0
for i in range(60000):
    x=x_train[i].reshape(784,1)
    t=np.argmax(t_train[i])
    affine3_1.forward(x)
    affine3_1.relu()
    affine3_2.forward(affine3_1.z)
    affine3_2.softmax()
    y=np.argmax(affine3_2.z)
    if t==y:
        count2+=1

acc=count2*100/60000
print(acc)

91.025


In [24]:
d_array=np.array([0.05,0.1,0.15,0.2,0.25])

for d in d_array:
  count=0
  for i in range(10000):
      r=np.random.rand(784)
      x_noise=x_test[i].copy()
      x_noise[np.where(r<d)]=np.random.rand()
      x=x_noise.reshape(784,1)
      t=np.argmax(t_test[i])
    
      affine3_1.forward(x)
      affine3_1.relu()
      affine3_2.forward(affine3_1.z)
      affine3_2.softmax()
      y=np.argmax(affine3_2.z)
      if t==y:
          count+=1

  accuracy=count*100/10000
  print('d:',d,'accu:',accuracy)

d: 0.05 accu: 91.11
d: 0.1 accu: 90.07
d: 0.15 accu: 87.57
d: 0.2 accu: 83.23
d: 0.25 accu: 78.09


In [9]:
layer=OneLayer(784,10)
affine4_1=Affine(layer.params['W1'],layer.params['b1'])

In [10]:
times=0
epoch=10
list_e=[]
learning_flag=False

while True:
    index_train=np.arange(60000)
    random_index_train=np.random.permutation(index_train)
    list_e_tmp=[]

    for i in random_index_train:
        x=x_train[i].reshape(784,1)
        t=t_train[i].reshape(10,1)
    
        affine4_1.forward(x)
        affine4_1.softmax()

        affine4_1.delta=affine4_1.z-t
        affine4_1.dw=np.dot(affine4_1.x,affine4_1.delta.T)
        affine4_1.db=np.sum(affine4_1.delta)
    
        if learning_flag:
          affine4_1.update_mom_atte()
        else:
          affine4_1.update()
    
        list_e_tmp.append(cross_entropy(affine4_1.z,t))
        affine4_1.set_before()
        learning_flag=True
        #print(affine3.z.reshape(1,10))
    
    list_e.append(list_e_tmp)
        
    times+=1
    print(times)
    if times>=epoch:
        break

1
2
3
4
5
6
7
8
9
10


In [12]:
count=0
for i in range(10000):
    x=x_test[i].reshape(784,1)
    t=np.argmax(t_test[i])
    
    affine4_1.forward(x)
    affine4_1.softmax()
    y=np.argmax(affine4_1.z)
    if t==y:
        count+=1

print(count)
accuracy=count*100/10000
print(accuracy)

8992
89.92


In [13]:
count2=0
for i in range(60000):
    x=x_train[i].reshape(784,1)
    t=np.argmax(t_train[i])
    affine4_1.forward(x)
    affine4_1.softmax()
    y=np.argmax(affine4_1.z)
    if t==y:
        count2+=1

acc=count2*100/60000
print(acc)

89.09833333333333


In [14]:
d_array=np.array([0.05,0.1,0.15,0.2,0.25])

for d in d_array:
  count=0
  for i in range(10000):
      r=np.random.rand(784)
      x_noise=x_test[i].copy()
      x_noise[np.where(r<d)]=np.random.rand()
      x=x_noise.reshape(784,1)
      t=np.argmax(t_test[i])
    
      affine4_1.forward(x)
      affine4_1.softmax()
      y=np.argmax(affine4_1.z)
      if t==y:
          count+=1

  accuracy=count*100/10000
  print('d:',d,'accu:',accuracy)

d: 0.05 accu: 89.58
d: 0.1 accu: 88.95
d: 0.15 accu: 88.6
d: 0.2 accu: 87.19
d: 0.25 accu: 84.59
