In [1]:
import os
import struct
import numpy as np
import matplotlib.pyplot as plt
import random
import math

In [2]:
def load_mnist(path, kind="train"):
    labels_path = os.path.join(path, '%s-labels.idx1-ubyte' % kind)
    images_path = os.path.join(path, '%s-images.idx3-ubyte' % kind)
 
    with open(labels_path, 'rb') as lbpath:
        magic, n = struct.unpack('>II', lbpath.read(8))
        # 'I'表示一个无符号整数，大小为四个字节
        # '>II'表示读取两个无符号整数，即8个字节
        labels = np.fromfile(lbpath, dtype=np.uint8)
 
    with open(images_path, 'rb') as imgpath:
        magic, num, rows, cols =  struct.unpack('>IIII', imgpath.read(16))
        images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)
 
    return images, labels

In [3]:
X_train, y_train = load_mnist("mnist_dataset/", kind="train")
X_test, y_test = load_mnist("mnist_dataset/", kind="t10k")

In [4]:
def get_act(x):
    act_vec = []
    for i in x:
        act_vec.append(1/(1+np.exp(-i)))
    act_vec = np.array(act_vec)
    return act_vec

In [6]:
class NN_Mnist(object):
    
    def __init__(self, sizes):
        self.inp_num = sizes[0]    # 输入层节点数
        self.out_num = sizes[2]    # 输出节点数
        self.hid_num = sizes[1]  # 隐层节点数
        self.w1 = 0.2*np.random.random((self.inp_num, self.hid_num))- 0.1
        self.w2 = 0.2*np.random.random((self.hid_num, self.out_num))- 0.1
        self.hid_offset = np.zeros(self.hid_num)
        self.out_offset = np.zeros(self.out_num)
    def train(self,X_train,y_train,hid_lrate,inp_lrate):
        X_train = np.array(X_train,dtype='float')
        #均值归一化 易于收敛
        X_train/=256
        for count in range(len(X_train)):
            t_label = np.zeros(self.out_num)
            t_label[y_train[count]] = 1
            #前向过程
            
            # 隐层值
            self.hid_value = np.dot(X_train[count], self.w1) + self.hid_offset
            # 隐层激活值
            self.hid_act = get_act(self.hid_value)
            # 输出层值
            self.out_value = np.dot(self.hid_act, self.w2) + self.out_offset
            # 输出层激活值
            self.out_act = get_act(self.out_value)

            #后向过程
            # 输出值与真值间的误差
            cost = t_label - self.out_act
            # 输出层delta计算
            out_delta = cost * self.out_act * (1-self.out_act)
            # 隐层delta计算
            hid_delta = self.hid_act * (1-self.hid_act) * np.dot(self.w2, out_delta)
            # 更新隐层到输出层权向量
            for i in range(0, self.out_num):
                self.w2[:,i] += hid_lrate * out_delta[i] * self.hid_act
            
            # 更新输出层到隐层的权向量
            for i in range(0, self.hid_num):
                self.w1[:,i] += inp_lrate * hid_delta[i] * X_train[count]      
            # 输出层偏置更新
            self.out_offset += hid_lrate * out_delta                             
            self.hid_offset += inp_lrate * hid_delta
        print("Training complete.")
    def error(self,X,y):
        X = np.array(X,dtype='float') 
        X/=256
        temp=0
        err_sample = []
        for count in range(len(X)):
            # 隐层值
            self.hid_value = np.dot(X[count], self.w1) + self.hid_offset 
            # 隐层激活值
            self.hid_act = get_act(self.hid_value)
            # 输出层值
            self.out_value = np.dot(self.hid_act, self.w2) + self.out_offset
            # 输出层激活值
            self.out_act = get_act(self.out_value)
            if np.argmax(self.out_act) == y[count]:
                temp+=1
            else:
                err_temp = []
                err_temp.append(y[count])
                err_sample.append(err_temp)
        with open("err_label.txt",'w') as f:
            for item in err_sample:
                f.write(str(item))
                f.write('\n')
        print('Error is: %.2f%%'%((1-float(temp)/len(X))*100))

In [7]:
train_data = []
for i in range(len(X_train)):
    temp = []
    temp.append(X_train[i])
    temp.append(y_train[i])
    train_data.append(temp)
train_data = np.array(train_data)

In [17]:
buffer_0_9 = []
buffer_0 = []
buffer_1 = []
buffer_2 = []
buffer_3 = []
buffer_4 = []
buffer_5 = []
buffer_6 = []
buffer_7 = []
buffer_8 = []
buffer_9 = []
for item in train_data:
    if(item[1] == 0):
        buffer_0.append(item)
    if(item[1] == 1):
        buffer_1.append(item)
    if(item[1] == 2):
        buffer_2.append(item)
    if(item[1] == 3):
        buffer_3.append(item)
    if(item[1] == 4):
        buffer_4.append(item)
    if(item[1] == 5):
        buffer_5.append(item)
    if(item[1] == 6):
        buffer_6.append(item)
    if(item[1] == 7):
        buffer_7.append(item)
    if(item[1] == 8):
        buffer_8.append(item)
    if(item[1] == 9):
        buffer_9.append(item)
buffer_0_9.append(buffer_0)
buffer_0_9.append(buffer_1)
buffer_0_9.append(buffer_2)
buffer_0_9.append(buffer_3)
buffer_0_9.append(buffer_4)
buffer_0_9.append(buffer_5)
buffer_0_9.append(buffer_6)
buffer_0_9.append(buffer_7)
buffer_0_9.append(buffer_8)
buffer_0_9.append(buffer_9)

In [25]:
X = []
y = []
for i in range(5400):

    X.append(buffer_0[i][0])
    y.append(buffer_0[i][1])
    
    X.append(buffer_1[i][0])
    y.append(buffer_1[i][1])
    
    X.append(buffer_2[i][0])
    y.append(buffer_2[i][1])
    
    X.append(buffer_3[i][0])
    y.append(buffer_3[i][1])
    
    X.append(buffer_4[i][0])
    y.append(buffer_4[i][1])
    
    X.append(buffer_5[i][0])
    y.append(buffer_5[i][1])
    
    X.append(buffer_6[i][0])
    y.append(buffer_6[i][1])
    
    X.append(buffer_7[i][0])
    y.append(buffer_7[i][1])
    
    X.append(buffer_8[i][0])
    y.append(buffer_8[i][1])
    
    X.append(buffer_9[i][0])
    y.append(buffer_9[i][1])
    

In [29]:
X = np.array(X)
y = np.array(y)

In [98]:
network = NN_Mnist([784,128,10])

In [99]:
network.train(X,y,0.2,0.2)

Training complete.


In [100]:
network.error(X_test,y_test)

Error is: 5.14%


In [101]:
Network = open("MyNetWork_0_9_128node.txt", 'w')
Network.write(str(network.inp_num))
Network.write('\n')
Network.write(str(network.hid_num))
Network.write('\n')
Network.write(str(network.out_num))
Network.write('\n')
Network.write(str(0.2)) 
Network.write('\n')      
Network.write(str(0.2)) 
Network.write('\n')      
              
for i in network.w1:
    for j in i:
        Network.write(str(j))
        Network.write(' ')
    Network.write('\n')

for i in network.w2:
    for j in i:
        Network.write(str(j))
        Network.write(' ')
Network.write('\n')

for i in network.hid_offset:
    Network.write(str(i))
    Network.write(' ')
Network.write('\n')

for i in network.out_offset:
    Network.write(str(i))
    Network.write(' ')
Network.write('\n')
Network.close()

In [35]:
X_2 = []
y_2 = []
for item in buffer_2:
    X_2.append(item[0])
    y_2.append(item[1])
X_2 = np.array(X_2)
y_2 = np.array(y_2)

X_5 = []
y_5 = []
for item in buffer_5:
    X_5.append(item[0])
    y_5.append(item[1])
X_5 = np.array(X_5)
y_5 = np.array(y_5)

In [86]:
network.train(X_5,y_5,0.002,0.002)

Training complete.


In [87]:
network.error(X_test,y_test)

Error is: 6.63%


In [108]:
network.train(X_test,y_test,0.05,0.05)

Training complete.


In [111]:
network.error(X_2,y_2)

Error is: 3.84%
