In [1]:
import numpy as np
import pdb

from activations import Relu
from convolution import Conv2d, Maxpool, BatchNorm2d, Flatten
from lossfunctions import CrossEntropyLoss
from linear import Linear, Dropout
from datasets import Mnist
from optim import SGD
from utils import label_encoder, shuffle_data

# 定义网络

In [2]:
class MyNet():
    def __init__(self):
        self.conv1 = Conv2d(in_channels=1, n_filter=30, filter_size=(3, 3), padding=1, stride=1)
        self.relu1 = Relu()
        self.bn1 = BatchNorm2d(n_channel=30, momentum=0.1)
        self.maxpool1 = Maxpool(size=2, stride=2)  # 对于mnist，此时输出feature size为14×14
        
        self.conv2 = Conv2d(in_channels=30, n_filter=20, filter_size=(3, 3), padding=1, stride=1)
        self.relu2 = Relu()
        self.bn2 = BatchNorm2d(n_channel=20, momentum=0.1)
        self.maxpool2 = Maxpool(size=2, stride=2)  # 与maxpoll1层是一样的，可以舍弃只用maxpool1
        # 对于mnist，此时输出feature size为7×7
        
        self.conv3 = Conv2d(in_channels=20, n_filter=10, filter_size=(3, 3), padding=0, stride=1)
        # 对于mnist，此时输出feature size为5×5
        self.relu3 = Relu()
        self.bn3 = BatchNorm2d(n_channel=10, momentum=0.1)
        
        self.flatten = Flatten()
        
        # 全连接层
        self.fc1 = Linear(dim_in=250, dim_out=100)
        self.dropout1 = Dropout(p=0.5)
        self.fc2 = Linear(dim_in=100, dim_out=10)
        
        self.parameters = self.conv1.params + self.bn1.params + self.conv2.params + self.bn2.params + \
                            self.conv3.params + self.bn3.params + self.fc1.params + self.fc2.params
    
    def __call__(self, X, mode='train'):
        """
        mode: 是在训练阶段还是测试阶段. train 或者 test
        """
        return self.forward(X, mode)
    
    def forward(self, X, mode):
        conv1_out = self.conv1(X)
        relu1_out = self.relu1(conv1_out)
        bn1_out = self.bn1(relu1_out, mode)
        pool1_out = self.maxpool1(bn1_out)
        
        conv2_out = self.conv2(pool1_out)
        relu2_out = self.relu2(conv2_out)
        bn2_out = self.bn2(relu2_out, mode)
        pool2_out = self.maxpool2(bn2_out)
        
        conv3_out = self.conv3(pool2_out)
        relu3_out = self.relu3(conv3_out)
        bn3_out = self.bn3(relu3_out, mode)
        
        flat_out = self.flatten(bn3_out)
        
        fc1_out = self.fc1(flat_out)
        drop1_out = self.dropout1(fc1_out, mode)
        fc2_out = self.fc2(drop1_out)
        
        return fc2_out
    
    def backward(self, d_out):
        d_fc2_x, d_fc2_params = self.fc2.backward(d_out)
        d_drop1_x = self.dropout1.backward(d_fc2_x)
        d_fc1_x, d_fc1_params = self.fc1.backward(d_drop1_x)
        
        d_flat_x = self.flatten.backward(d_fc1_x)
        
        d_bn3_x, d_bn3_params = self.bn3.backward(d_flat_x)
        d_relu3_x = self.relu3.backward(d_bn3_x)
        d_conv3_x, d_conv3_params = self.conv3.backward(d_relu3_x)
        
        d_pool2_x = self.maxpool2.backward(d_conv3_x)
        d_bn2_x, d_bn2_params = self.bn2.backward(d_pool2_x)
        d_relu2_x = self.relu2.backward(d_bn2_x)
        d_conv2_x, d_conv2_params = self.conv2.backward(d_relu2_x)
        
        d_pool1_x = self.maxpool1.backward(d_conv2_x)
        d_bn1_x, d_bn1_params = self.bn1.backward(d_pool1_x)
        d_relu1_x = self.relu1.backward(d_bn1_x)
        _, d_conv1_params = self.conv1.backward(d_relu1_x)
        
        grads = d_conv1_params + d_bn1_params + d_conv2_params + d_bn2_params + d_conv3_params + \
                d_bn3_params + d_fc1_params + d_fc2_params
        
        return grads

# 定义训练方式

In [7]:
def train(train_datas, train_labels, test_datas, test_labels, network, 
          loss_fc, optim, epochs, batch_size):
    for epoch in range(1, epochs + 1):
        print("**" * 20, "epoch: %d" % epoch, "**" * 20)
        
        # 打乱数据
        shuffled_datas, shuffled_labels = shuffle_data(train_datas, train_labels)
        
        n_correct = 0  # 预测正确的个数
        n_data = train_datas.shape[0]  # 总样本数
        loss_sum = 0.0
        
        num_trained_batchs = 0  # 记录当前epoch训练的batch数目
        for i in range(0, n_data, batch_size):
            batch_datas, batch_labels = shuffled_datas[i : i+batch_size], shuffled_labels[i : i+batch_size]
            
            net_out = network(batch_datas, mode='train')  # 模型输出
            
            batch_loss = loss_fc(net_out, batch_labels)  # 计算损失
            loss_sum += batch_loss
            
            grad_out = loss_fc.backward()  # 计算loss对模型输出的梯度
            # 计算模型所有参数的梯度，更新参数
            grads = network.backward(grad_out)
            optim.update_parameters(grads)
            
            # pdb.set_trace()
            
            # 计算预测正确的个数
            predict = np.argmax(net_out, axis=1)
            n_correct += np.sum(predict == np.argmax(batch_labels, axis=1))
            
            num_trained_batchs += 1
            if num_trained_batchs % 100 == 0:
                ave_loss = loss_sum / num_trained_batchs
                print("*" * 20, "%d th batch, loss: %f" % (num_trained_batchs, ave_loss), "*" * 20)
        
        train_acc = n_correct / n_data
        
        test_acc = test(test_datas, test_labels, network, batch_size)
        
        print("epoch %d, train accuracy: %f  test accuracy: %f" % (epoch, train_acc, test_acc))
            

def test(test_datas, test_labels, network, batch_size):
    """
    测试时使用批量化喂数据，否则内存不够
    test_labels: one hot
    return: 测试准确率
    """
    n_data = test_labels.shape[0]
    n_correct = 0
    for i in range(0, n_data, batch_size):
        batch_datas = test_datas[i : i + batch_size]
        batch_labels = test_labels[i : i + batch_size]
        net_out = network(batch_datas, mode='test')
        predict = np.argmax(net_out, axis=1)
        n_correct += np.sum(predict == np.argmax(batch_labels, axis=1))
        
    test_acc = n_correct / n_data
    return test_acc

# 训练

## 加载mnist数据

In [4]:
train_image_path =  r'D:\datas\mnist\train-images.idx3-ubyte'
train_label_path =  r'D:\datas\mnist\train-labels.idx1-ubyte'
test_image_path =  r'D:\datas\mnist\t10k-images.idx3-ubyte'
test_label_path =  r'D:\datas\mnist\t10k-labels.idx1-ubyte'
# 训练样本：共60000个
# 测试样本：共10000个

# 声明mnist数据实例
mnist = Mnist(train_image_path, train_label_path, test_image_path, test_label_path)

# 获得训练数据
train_images = mnist.load_train_images()
train_labels = mnist.load_train_labels()
train_images = train_images[:, np.newaxis, :, :]  # 增加通道维度，卷积网络输入为4维[N, C, H, W], mnist数据加载后是3维[N, H, W]
train_labels = np.array(train_labels, np.int32)  # 原类型时float类型，转化为int，否则转换成one-hot会出错
train_labels = label_encoder(train_labels, 10)

# 获得测试数据
test_images = mnist.load_test_images()
test_labels = mnist.load_test_labels()
test_images = test_images[:, np.newaxis, :, :]
test_labels = np.array(test_labels, np.int32)
test_labels = label_encoder(test_labels, 10)

魔数:2051, 图片数量: 60000张, 图片大小: 28*28
16
>784B 16 784
已解析 10000张
7839232
已解析 20000张
15679232
已解析 30000张
23519232
已解析 40000张
31359232
已解析 50000张
39199232
已解析 60000张
47039232
魔数:2049, 图片数量: 60000张
已解析 10000张
已解析 20000张
已解析 30000张
已解析 40000张
已解析 50000张
已解析 60000张
魔数:2051, 图片数量: 10000张, 图片大小: 28*28
16
>784B 16 784
已解析 10000张
7839232
魔数:2049, 图片数量: 10000张
已解析 10000张


## 开始训练

In [8]:
# 初始化网络
mynet = MyNet()

# 声明损失函数
cross_entropy = CrossEntropyLoss()

sgd = SGD(mynet.parameters, lr=0.01)

train(train_images, train_labels, test_images, test_labels, network=mynet,
      loss_fc=cross_entropy, optim=sgd, epochs=3, batch_size=30)

**************************************** epoch: 1 ****************************************
******************** 100 th batch, loss: 2.294114 ********************
******************** 200 th batch, loss: 1.674324 ********************
******************** 300 th batch, loss: 1.360098 ********************
******************** 400 th batch, loss: 1.172868 ********************
******************** 500 th batch, loss: 1.037376 ********************
******************** 600 th batch, loss: 0.939488 ********************
******************** 700 th batch, loss: 0.866106 ********************
******************** 800 th batch, loss: 0.806912 ********************
******************** 900 th batch, loss: 0.756997 ********************
******************** 1000 th batch, loss: 0.714366 ********************
******************** 1100 th batch, loss: 0.675957 ********************
******************** 1200 th batch, loss: 0.647047 ********************
******************** 1300 th batch, loss: 0.616233 ***