In [1]:
import torch # 导入torch框架
import torchvision.datasets as dset # 使用torchvision方便导入数据集
import torchvision.transforms as transforms # 使用torchvision方便数据转换为tensor

In [2]:
Train_batch_size = 64 # 训练集数据每次训练使用的数据量
Test_batch_size = 10000 # 测试集所有数据量，用于在训练完成后一次性输入全部测试数据

### 下载数据集

In [3]:
train_data = dset.MNIST("./mnist", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
# 将数据下载到指定目录，然后将数据转换为tensor张量

In [4]:
test_data = dset.MNIST("./mnist", train=False, transform=transforms.ToTensor(), target_transform=None, download=False)

In [5]:
train_data.data.size()# 查看训练集的shape

torch.Size([60000, 28, 28])

### 生成器

In [6]:
train_loader = torch.utils.data.DataLoader(dataset=train_data, batch_size=Train_batch_size, shuffle=True)
# 将数据集放入torch自带的数据生成器中，方便训练

In [7]:
test_loader = torch.utils.data.DataLoader(dataset=test_data,batch_size=Test_batch_size, shuffle=True)

### 搭建网络

In [8]:
import torch.nn.functional as F

In [9]:
class RNN(torch.nn.Module):
    def __init__(self):
        super(RNN,self).__init__()
        self.rnn = torch.nn.LSTM(     
            input_size=28,      # 图片每行的数据像素点
            hidden_size=100,     # rnn hidden unit
            num_layers=2,       # 有几层 RNN layers
            batch_first=True,   # input & output 会是以 batch size 为第一维度的特征集 e.g. (batch, time_step, input_size)
        )
        self.out = torch.nn.Linear(100, 10)    # 输出层
    def forward(self,x):
         # x shape (batch, time_step, input_size)
        # r_out shape (batch, time_step, output_size)
        # s_h shape (n_layers, batch, hidden_size)   LSTM 有两个 hidden states
        # s_c shape (n_layers, batch, hidden_size)
        r_out, (s_h, s_c) = self.rnn(x, None)   # None 表示 hidden state 会用全0的 state,初始化的hidden states
        # 选取最后一个时间点的 r_out 输出
        # 这里 r_out[:, -1, :] 的值也是 h_n 的值
        out = self.out(r_out[:, -1, :])
        #out = self.out(s_h[-1])
        
        return out

In [10]:
rnn = RNN() # 将RNN模型实例化
rnn

RNN(
  (rnn): LSTM(28, 100, num_layers=2, batch_first=True)
  (out): Linear(in_features=100, out_features=10, bias=True)
)

### 优化器+损失

In [11]:
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.01)   # 使用adam优化器，学习率是0.01，对rnn所有的参数进行优化
loss_func = torch.nn.CrossEntropyLoss()   # 使用交叉熵作为损失函数

In [12]:
epochs = 5 # 训练轮数
step = 0
for e in range(epochs):
    for (x,y)in train_loader:
#         print(x.shape)
        out = rnn(x.view(-1,28,28))# 将图片转为(n,28,28)的然后输入模型，获取输出
        loss = loss_func(out,y)# 根据输出和标签计算loss
        optimizer.zero_grad()# 初始化梯度
        loss.backward()# 计算梯度
        optimizer.step()# 更新参数
        pred = torch.argmax(F.softmax(out),1) # 获得预测结果
        correct = pred.eq(y.data.view_as(pred)) # 计算正确率
        correct = correct.type(torch.float)
        acc = torch.sum(correct)/Train_batch_size
        if step %500==0: # 每500步的时候，测试集测试一次，并进行一次结果输出
            print('step:',step)
            print('Train Accuracy=%.2f'%acc)
            # 验证集
            for x_,y_ in test_loader:
#                 print("y_",y_.shape)
                x_ = x_.type(torch.float32)
                test_out = rnn(x_.view(-1,28,28))
                test_pred = torch.argmax(test_out,1)
#                 print(test_pred.shape)
                te_correct = test_pred.eq(y_.data.view_as(test_pred))
#                 print(te_correct)
                te_correct = te_correct.type(torch.float)
                te_acc = torch.sum(te_correct)/10000
                print('Test Accuracy=%.2f'%te_acc)
                print('------------------')
        step += 1
            

  # This is added back by InteractiveShellApp.init_path()


step: 0
Train Accuracy=0.11
Test Accuracy=0.14
------------------
step: 500
Train Accuracy=0.98
Test Accuracy=0.96
------------------
step: 1000
Train Accuracy=0.97
Test Accuracy=0.96
------------------
step: 1500
Train Accuracy=0.98
Test Accuracy=0.97
------------------
step: 2000
Train Accuracy=0.95
Test Accuracy=0.97
------------------
step: 2500
Train Accuracy=0.97
Test Accuracy=0.98
------------------
step: 3000
Train Accuracy=0.97
Test Accuracy=0.97
------------------
step: 3500
Train Accuracy=0.98
Test Accuracy=0.97
------------------
step: 4000
Train Accuracy=1.00
Test Accuracy=0.97
------------------
step: 4500
Train Accuracy=1.00
Test Accuracy=0.98
------------------
