In [27]:
%matplotlib inline
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
from matplotlib import pyplot as plt

In [28]:
trans = transforms.ToTensor()
train_data = torchvision.datasets.FashionMNIST(
    root="../data", train=True, transform=trans, download=True)
test_data = torchvision.datasets.FashionMNIST(
    root="../data", train=False, transform=trans, download=True)

In [29]:
train_iter = data.DataLoader(train_data, batch_size=100, shuffle=True,
                             num_workers=4)
test_iter = data.DataLoader(test_data, batch_size=100, shuffle=True,
                            num_workers=4)

In [30]:
from torch import nn

In [31]:
sequence_length = 28
input_size = 28
hidden_size = 128
num_classes = 10
batch_size = 100
num_epochs = 10
learning_rate = 0.01

In [32]:
def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdim=True)
    return X_exp / partition

In [33]:
class my_rnn(nn.Module):
        def __init__(self,input_size, hidden_size, batch_size, num_outputs):
            super(my_rnn,self).__init__()
            self.hidden_size = hidden_size
            self.U = nn.Linear(input_size, hidden_size)
            self.W = nn.Linear(hidden_size, hidden_size)
            self.V = nn.Linear(hidden_size, num_outputs)
        def forward(self,X):
            X = X.reshape(-1, X.size(2),X.size(3))
            sequence_len = X.size(1)
            A = torch.rand(X.size(0), self.hidden_size)
            for i in range(sequence_len):
                input1 = X[:, i, :]
                input1 = self.U(input1)
                input2 = self.W(A)
                A = torch.relu(input1 + input2)
            return self.V(A)

In [34]:
model = my_rnn(input_size, hidden_size, batch_size, num_classes)
print(model)

my_rnn(
  (U): Linear(in_features=28, out_features=128, bias=True)
  (W): Linear(in_features=128, out_features=128, bias=True)
  (V): Linear(in_features=128, out_features=10, bias=True)
)


In [35]:
def cross_entropy(y_hat, y):
    return - torch.log(y_hat[range(len(y_hat)), y])

In [36]:
loss_func = nn.CrossEntropyLoss()
loss_func

CrossEntropyLoss()

In [37]:
from torch import optim
optimizer = optim.Adam(model.parameters(), lr = 0.01)   
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.01
    maximize: False
    weight_decay: 0
)

In [38]:
def train(num_epochs, model):
        
    # Train the model
    total_step = 600 
    for epoch in range(num_epochs):
        for i, (images,labels) in enumerate(train_iter):
            
            # Forward pass
            outputs = model(images)
            loss = loss_func(outputs, labels)
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                       .format(epoch + 1, num_epochs, i + 1, total_step, loss.mean()))
                pass
train(num_epochs, model)

Epoch [1/10], Step [100/600], Loss: 1.6126
Epoch [1/10], Step [200/600], Loss: 1.2543
Epoch [1/10], Step [300/600], Loss: 1.1088
Epoch [1/10], Step [400/600], Loss: 0.9630
Epoch [1/10], Step [500/600], Loss: 0.9449
Epoch [1/10], Step [600/600], Loss: 0.7450
Epoch [2/10], Step [100/600], Loss: 0.9621
Epoch [2/10], Step [200/600], Loss: 0.8912
Epoch [2/10], Step [300/600], Loss: 0.6317
Epoch [2/10], Step [400/600], Loss: 0.6515
Epoch [2/10], Step [500/600], Loss: 0.8456
Epoch [2/10], Step [600/600], Loss: 0.6492
Epoch [3/10], Step [100/600], Loss: 0.7023
Epoch [3/10], Step [200/600], Loss: 0.7780
Epoch [3/10], Step [300/600], Loss: 0.8708
Epoch [3/10], Step [400/600], Loss: 0.7955
Epoch [3/10], Step [500/600], Loss: 0.5306
Epoch [3/10], Step [600/600], Loss: 0.6266
Epoch [4/10], Step [100/600], Loss: 0.6101
Epoch [4/10], Step [200/600], Loss: 0.7150
Epoch [4/10], Step [300/600], Loss: 0.5641
Epoch [4/10], Step [400/600], Loss: 0.5110
Epoch [4/10], Step [500/600], Loss: 0.6302
Epoch [4/10

# 随着训练轮数增加，损失在0.4到0.9振荡

In [40]:
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_iter:
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total = total + labels.size(0)
        correct = correct + (predicted == labels).sum().item()
print('Test Accuracy of the model on the 10000 test images: {} %'.format(100 * correct / total))

Test Accuracy of the model on the 10000 test images: 77.22 %
