<a href="https://colab.research.google.com/github/eisbetterthanpi/python/blob/master/RNN2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
# https://github.com/python-engineer/pytorch-examples/blob/master/rnn-lstm-gru/main.py

train_data = torchvision.datasets.FashionMNIST(root="data", train=True, download=True,transform=transforms.ToTensor(),)
test_data = torchvision.datasets.FashionMNIST(root="data", train=False, download=True, transform=transforms.ToTensor(),) #opt no download
batch_size = 64
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size)#, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size)#, shuffle=False)


In [None]:

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = "cuda" if torch.cuda.is_available() else "cpu"

input_size = 28
sequence_length = 28
hidden_size = 128
num_layers = 2
num_classes = 10

# Fully connected neural network with one hidden layer
class RNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(RNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        # self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        # self.rnn = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        # -> x needs to be: (batch_size, seq, input_size)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(device) 
        # x: (n, 28, 28), h0: (2, n, 128)
        # out, _ = self.rnn(x, h0)
        out, _ = self.lstm(x, (h0,c0))
        # out:(batch_size, seq_length, hidden_size) (n, 28, 128)
        out = out[:, -1, :] # out: (n, 128)
        out = self.fc(out) # out: (n, 10)
        return out

model = RNN(input_size, hidden_size, num_layers, num_classes).to(device)
print(model)

RNN(
  (lstm): LSTM(28, 128, num_layers=2, batch_first=True)
  (fc): Linear(in_features=128, out_features=10, bias=True)
)


In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

def train(train_loader, model, loss_fn, optimizer):
    n_total_steps = len(train_loader)
    size = len(train_loader.dataset)
    # model.train()
    for batch, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_length, input_size).to(device) # origin shape: [N, 1, 28, 28] resized: [N, 28, 28]
        labels = labels.to(device)
        # X = torch.squeeze(X)
        # X, y = X.to(device), y.to(device)
        outputs = model(images)
        loss = loss_fn(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if batch % 100 == 0:
            loss, current = loss.item(), batch * len(images)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(test_loader, model, loss_fn):
    with torch.no_grad():
        n_correct = 0
        n_samples = 0
        for X, y in test_loader:
            X = X.reshape(-1, sequence_length, input_size).to(device)
            y = y.to(device)
            outputs = model(X)
            _, predicted = torch.max(outputs.data, 1)
            n_samples += y.size(0)
            n_correct += (predicted == y).sum().item()
        acc = 100.0 * n_correct / n_samples
        print(f'Accuracy of the network on the 10000 test images: {acc} %')

# def test(test_loader, model, loss_fn):
#     size = len(test_loader.dataset)
#     num_batches = len(test_loader)
#     model.eval()
#     test_loss, correct = 0, 0
#     with torch.no_grad():
#         for X, y in test_loader:
#             X, y = X.to(device), y.to(device)
#             X = torch.squeeze(X)
#             pred = model(X)
#             test_loss += loss_fn(pred, y).item()
#             correct += (pred.argmax(1) == y).type(torch.float).sum().item()
#     test_loss /= num_batches
#     correct /= size
#     print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

train(train_loader, model, loss_fn, optimizer)
test(test_loader, model, loss_fn)

loss: 2.306781  [    0/60000]
loss: 0.856984  [ 6400/60000]
loss: 0.544334  [12800/60000]
loss: 0.769984  [19200/60000]
loss: 0.589071  [25600/60000]
loss: 0.513783  [32000/60000]
loss: 0.562093  [38400/60000]
loss: 0.585068  [44800/60000]
loss: 0.598474  [51200/60000]
loss: 0.541361  [57600/60000]
Accuracy of the network on the 10000 test images: 81.45 %


In [None]:

epochs = 2
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_loader, model, loss_fn, optimizer)
    test(train_loader, model, loss_fn)
print("Done!")
# torch.save(model.state_dict(), "model.pth")
# print("Saved PyTorch Model State to model.pth")
# model = NeuralNetwork()
# model.load_state_dict(torch.load("model.pth"))


Epoch 1
-------------------------------
loss: 0.432146  [    0/60000]
loss: 0.532497  [ 6400/60000]
loss: 0.407758  [12800/60000]
loss: 0.570695  [19200/60000]
loss: 0.409937  [25600/60000]
loss: 0.390221  [32000/60000]
loss: 0.404769  [38400/60000]
loss: 0.577200  [44800/60000]
loss: 0.557909  [51200/60000]
loss: 0.506684  [57600/60000]
Accuracy of the network on the 10000 test images: 84.75333333333333 %
Epoch 2
-------------------------------
loss: 0.304823  [    0/60000]
loss: 0.433561  [ 6400/60000]
loss: 0.346738  [12800/60000]
loss: 0.465120  [19200/60000]
loss: 0.391968  [25600/60000]
loss: 0.290906  [32000/60000]
loss: 0.358383  [38400/60000]
loss: 0.559784  [44800/60000]
loss: 0.425927  [51200/60000]
loss: 0.518589  [57600/60000]
Accuracy of the network on the 10000 test images: 86.515 %
Done!


In [None]:
classes = ["T-shirt/top","Trouser","Pullover","Dress","Coat","Sandal","Shirt","Sneaker","Bag","Ankle boot",]

model.eval()

import random
n=random.randint(0,1000)
print(n)
x, y = test_data[n][0], test_data[n][1]
with torch.no_grad():
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')


331
Predicted: "Sneaker", Actual: "Sneaker"
