In [1]:
# working with data
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt

In [4]:
# use data from torchvision datasets
training_data = datasets.FashionMNIST(
    root = 'data',
    train = True,
    download = True,
    transform = ToTensor(),
)

test_data = datasets.FashionMNIST(
    root = 'data',
    train = False,
    download = True,
    transform = ToTensor(),
)

In [5]:
# define a batch size of 64
# means each element in dataloader iterable will return batch of 64 features and labels
batch_size = 64

# create a dataloader
train_dataloader = DataLoader(training_data, batch_size = batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
  print('Shape of X [H, C, H, W]: ', X.shape)
  print('Shape of y: ', y.shape, y.dtype)
  break

Shape of X [H, C, H, W]:  torch.Size([64, 1, 28, 28])
Shape of y:  torch.Size([64]) torch.int64


In [7]:
# creating models
# get cpu or gpu for training
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'Using {device} device')

# define the model
# create a class that inherit from nn.Module
class NeuralNetwork(nn.Module):
  def __init__(self):
    super(NeuralNetwork, self).__init__()
    self.flatten = nn.Flatten()
    self.linear_relu_stack = nn.Sequential(
        nn.Linear(28 * 28, 512),
        nn.ReLU(),
        nn.Linear(512, 512),
        nn.ReLU(),
        nn.Linear(512, 10)
    )
  
  def forward(self, x):
    x = self.flatten(x)
    logits = self.linear_relu_stack(x)
    return logits

# create model
model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [11]:
# optimize and train model
# loss function and optimizer needed to train the model
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)

def train(dataloader, model, loss_fn, optimizer):
  size = len(dataloader.dataset)
  model.train()
  for batch, (X, y) in enumerate(dataloader):
    X, y = X.to(device), y.to(device)

    # compute prediction error
    pred = model(X)
    loss = loss_fn(pred, y)

    # backpropagation
    # to adjust the model params
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 100 == 0:
      loss, current = loss.item(), batch * len(X)
      print(f'loss: {loss:>7f} [{current:>5d}/{size:>5d}]')

# test model performance and to enrsume it is learning
def test(dataloader, model, loss_fn):
  size = len(dataloader.dataset)
  num_batches = len(dataloader)
  model.eval()
  test_loss, correct = 0, 0
  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      pred = model(X)
      test_loss += loss_fn(pred, y).item()
      correct += (pred.argmax(1) == y).type(torch.float).sum().item()
  test_loss /= num_batches
  correct /= size
  print(f'Test error: \n Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f} \n')

In [12]:
# train
# process over several iterations (epochs)
epochs = 5
for t in range(epochs):
  print(f'Epoch {t + 1}\n----------------------------')
  train(train_dataloader, model, loss_fn, optimizer)
  test(test_dataloader, model, loss_fn)
print('Done!')

Epoch 1
----------------------------
loss: 2.175184 [    0/60000]
loss: 2.160882 [ 6400/60000]
loss: 2.105793 [12800/60000]
loss: 2.113875 [19200/60000]
loss: 2.071410 [25600/60000]
loss: 2.013943 [32000/60000]
loss: 2.045913 [38400/60000]
loss: 1.968857 [44800/60000]
loss: 1.975269 [51200/60000]
loss: 1.906171 [57600/60000]
Test error: 
 Accuracy: 57.3%, Avg loss: 1.897527 

Epoch 2
----------------------------
loss: 1.936010 [    0/60000]
loss: 1.904527 [ 6400/60000]
loss: 1.788033 [12800/60000]
loss: 1.822152 [19200/60000]
loss: 1.724393 [25600/60000]
loss: 1.670588 [32000/60000]
loss: 1.702010 [38400/60000]
loss: 1.599713 [44800/60000]
loss: 1.624885 [51200/60000]
loss: 1.524536 [57600/60000]
Test error: 
 Accuracy: 60.3%, Avg loss: 1.533361 

Epoch 3
----------------------------
loss: 1.605091 [    0/60000]
loss: 1.572338 [ 6400/60000]
loss: 1.419446 [12800/60000]
loss: 1.487039 [19200/60000]
loss: 1.371698 [25600/60000]
loss: 1.359959 [32000/60000]
loss: 1.385903 [38400/60000]
lo

In [13]:
# saving model
torch.save(model.state_dict(), 'model.pth')
print('Saved PyTorch model state to model.pth')

Saved PyTorch model state to model.pth


In [14]:
# load model
model = NeuralNetwork()
model.load_state_dict(torch.load('model.pth'))

<All keys matched successfully>

In [17]:
# the model now can be used to make a predictions
classes = [
  "T-shirt/top",
  "Trouser",
  "Pullover",
  "Dress",
  "Coat",
  "Sandal",
  "Shirt",
  "Sneaker",
  "Bag",
  "Ankle boot",
]

model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
  pred = model(x)
  predicted, actual = classes[pred[0].argmax(0)], classes[y]
  print(f'Predicted: {predicted}, Actual: {actual}')

Predicted: Ankle boot, Actual: Ankle boot
