In [42]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose
from collections import OrderedDict

In [7]:
training_data = datasets.FashionMNIST(
    root='data',
    train=True,
    download=True,
    transform=ToTensor())

In [8]:
test_data = datasets.FashionMNIST(
    root='data',
    train=False,
    download=True,
    transform=ToTensor())

In [12]:
BATCH_SIZE = 64
train_dataloader = DataLoader(training_data, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_data, batch_size=BATCH_SIZE)

In [30]:
for X, y in train_dataloader:
    print(X.shape)
    print(y, y.shape, y.dtype)
    break

torch.Size([64, 1, 28, 28])
tensor([9, 0, 0, 3, 0, 2, 7, 2, 5, 5, 0, 9, 5, 5, 7, 9, 1, 0, 6, 4, 3, 1, 4, 8,
        4, 3, 0, 2, 4, 4, 5, 3, 6, 6, 0, 8, 5, 2, 1, 6, 6, 7, 9, 5, 9, 2, 7, 3,
        0, 3, 3, 3, 7, 2, 2, 6, 6, 8, 3, 3, 5, 0, 5, 5]) torch.Size([64]) torch.int64


In [17]:
len(train_dataloader)

938

In [21]:
len(test_dataloader)

157

In [52]:
train_dataloader.dataset

Dataset FashionMNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [26]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print('Device: {}'.format(device))

Device: cpu


In [44]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            OrderedDict([
            ('fc1', nn.Linear(28*28, 512)),
            ('relu1', nn.ReLU()),
            ('fc2', nn.Linear(512, 512)),
            ('relu2', nn.ReLU()),
            ('fc3', nn.Linear(512, 10))
            ])
        )
    
    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (fc1): Linear(in_features=784, out_features=512, bias=True)
    (relu1): ReLU()
    (fc2): Linear(in_features=512, out_features=512, bias=True)
    (relu2): ReLU()
    (fc3): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [49]:
for name, weight in list(model.named_parameters()):
    print(name,': ', weight.shape)

linear_relu_stack.fc1.weight :  torch.Size([512, 784])
linear_relu_stack.fc1.bias :  torch.Size([512])
linear_relu_stack.fc2.weight :  torch.Size([512, 512])
linear_relu_stack.fc2.bias :  torch.Size([512])
linear_relu_stack.fc3.weight :  torch.Size([10, 512])
linear_relu_stack.fc3.bias :  torch.Size([10])


In [50]:
list(model.named_parameters())

[('linear_relu_stack.fc1.weight',
  Parameter containing:
  tensor([[ 0.0305, -0.0352,  0.0105,  ..., -0.0027, -0.0287, -0.0143],
          [ 0.0117, -0.0267, -0.0348,  ..., -0.0075,  0.0190, -0.0100],
          [-0.0247,  0.0282,  0.0133,  ...,  0.0104, -0.0223,  0.0199],
          ...,
          [ 0.0205,  0.0046, -0.0286,  ...,  0.0322, -0.0049, -0.0305],
          [ 0.0019,  0.0037,  0.0115,  ..., -0.0068, -0.0037,  0.0300],
          [-0.0002, -0.0036, -0.0349,  ...,  0.0347, -0.0303, -0.0347]],
         requires_grad=True)),
 ('linear_relu_stack.fc1.bias',
  Parameter containing:
  tensor([-0.0106,  0.0294,  0.0227, -0.0094, -0.0031,  0.0194, -0.0161,  0.0257,
          -0.0192, -0.0057,  0.0116,  0.0353,  0.0334,  0.0236,  0.0057,  0.0044,
           0.0108,  0.0052, -0.0134, -0.0078,  0.0065,  0.0245,  0.0310,  0.0139,
           0.0076, -0.0220, -0.0251, -0.0086,  0.0013,  0.0236,  0.0290,  0.0017,
           0.0010,  0.0058,  0.0198,  0.0013, -0.0225,  0.0072, -0.0061, -0.014

In [23]:
inputs = torch.zeros([32, 28*28])

In [29]:
model(inputs).shape

torch.Size([32, 10])

In [51]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

In [2]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.data_set)
#     model.train()
    for batch, (X, y) in enumerate(dataloader):
        X = X.to(device)
        y = y.to(device)
        
        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)
        
        # Back propagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if batch % 100 == 0:
            loss, current = loss, batch*len(X)
            print(f'Loss: {loss:>7f}   [{current:>5d}/{size:>5d}]')