In [14]:
import torch
import torchvision
from torch import nn
from torch.utils import data
from torchvision import transforms
from matplotlib import pyplot as plt

## Data Loading

In [15]:
batch_size = 256

def load_data_fashion_mnist(batch_size, resize=None):  #@save
    
    trans = [transforms.ToTensor()]
    
    if resize:
        trans.insert(0, transforms.Resize(resize))
        
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(root="../datasets", train=True, transform=trans, download=True)
    mnist_test = torchvision.datasets.FashionMNIST(root="../datasets", train=False, transform=trans, download=True)
    
    return (
                data.DataLoader(mnist_train, batch_size, shuffle=True, num_workers=4),
                data.DataLoader(mnist_test, batch_size, shuffle=False, num_workers=4)
           )

In [16]:
train_iter, test_iter = load_data_fashion_mnist(batch_size)

## Model definition

In [25]:
def net(X):
    return softmax(torch.matmul(X.reshape((-1, W.shape[0])), W) + b)

The output layer of softmax regression is a fully-connected layer. Therefore, to implement our model, we just need to add one fully-connected layer with 10 outputs to our Sequential. Again, here, the Sequential is not really necessary, but we might as well form the habit since it will be ubiquitous when implementing deep models. Again, we initialize the weights at random with zero mean and standard deviation 0.01.

In [18]:
net = nn.Sequential(nn.Flatten(), nn.Linear(784, 10))

def init_weights(m):
    if type(m) == nn.Linear:
        nn.init.normal_(m.weight, std=0.01)
        
net.apply(init_weights);

## Loss Function

In [19]:
loss = nn.CrossEntropyLoss()

## Optimization

In [23]:
for param in net:
    print(param)

Flatten(start_dim=1, end_dim=-1)
Linear(in_features=784, out_features=10, bias=True)


In [24]:
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

## Model training
* initiate randomly the linear model parameters $(\mathbf{w}, b)$
* repeat for each epoch (unitl done)
    * load batch of data
    * do linear model precitions with current parameter values
    * calculate loss
    * get gradients
    * update parameter values

In [None]:
num_epochs = 10

def train_epoch_ch3(net, train_iter, loss, updater):  #@save
    """The training loop defined in Chapter 3."""
    # Set the model to training mode
    if isinstance(net, torch.nn.Module):
        net.train()
    # Sum of training loss, sum of training accuracy, no. of examples
    metric = Accumulator(3)
    for X, y in train_iter:
        # Compute gradients and update parameters
        y_hat = net(X)
        l = loss(y_hat, y)
        if isinstance(updater, torch.optim.Optimizer):
            # Using PyTorch in-built optimizer & loss criterion
            updater.zero_grad()
            l.backward()
            updater.step()
            metric.add(float(l) * len(y), accuracy(y_hat, y), y.numel())
        else:
            # Using custom built optimizer & loss criterion
            l.sum().backward()
            updater(X.shape[0])
            metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
    # Return training loss and training accuracy
    return metric[0] / metric[2], metric[1] / metric[2]
