In [None]:
%matplotlib inline
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision import transforms
from d2l import torch as d2l
d2l.use_svg_display()

In [None]:
# class FashionMNIST(d2l.DataModule):  #@save
#     """The Fashion-MNIST dataset."""
#     def __init__(self, batch_size=64, resize=(28, 28)):
#         super().__init__()
#         self.save_hyperparameters()
#         trans = transforms.Compose([transforms.Resize(resize),
#                                     transforms.ToTensor()])
#         self.train = torchvision.datasets.FashionMNIST(
#             root=self.root, train=True, transform=trans, download=True)
#         self.val = torchvision.datasets.FashionMNIST(
#             root=self.root, train=False, transform=trans, download=True)
#     def get_dataloader(self, train):
#         data = self.train if train else self.val
#         return torch.utils.data.DataLoader(data, self.batch_size, shuffle=train,
#                                         num_workers=self.num_workers)

This dataloader is not in use. Instead, we directly get the dataset from ```d2l.FashionMNIST``` to make life easier

In [None]:
def softmax(X):
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdims=True)
    return X_exp / partition  # The broadcasting mechanism is applied here

The formula of the softmax activation function that we just defined is as follows
$$\mathrm{softmax}(\mathbf{X})_{ij} = \frac{\exp(\mathbf{X}_{ij})}{\sum_k \exp(\mathbf{X}_{ik})}.$$

In [None]:
class SoftmaxRegressionScratch(d2l.Classifier):
    def __init__(self, num_inputs, num_outputs, lr, sigma=0.01):
        super().__init__()
        self.save_hyperparameters()
        self.W = torch.normal(0, sigma, size=(num_inputs, num_outputs),
                              requires_grad=True)
        self.b = torch.zeros(num_outputs, requires_grad=True)

    def parameters(self):
        return [self.W, self.b]

We have just created the model :)

In [None]:
@d2l.add_to_class(SoftmaxRegressionScratch)
def forward(self, X):
    X = X.reshape((-1, self.W.shape[0]))
    return softmax(torch.matmul(X, self.W) + self.b)

```@d2l.add_to_class()``` is a decorator that is used to add new methods to a pre-existing class

```torch.matmul()``` does matrix multiplication

In [None]:
def cross_entropy(y_hat, y):
    return -torch.log(y_hat[list(range(len(y_hat))), y]).mean()
@d2l.add_to_class(SoftmaxRegressionScratch)
def loss(self, y_hat, y):
    return cross_entropy(y_hat, y)

```y_hat``` is a variable used in machine learning to represent the predicted output or the estimated target value for a given input. In other words, ```y_hat``` represents the predicted output of a model given some input features.

In [None]:
data = d2l.FashionMNIST(batch_size=256)
model = SoftmaxRegressionScratch(num_inputs=784, num_outputs=10, lr=0.1)
trainer = d2l.Trainer(max_epochs=10)
trainer.fit(model, data)