In [11]:
import torch
import pandas as pd
import numpy as np
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from torchvision import datasets, transforms
from matplotlib import pyplot as plt
%matplotlib inline

In [12]:
mnist_train = datasets.FashionMNIST("..\datasets", train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.FashionMNIST("..\datasets", train=False, download=True, transform=transforms.ToTensor())

In [14]:
def dloaders(batch_size):
    train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(mnist_test, batch_size=batch_size, shuffle=False)
    return train_loader, test_loader

- 28x28 = 784 pixel values
- 10 classes
- 256 hidden units

## Model parameters

In [7]:
num_inputs = 784
num_outputs = 10
num_hiddens = 256

W1 = nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True) * 0.01)
b1 = nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
W2 = nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True) * 0.01)
b2 = nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))

params =  [W1, b1, W2, b2]

## Activation function
- ``torch.zeros_like`` Returns a tensor filled with the scalar value 0, with the same size as input

In [8]:
def relu(X):
    a = torch.zeros_like(X)
    return torch.max(X, a)

## Model
Because we are disregarding spatial structure, we reshape each two-dimensional image into a flat vector of length num_inputs.

In [9]:
def net(X):
    X = X.reshape((-1, num_inputs))
    H = relu(X@W1 + b1) # @ means matrix multiplication
    return (H@W2 + b2)

## Loss function

In [10]:
loss = nn.CrossEntropyLoss()

## Training

In [None]:
num_epochs = 10
learning_rate = 0.1
updater = torch.optim.SGD(params, lr=learning_rate)
