In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [2]:
# Download training data from open datasets.
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor(),
)

# Download test data from open datasets.
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor(),
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw



In [3]:
batch_size = 64

# Create data loaders.
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

for X, y in test_dataloader:
    print(f"Shape of X [N, C, H, W]: {X.shape}")
    print(f"Shape of y: {y.shape} {y.dtype}")
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64]) torch.int64


In [4]:
# Get cpu or gpu device for training.
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using {device} device")

# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)
print(model)

Using cpu device
NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [6]:
import numpy as np
from numpy.random import randn

N, D_in, H,D_out = 64,1000,100,10
x,y = randn(N,D_in),randn(N,D_out)
w1,w2 = randn(D_in,H), randn(H,D_out)

for t in range(2000):
  h=1/(1+np.exp(-x.dot(w1)))
  y_pred=h.dot(w2)
  loss=np.square(y_pred-y).sum()
  print(t,loss)

  grad_y_pred = 2.0*(y_pred-y)
  grad_w2=h.T.dot(grad_y_pred)
  grad_h=grad_y_pred.dot(w2.T)
  grad_w1=x.T.dot(grad_h*h*(1-h))

  w1-=1e-4*grad_w1
  w2-=1e-4*grad_w2

0 25788.508214937196
1 16739.29475619875
2 12991.08643890038
3 11155.659976881909
4 10101.679041211446
5 9421.004458012163
6 8944.871849027766
7 8573.491679365143
8 8256.109674827709
9 7962.00999396531
10 7690.565818877203
11 7456.822977331665
12 7249.455216925177
13 7054.666762413681
14 6874.461981995095
15 6709.443181023763
16 6556.5885216252045
17 6409.253721512443
18 6264.050396534523
19 6119.853111754678
20 5990.985073002064
21 5876.228550707258
22 5769.683756206388
23 5669.587866790152
24 5574.918031757588
25 5484.698517035811
26 5398.226068387174
27 5315.324764374781
28 5235.743084589751
29 5159.076139336005
30 5084.841138857076
31 5012.354416485363
32 4940.521050487996
33 4867.37090034246
34 4789.594740448447
35 4705.770368984029
36 4627.784883348242
37 4559.566103909974
38 4495.026320943538
39 4432.686759418959
40 4372.131074412724
41 4313.0969490080215
42 4255.299036390182
43 4198.347012538073
44 4141.686831683863
45 4084.809536895856
46 4028.1747280251957
47 3973.12292479524