In [1]:
import matplotlib.pyplot as plt 
import torch 
import numpy as np 

### Loading Data 

In [4]:
import torchvision as tv 

In [5]:
import time 

In [6]:
BATCH_SIZE = 256 

In [7]:
train_dataset = tv.datasets.MNIST('.', train=True, transform=tv.transforms.ToTensor(), download=True)
test_dataset = tv.datasets.MNIST('.', train=False, transform=tv.transforms.ToTensor(), download=True)

train = torch.utils.data.DataLoader(train_dataset, batch_size = BATCH_SIZE)
test = torch.utils.data.DataLoader(test_dataset, batch_size = BATCH_SIZE)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ./MNIST/raw/train-labels-idx1-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-images-idx3-ubyte.gz to ./MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ./MNIST/raw/t10k-labels-idx1-ubyte.gz to ./MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [18]:
for X, y in train:
    print(X.shape)
    break 

torch.Size([256, 1, 28, 28])


In [13]:
train_dataset.targets

tensor([5, 0, 4,  ..., 5, 6, 8])

In [14]:
train_dataset.classes

['0 - zero',
 '1 - one',
 '2 - two',
 '3 - three',
 '4 - four',
 '5 - five',
 '6 - six',
 '7 - seven',
 '8 - eight',
 '9 - nine']

In [19]:
model = torch.nn.Sequential(
    torch.nn.Flatten(), 
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(), 
    torch.nn.Linear(256, 10)
)

In [20]:
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=256, bias=True)
  (2): ReLU()
  (3): Linear(in_features=256, out_features=10, bias=True)
)

In [24]:
train_dataset.data[0].shape

torch.Size([28, 28])

In [28]:
loss = torch.nn.CrossEntropyLoss()
trainer = torch.optim.SGD(model.parameters(), lr=0.01)
num_epochs = 10 

In [34]:
def train_model():
    for ep in range(num_epochs):
        train_iters, train_passed = 0, 0
        train_loss, train_acc = 0., 0. 
        start = time.time()
        
        model.train()
        for X, y in train:
            trainer.zero_grad()
            y_pred = model(X)
            l = loss(y_pred, y)
            l.backward()
            trainer.step()
            train_loss += l.item()
            train_acc += (y_pred.argmax(axis=1) == y).sum().item()
            train_iters += 1
            train_passed += len(X)
        
        test_iters, test_passed = 0, 0
        test_loss, test_acc = 0., 0. 
        model.eval()
        for X, y in test:
            y_pred = model(X)
            l = loss(y_pred, y)
            test_loss += l.item()
            test_acc += (y_pred.argmax(axis=1) == y).sum().item()
            test_iters += 1
            test_passed += len(X)
        
        print('ep: {}, taked: {:.3f}, train_loss: {}, train_acc: {}, test_loss: {}, test_acc: {}'.format(
        ep, time.time() - start, train_loss / train_iters, train_acc / train_passed, 
        test_loss / test_iters, test_acc / test_passed)
             )
            

In [35]:
train_model()

ep: 0, taked: 5.709, train_loss: 0.8527874864162283, train_acc: 0.8293333333333334, test_loss: 0.7048545561730861, test_acc: 0.8508
ep: 1, taked: 5.953, train_loss: 0.657043568631436, train_acc: 0.8520166666666666, test_loss: 0.5738147836178541, test_acc: 0.8663
ep: 2, taked: 6.160, train_loss: 0.5585123116031606, train_acc: 0.8648333333333333, test_loss: 0.5005700714886189, test_acc: 0.8769
ep: 3, taked: 6.116, train_loss: 0.49961259764559723, train_acc: 0.8744, test_loss: 0.4539928209036589, test_acc: 0.8833
ep: 4, taked: 5.812, train_loss: 0.4604251074030044, train_acc: 0.8817166666666667, test_loss: 0.4218445587903261, test_acc: 0.8884
ep: 5, taked: 5.559, train_loss: 0.43241352591108767, train_acc: 0.8857833333333334, test_loss: 0.39832677617669104, test_acc: 0.8928
ep: 6, taked: 6.060, train_loss: 0.4112980327073564, train_acc: 0.8897166666666667, test_loss: 0.3803146531805396, test_acc: 0.897
ep: 7, taked: 6.092, train_loss: 0.3946975698813479, train_acc: 0.8926, test_loss: 0.36

### ADAM optimizer 

In [36]:
model = torch.nn.Sequential(
    torch.nn.Flatten(), 
    torch.nn.Linear(784, 256),
    torch.nn.ReLU(), 
    torch.nn.Linear(256, 10)
)

In [37]:
trainer = torch.optim.Adam(model.parameters(), lr=0.01)

In [38]:
train_model()

ep: 0, taked: 6.118, train_loss: 0.24656554883385592, train_acc: 0.9240166666666667, test_loss: 0.1384344590594992, test_acc: 0.957
ep: 1, taked: 5.841, train_loss: 0.10779166680146406, train_acc: 0.9662333333333334, test_loss: 0.10728151538642124, test_acc: 0.9674
ep: 2, taked: 5.893, train_loss: 0.06978990582828509, train_acc: 0.9775666666666667, test_loss: 0.10998912630457199, test_acc: 0.9684
ep: 3, taked: 5.862, train_loss: 0.05870887036534383, train_acc: 0.9806333333333334, test_loss: 0.11428872810211033, test_acc: 0.9698
ep: 4, taked: 6.428, train_loss: 0.05114722129433079, train_acc: 0.98365, test_loss: 0.15515935775929393, test_acc: 0.9631
ep: 5, taked: 6.164, train_loss: 0.04780895299615061, train_acc: 0.9845666666666667, test_loss: 0.11918640876756399, test_acc: 0.9726
ep: 6, taked: 6.305, train_loss: 0.05184222991687266, train_acc: 0.98365, test_loss: 0.13471648911757939, test_acc: 0.971
ep: 7, taked: 6.070, train_loss: 0.04298653220386304, train_acc: 0.9860666666666666, te