In [1]:
import torch
import torchvision
from torchvision.datasets import MNIST

In [2]:
dataset = MNIST(root='data/', download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100.1%

Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


113.5%

Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100.4%

Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


180.4%

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw
Processing...
Done!


In [3]:
len(dataset)

60000

In [4]:
test_dataset = MNIST(root='data/', train=False)

In [5]:
len(test_dataset)

10000

In [6]:
dataset[0]

(<PIL.Image.Image image mode=L size=28x28 at 0x7F0FD8AF8150>, 5)

In [7]:
import torchvision.transforms as transforms

In [8]:
dataset = MNIST(root='data/', train=True, transform=transforms.ToTensor())

In [11]:
import matplotlib.pyplot as plt
%matplotlib inline

In [12]:
import numpy as np

In [13]:
def split_indices(n, val_pct):
    n_val = int(val_pct*n)
    idxs = np.random.permutation(n)
    return idxs[n_val:], idxs[:n_val]

In [14]:
train_indices, val_indices = split_indices(len(dataset), val_pct=0.2)

In [15]:
print(len(train_indices), len(val_indices))
print('Sample val indices: ', val_indices[:20])

48000 12000
Sample val indices:  [57087 33515 50092 29457  2417 47383 22368 40968 59615  1199 19752 53970
 15039 44402 37818 53731 29470  1527 44933 19423]


In [16]:
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader

In [17]:
batch_size = 100
train_sampler = SubsetRandomSampler(train_indices)
train_loader = DataLoader(dataset, batch_size, sampler=train_sampler)

val_sampler = SubsetRandomSampler(val_indices)
val_loader = DataLoader(dataset, batch_size, sampler=val_sampler)

In [18]:
import torch.nn as nn

In [19]:
input_size = 28*28
num_classes = 10

In [20]:
model = nn.Linear(input_size, num_classes)

In [21]:
class MnistModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(input_size, num_classes)
        
    def forward(self, xb):
        xb = xb.reshape(-1, 784)
        out = self.linear(xb)
        return out

In [22]:
model = MnistModel()

In [23]:
import torch.nn.functional as F

In [24]:
def accuracy(l1, l2):
    return torch.sum(l1 == l2).item() / len(l1)

In [25]:
loss_fn = F.cross_entropy

In [26]:
learning_rate = 0.001
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [27]:
def loss_batch(model, loss_func, xb, yb, opt=None, metric=None):
    preds = model(xb)
    loss = loss_func(preds, yb)
    
    if opt is not None:
        loss.backward()
        opt.step()
        opt.zero_grad()
    metric_result = None
    if metric is not None:
        metric_result = metric(preds, yb)
    return loss.item(), len(xb), metric_result

In [31]:
def evaluate(model, loss_fn, valid_dl, metric=None):
    with torch.no_grad():
        results = [loss_batch(model, loss_fn, xb, yb, metric=metric) for xb, yb in valid_dl]
        losses, nums, metrics = zip(*results)
        total = np.sum(nums)
        avg_loss = np.sum(np.multiply(losses, nums)) / total
        avg_metric = None
        if metric is not None:
            avg_metric = np.sum(np.multiply(metrics, nums)) / total
    return avg_loss, total, avg_metric

In [29]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.sum(preds == labels).item() / len(preds)

In [32]:
val_loss, total, val_acc = evaluate(model, loss_fn, val_loader, metric=accuracy)

In [33]:
print(val_loss)

2.316989223162333


In [34]:
print(val_acc)

0.1125


In [35]:
def fit(epochs, model, loss_fn, opt, train_dl, valid_dl, metric=None):
    for epoch in range(epochs):
        for xb, yb in train_dl:
            loss, _, _ = loss_batch(model, loss_fn, xb, yb, opt)
            
        result = evaluate(model, loss_fn, valid_dl, metric)
        val_loss, total, val_metric = result
        
        if metric is None:
            print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, val_loss))
        else:
            print('Epoch [{}/{}], Loss: {:.4f}, {}: {:.4f}'.format(epoch+1, epochs, val_loss, metric.__name__, val_metric))

In [36]:
model = MnistModel()

In [37]:
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [38]:
fit(5, model, F.cross_entropy, optimizer, train_loader, val_loader, accuracy)

Epoch [1/5], Loss: 1.8617, accuracy: 0.6927
Epoch [2/5], Loss: 1.5655, accuracy: 0.7626
Epoch [3/5], Loss: 1.3573, accuracy: 0.7885
Epoch [4/5], Loss: 1.2079, accuracy: 0.8026
Epoch [5/5], Loss: 1.0971, accuracy: 0.8122


In [39]:
torch.cuda.is_available()

True

In [40]:
torch.cuda.get_device_name(0)

'GeForce GTX 1070'

In [41]:
torch.cuda.device_count()

1

In [42]:
torch.cuda.device(0)

<torch.cuda.device at 0x7f0fbe511c10>

In [43]:
torch.cuda.current_device()

0