In [1]:
import torch
import torchvision as vision
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import sys
torch.cuda.set_device(0)
print('Python Version: {}; CUDA Version: {}; pytorch version: {}'.format(
    sys.version,torch.version.cuda,torch.__version__))

Python Version: 3.6.2 (default, Jul 29 2017, 00:00:00) 
[GCC 4.8.4]; CUDA Version: 8.0.61; pytorch version: 0.4.0


In [2]:
# change the PIL to tensor
from torchvision.transforms import ToTensor, Normalize as Norm, Compose
transform = Compose([ToTensor(),Norm((0.5,), (0.5,))]) # mean, range (change to 0 - 1)

train_dataset = MNIST(root='../dataset/MNIST',train=True,download=True,transform=transform)
test_dataset = MNIST(root='../dataset/MNIST',train=False,download=True,transform=transform)
train_feeder = DataLoader(train_dataset, batch_size=128,shuffle=True, num_workers=2)
test_feeder = DataLoader(test_dataset,batch_size=1024,shuffle=False,num_workers=2)

In [3]:
from torch.nn import Conv2d as Conv, MaxPool2d as Pool, Linear as FC
from torch.nn.functional import relu, dropout
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = Conv(1, 6, kernel_size=5)
        self.pool = Pool(2, 2)
        self.conv2 = Conv(6, 16, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = FC(16 * 4 * 4, 120)
        self.fc2 = FC(120, 84)
        self.fc3 = FC(84, 10)

    def forward(self, x):
        x = self.pool(relu(self.conv1(x)))
        x = self.pool(relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4) # in pytorch: need to view to reshape
        x = relu(self.fc1(x))
        x = relu(self.fc2(x))
        x = dropout(x,training=self.training)
        x = self.fc3(x)
        return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = Net().to(device)

In [4]:
from torch.optim import SGD
from torch.nn import CrossEntropyLoss

criterion = CrossEntropyLoss()
optimizer = SGD(net.parameters(), lr=0.001, momentum=0.9)

In [5]:
net.train()
length = len(train_feeder.dataset)
for epoch in range(100):
    running_loss = 0.0
    running_correct = 0
    for inputs, labels in train_feeder:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad() # refresh the buffer
        outputs = net(inputs) # forward
        loss = criterion(outputs, labels) # calculate the loss
        loss.backward() # BP
        optimizer.step() # p' = p - lr * grad
        running_loss += loss.item()
        pred = outputs.max(1, keepdim=True)[1] # get the index of the max log-probability
        running_correct += pred.eq(labels.view_as(pred)).sum().item()
    running_loss /= length
    
    print('Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
        running_loss, running_correct, length,100. * running_correct / length)) 

Train set: Average loss: 0.0179, Accuracy: 8229/60000 (13.71%)
Train set: Average loss: 0.0169, Accuracy: 20193/60000 (33.66%)
Train set: Average loss: 0.0070, Accuracy: 43502/60000 (72.50%)
Train set: Average loss: 0.0033, Accuracy: 52524/60000 (87.54%)
Train set: Average loss: 0.0023, Accuracy: 54880/60000 (91.47%)
Train set: Average loss: 0.0018, Accuracy: 56042/60000 (93.40%)
Train set: Average loss: 0.0015, Accuracy: 56631/60000 (94.39%)
Train set: Average loss: 0.0013, Accuracy: 57036/60000 (95.06%)
Train set: Average loss: 0.0012, Accuracy: 57315/60000 (95.53%)
Train set: Average loss: 0.0011, Accuracy: 57585/60000 (95.97%)
Train set: Average loss: 0.0010, Accuracy: 57782/60000 (96.30%)
Train set: Average loss: 0.0009, Accuracy: 57876/60000 (96.46%)
Train set: Average loss: 0.0009, Accuracy: 58006/60000 (96.68%)
Train set: Average loss: 0.0009, Accuracy: 58090/60000 (96.82%)
Train set: Average loss: 0.0008, Accuracy: 58238/60000 (97.06%)
Train set: Average loss: 0.0008, Accuracy

In [6]:
net.eval()
test_loss = 0
test_correct = 0
length = len(test_feeder.dataset)
with torch.no_grad():
    for data, target in test_feeder:
        data, target = data.to(device), target.to(device)
        output = net(data)
        test_loss += criterion(output, target)
        pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        test_correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= length
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, test_correct, length, 100. * test_correct / length))


Test set: Average loss: 0.0000, Accuracy: 9912/10000 (99.12%)

