In [1]:
import torch
import torchvision as vision
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
import sys
torch.cuda.set_device(0)
print('Python Version: {}; CUDA Version: {}; pytorch version: {}'.format(
    sys.version,torch.version.cuda,torch.__version__))

Python Version: 3.6.2 (default, Jul 29 2017, 00:00:00) 
[GCC 4.8.4]; CUDA Version: 8.0.61; pytorch version: 0.4.0


In [2]:
# change the PIL to tensor
from torchvision.transforms import ToTensor, Normalize as Norm, Compose
transform = Compose([ToTensor(),Norm((0.5,), (0.5,))]) # mean, range (change to 0 - 1)

train_dataset = MNIST(root='../dataset/MNIST',train=True,download=True,transform=transform)
test_dataset = MNIST(root='../dataset/MNIST',train=False,download=True,transform=transform)

In [3]:
# Build up the dataset from numpy array
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split as split
from sklearn.preprocessing import Normalizer
from torch.utils.data import TensorDataset as TData
import numpy as np
mnist = fetch_mldata('MNIST original')
X_train, X_test, y_train, y_test = split(mnist['data'],mnist['target'],train_size=60000,test_size=10000)
norm = Normalizer()
X_train = norm.fit_transform(X_train).astype(np.float32)
X_test = norm.transform(X_test).astype(np.float32)
train_dataset = TData(torch.from_numpy(X_train.reshape((-1,1,28,28)),), 
                      torch.from_numpy(y_train.astype(np.long)).view(-1))
test_dataset =  TData(torch.from_numpy(X_test.reshape((-1,1,28,28))), 
                      torch.from_numpy(y_test.astype(np.long)).view(-1))

In [4]:
print (len(train_dataset)) # length method
print (train_dataset[0][0].shape) # Data field, channel * width * length for conv2
print (train_dataset[0][1].shape) # Label field, .view(-1)
train_feeder = DataLoader(train_dataset, batch_size=128,shuffle=True, num_workers=2)
test_feeder = DataLoader(test_dataset,batch_size=1024,shuffle=False,num_workers=2)

60000
torch.Size([1, 28, 28])
torch.Size([])


In [5]:
from torch.nn import Conv2d as Conv, MaxPool2d as Pool, Linear as FC
from torch.nn.functional import relu, dropout
import torch.nn as nn

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = Conv(1, 6, kernel_size=5)
        self.pool = Pool(2, 2)
        self.conv2 = Conv(6, 16, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = FC(16 * 4 * 4, 120)
        self.fc2 = FC(120, 84)
        self.fc3 = FC(84, 10)

    def forward(self, x):
        x = self.pool(relu(self.conv1(x)))
        x = self.pool(relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4) # in pytorch: need to view to reshape
        x = relu(self.fc1(x))
        x = relu(self.fc2(x))
        x = dropout(x,training=self.training)
        x = self.fc3(x)
        return x
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = Net().to(device)

In [6]:
from torch.optim import SGD
from torch.nn import CrossEntropyLoss

criterion = CrossEntropyLoss()
optimizer = SGD(net.parameters(), lr=0.01, momentum=0.9)

In [7]:
net.train()
length = len(train_feeder.dataset)
for epoch in range(100):
    running_loss = 0.0
    running_correct = 0
    for inputs, labels in train_feeder:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad() # refresh the buffer
        outputs = net(inputs) # forward
        loss = criterion(outputs, labels) # calculate the loss
        loss.backward() # BP
        optimizer.step() # p' = p - lr * grad
        running_loss += loss.item()
        pred = outputs.max(1, keepdim=True)[1] # get the index of the max log-probability
        running_correct += pred.eq(labels.view_as(pred)).sum().item()
    running_loss /= length
    
    print('Epoch = {}: Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)'.format(
        epoch, running_loss, running_correct, length,100. * running_correct / length)) 

Epoch = 0: Train set: Average loss: 0.0180, Accuracy: 6601/60000 (11.00%)
Epoch = 1: Train set: Average loss: 0.0180, Accuracy: 6937/60000 (11.56%)
Epoch = 2: Train set: Average loss: 0.0117, Accuracy: 28412/60000 (47.35%)
Epoch = 3: Train set: Average loss: 0.0037, Accuracy: 51292/60000 (85.49%)
Epoch = 4: Train set: Average loss: 0.0021, Accuracy: 55155/60000 (91.92%)
Epoch = 5: Train set: Average loss: 0.0015, Accuracy: 56560/60000 (94.27%)
Epoch = 6: Train set: Average loss: 0.0012, Accuracy: 57316/60000 (95.53%)
Epoch = 7: Train set: Average loss: 0.0010, Accuracy: 57785/60000 (96.31%)
Epoch = 8: Train set: Average loss: 0.0009, Accuracy: 58041/60000 (96.73%)
Epoch = 9: Train set: Average loss: 0.0008, Accuracy: 58223/60000 (97.04%)
Epoch = 10: Train set: Average loss: 0.0007, Accuracy: 58447/60000 (97.41%)
Epoch = 11: Train set: Average loss: 0.0007, Accuracy: 58515/60000 (97.53%)
Epoch = 12: Train set: Average loss: 0.0006, Accuracy: 58675/60000 (97.79%)
Epoch = 13: Train set: A

In [8]:
net.eval()
test_loss = 0
test_correct = 0
length = len(test_feeder.dataset)
with torch.no_grad():
    for data, target in test_feeder:
        data, target = data.to(device), target.to(device)
        output = net(data)
        test_loss += criterion(output, target)
        pred = output.max(1, keepdim=True)[1] # get the index of the max log-probability
        test_correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= length
print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
    test_loss, test_correct, length, 100. * test_correct / length))


Test set: Average loss: 0.0001, Accuracy: 9875/10000 (98.75%)

