In [1]:
import torch

In [2]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

In [3]:
torch.cuda.is_available()

True

## Import Data

In [4]:
import random
from math import floor

def train_valid_split(dataset, test_size = 0.25, shuffle = False, random_seed = 0):
    """ Return a list of splitted indices from a DataSet.
    Indices can be used with DataLoader to build a train and validation set.
    
    Arguments:
        A Dataset
        A test_size, as a float between 0 and 1 (percentage split) or as an int (fixed number split)
        Shuffling True or False
        Random seed
    """
    length = dataset.__len__()
    indices = list(range(1,length))
    
    if shuffle == True:
        random.seed(random_seed)
        random.shuffle(indices)
    
    if type(test_size) is float:
        split = floor(test_size * length)
    elif type(test_size) is int:
        split = test_size
    else:
        raise ValueError('%s should be an int or a float' % str)
    return indices[split:], indices[:split]

In [5]:
dtype = torch.cuda.FloatTensor
bs = 64

transformations = transforms.Compose([
                      transforms.ToTensor(),
                      transforms.Normalize((0.1307), (0.3081),)])

train_transforms = transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])

train_data = datasets.MNIST('../data/mnist', train=True, download=True,
                  transform=train_transforms)

valid_data = datasets.MNIST('../data/mnist', train=True, download=True,
                  transform=train_transforms)

test_data = datasets.MNIST('../data/mnist', train=False, download=True,
                  transform=train_transforms)

In [6]:
from torch.utils.data.sampler import SubsetRandomSampler

train_idx, valid_idx = train_valid_split(train_data, shuffle=True)
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

In [7]:
kwargs = {'num_workers': 1, 'pin_memory': True}
# train_loader = torch.utils.data.DataLoader(train_dataset, 
#                                         batch_size=bs, sampler=train_sampler, **kwargs)
train_loader = torch.utils.data.DataLoader(train_data,
        batch_size=bs, sampler=train_sampler, **kwargs)

valid_loader = torch.utils.data.DataLoader(valid_data,
        batch_size=bs, sampler=valid_sampler, **kwargs)

test_loader = torch.utils.data.DataLoader(train_data,
        batch_size=bs, **kwargs)

## Set up Model

In [8]:
class Flatten(nn.Module):
    def forward(self, x):
        N, C, H, W = x.size()
        return x.view(-1, 320)

class SimpleConv(nn.Module):
    def __init__(self):
        super(SimpleConv, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3)
        self.conv1_bn = nn.BatchNorm2d(32)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3)
        self.conv2_bn = nn.BatchNorm2d(32)
        self.conv3 = nn.Conv2d(32, 64, kernel_size=3)
        self.conv3_bn = nn.BatchNorm2d(64)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3)
        self.conv4_bn = nn.BatchNorm2d(64)
        self.conv2_drop = nn.Dropout2d()
        self.dense1 = nn.Linear(4096, 128)
        self.dense1_bn = nn.BatchNorm1d(128)
        self.dense2 = nn.Linear(128, 10)
    
    def forward(self, x):
        x = F.relu(self.conv1_bn(self.conv1(x)))
        x = F.relu(F.max_pool2d(self.conv2_bn(self.conv2(x)), 2))
        x = F.relu(self.conv3_bn(self.conv3(x)))
        x = F.relu(self.conv4_bn(self.conv4(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.dense1_bn(self.dense1(x)))
        x = self.dense2(x)
#         print(x.size())
        return F.log_softmax(x, dim=1)
    
#   def train(self, x, y, optimizer, epochs)

In [9]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)



In [10]:
# model = simpleConv()
model = SimpleConv()
model.cuda()

SimpleConv(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv1_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1))
  (conv2_bn): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True)
  (conv3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv3_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (conv4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1))
  (conv4_bn): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True)
  (conv2_drop): Dropout2d(p=0.5)
  (dense1): Linear(in_features=4096, out_features=128, bias=True)
  (dense1_bn): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True)
  (dense2): Linear(in_features=128, out_features=10, bias=True)
)

In [24]:
optimizer = optim.Adam(model.parameters(), lr=2e-4)

def train(epochs=1):
    for epoch in range(epochs):
        model.train(True)
        for batch_idx, (data, target) in enumerate(train_loader):
            data = Variable(data.cuda())
            target = Variable(target.cuda())
            optimizer.zero_grad()
            pred = model(data)
            loss = F.nll_loss(pred, target)
            loss.backward()
            optimizer.step()

            log_interval = 100
            if batch_idx % log_interval == 0:
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                    epoch+1, batch_idx * len(data), len(train_loader.dataset),
                    100. * batch_idx / len(train_loader), loss.data[0]))
        model.train(False)
        running_loss = 0
        running_correct = 0
        for batch_idx, (data, target) in enumerate(valid_loader):
            data = Variable(data.cuda())
            target = Variable(target.cuda())
            pred = model(data)
            _, pred_max = torch.max(pred.data, 1)

            loss = F.nll_loss(pred, target)
            running_loss += loss.data[0]
            running_correct += torch.sum(pred_max == target.data)
    
        print("\nEpoch", epoch+1, "results:")
        print("Val log loss:", running_loss / len(valid_idx))
        print("Val Accuracy:", running_correct / len(valid_idx), "\n")
        model.train(True)

In [25]:
train(2)


Epoch 1 results:
Val log loss: 0.0004989427044987679
Val Accuracy: 0.9916666666666667 


Epoch 2 results:
Val log loss: 0.0004500547024227368
Val Accuracy: 0.9922666666666666 

