In [1]:
import torch 
import torch.nn as nn
import torchvision.datasets as vision_dsets
from torch.utils import data
import torchvision.transforms as T # Transformation functions to manipulate images


def get_mnist_dataloader(root='untracked',train =True,transforms=T.ToTensor() ,download =True,batch_size = 32,num_worker = 1):
    print ("[+] Get the MNIST DATA")
    """
    We will use Mnist data for our tutorial 
    """
    mnist_train = vision_dsets.MNIST(root = root,  #root is the place to store your data. 
                                    train = True,  
                                    transform = transforms,
                                    download=download)
    mnist_test = vision_dsets.MNIST(root = root,
                                    train = False, 
                                    transform = transforms,
                                    download=download)
    """
    Data Loader is a iterator that fetches the data with the number of desired batch size. 
    * Practical Guide : What is the optimal batch size? 
      - Usually.., higher the batter. 
      - We recommend to use it as a multiple of 2 to efficiently utilize the gpu memory. (related to bit size)
    """
    trainDataLoader = data.DataLoader(dataset = mnist_train,  # information about your data type
                                      batch_size = batch_size, # batch size
                                      shuffle =True, # Whether to shuffle your data for every epoch. (Very important for training performance)
                                      num_workers = 1) # number of workers to load your data. (usually number of cpu cores)

    testDataLoader = data.DataLoader(dataset = mnist_test, 
                                    batch_size = batch_size,
                                    shuffle = False, # we don't actually need to shuffle data for test
                                    num_workers = 1) #
    print ("[+] Finished loading data & Preprocessing")
    return trainDataLoader,testDataLoader

trainDataLoader, testDataLoader = get_mnist_dataloader(transforms=T.Compose([T.ToTensor(), ]))

[+] Get the MNIST DATA
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to untracked/MNIST/raw/train-images-idx3-ubyte.gz


9913344it [00:01, 5978787.37it/s]                             


Extracting untracked/MNIST/raw/train-images-idx3-ubyte.gz to untracked/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to untracked/MNIST/raw/train-labels-idx1-ubyte.gz


29696it [00:00, 176271.86it/s]                          


Extracting untracked/MNIST/raw/train-labels-idx1-ubyte.gz to untracked/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to untracked/MNIST/raw/t10k-images-idx3-ubyte.gz


1649664it [00:01, 1169016.88it/s]                             


Extracting untracked/MNIST/raw/t10k-images-idx3-ubyte.gz to untracked/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to untracked/MNIST/raw/t10k-labels-idx1-ubyte.gz


5120it [00:00, 21410604.67it/s]         

Extracting untracked/MNIST/raw/t10k-labels-idx1-ubyte.gz to untracked/MNIST/raw

[+] Finished loading data & Preprocessing





In [4]:
import torch 
import torch.nn as nn

class Model(nn.Module):
    def __init__(self, model="linear"):
        super().__init__()
        self.model_type = model
        if model =="linear":
            self.linear1 = nn.Linear(784, 128)
            self.linear2 = nn.Linear(128, 10)
        elif model == "cnn":
            self.conv1 = nn.Conv2d(in_channels= 1, out_channels=32, kernel_size=3, stride=1)	
            self.conv2 = nn.Conv2d(in_channels=32, out_channels=28, kernel_size=3, stride=2)
            self.fc1 = nn.Linear(in_features=4032, out_features=512)
            self.fc2 = nn.Linear(in_features=512, out_features=10)
            
    def forward(self, x):
        batch_size = x.size(0)

        if self.model_type == "linear":
            x = x.view(batch_size, -1)
            x = self.linear1(x)
            x = nn.functional.relu(x)
            x = self.linear2(x) 
        else:

            x = nn.functional.relu(self.conv1(x))
            x = nn.functional.relu(self.conv2(x))
            x = x.contiguous().view(batch_size, -1)
            x = self.fc1(x)
            x = nn.functional.relu(x)
            x = self.fc2(x)
        return x 



In [9]:
linear_model = Model(model="linear")
cnn_model = Model(model="cnn")

# Todo : Cuda 

In [11]:
import torch.optim as optim
from torch.autograd import Variable


def train(net, train_loader, optimizer, criterion,  epoch=2):
    net.train()
    for e in range(epoch):
        running_loss = 0.0  
        for i, data in enumerate(train_loader, 0): 
            # get the inputs
            inputs, labels = data # Return type for data in dataloader is tuple of (input_data, labels)
            inputs = Variable(inputs)
            labels = Variable(labels)
            # zero the parameter gradients
            optimizer.zero_grad()    

            # forward + backward + optimize
            outputs = net(inputs) # get output after passing through the network
            loss = criterion(outputs, labels) # compute model's score using the loss function 
            loss.backward() # perform back-propagation from the loss
            optimizer.step() # perform gradient descent with given optimizer

            # print statistics
            running_loss += loss.item()
            if (i+1) % 500 == 0:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' % (e + 1, i + 1, running_loss / 500))
                running_loss = 0.0

criterion = nn.CrossEntropyLoss() 
models = [linear_model, cnn_model]
optimizers  = [torch.optim.SGD(linear_model.parameters(), lr=0.001, momentum=0.9), 
                torch.optim.SGD(cnn_model.parameters(), lr=0.001, momentum=0.9)]


for model, opti, in zip(models, optimizers):
    print("--- Training Started with model ---")
    print(model)
    train(model, trainDataLoader, opti, criterion, 2)


[1,   500] loss: 1.748
[1,  1000] loss: 0.763
[1,  1500] loss: 0.527
[2,   500] loss: 0.415
[2,  1000] loss: 0.385
[2,  1500] loss: 0.354
[1,   500] loss: 1.403
[1,  1000] loss: 0.344
