In [14]:
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable # to demonstrate autograd
from tqdm import tqdm

In [2]:
input_size = 784 # 28x28 image = 784 pixels
hidden_size = 400 # Number of hidden neurons (i/p + o/p)/2 is a good estimate of # of hidden neurons
out_size = 10 # Number of classes (0-9)
epochs = 10 # Number of times we want to go through the entire dataset
batch_size = 100 # Number of input examples considered per iteration
learning_rate = 0.001 # Speed of learning

In [3]:
train_dataset = datasets.MNIST(root="./mnist_data",
                               train = True,
                               transform=transforms.ToTensor(), # Originally they are just images, here we are transforming all the images to tensors.
                               download=True)

test_dataset = datasets.MNIST(root="./mnist_data",
                              train=False,
                              transform=transforms.ToTensor())

In [4]:
# Data loader class to load the data in batches and also shuffle to not learn any spurious patterns.

train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
                                           batch_size=batch_size,
                                           shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset,
                                          batch_size=batch_size,
                                          shuffle=True)

In [6]:
#Define the network
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, out_size):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)   # First layer
        self.relu = nn.ReLU()                           # First layer activation
        self.fc2 = nn.Linear(hidden_size,hidden_size)   # Second layer
        self.fc3 = nn.Linear(hidden_size,out_size)      # 3rd layer
        # 784 400; 400 400; 400 10 - 3 layers
        
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        out = self.relu(out)
        out = self.fc3(out)
        return out

In [8]:
net = Net(input_size=input_size, hidden_size=hidden_size, out_size=out_size)
CUDA = torch.cuda.is_available()

if CUDA: 
    net.cuda()

criterion = nn.CrossEntropyLoss() # Cross entropy comes along with softmax. So no need to specify the softmax layer in the net.
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [12]:
# Visualize train loader

for i, (images, labels) in enumerate(train_loader):
    print(images.size()) # original size of the images in batch. Only one channel since grayscale image. 
    images = images.view(-1,784)
    print(images.size()) # flattened size of the images in batch


torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([100, 1, 28, 28])
torch.Size([100, 784])
torch.Size([

In [21]:
# Train the network
total_train = 0 

for epoch in range(epochs):
    running_loss = 0
    correct_train = 0 # number of correct classifications
    for i, (images, labels) in enumerate(train_loader):
        # flatten the images
        images = images.view(-1, 784)
        if CUDA:
            images = images.cuda()
            labels = labels.cuda()

        optimizer.zero_grad() # Clear previous gradients so the gradients are not accumulated. (wt = wt - lr * param_grad) clear the param_grad here. 
        outputs = net(images)
        #print(outputs.size())
        
        _, predicted = torch.max(outputs.data, 1) # take max across columns per row. Returns 2 arguments: 1st arg is the max value and the 2nd arg is the index which is the predicted label in our case.

        total_train += labels.size(0)

        if CUDA:
            correct_train += (predicted.cpu() == labels.cpu()).sum() # sum function is not available on GPU so we need to move the data to cpu before running it. 
        else:
            correct_train += (predicted == labels).sum()
        
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        loss.backward()
        optimizer.step()

        if (i+1) % 100 == 0:
            print('Epoch [{}/{}], Training Loss: {:.3f}, Training Accuracy: {:.3f}%'.format(epoch+1, epochs, running_loss/len(train_loader), (100*correct_train.double()/len(train_dataset))))
        

Epoch [1/10], Training Loss: 0.009, Training Accuracy: 16.373%
Epoch [1/10], Training Loss: 0.018, Training Accuracy: 32.768%
Epoch [1/10], Training Loss: 0.026, Training Accuracy: 49.143%
Epoch [1/10], Training Loss: 0.037, Training Accuracy: 65.492%
Epoch [1/10], Training Loss: 0.048, Training Accuracy: 81.827%
Epoch [1/10], Training Loss: 0.056, Training Accuracy: 98.235%
Epoch [2/10], Training Loss: 0.007, Training Accuracy: 16.468%
Epoch [2/10], Training Loss: 0.013, Training Accuracy: 32.952%
Epoch [2/10], Training Loss: 0.019, Training Accuracy: 49.437%
Epoch [2/10], Training Loss: 0.026, Training Accuracy: 65.853%
Epoch [2/10], Training Loss: 0.033, Training Accuracy: 82.280%
Epoch [2/10], Training Loss: 0.041, Training Accuracy: 98.673%
Epoch [3/10], Training Loss: 0.004, Training Accuracy: 16.528%
Epoch [3/10], Training Loss: 0.009, Training Accuracy: 33.035%
Epoch [3/10], Training Loss: 0.014, Training Accuracy: 49.552%
Epoch [3/10], Training Loss: 0.019, Training Accuracy: 

In [23]:
# Test the network

correct = 0
total = 0

for images,labels in test_loader:
    images = images.view(-1,784)

    if CUDA:
        images.cuda()
    
    outputs = net(images)

    _, predicted = torch.max(outputs.data, 1)
    total += labels.size(0)

    if CUDA:
        correct += (predicted.cpu() == labels.cpu()).sum()
    else:
        correct += (predicted == labels).sum()

print(f" Final accuracy: {100*correct/total}")

 Final accuracy: 98.12999725341797


In [1]:
# Train w/ intel optimized model.
import intel_extension_for_pytorch as ipex