In [33]:
#!pip install torchvision
import torch
import torch.nn as nn
import torchvision.datasets as datasets 
import torchvision.transforms as transforms

In [34]:
input_size = 784        #Number of input neurons (image pixels)
hidden_size = 400       #Number of hidden neurons
out_size = 10           #Number of classes (0-9) 
epochs = 10            #How many times we pass our entire dataset into our network 
batch_size = 100        #Input size of the data during one iteration 
learning_rate = 0.001   #How fast we are learning

In [35]:
train_dataset = datasets.MNIST(root='./data', train=True,transform=transforms.ToTensor(), download=True)

test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor())

In [36]:
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [37]:
class Model(nn.Module):
    def __init__(self, input_neurons, hidden_neurons, output_neurons=10):
        super(Model, self).__init__()
        
        self.fc1 = nn.Linear(input_neurons, hidden_neurons) # input layer
        self.fc2 = nn.Linear(hidden_neurons, hidden_neurons) # hidden layer
        self.output_layer = nn.Linear(hidden_neurons, output_neurons) # output layer
        
        self.ReLU = nn.ReLU()
        self.init_weights()

    def init_weights(self):
        nn.init.kaiming_normal_(self.fc1.weight)
        nn.init.kaiming_normal_(self.fc2.weight)
        
    def forward(self, X):
        out = self.fc1(X)
        out = self.ReLU(out)
        out = self.fc2(out)
        out = self.ReLU(out)
        out = self.output_layer(out)
        
        return out

In [38]:
model = Model(input_size, hidden_size, out_size)

CUDA = torch.cuda.is_available() # Check if CUDA GPU device(s) are available

if CUDA: # If it is available
    model = model.cuda()
    
loss_function = nn.CrossEntropyLoss() # defining the loss function
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # initializing the Adam optimizer

In [39]:
# Train the network
for epoch in range(epochs):
    correct_train = 0
    running_loss = 0    
    for i, (images, labels) in enumerate(train_loader):
        images = images.view(-1, 784) # flatten the image. 28*28 -> 784
        
        if CUDA: # If you have a CUDA device 
            images = images.cuda()
            labels = labels.cuda()
        
        outputs = model(images) # Get the outputs by feeding the images to the model
        _, predicted = torch.max(outputs.data, 1) # Get the indexes of the max value in each model output                                         
        correct_train += (predicted == labels).sum() # Get the count of the correct predictions

        optimizer.zero_grad() # Clear the residual gradients
        loss = loss_function(outputs, labels) # Calculate the loss function
        running_loss += loss.item() # add the loss at each batch
        loss.backward() # backpropagate the loss and calculate the gradients
        optimizer.step() # Update the weights of the model parameters
        
    print('Epoch [{}/{}], Training Loss: {:.3f}, Training Accuracy: {:.3f}%'.format
          (epoch+1, epochs, running_loss/len(train_loader), (100*correct_train.double()/len(train_dataset))))
            
        
        
        

Epoch [1/10], Training Loss: 0.238, Training Accuracy: 93.038%
Epoch [2/10], Training Loss: 0.085, Training Accuracy: 97.340%
Epoch [3/10], Training Loss: 0.053, Training Accuracy: 98.355%
Epoch [4/10], Training Loss: 0.038, Training Accuracy: 98.795%
Epoch [5/10], Training Loss: 0.029, Training Accuracy: 99.045%
Epoch [6/10], Training Loss: 0.024, Training Accuracy: 99.205%
Epoch [7/10], Training Loss: 0.018, Training Accuracy: 99.382%
Epoch [8/10], Training Loss: 0.017, Training Accuracy: 99.423%
Epoch [9/10], Training Loss: 0.012, Training Accuracy: 99.582%
Epoch [10/10], Training Loss: 0.017, Training Accuracy: 99.448%


In [41]:
with torch.no_grad():
    correct = 0
    for images, labels in test_loader:
        images = images.view(-1, 784)
        
        if CUDA:
            images = images.cuda()
            labels = labels.cuda()
            
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / len(test_dataset)))

Accuracy of the network on the 10000 test images: 97.87 %
