In [1]:
import torch
import torchvision.datasets as datasets
import torchvision.transforms

In [3]:
#  declare a class of our neural Network
class NeuralNet(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNet, self).__init__()
        self.l1 = torch.nn.Linear(input_size, hidden_size)    #First Layer                           
        self.l2 = torch.nn.Linear(hidden_size, hidden_size)      #Second Layer Activation
        self.l3 = torch.nn.Linear(hidden_size, output_size)
        self.activation = torch.nn.ReLU() # in this case Relu would be fine! but may be for other one you should be more carefull
        #  maybe for the other case you should use torch.nn.Tanh() its more robust to curved one
        self.init_weights()

    def init_weights(self):
        torch.nn.init.kaiming_normal_(self.l1.weight)
        torch.nn.init.kaiming_normal_(self.l2.weight)
        
        
    def forward(self, X):                          
        out = self.l1(X)
        out = self.activation(out)
        out = self.l2(out)
        out = self.activation(out)
        out = self.l3(out)
        return out
        

In [4]:
train_dataset = datasets.MNIST(root='./data', train=True, transform=torchvision.transforms.ToTensor(), download=True)

In [5]:
test_dataset = datasets.MNIST(root='./data', train=False,transform=torchvision.transforms.ToTensor())

In [6]:
batch_size = 100
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size,shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=True)
# our data would be iterable by loading it to a loader from torch.utils.data.DataLoader() 
# by means of shuffle = True, our data would be independent to the order of data

In [7]:
learning_rate = 0.001

#  instantiate net as our model from NeuralNet, which we declared before 
net = NeuralNet(784, 392, 10)

#  To use CUDA if it's available 
if torch.cuda.is_available():
    net = net.cuda()

cel = torch.nn.CrossEntropyLoss()
#  use Adam optimizer
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [8]:
epochs = 10
for epoch_counter in range(epochs):
    total_loss = 0
    hit_counter = 0
    for index, (images, labels) in enumerate(train_loader):   
        images = images.view(-1, 784) #  to reshape tensor of images : 28*28 = 784 the size of each one
        if torch.cuda.is_available():
            images = images.cuda()
            labels = labels.cuda()
            
        outputs = net(images)       
        will_not_used, predicted = torch.max(outputs.data, 1)                                              
        hit_counter += (predicted == labels).sum() 
        loss = cel(outputs, labels) #  output as predicted class and labels as actual one
        total_loss += loss.item()
        optimizer.zero_grad() #  to clear the history of gradients before next backpropogation
        loss.backward()
        optimizer.step()                                  
    
    print('Results for epoch {}/{} : '.format(epoch_counter+1, epochs))
    accuracy = 100 * float(hit_counter)/len(train_dataset)
    training_loss = total_loss/(len(train_loader))
    print('Training Loss: {:.3f}, Training Accuracy: {:.3f}%'.format(training_loss, accuracy))

Results for epoch 1/10 : 
Training Loss: 0.238, Training Accuracy: 93.080%
Results for epoch 2/10 : 
Training Loss: 0.087, Training Accuracy: 97.322%
Results for epoch 3/10 : 
Training Loss: 0.054, Training Accuracy: 98.245%
Results for epoch 4/10 : 
Training Loss: 0.039, Training Accuracy: 98.738%
Results for epoch 5/10 : 
Training Loss: 0.028, Training Accuracy: 99.125%
Results for epoch 6/10 : 
Training Loss: 0.023, Training Accuracy: 99.247%
Results for epoch 7/10 : 
Training Loss: 0.019, Training Accuracy: 99.393%
Results for epoch 8/10 : 
Training Loss: 0.016, Training Accuracy: 99.468%
Results for epoch 9/10 : 
Training Loss: 0.015, Training Accuracy: 99.475%
Results for epoch 10/10 : 
Training Loss: 0.013, Training Accuracy: 99.528%


In [9]:
with torch.no_grad():
    hit_counter = 0
    for images, labels in test_loader:
        if torch.cuda.is_available():
            images = images.cuda()
            labels = labels.cuda()
        images = images.view(-1, 28*28)
        outputs = net(images)
        will_not_used, predicted = torch.max(outputs.data, 1)
        hit_counter += (predicted == labels).sum().item()

    #  Accuracy of the network on test images about 10000   
    accuracy = 100 * hit_counter / len(test_dataset)
    print('Accuracy to test_set: {} %'.format(accuracy))

Accuracy to test_set: 97.97 %
