In [1]:
# import the required libraries
import torch
import torchvision
import torch.nn as nn
from torch.utils.data import DataLoader

#=======================================================#
#           Download and Load the Dataset               #
#=======================================================#
train_data = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=torchvision.transforms.ToTensor())
test_data = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=torchvision.transforms.ToTensor())

#=======================================================#
#           Parameters to use while training            #
#=======================================================#
input_size = 28 * 28
num_classes = 10
batch_size = 64
num_epochs = 5
learning_rate = 0.001


#=====================================================================================#
#          Load into DataLoader Which will make easire to access and train            #
#=====================================================================================#

# dataloaders will shuffle and split into batches and create iterators
train_data_loader =DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True)
test_data_loader =DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True)

# create the model
model = nn.Linear(input_size, num_classes)
# loss function
criterion = nn.CrossEntropyLoss()
# model optimizer
optimizer = torch.optim.Adam(model.parameters(), learning_rate)

# iterating the throught the network
for i in range(num_epochs):
  for batch, (train_images, train_labels) in enumerate(train_data_loader):

    # flattening the image from 28 * 28 to 784
    train_images = train_images.reshape(-1, input_size)
    # passing the flatten image to model
    output = model(train_images)
    # calculating the loss between predicted and actual ground truths
    loss = criterion(output, train_labels)

    optimizer.zero_grad()
    # backward propagation
    loss.backward()
    optimizer.step()
  print(f"epoch: {i}, batch: {batch}, loss: {loss}")


epoch: 0, batch: 937, loss: 0.3805159032344818
epoch: 1, batch: 937, loss: 0.0816948264837265
epoch: 2, batch: 937, loss: 0.3752652406692505
epoch: 3, batch: 937, loss: 0.2628442645072937
epoch: 4, batch: 937, loss: 0.4250142574310303


In [2]:
dtotal_labels = 0
correct_detected = 0

# testing the model
for test_images, test_labels in test_data_loader:
  test_images = test_images.reshape(-1, input_size)
  model_outputs = model(test_images)
  _, predicted = torch.max(model_outputs.data, -1)
  total_labels = predicted.size(0)
  correct_detected = (test_labels == predicted).sum()
print(f"model accuracy, {(correct_detected/total_labels)*100}%")

model accuracy, 93.75%


In [3]:
# saving the model
torch.save(model.state_dict(), "model.ckpt")

In [4]:
model.state_dict()

OrderedDict([('weight',
              tensor([[ 0.0345,  0.0191,  0.0031,  ..., -0.0050,  0.0101,  0.0163],
                      [ 0.0284, -0.0345,  0.0088,  ..., -0.0078,  0.0103, -0.0354],
                      [ 0.0221,  0.0078,  0.0123,  ..., -0.0309,  0.0095,  0.0126],
                      ...,
                      [-0.0123, -0.0065,  0.0026,  ...,  0.0280,  0.0203, -0.0198],
                      [-0.0233, -0.0168,  0.0167,  ...,  0.0263,  0.0324,  0.0061],
                      [ 0.0323, -0.0209,  0.0338,  ...,  0.0306,  0.0342, -0.0202]])),
             ('bias',
              tensor([-0.2920,  0.3931,  0.0362, -0.2331,  0.1063,  0.6721, -0.0801,  0.3640,
                      -0.8345, -0.1194]))])