In [20]:
import numpy as np
import math
import torch
import pandas as pd
import matplotlib.pyplot as plt
from torchvision import datasets, transforms

In [21]:
transform = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.1307,),(0.3081,)), # mean value = 0.1307, standard deviation value = 0.3081
])

In [22]:
data_path = './MNIST'

data_test   = datasets.MNIST(root = data_path, train= True, download=True, transform= transform)
data_train  = datasets.MNIST(root = data_path, train= False, download=True, transform= transform)


In [23]:
from torch import nn
from torch.nn import functional as F
from torch import optim

In [24]:
print("the number of your training data (must be 10,000) = ", data_train.__len__())
print("hte number of your testing data (must be 60,000) = ", data_test.__len__())

the number of your training data (must be 10,000) =  10000
hte number of your testing data (must be 60,000) =  60000


In [25]:
device = torch.device('cpu')

In [26]:
class classification(nn.Module):
    def __init__(self):
        super(classification, self).__init__()
        
        # construct layers for a neural network
        self.classifier1 = nn.Sequential(
            nn.Linear(in_features=28*28, out_features=20*20),
            nn.Sigmoid(),
        ) 
        self.classifier2 = nn.Sequential(
            nn.Linear(in_features=20*20, out_features=10*10),
            nn.Sigmoid(),
        ) 
        self.classifier3 = nn.Sequential(
            nn.Linear(in_features=10*10, out_features=10),
            nn.LogSoftmax(dim=1),
        )
    
    def forward(self, inputs):                 # [batchSize, 1, 28, 28]
        x = inputs.view(inputs.size(0), -1)    # [batchSize, 28*28]
        x = self.classifier1(x)                # [batchSize, 20*20]
        x = self.classifier2(x)                # [batchSize, 10*10]
        out = self.classifier3(x)              # [batchSize, 10]
        
        return out


In [31]:
criterion = nn.CrossEntropyLoss()
classifier = classification().to(device)
learning_rate_value = 1e-3
optimizer = torch.optim.Adam(classifier.parameters(), lr=learning_rate_value, weight_decay=0.38)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma= 0.99)  


In [32]:
epochs = 100
batch_size = 16

In [33]:
training_set = torch.utils.data.DataLoader(dataset=data_train, batch_size = batch_size, shuffle=True, drop_last=True) 
testing_set = torch.utils.data.DataLoader(dataset=data_test, batch_size = batch_size, shuffle=True, drop_last=True)

tr_Liters = []
tr_Aters = []
te_Liters = []
te_Aters = []

In [None]:
for i in range(epochs):
  tr_avg_loss = 0
  tr_avg_acc = 0
  te_avg_loss =0
  te_avg_acc = 0

  for x, y in training_set:
    x = x.view(-1, 28*28).to(device)
    y = y.to(device)
    optimizer.zero_grad()
    y_pred = classifier(x)
    tr_loss = criterion(y_pred, y)
    tr_loss.backward()
    optimizer.step()
    scheduler.step()

    tr_avg_loss += tr_loss / len(training_set)
    pred = torch.argmax(y_pred, 1) == y
    tr_acc = pred.float().mean()
    tr_avg_acc += tr_acc.item() / len(training_set)

  with torch.no_grad():
    for x, y in testing_set:
      x = x.view(-1, 28*28).to(device)
      y = y.to(device)

      y_pred = classifier(x)
      te_loss = criterion(y_pred, y)

      te_avg_loss += te_loss.item() / len(testing_set)
      pred = torch.argmax(y_pred, 1) == y
      te_acc = pred.float().mean()
      te_avg_acc += te_acc.item() / len(testing_set)

    tr_Liters.append(tr_avg_loss)
    tr_Aters.append(tr_avg_acc)
    te_Liters.append(te_avg_loss)
    te_Aters.append(te_avg_acc)
    print(i)      


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47


In [None]:
plt.figure(1, figsize=(8, 8))
plt.plot(np.array(range(epochs)), tr_Liters, c='r', label='train loss')
plt.plot(np.array(range(epochs)), te_Liters, c='b', label='test loss')
plt.title('loss')
plt.xticks(range(0, epochs, 10))
plt.legend()
plt.show()

In [None]:
plt.figure(1, figsize=(8, 8))
plt.plot(np.array(range(training_epochs)), tr_Aters, c='r', label='train accuracy')
plt.plot(np.array(range(training_epochs)), te_Aters, c='b', label='train accuracy')
plt.title('accuracy')
plt.xticks(range(0, epochs, 10))
plt.legend()
plt.show()