In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from torchsummary import summary
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from keras.datasets import mnist

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cpu


In [3]:
# 使用 keras 直接載入 MNIST dataset
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()

# Normalization
train_images = train_images.astype('float32') / 255
test_images = test_images.astype('float32') / 255

# 利用 sklearn，將每一類都以 8:2 的比例分成訓練資料和測試資料
features_train, features_test, targets_train, targets_test = train_test_split(train_images, train_labels, test_size = 0.2, random_state = 42)

In [4]:
features_train[0]

array([[0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.        , 0.        , 0.        , 0.        ,
        0.        , 0.    

In [5]:
targets_train[0]

5

In [6]:
# 將切好的 data 轉成 tensor 形式
# Training Datasets
featuresTrain = torch.from_numpy(features_train)
targetsTrain = torch.from_numpy(targets_train).type(torch.LongTensor)     # data type is long

In [7]:
type(featuresTrain)

torch.Tensor

In [8]:
featuresTrain.dtype

torch.float32

In [9]:
featuresTrain.shape

torch.Size([48000, 28, 28])

In [10]:
targetsTrain

tensor([5, 0, 1,  ..., 0, 0, 4])

In [11]:
# Testing Datasets
featuresTest = torch.from_numpy(features_test)
targetsTest = torch.from_numpy(targets_test).type(torch.LongTensor)       # data type is long

In [12]:
# 使用 torch.utils.data.TensorDataset 將 train 和 test datasets 存成 tensor 形式
# Pytorch train and test TensorDataset
train = torch.utils.data.TensorDataset(featuresTrain, targetsTrain)
test = torch.utils.data.TensorDataset(featuresTest, targetsTest)

In [13]:
type(train)

torch.utils.data.dataset.TensorDataset

In [14]:
LR = 0.01                                                        # Learning Rate
batch_size = 100                                                 # Batch size
n_iters = 10000                                                  # Iterations each epoch
num_epochs = n_iters / (len(features_train) / batch_size)        
num_epochs = int(num_epochs)                                     # Epochs

In [15]:
num_epochs

20

In [16]:
# torch.utils.data.TensorDatasetDataLoader(dataset, batch_size=1, shuffle=False,...) 為數據加載器
# 組合數據集和採樣器，並在數據集上提供單進程或多進程迭代器
# Pytorch DataLoader
train_loader = torch.utils.data.DataLoader(train, batch_size = batch_size, shuffle = True)
test_loader = torch.utils.data.DataLoader(test, batch_size = batch_size, shuffle = True)

In [17]:
import matplotlib.pyplot as plt

In [85]:
# Create CNN Model
class CNN_Model(nn.Module):
    def __init__(self):
        super(CNN_Model, self).__init__()
        # Convolution 1 , input_shape=(1,28,28), output_shape=(1,26,26)
        self.cnn1 = nn.Conv2d(in_channels=1, out_channels=4, kernel_size=3, stride=1, padding=0)
        # activation
        self.relu1 = nn.ReLU() 
        # Max pool 1, output_shape=(1,13,13)
        self.maxpool1 = nn.MaxPool2d(kernel_size=2) 
        # Convolution 2, output_shape=(1,11,11)
        self.cnn2 = nn.Conv2d(in_channels=4, out_channels=10, kernel_size=3, stride=1, padding=0)
        # activation
        self.relu2 = nn.ReLU() 
        # Max pool 2, output_shape=(10,5,5)
        self.maxpool2 = nn.MaxPool2d(kernel_size=2)
        # Average pool, output_shape=(10,1,1)
        self.avgpool = nn.AvgPool2d(kernel_size=5)
        # Fully connected 1, input_shape=(1*5*5)
        # self.fc1 = nn.Linear(10 * 5 * 5, 10) 
        self.fc1 = nn.Linear(10 * 1 * 1, 10) 
    
    def forward(self, x):
        # Convolution 1
        out = self.cnn1(x)
        out = self.relu1(out)
        # Max pool 1
        out = self.maxpool1(out)
        # Convolution 2 
        out = self.cnn2(out)
        out = self.relu2(out)
        # Max pool 2 
        out = self.maxpool2(out)
        # Average pool
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        # Linear function (readout)
        out = self.fc1(out)
        return out

model = CNN_Model().to(device)                            # Create the CNN Model             
optimizer = torch.optim.Adam(model.parameters(), lr = LR) # 選擇你想用的 optimizer(Adam)
summary(model, (1, 28, 28))                               # 利用 torchsummary 的 summary package 印出模型資訊，input size: (1 * 28 * 28) 
loss_func = nn.CrossEntropyLoss()                         # 選擇想用的 loss function(CrossEntropy)
input_shape = (-1, 1, 28, 28)

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 4, 26, 26]              40
              ReLU-2            [-1, 4, 26, 26]               0
         MaxPool2d-3            [-1, 4, 13, 13]               0
            Conv2d-4           [-1, 10, 11, 11]             370
              ReLU-5           [-1, 10, 11, 11]               0
         MaxPool2d-6             [-1, 10, 5, 5]               0
         AvgPool2d-7             [-1, 10, 1, 1]               0
            Linear-8                   [-1, 10]             110
Total params: 520
Trainable params: 520
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.07
Params size (MB): 0.00
Estimated Total Size (MB): 0.07
----------------------------------------------------------------


In [66]:
# 訓練 function
def fit_model(model, loss_func, optimizer, input_shape, num_epochs, train_loader, test_loader):
    # Traning the Model
    # 儲存訓練資訊的 List
    training_loss, training_accuracy = [], []
    validation_loss, validation_accuracy = [], []
    for epoch in range(num_epochs):
        # ---------------------------
        # Training Stage
        # ---------------------------
        correct_train, total_train = 0, 0
        for i, (images, labels) in enumerate(train_loader):
            train, labels = images.view(input_shape).to(device), labels.to(device)  # extract training data and label
            optimizer.zero_grad()                                                   # reset gradient
            outputs = model(train)                                                  # 將訓練資料輸入至模型進行訓練 (Forward propagation)
            train_loss = loss_func(outputs, labels)                                 # 計算 loss
            train_loss.backward()                                                   # 將 loss 反向傳播
            optimizer.step()                                                        # 更新權重
            
            # 計算訓練資料的準確度 (correct_train / total_train)
            predicted = torch.max(outputs.data, 1)[1]                               # 取出預測的 maximum
            total_train += len(labels)                                              # 全部的 label 數 (Total number of labels)
            correct_train += (predicted == labels).float().sum()                    # 全部猜中的個數 (Total correct predictions)
        
        # 將 accuracy 和 loss 存入 list
        train_accuracy = 100 * correct_train / float(total_train)                   # training accuracy (To cpu())
        training_accuracy.append(train_accuracy.cpu())
        training_loss.append(train_loss.data.cpu())                                 # training loss (To cpu())

        # --------------------------
        # Testing Stage
        # --------------------------
        correct_test, total_test = 0, 0
        for images, labels in test_loader:
            test, labels = images.view(input_shape).to(device), labels.to(device)   # 取出 testing data 以及 labels(轉 device 的型態)
            outputs = model(test)                                                   # 將測試資料輸入至模型進行測試 (Forward propagation)
            val_loss = loss_func(outputs, labels)                                   # 計算 loss
            
            # 計算測試資料的準確度 (correct_test / total_test)
            predicted = torch.max(outputs.data, 1)[1]                               # 取出預測的 maximum
            total_test += len(labels)                                               # 全部的 label 數 (Total number of labels)
            correct_test += (predicted == labels).float().sum()                     # 全部猜中的個數 (Total correct predictions)
            
        # 將 accuracy 和 loss 存入 list
        val_accuracy = 100 * correct_test / float(total_test)                       # testing accuracy (To cpu())
        validation_accuracy.append(val_accuracy.cpu())
        validation_loss.append(val_loss.data.cpu())                                 # testing loss (To cpu())
        
        # 顯現當前 Epoch 訓練情況
        print('Train Epoch: {}/{} Traing_Loss: {} Traing_acc: {:.6f}% Val_Loss: {} Val_accuracy: {:.6f}%'.format(epoch+1, num_epochs, train_loss.data, train_accuracy, val_loss.data, val_accuracy))
    return training_loss, training_accuracy, validation_loss, validation_accuracy

In [86]:
training_loss, training_accuracy, validation_loss, validation_accuracy = fit_model(model, loss_func, optimizer, input_shape, num_epochs, train_loader, test_loader)

Train Epoch: 1/20 Traing_Loss: 0.8146040439605713 Traing_acc: 56.091667% Val_Loss: 0.5743509531021118 Val_accuracy: 75.366669%
Train Epoch: 2/20 Traing_Loss: 0.4250917434692383 Traing_acc: 82.420830% Val_Loss: 0.264478862285614 Val_accuracy: 84.933334%
Train Epoch: 3/20 Traing_Loss: 0.36569440364837646 Traing_acc: 86.204170% Val_Loss: 0.47232264280319214 Val_accuracy: 86.541664%
Train Epoch: 4/20 Traing_Loss: 0.24268700182437897 Traing_acc: 87.870834% Val_Loss: 0.4188303053379059 Val_accuracy: 86.083336%
Train Epoch: 5/20 Traing_Loss: 0.3627376854419708 Traing_acc: 88.985420% Val_Loss: 0.3797226846218109 Val_accuracy: 87.775002%
Train Epoch: 6/20 Traing_Loss: 0.18427540361881256 Traing_acc: 89.541664% Val_Loss: 0.35305169224739075 Val_accuracy: 88.158333%
Train Epoch: 7/20 Traing_Loss: 0.22373411059379578 Traing_acc: 89.947914% Val_Loss: 0.22612027823925018 Val_accuracy: 89.908333%
Train Epoch: 8/20 Traing_Loss: 0.3222634196281433 Traing_acc: 90.245834% Val_Loss: 0.26324474811553955 Va

In [62]:
# test with test data
testImages = torch.from_numpy(test_images)
testLabels = torch.from_numpy(test_labels).type(torch.LongTensor)   
verify = torch.utils.data.TensorDataset(testImages, testLabels)
verify_loader = torch.utils.data.DataLoader(verify, batch_size = batch_size, shuffle = False)

In [71]:
def verify_model(model, input_shape, loader):
    correct_test, total_test = 0, 0
    for images, labels in loader:
        test, labels = images.view(input_shape).to(device), labels.to(device)   # 取出 testing data 以及 labels(轉 device 的型態)
        outputs = model(test)                                                   # 將測試資料輸入至模型進行測試 (Forward propagation)                                 # 計算 loss
        
        # 計算測試資料的準確度 (correct_test / total_test)
        # print(torch.max(outputs.data, 1)[1])
        # print(labels)
        # print("--------------")
        predicted = torch.max(outputs.data, 1)[1]                               # 取出預測的 maximum
        total_test += len(labels)                                               # 全部的 label 數 (Total number of labels)
        correct_test += (predicted == labels).float().sum()   
    val_accuracy = 100 * correct_test / float(total_test)
    print("Verified Accuracy: %f (%d/%d)" % (val_accuracy, correct_test, total_test))
    return

In [87]:
verify_model(model, input_shape, verify_loader)

Verified Accuracy: 93.040001 (9304/10000)


In [89]:
for name, param in model.named_parameters():
    print(name)

cnn1.weight
cnn1.bias
cnn2.weight
cnn2.bias
fc1.weight
fc1.bias


In [90]:
for name, param in model.named_parameters():
    print(param.data)

tensor([[[[ 0.9733,  2.2065,  2.0408],
          [ 1.1901,  0.1466, -1.2374],
          [ 0.5957, -0.9144,  0.0440]]],


        [[[ 0.5374,  2.4182,  2.8426],
          [-0.9732,  1.3385,  0.0107],
          [-4.6548, -3.6290, -5.7798]]],


        [[[ 0.1510,  1.6150, -5.0594],
          [-2.6212, -7.0269, -5.0732],
          [-2.8966,  3.0128, -2.7448]]],


        [[[-1.2517, -2.5902, -1.7196],
          [ 0.9144, -0.2910,  0.5186],
          [ 2.1329,  2.0890,  1.5766]]]])
tensor([-0.0410, -0.0021,  1.4504, -0.0150])
tensor([[[[-4.6837e-01, -5.5493e-01, -1.2152e-01],
          [-1.6839e-01,  4.3158e-01,  1.9400e-01],
          [ 6.8899e-01,  1.8456e-01,  8.3717e-03]],

         [[-8.0337e-01,  6.1371e-01, -2.1128e+00],
          [-1.6299e+00, -4.1353e-01,  1.4319e+00],
          [ 1.1272e+00,  1.3637e+00,  3.3574e+00]],

         [[-9.2033e-01, -3.5494e-01, -4.0578e-01],
          [-2.4812e-01,  1.0597e-01, -1.3861e-01],
          [ 1.7561e-01,  1.1363e+00,  2.5586e-01]],

       