* Memorize (4)-2 and make the same algorithm. There should be two layers with Relu activation function in the hidden layer, with 32 nodes inside. Use cross entropy as the loss function. Split the training dataset into training and validation dataset, and then apply the trainined model into the test dataset. Select cuda if available.

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt 
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
from torchvision.utils import make_grid
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split

torch.manual_seed(0)

* Select whether to use cuda or cpu. Compare the time and check if they return the same accuracy in the test data, or equivalently if the model is the same.
* Given the same seed and the same device (cuda or cpu), our result stays consistent. However, depending on whethere we choose cuda or cpu, they give us different results. Maybe there appears some difference in randomnes. So the accuracy of the final model differs.
* cuda was not always faster than cpu. Maybe it is because the sample size is not big enough.
* comparison with num_workers=0: (4)-1: cuda 103 seconds VS cpu 113 seconds / (4)-2 : cuda 57 seconds VS cpu 54 seconds : (4)-2 was a lot faster because we are not treating the validation dataset as multiple batches, but treating them all at once.

In [None]:
print(torch.cuda.is_available()) # check if cuda is available.

# option 1
if torch.cuda.is_available():    # Choose cuda if avaialbe
    device = torch.device("cuda")

# # option 2
# device = torch.device("cuda") # Choose cuda

# # option 3
# device = torch.device("cpu") # Choose cpu

In [None]:
dataset = MNIST(root='data/', download=False, transform=ToTensor())
val_size = 10000
train_size = len(dataset) - val_size
train_ds, val_ds = random_split(dataset, [train_size, val_size])
batch_size = 128
train_loader = DataLoader(train_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)

In [None]:
class MnistModel(nn.Module):
    def __init__(self, in_size, hid_size, out_size):
        super().__init__()
        self.linear1 = nn.Linear(in_size, hid_size)
        self.linear2 = nn.Linear(hid_size, out_size)
    
    def forward(self, xb):
        xb = xb.view(xb.size(0), -1) # flatten the tensor into a matrix.
        out = self.linear1(xb) # first layer
        out = F.relu(out) # activation function
        out = self.linear2(out) # second layer
        return out

In [None]:
input_size = 784
hidden_size = 32
num_classes = 10
model = MnistModel(in_size=input_size, hid_size=hidden_size, out_size=num_classes)

In [None]:
def to_device(data, device):
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device)

In [None]:
class DeviceDataLoader():
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
    def __iter__(self):
        for batch in self.dl:
            yield to_device(batch, self.device)

In [None]:
model = MnistModel(in_size = input_size, hid_size = hidden_size, out_size = num_classes )
model.to(device)

In [None]:
train_loader = DeviceDataLoader(train_loader, device)
images_val, labels_val = zip(*val_ds)
size = val_ds[0][0].shape
images_val = torch.cat(images_val).reshape(len(val_ds), size[0], size[1], size[2])
images_val = images_val.to(device)
labels_val = torch.tensor(labels_val, device=device)
history = []

In [None]:
import time
# t_begin = time.process_time() 
t_begin = time.time() 

* 5 epochs with learning rate = 0.5

In [None]:
### Set the learning rate and number of iterations

# case 1
learning_rate = 0.5
epochs = 5

# # case 2
# learning_rate = 0.1
# epochs = 5

optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(epochs):
    
    ### training step    

    for batch in train_loader:

        images, labels = batch
        out = model(images)
        loss = F.cross_entropy(out, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    ### validation step

    ## loss
    out_val = model(images_val) # 항상 MESSAGE 잘 보라. cuda로 돌리기
    loss_val = F.cross_entropy(out_val, labels_val).item()
    
    ## accuracy
    _, preds_val = torch.max(out_val, dim=1)
    acc_val = (torch.sum(preds_val == labels_val) / len(val_ds)).item()    
    
    ## record history
    result_val = {'val_loss': loss_val, 'val_acc': acc_val}
    history.append(result_val)
    print('Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}'.format(epoch+1, loss_val, acc_val))

* 5 epochs with learning rate = 0.1

In [None]:
### Set the learning rate and number of iterations

# # case 1
# learning_rate = 0.5
# epochs = 5

# case 2
learning_rate = 0.1
epochs = 5

optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

for epoch in range(epochs):
    
    ### training step    

    for batch in train_loader:

        images, labels = batch
        out = model(images)
        loss = F.cross_entropy(out, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    ### validation step

    ## loss
    out_val = model(images_val) # 항상 MESSAGE 잘 보라. cuda로 돌리기
    loss_val = F.cross_entropy(out_val, labels_val).item()
    
    ## accuracy
    _, preds_val = torch.max(out_val, dim=1)
    acc_val = (torch.sum(preds_val == labels_val) / len(val_ds)).item()    
    
    ## record history
    result_val = {'val_loss': loss_val, 'val_acc': acc_val}
    history.append(result_val)
    print('Epoch [{}], val_loss: {:.4f}, val_acc: {:.4f}'.format(epoch+1, loss_val, acc_val))

In [None]:
# t_end = time.process_time() 
t_end = time.time() 
print('Took {} seconds'.format(t_end-t_begin))

In [None]:
losses = [x['val_loss'] for x in history]
accuracies = [x['val_acc'] for x in history]

plt.subplot(1,2,1)
indices = list(range(len(history) + 1))
indices.remove(0)
plt.plot(indices, losses, '-x')
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('Loss vs No. of epochs')

plt.subplot(1,2,2)
plt.plot(indices, accuracies, '-x')
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('Accuracy vs No. of epochs')

In [None]:
test_dataset = MNIST(root='data/', train=False, transform=ToTensor())
images_test, labels_test = zip(*test_dataset)
images_test = torch.cat(images_test).reshape(len(test_dataset), size[0], size[1], size[2])
images_test = images_test.to(device)
labels_test = torch.tensor(labels_test, device=device)

In [None]:
out_test = model(images_test)
_, pred_test = torch.max(out_test, dim=1)
torch.sum(labels_test == pred_test) / len(test_dataset)