# 5-4 Exercise

According Lab session `5-1` to `5-3`, we can do some exercise.


## Exercise 1

Firstly, you can change the linear function from Session 5.1 to a nonlinear function, like x^2, x^3...; Then using (mini) Batch Gradient Descent or Stochastic Gradient Descend (SGD), to check the loss curve.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch

# produce the data point with linear function
X = torch.arange(-5, 5, 0.05).view(-1, 1)
func = X**2
# Gaussian noise is added to create the variable Y
Y = func + 0.2 * torch.randn(X.size())

# plot and visualize the data points
fig = plt.figure(figsize=(20, 10))

ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

ax1.plot(X, Y, 'b*', label='Y')
ax1.plot(X, func, 'r', label='function')
ax1.set_xlabel('x')
ax1.set_ylabel('y')
ax1.legend()
ax1.grid('True', color='y')

# define the forward function
def forward(x):
    return w * x + b

# define loss function with Mean Square Error (MSE)
def criterion(y_pred, y):
    return torch.mean((y_pred - y) ** 2)

#  initial parameters w and b
w = torch.tensor(-10.0, requires_grad=True)
b = torch.tensor(-20.0, requires_grad=True)

#  other parameters
step_size = 0.1
loss_BGD = []
n_iter = 100

#Initial predictions
print('Predict before training with BGD: x=' + str(4) + ' y=' + str(4**2) + ' prediction=' + str(forward(4.0)))

for i in range (n_iter):
    # making predictions with forward pass
    Y_pred = forward(X)
    # calculating the loss between original and predicted data points
    loss = criterion(Y_pred, Y)
    # storing the calculated loss in a list
    loss_BGD.append(loss.item())
    # backward pass for computing the gradients of the loss w.r.t to learnable parameters
    loss.backward()
    # updateing the parameters after each iteration
    w.data = w.data - step_size * w.grad.data
    b.data = b.data - step_size * b.grad.data
    # zeroing gradients after each iteration
    w.grad.data.zero_()
    b.grad.data.zero_()
    # priting some values for understanding
    if i % 5 == 0:
        print('iteration: {}, \t loss: {}, \t weight: {}, \t bias: {}'.format(i, loss.item(), w.item(), b.item()))

#Predict y after updating w
print('Predict after training with BGD: x=' + str(4) + ' y=' + str(4**2) + ' prediction=' + str(forward(4.0)))

# plot the figure (loss_BGD)
plt.plot(loss_BGD, label="Batch Gradient Descent")
plt.xlabel('Epoch')
plt.ylabel('Cost/Total loss')
plt.legend()
plt.show()

## Exercise 2

Secondly, search the learning rate between (0, 1), and find the best learning rate for the Session 5.2.

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.autograd import Variable
import matplotlib.pyplot as plt

# Define Hyperparameters
# set img_size = (28,28) ---> 28*28=784 pixels in total
input_size = 784
# number of nodes at hidden layer
hidden_size = 500
# number of output classes discrete range [0,9]
num_classes = 10

# number of times which the entire dataset is passed throughout the model
num_epochs = 30

# the size of input data took for one iteration
batch_size = 1000

# loss function
loss_function = nn.CrossEntropyLoss()

In [None]:
# Download MNIST data
train_data = datasets.MNIST(root = './data', train = True,
                        transform = transforms.ToTensor(), download = True)

test_data = datasets.MNIST(root = './data', train = False,
                       transform = transforms.ToTensor(), download = True)

# Split dataset with DataLoader, train dataset and test dataset
train_gen = torch.utils.data.DataLoader(dataset = train_data,
                                             batch_size = batch_size,
                                             shuffle = True)

test_gen = torch.utils.data.DataLoader(dataset = test_data,
                                      batch_size = batch_size,
                                      shuffle = False)

# Use GPU, if the GPU is available, otherwise use the CPU.
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

x_train,y_train = next(iter(train_gen))
print(x_train.size())

In [None]:
# Define neural network model
class Net(nn.Module):
  def __init__(self, input_size, hidden_size, num_classes):
    super(Net,self).__init__()
    self.fc1 = nn.Linear(input_size, hidden_size)
    self.relu = nn.ReLU()  # Relu activation function, you can also use others like Tanh, Sigmold, etc.
    self.fc2 = nn.Linear(hidden_size, num_classes)

  def forward(self,x):
    out = self.fc1(x)
    out = self.relu(out)
    out = self.fc2(out)
    return out

# Build the model of neural network
net = Net(input_size, hidden_size, num_classes)
# feed net to device
net.to(device)

print(net)

This defines the learning rate values to be tested.

In [None]:
lr_list = np.arange(0.0001, 0.01, 0.001)
print(lr_list)

In [None]:
num_epochs = 3

# Lists for visualization of loss and accuracy
loss_list = []
accuracy_list = []

# Lists for knowing classwise accuracy
predicted_list = []
labels_list = []

# set learning rate as iteration number
for lr in lr_list:
    # Adam optimizer, you can also use AdaGrad or RMSProp, etc.
    optimizer = torch.optim.Adam(net.parameters(), lr=lr)   # current learning rate

    # train the model
    net.train()
    for epoch in range(num_epochs):
      for i ,(images,labels) in enumerate(train_gen):
          images = Variable(images.view(-1,28*28))
          # if you have GPU, you can set as  .cuda()
          # images = Variable(images.view(-1,28*28)).cuda()
          # if you have GPU, you can set as  .cuda()
          # labels = Variable(labels).cuda()
          labels = Variable(labels)

          optimizer.zero_grad()
          outputs = net(images)
          loss = loss_function(outputs, labels)
          loss.backward()
          optimizer.step()

          if (i+1) % 100 == 0:
              print('Step [%d/%d], Loss: %.4f'
                      %( i+1, len(train_data)//batch_size, loss.item()))
    loss_list.append(loss.data)

    # Evaluate the accuracy of the model
    correct = 0
    total = 0
    net.eval()
    for images, labels in test_gen:
        # if you have GPU, you can set as  .cuda()
        # images = Variable(images.view(-1,28*28)).cuda()
        images = Variable(images.view(-1,28*28))
        # labels = labels.cuda()
        labels = Variable(labels)

        output = net(images)

        _, predicted = torch.max(output,1)
        predicted_list.append(predicted)

        correct += (predicted == labels).sum()
        total += labels.size(0)

    accuracy = (100*correct)/(total)
    # loss_list.append(loss.data)
    accuracy_list.append(accuracy)

    print("Learning Rate: {}, Loss: {}, Accuracy: {}%".format(lr, loss.data, accuracy))

In [None]:
np.arange(len(lr_list))

In [None]:
plt.bar(np.arange(len(lr_list)), accuracy_list)
plt.xlabel("LR value")
plt.xticks(np.arange(len(lr_list)), lr_list, rotation=65)
plt.ylabel("Accuracy")
plt.ylim(97,99)
plt.title("LR vs Accuracy")
plt.show()

## Exercise 3

Lastly, using the Fashion MNIST dataset, how can you improve the accuracy?
Please explore any method to achieve a much higher accuracy. For example, you can change the network model by add more layers, tune hyperparameters, or any other ideas to obtain a better results.


In [None]:
import torchvision
import matplotlib.pyplot as plt
from torch import nn, optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import numpy as np
from torch.utils.data import Dataset, DataLoader

In [None]:
# load dataset
train_set = torchvision.datasets.FashionMNIST(root = './data/FashionMNIST', download = True,
                                              train = True, transform = transforms.Compose([transforms.ToTensor(),]))
test_set = torchvision.datasets.FashionMNIST(root = './data/FashionMNIST', download=True,
                                             train=False, transform = transforms.Compose([transforms.ToTensor()]))

# split to train loader
train_loader = DataLoader(dataset=train_set,batch_size=100,shuffle=True) # training set shuffle the data
test_loader = DataLoader(dataset=test_set,batch_size=50,shuffle=False) # testing set fix the data order

In [None]:
# define the model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.conv1 = nn.Conv2d(1,10,5)    # Convolutional layer
        self.conv2 = nn.Conv2d(10,20,3)

        self.fc1 = nn.Linear(20*10*10,500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        input_size = x.size(0)
        # in: batch*1*28*28, out: batch*10*24*24(28-5+1)
        x = self.conv1(x)
        # out: batch*10*24*24
        x = F.relu(x)
        # in: batch*10*24*24, out: batch*10*12*12
        x = F.max_pool2d(x,2,2)

        # in: batch*10*12*12, out: batch*20*10*10 (12-3+1)
        x = self.conv2(x)
        x = F.relu(x)

        # 20*10*10 = 2000
        x = x.view(input_size,-1)

        # in: batch*2000  out:batch*500
        x = self.fc1(x)
        x = F.relu(x)

        # in:batch*500 out:batch*10
        x = self.fc2(x)
        return x

# create the object for model CNN
net= CNN()
print(net)

In [None]:
# define the parameters
learning_rate = 1e-3
batch_size = 256
epochs = 10

loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [None]:
#  train the model
correct = 0
loss_list = []
for i in range(1,epochs+1):
   net.train()
   for batch_idx, (data, target) in enumerate(train_loader):

      optimizer.zero_grad()
      output = net(data)

      loss = loss_function(output, target)
      loss.backward()
      optimizer.step()
   loss_list.append(loss.data)
   print('Epoch: {}, \t loss: {}'.format(i, loss.item()))

In [None]:
# plot the figure
fig = plt.figure(figsize=(10, 10))

plt.plot(loss_list)
plt.xlabel('Epoch')
plt.ylabel('Loss')

plt.show()

In [None]:
correct = 0

for data, target in test_loader:
    outputs = net(data)
    _, predicted = torch.max(outputs.data, 1)

    correct += (predicted == target).sum().item()

print('Test accuracy: {}/{} ({:.2f}%)\n'.format(correct, len(test_loader.dataset),
                                                100. * correct / len(test_loader.dataset)))