# Multi Layer Perceptron and MNIST

In [1]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F 
import torch.optim as optim 
import random

batch_size = 128
training_epochs = 100

mnist_train = dsets.MNIST(root="MNIST_data/",
                          train=True,
                          transform=transforms.ToTensor(),
                          download=True)
mnist_test = dsets.MNIST(root="MNIST_data/",
                          train=False,
                          transform=transforms.ToTensor(),
                          download=True)

# 불러온 값을 batch로 맞추기 위해 dataloader를 사용한다.
data_loader=torch.utils.data.DataLoader(dataset=mnist_train,
                                   batch_size=batch_size,
                                   shuffle=True,
                                   drop_last=True) # batch_size에 안맞고 남는 data 버린다.

In [2]:
learning_rate = 0.001
training_epochs = 15
batch_size = 100

linear = nn.Linear(784,10,bias=True)
nn.init.normal_(linear.weight) # normal distribution으로 weight initialization

optimizer = optim.Adam(linear.parameters(), lr=learning_rate)

In [3]:
# single Layer

total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(data_loader)
    
    for X, Y in data_loader:
        X = X.view(-1,28*28) # reshape image to 784, but Label should be onehotecoded
        # 참고로 X는 (batch size, 1, 28, 28)이었는데 그 이후 (batch size, 784)가 된다.    
        predict = linear(X)
        cost = F.cross_entropy(predict, Y)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
    
    print("Epoch: ", "%04d" % (epoch+1), "cost = ", "{:.9f}".format(avg_cost))


Epoch:  0001 cost =  6.000259876
Epoch:  0002 cost =  1.971456528
Epoch:  0003 cost =  1.233181715
Epoch:  0004 cost =  0.952599049
Epoch:  0005 cost =  0.802862823
Epoch:  0006 cost =  0.708308518
Epoch:  0007 cost =  0.641792655
Epoch:  0008 cost =  0.592713892
Epoch:  0009 cost =  0.554369390
Epoch:  0010 cost =  0.523402810
Epoch:  0011 cost =  0.498058051
Epoch:  0012 cost =  0.475769252
Epoch:  0013 cost =  0.457886189
Epoch:  0014 cost =  0.441623956
Epoch:  0015 cost =  0.428177416


In [4]:
# Multi Layer

# Linear에 가중치 초기화 method가 따로 없어서, 이렇게 선언해야한다.
linear1 = nn.Linear(784,256,bias=True)
linear2 = nn.Linear(256,256,bias=True)
linear3 = nn.Linear(256,10,bias=True)
relu = torch.nn.ReLU()

torch.nn.init.normal_(linear1.weight)
torch.nn.init.normal_(linear2.weight)
torch.nn.init.normal_(linear3.weight)

model = nn.Sequential(linear, relu,
                      linear2, relu,
                      linear3)

optimizer = optim.Adam(linear.parameters(), lr=learning_rate)

total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(data_loader)
    
    for X, Y in data_loader:
        X = X.view(-1,28*28) 
        predict = linear(X)
        cost = F.cross_entropy(predict, Y) # 이미 softmax가 있다.
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
    
    print("Epoch: ", "%04d" % (epoch+1), "cost = ", "{:.9f}".format(avg_cost))



Epoch:  0001 cost =  0.415787548
Epoch:  0002 cost =  0.404293984
Epoch:  0003 cost =  0.393926442
Epoch:  0004 cost =  0.384406596
Epoch:  0005 cost =  0.376875848
Epoch:  0006 cost =  0.369347125
Epoch:  0007 cost =  0.362255245
Epoch:  0008 cost =  0.355386287
Epoch:  0009 cost =  0.349586934
Epoch:  0010 cost =  0.344415039
Epoch:  0011 cost =  0.339549154
Epoch:  0012 cost =  0.334672987
Epoch:  0013 cost =  0.330124974
Epoch:  0014 cost =  0.326059937
Epoch:  0015 cost =  0.322006494


In [5]:
# Test the model using test sets
with torch.no_grad():
    X_test = mnist_test.test_data.view(-1, 28 * 28).float() 
    Y_test = mnist_test.test_labels 

    prediction = linear(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float() 
    Y_single_data = mnist_test.test_labels[r:r + 1] 

    print('Label: ', Y_single_data.item())
    single_prediction = linear(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.8888999819755554
Label:  9
Prediction:  9




# Weight initialization

In [6]:
# Xavier_Uniform distribution

# def xavier_uniform_(tensor, gain=1):
#     fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor)
#     std = gain * math.sqrt(2. / (fan_in + fan_out))
#     a = math.sqrt(3.) * std
#     with torch.no_grad():
#         return tensor.uniform_(-a,a)

In [7]:
linear1 = nn.Linear(784,256,bias=True)
linear2 = nn.Linear(256,256,bias=True)
linear3 = nn.Linear(256,10,bias=True)
relu = torch.nn.ReLU()

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

model = nn.Sequential(linear, relu,
                      linear2, relu,
                      linear3)

optimizer = optim.Adam(linear.parameters(), lr=learning_rate)

total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(data_loader)
    
    for X, Y in data_loader:
        X = X.view(-1,28*28) 
        predict = linear(X)
        cost = F.cross_entropy(predict, Y) # 이미 softmax가 있다.
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
    
    print("Epoch: ", "%04d" % (epoch+1), "cost = ", "{:.9f}".format(avg_cost))

Epoch:  0001 cost =  0.318682969
Epoch:  0002 cost =  0.315017045
Epoch:  0003 cost =  0.311255425
Epoch:  0004 cost =  0.308744848
Epoch:  0005 cost =  0.305688769
Epoch:  0006 cost =  0.303019404
Epoch:  0007 cost =  0.299978286
Epoch:  0008 cost =  0.297845423
Epoch:  0009 cost =  0.294370979
Epoch:  0010 cost =  0.293242365
Epoch:  0011 cost =  0.290827185
Epoch:  0012 cost =  0.288658142
Epoch:  0013 cost =  0.286851197
Epoch:  0014 cost =  0.284332752
Epoch:  0015 cost =  0.282683849


In [8]:
# 더 deep한 Network를 형성한다.

linear1 = nn.Linear(784,512,bias=True)
linear2 = nn.Linear(512,512,bias=True)
linear3 = nn.Linear(512,512,bias=True)
linear4 = nn.Linear(512,512,bias=True)
linear5 = nn.Linear(512,10,bias=True)
relu = torch.nn.ReLU()

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)
torch.nn.init.xavier_uniform_(linear4.weight)
torch.nn.init.xavier_uniform_(linear5.weight)

model = nn.Sequential(linear, relu,
                      linear2, relu,
                      linear3, relu,
                      linear4, relu,
                      linear5)

optimizer = optim.Adam(linear.parameters(), lr=learning_rate)

total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(data_loader)
    
    for X, Y in data_loader:
        X = X.view(-1,28*28) 
        predict = linear(X)
        cost = F.cross_entropy(predict, Y) # 이미 softmax가 있다.
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
    
    print("Epoch: ", "%04d" % (epoch+1), "cost = ", "{:.9f}".format(avg_cost))

Epoch:  0001 cost =  0.281634301
Epoch:  0002 cost =  0.279691398
Epoch:  0003 cost =  0.277927905
Epoch:  0004 cost =  0.276562065
Epoch:  0005 cost =  0.274940699
Epoch:  0006 cost =  0.273482412
Epoch:  0007 cost =  0.272297025
Epoch:  0008 cost =  0.270939738
Epoch:  0009 cost =  0.269048482
Epoch:  0010 cost =  0.267948955
Epoch:  0011 cost =  0.267306060
Epoch:  0012 cost =  0.265866101
Epoch:  0013 cost =  0.264946967
Epoch:  0014 cost =  0.263636112
Epoch:  0015 cost =  0.262503535


# Dropout

In [9]:
# dropout을 추가한다.

linear1 = nn.Linear(784,512,bias=True)
linear2 = nn.Linear(512,512,bias=True)
linear3 = nn.Linear(512,512,bias=True)
linear4 = nn.Linear(512,512,bias=True)
linear5 = nn.Linear(512,10,bias=True)
relu = torch.nn.ReLU()
dropout = torch.nn.Dropout(p=0.5)

torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)
torch.nn.init.xavier_uniform_(linear4.weight)
torch.nn.init.xavier_uniform_(linear5.weight)

model = nn.Sequential(linear, relu, dropout,
                      linear2, relu, dropout,
                      linear3, relu, dropout,
                      linear4, relu, dropout,
                      linear5)

optimizer = optim.Adam(linear.parameters(), lr=learning_rate)

model.train() # dropout 적용
total_batch = len(data_loader)
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(data_loader)
    
    for X, Y in data_loader:
        X = X.view(-1,28*28) 
        predict = linear(X)
        cost = F.cross_entropy(predict, Y) # 이미 softmax가 있다.
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
    
    print("Epoch: ", "%04d" % (epoch+1), "cost = ", "{:.9f}".format(avg_cost))
    
# model.train()과 model.eval()을 적극 사용해야한다.

Epoch:  0001 cost =  0.261538982
Epoch:  0002 cost =  0.261063308
Epoch:  0003 cost =  0.259867907
Epoch:  0004 cost =  0.258937359
Epoch:  0005 cost =  0.257843316
Epoch:  0006 cost =  0.257619917
Epoch:  0007 cost =  0.256563157
Epoch:  0008 cost =  0.255656570
Epoch:  0009 cost =  0.254655898
Epoch:  0010 cost =  0.253812224
Epoch:  0011 cost =  0.253492236
Epoch:  0012 cost =  0.252786100
Epoch:  0013 cost =  0.252092630
Epoch:  0014 cost =  0.251044303
Epoch:  0015 cost =  0.250247121


In [10]:
# Test the model using test sets
with torch.no_grad():
    model.eval()
    X_test = mnist_test.test_data.view(-1, 28 * 28).float()
    Y_test = mnist_test.test_labels

    prediction = linear(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy:', accuracy.item())

    # Get one and predict
    r = random.randint(0, len(mnist_test) - 1)
    X_single_data = mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float() 
    Y_single_data = mnist_test.test_labels[r:r + 1] 

    print('Label: ', Y_single_data.item())
    single_prediction = linear(X_single_data)
    print('Prediction: ', torch.argmax(single_prediction, 1).item())

Accuracy: 0.873199999332428
Label:  6
Prediction:  6


# Batch Normalization

In [None]:
import torch
import random
import torchvision

device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

mnist_train = torchvision.datasets.MNIST(root="MNIST_data/",
                                         train=True,
                                         transform=torchvision.transforms.ToTensor(),
                                         download=True)

mnist_test = torchvision.datasets.MNIST(root="MNIST_data/",
                                         train=False,
                                         transform=torchvision.transforms.ToTensor(),
                                         download=True)

batch_size = 100
training_epochs = 10
learning_rate = 0.01
train_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                          batch_size=batch_size,
                                          shuffle=True,
                                          drop_last=True)


class Bn_Model(torch.nn.Module):
      def __init__(self):
    super().__init__()

    self.linear1 = torch.nn.Linear(784,32,bias=True)
    self.linear2 = torch.nn.Linear(32,32,bias=True)
    self.linear3 = torch.nn.Linear(32,10,bias=True)
    self.relu = torch.nn.ReLU()
    self.bn1 = torch.nn.BatchNorm1d(32)
    self.bn2 = torch.nn.BatchNorm1d(32)
    self.model = torch.nn.Sequential(
        self.linear1, self.bn1, self.relu,
        self.linear2, self.bn2, self.relu,
        self.linear3
    ).to(device)
  
  def forward(self, X):
    return self.model(X)


bn_model = Bn_Model()
bn_criterion = torch.nn.CrossEntropyLoss()
bn_optimizer = torch.optim.SGD(bn_model.parameters(), lr=0.1)

bn_model

In [None]:
# train

bn_model.train()
for epoch in range(training_epochs):
  for X, Y in train_loader:
    X = X.view(-1, 28 * 28).to(device)
    Y = Y.to(device)

    bn_optimizer.zero_grad()
    bn_prediction = bn_model(X)
    bn_loss = bn_criterion(bn_prediction, Y)
    bn_loss.backward()
    bn_optimizer.step()

  print("Epoch : {}, loss : {}".format(epoch, bn_loss))
  
# test
bn_model.eval()
with torch.no_grad():
  X_test = mnist_test.test_data.view(-1,28*28).float().to(device)
  Y_test = mnist_test.test_labels.to(device)

  prediction = bn_model(X_test)
  accuracy = (torch.argmax(prediction, 1) == Y_test).float().mean()
  print(accuracy)

In [None]:
class Nn_Model(torch.nn.Module):
      def __init__(self):
    super().__init__()

    self.linear1 = torch.nn.Linear(784,32,bias=True)
    self.linear2 = torch.nn.Linear(32,32,bias=True)
    self.linear3 = torch.nn.Linear(32,10,bias=True)
    self.relu = torch.nn.ReLU()
    self.model = torch.nn.Sequential(
        self.linear1, self.relu,
        self.linear2, self.relu,
        self.linear3
    ).to(device)
  
  def forward(self, X):
    return self.model(X)

nn_model = Nn_Model()
nn_criterion = torch.nn.CrossEntropyLoss()
nn_optimizer = torch.optim.SGD(nn_model.parameters(), lr=0.1)
nn_model


# train

nn_model.train()
for epoch in range(training_epochs):
  for X, Y in train_loader:
    X = X.view(-1, 28 * 28).to(device)
    Y = Y.to(device)

    nn_optimizer.zero_grad()
    nn_prediction = nn_model(X)
    nn_loss = nn_criterion(nn_prediction, Y)
    nn_loss.backward()
    nn_optimizer.step()
  print("Epoch : {}, loss : {}".format(epoch, nn_loss))
  
# test
nn_model.eval()
with torch.no_grad():
  X_test = mnist_test.test_data.view(-1,28*28).float().to(device)
  Y_test = mnist_test.test_labels.to(device)

  prediction = nn_model(X_test)
  accuracy = (torch.argmax(prediction, 1) == Y_test).float().mean()
  print(accuracy)