In [6]:
import torch
import torch.nn as nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

# Fully Connected with ReLU, Xavier_Init, Dropout

In [20]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for reproducibility
random.seed(777)
torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [32]:
mnist_train = dsets.MNIST(root = "MNIST_data/", train = True, transform = transforms.ToTensor(), download=True)
mnist_test = dsets.MNIST(root = "MNIST_data/", train = False, transform = transforms.ToTensor(), download=True)
batch_size = 100
data_loader = torch.utils.data.DataLoader(mnist_train, batch_size = batch_size, shuffle = True, drop_last =True)

linear1 = nn.Linear(784,256,bias= True).to(device)
linear2 = nn.Linear(256,256,bias= True).to(device)
linear3 = nn.Linear(256,10,bias= True).to(device)
#ReLU
relu = nn.ReLU()
#Dropout
dropout = torch.nn.Dropout(p = 0.5)
#Xavier Initialization
torch.nn.init.xavier_uniform_(linear1.weight)
torch.nn.init.xavier_uniform_(linear2.weight)
torch.nn.init.xavier_uniform_(linear3.weight)

epochs = 10
learning_rate = 0.001

model = torch.nn.Sequential(linear1,relu,dropout, linear2,relu,dropout, linear3).to(device)
#! 여기에 소프트맥스를 막단에 넣어줄 필요가 없는게, 함수 실행시 torch.nn.CrossEntropyLoss()에서 저절로 softmax까지 계산한 loss를 반환한다.
optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)
criterion = torch.nn.CrossEntropyLoss().to(device)

model.train() #드롭아웃, BN등이 train에만 적용되어야함
for epoch in range(epochs):
    total_batch = len(data_loader)
    avg_cost = 0
    for X,Y in data_loader:
        X = X.view(-1,28*28).to(device)
        Y = Y.to(device)
        hypothesis = model(X)
        optimizer.zero_grad()
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        avg_cost+= cost/total_batch
    
    print("epoch: %d, avg_cost : %.5f"%((epoch+1),avg_cost))
        
        
        

epoch: 1, avg_cost : 0.43828
epoch: 2, avg_cost : 0.20776
epoch: 3, avg_cost : 0.16953
epoch: 4, avg_cost : 0.14298
epoch: 5, avg_cost : 0.13300
epoch: 6, avg_cost : 0.12202
epoch: 7, avg_cost : 0.10891
epoch: 8, avg_cost : 0.10568
epoch: 9, avg_cost : 0.10153
epoch: 10, avg_cost : 0.09643


In [33]:
with torch.no_grad():
    model.eval() #드롭아웃, BN등이 train에만 적용되어야함
    X_test = mnist_test.test_data.view(-1,28*28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    prediction= model(X_test)
    correct_prediction = Y_test == torch.argmax(prediction,1)
    accuracy = correct_prediction.float().mean()
    print("Accuracy: %.5f"%accuracy.item())

Accuracy: 0.97730


# BatchNormalization

In [36]:
bn1 = torch.nn.BatchNorm1d(256)
bn2 = torch.nn.BatchNorm1d(256)
linear1n = nn.Linear(784,256,bias= True).to(device)
linear2n = nn.Linear(256,256,bias= True).to(device)
linear3n = nn.Linear(256,10,bias= True).to(device)
#ReLU
relu = nn.ReLU()
#Xavier Initialization
torch.nn.init.xavier_uniform_(linear1n.weight)
torch.nn.init.xavier_uniform_(linear2n.weight)
torch.nn.init.xavier_uniform_(linear3n.weight)

bn_model = torch.nn.Sequential(linear1n, bn1, relu, linear2n, bn2,relu,linear3n).to(device)

bn_optimizer = torch.optim.Adam(model.parameters(),lr = learning_rate)
criterion = torch.nn.CrossEntropyLoss().to(device)

bn_model.train() #드롭아웃, BN등이 train에만 적용되어야함
for epoch in range(epochs):
    total_batch = len(data_loader)
    avg_cost = 0
    for X,Y in data_loader:
        X = X.view(-1,28*28).to(device)
        Y = Y.to(device)
        hypothesis = bn_model(X)
        bn_optimizer.zero_grad()
        bn_cost = criterion(hypothesis, Y)
        bn_cost.backward()
        bn_optimizer.step()
        avg_cost+= cost/total_batch
    
    print("epoch: %d, avg_cost : %.5f"%((epoch+1),avg_cost))

epoch: 1, avg_cost : 0.10159
epoch: 2, avg_cost : 0.10159
epoch: 3, avg_cost : 0.10159
epoch: 4, avg_cost : 0.10159
epoch: 5, avg_cost : 0.10159
epoch: 6, avg_cost : 0.10159
epoch: 7, avg_cost : 0.10159
epoch: 8, avg_cost : 0.10159
epoch: 9, avg_cost : 0.10159
epoch: 10, avg_cost : 0.10159


In [37]:
with torch.no_grad():
    bn_model.eval() #드롭아웃, BN등이 train에만 적용되어야함
    X_test = mnist_test.test_data.view(-1,28*28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    prediction= bn_model(X_test)
    correct_prediction = Y_test == torch.argmax(prediction,1)
    accuracy = correct_prediction.float().mean()
    print("Accuracy: %.5f"%accuracy.item())

Accuracy: 0.10900
