# Pytorch turorial 5

## MNIST 1
softmax를 활용

In [1]:
import torch, random
from torch.autograd import Variable
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from time import time
torch.manual_seed(777)  # reproducibility

<torch._C.Generator at 0x7fa047a330c0>

In [2]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
t0 = time()
# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/', train=True, transform=transforms.ToTensor(), download=True)
mnist_test = dsets.MNIST(root='MNIST_data/', train=False, transform=transforms.ToTensor(), download=True)

# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,batch_size=batch_size, shuffle=True)

In [3]:
# model
model = torch.nn.Linear(784, 10, bias=True)
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss()    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [4]:
# 학습모델 만들기
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(mnist_train) // batch_size
    
    for i, (batch_xs, batch_ys) in enumerate(data_loader):
        # reshape input image into [batch_size by 784]
        X = Variable(batch_xs.view(-1, 28 * 28))
        Y = Variable(batch_ys)    # label 은 one-hot encoded 이 아니다 (convolution 알고리즘)
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
    print("[Epoch: {:>4}] cost = {:>.9}".format(epoch + 1, avg_cost.data[0]))
print('Learning Finished!')

[Epoch:    1] cost = 0.615321696
[Epoch:    2] cost = 0.343970388
[Epoch:    3] cost = 0.308309406
[Epoch:    4] cost = 0.291389644
[Epoch:    5] cost = 0.281003147
[Epoch:    6] cost = 0.274250835
[Epoch:    7] cost = 0.268962383
[Epoch:    8] cost = 0.264928371
[Epoch:    9] cost = 0.261347979
[Epoch:   10] cost = 0.258764029
[Epoch:   11] cost = 0.25649184
[Epoch:   12] cost = 0.254417121
[Epoch:   13] cost = 0.252568364
[Epoch:   14] cost = 0.251057833
[Epoch:   15] cost = 0.24962458
Learning Finished!


In [5]:
# 학습모델 테스트 및 정확도 측정
X_test = Variable(mnist_test.test_data.view(-1, 28 * 28).float())
Y_test = Variable(mnist_test.test_labels)

prediction = model(X_test)
correct_prediction = (torch.max(prediction.data, 1)[1] == Y_test.data)
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy)

Accuracy: 0.9039


In [6]:
# 임의 데이터 시뮬레이션
r = random.randint(0, len(mnist_test) - 1)

# 학습모형으로 시뮬레이션
X_single_data = Variable(mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float())
Y_single_data = Variable(mnist_test.test_labels[r:r + 1])
print("Label: ", Y_single_data.data)

# 실제 데이터의 값과 비교
single_prediction = model(X_single_data)
print("Prediction: ", torch.max(single_prediction.data, 1)[1])
print(int(time()-t0)//60, 'min', int(time()-t0)%60, 'sec')

Label:  
 3
[torch.LongTensor of size 1]

Prediction:  
 3
[torch.LongTensor of size 1x1]

2 min 15 sec


## MNIST 2
Nural Network 를 활용

In [7]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
t0 = time()

In [8]:
# nn layers
linear1 = torch.nn.Linear(784, 256, bias=True)
linear2 = torch.nn.Linear(256, 256, bias=True)
linear3 = torch.nn.Linear(256, 10, bias=True)
relu = torch.nn.ReLU()
# model
model = torch.nn.Sequential(linear1, relu, linear2, relu, linear3)
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss()    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [9]:
# 학습모델 만들기
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(mnist_train) // batch_size

    for i, (batch_xs, batch_ys) in enumerate(data_loader):
        # reshape input image into [batch_size by 784]
        X = Variable(batch_xs.view(-1, 28 * 28))
        Y = Variable(batch_ys)    # label is not one-hot encoded
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
    print("[Epoch: {:>4}] cost = {:>.9}".format(epoch + 1, avg_cost.data[0]))
print('Learning Finished!')

[Epoch:    1] cost = 0.301067621
[Epoch:    2] cost = 0.11021772
[Epoch:    3] cost = 0.0715760663
[Epoch:    4] cost = 0.0528145395
[Epoch:    5] cost = 0.0378407687
[Epoch:    6] cost = 0.0306602456
[Epoch:    7] cost = 0.0241072178
[Epoch:    8] cost = 0.0203977656
[Epoch:    9] cost = 0.0149436677
[Epoch:   10] cost = 0.0167632103
[Epoch:   11] cost = 0.0102889761
[Epoch:   12] cost = 0.0124249188
[Epoch:   13] cost = 0.0116662057
[Epoch:   14] cost = 0.0102858394
[Epoch:   15] cost = 0.0112185385
Learning Finished!


In [10]:
# 학습모델 테스트 및 정확도 측정
X_test = Variable(mnist_test.test_data.view(-1, 28 * 28).float())
Y_test = Variable(mnist_test.test_labels)
prediction = model(X_test)
correct_prediction = (torch.max(prediction.data, 1)[1] == Y_test.data)
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy)

Accuracy: 0.9786


In [11]:
# 임의 데이터 시뮬레이션
r = random.randint(0, len(mnist_test) - 1)
X_single_data = Variable(mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float())
Y_single_data = Variable(mnist_test.test_labels[r:r + 1])
print("Label: ", Y_single_data.data)
single_prediction = model(X_single_data)
print("Prediction: ", torch.max(single_prediction.data, 1)[1])
print(int(time()-t0)//60, 'min', int(time()-t0)%60, 'sec')

Label:  
 0
[torch.LongTensor of size 1]

Prediction:  
 0
[torch.LongTensor of size 1x1]

5 min 7 sec


## MNIST 3
and Xavier

    홍콩 중문대 박사과정에 있던 he가 2015년에 이 방식을 사용해서 ImageNet에서 3% 에러를 달성했다.
    xavier - 입력값과 출력값 사이의 난수를 선택해서 입력값의 제곱근으로 나눈다.
    he - 입력값을 반으로 나눈 제곱근 사용. 분모가 작아지기 때문에 xavier보다 넓은 범위의 난수 생성.

In [19]:
import torch.nn.init

# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
t0 = time()

In [20]:
# xavier initializer
torch.nn.init.xavier_uniform(linear1.weight)
torch.nn.init.xavier_uniform(linear2.weight)
torch.nn.init.xavier_uniform(linear3.weight)
# model
model = torch.nn.Sequential(linear1, relu, linear2, relu, linear3)
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss()    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [21]:
# 학습모델 만들기
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(mnist_train) // batch_size

    for i, (batch_xs, batch_ys) in enumerate(data_loader):
        # reshape input image into [batch_size by 784]
        X = Variable(batch_xs.view(-1, 28 * 28))
        Y = Variable(batch_ys)    # label is not one-hot encoded
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
    print("[Epoch: {:>4}] cost = {:>.9}".format(epoch + 1, avg_cost.data[0]))
print('Learning Finished!')

[Epoch:    1] cost = 0.245956078
[Epoch:    2] cost = 0.0926144123
[Epoch:    3] cost = 0.0621828288
[Epoch:    4] cost = 0.0444740318
[Epoch:    5] cost = 0.0336061083
[Epoch:    6] cost = 0.0265867803
[Epoch:    7] cost = 0.0188468993
[Epoch:    8] cost = 0.0207099058
[Epoch:    9] cost = 0.0147647941
[Epoch:   10] cost = 0.0157889128
[Epoch:   11] cost = 0.0123811411
[Epoch:   12] cost = 0.0109416591
[Epoch:   13] cost = 0.00986913685
[Epoch:   14] cost = 0.0111952536
[Epoch:   15] cost = 0.0102181891
Learning Finished!


In [22]:
# 학습모델 테스트 및 정확도 측정
X_test = Variable(mnist_test.test_data.view(-1, 28 * 28).float())
Y_test = Variable(mnist_test.test_labels)

prediction = model(X_test)
correct_prediction = (torch.max(prediction.data, 1)[1] == Y_test.data)
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy)

Accuracy: 0.9805


In [23]:
# 임의 데이터 시뮬레이션
r = random.randint(0, len(mnist_test) - 1)
X_single_data = Variable(mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float())
Y_single_data = Variable(mnist_test.test_labels[r:r + 1])

print("Label: ", Y_single_data.data)
single_prediction = model(X_single_data)
print("Prediction: ", torch.max(single_prediction.data, 1)[1])
print(int(time()-t0)//60, 'min', int(time()-t0)%60, 'sec')

Label:  
 4
[torch.LongTensor of size 1]

Prediction:  
 2
[torch.LongTensor of size 1x1]

4 min 54 sec


## MNIST 4
and Deep Learning

In [24]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
t0 = time()

In [25]:
# nn layers
linear1 = torch.nn.Linear(784, 512, bias=True)
linear2 = torch.nn.Linear(512, 512, bias=True)
linear3 = torch.nn.Linear(512, 512, bias=True)
linear4 = torch.nn.Linear(512, 512, bias=True)
linear5 = torch.nn.Linear(512, 10, bias=True)
relu = torch.nn.ReLU()

In [26]:
# xavier initializer
torch.nn.init.xavier_uniform(linear1.weight)
torch.nn.init.xavier_uniform(linear2.weight)
torch.nn.init.xavier_uniform(linear3.weight)
torch.nn.init.xavier_uniform(linear4.weight)
torch.nn.init.xavier_uniform(linear5.weight)

Parameter containing:
 0.0832  0.0549  0.0611  ...  -0.0484  0.0227 -0.0112
-0.0071 -0.0764  0.0878  ...  -0.0620  0.0192 -0.0361
 0.0199 -0.0953  0.0343  ...   0.0230 -0.0630 -0.0081
          ...             ⋱             ...          
-0.0297  0.0418 -0.0346  ...   0.0208  0.1000 -0.0837
 0.0246  0.0823 -0.0464  ...  -0.0421 -0.0644  0.0678
-0.0124 -0.0222  0.0353  ...  -0.0845 -0.0053  0.0457
[torch.FloatTensor of size 10x512]

In [27]:
# model
model = torch.nn.Sequential(linear1, relu,
                            linear2, relu,
                            linear3, relu,
                            linear4, relu,
                            linear5)
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss()    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [28]:
# 학습모델 만들기
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(mnist_train) // batch_size

    for i, (batch_xs, batch_ys) in enumerate(data_loader):
        # reshape input image into [batch_size by 784]
        X = Variable(batch_xs.view(-1, 28 * 28))
        Y = Variable(batch_ys)    # label is not one-hot encoded
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
    print("[Epoch: {:>4}] cost = {:>.9}".format(epoch + 1, avg_cost.data[0]))
print('Learning Finished!')

[Epoch:    1] cost = 0.210798308
[Epoch:    2] cost = 0.0904840976
[Epoch:    3] cost = 0.0658415332
[Epoch:    4] cost = 0.0507572927
[Epoch:    5] cost = 0.0417291224
[Epoch:    6] cost = 0.032476861
[Epoch:    7] cost = 0.0308252443
[Epoch:    8] cost = 0.0255429968
[Epoch:    9] cost = 0.0240809005
[Epoch:   10] cost = 0.0192096625
[Epoch:   11] cost = 0.0210121498
[Epoch:   12] cost = 0.0213301592
[Epoch:   13] cost = 0.0156960711
[Epoch:   14] cost = 0.0140191251
[Epoch:   15] cost = 0.0152498316
Learning Finished!


In [29]:
# 학습모델 테스트 및 정확도 측정
X_test = Variable(mnist_test.test_data.view(-1, 28 * 28).float())
Y_test = Variable(mnist_test.test_labels)

prediction = model(X_test)
correct_prediction = (torch.max(prediction.data, 1)[1] == Y_test.data)
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy)

Accuracy: 0.9795


In [30]:
# 임의 데이터 시뮬레이션
r = random.randint(0, len(mnist_test) - 1)
X_single_data = Variable(mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float())
Y_single_data = Variable(mnist_test.test_labels[r:r + 1])

print("Label: ", Y_single_data.data)
single_prediction = model(X_single_data)
print("Prediction: ", torch.max(single_prediction.data, 1)[1])
print(int(time()-t0)//60, 'min', int(time()-t0)%60, 'sec')

Label:  
 1
[torch.LongTensor of size 1]

Prediction:  
 1
[torch.LongTensor of size 1x1]

17 min 30 sec


## MNIST 5
and Deep Learning with Drop Out

In [31]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100
keep_prob = 0.7
t0 = time()

In [32]:
# nn layers
linear1 = torch.nn.Linear(784, 512, bias=True)
linear2 = torch.nn.Linear(512, 512, bias=True)
linear3 = torch.nn.Linear(512, 512, bias=True)
linear4 = torch.nn.Linear(512, 512, bias=True)
linear5 = torch.nn.Linear(512, 10, bias=True)
relu = torch.nn.ReLU()
# p is the probability of being dropped in PyTorch ( 전체의 70% 만큼만 활용 )
dropout = torch.nn.Dropout(p=1 - keep_prob)

In [33]:
# xavier initializer
torch.nn.init.xavier_uniform(linear1.weight)
torch.nn.init.xavier_uniform(linear2.weight)
torch.nn.init.xavier_uniform(linear3.weight)
torch.nn.init.xavier_uniform(linear4.weight)
torch.nn.init.xavier_uniform(linear5.weight)

Parameter containing:
 5.3671e-02 -9.6433e-02 -8.8157e-02  ...   2.5232e-02 -3.6547e-02 -3.3956e-03
-2.3887e-02 -1.0675e-01  5.1505e-02  ...  -2.3071e-02  5.4573e-02 -5.9830e-02
-6.4813e-02  4.7453e-02  2.1834e-02  ...   9.0064e-02 -7.8786e-02  1.0311e-01
                ...                   ⋱                   ...                
-2.8091e-02  9.7533e-02 -1.0102e-02  ...   3.2850e-02  5.1721e-02  8.7104e-02
-4.4692e-02  6.7316e-02 -7.3007e-02  ...   9.0272e-02 -1.7859e-02 -6.6067e-02
 7.0924e-02 -2.7705e-02 -8.6818e-02  ...   2.1309e-02  3.6259e-02 -5.2669e-02
[torch.FloatTensor of size 10x512]

In [34]:
# 학습모델 정의하기
model = torch.nn.Sequential(linear1, relu, dropout,
                            linear2, relu, dropout,
                            linear3, relu, dropout,
                            linear4, relu, dropout,
                            linear5)

In [35]:
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss()    # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [36]:
# 학습모델 만들기
for epoch in range(training_epochs):
    avg_cost = 0
    total_batch = len(mnist_train) // batch_size

    for i, (batch_xs, batch_ys) in enumerate(data_loader):
        # reshape input image into [batch_size by 784]
        X = Variable(batch_xs.view(-1, 28 * 28))
        Y = Variable(batch_ys)    # label is not one-hot encoded
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        avg_cost += cost / total_batch
    print("[Epoch: {:>4}] cost = {:>.9}".format(epoch + 1, avg_cost.data[0]))
print('Learning Finished!')

[Epoch:    1] cost = 0.314977467
[Epoch:    2] cost = 0.145359248
[Epoch:    3] cost = 0.112980329
[Epoch:    4] cost = 0.0965414718
[Epoch:    5] cost = 0.082470648
[Epoch:    6] cost = 0.0748037025
[Epoch:    7] cost = 0.0692059323
[Epoch:    8] cost = 0.0625941902
[Epoch:    9] cost = 0.0597230643
[Epoch:   10] cost = 0.0563570373
[Epoch:   11] cost = 0.0526821129
[Epoch:   12] cost = 0.0504432209
[Epoch:   13] cost = 0.0460488014
[Epoch:   14] cost = 0.0466552302
[Epoch:   15] cost = 0.0416866988
Learning Finished!


In [37]:
# 학습모델 테스트 및 정확도 측정
model.eval()    # 측정모드로 전환 (dropout = False)
X_test = Variable(mnist_test.test_data.view(-1, 28 * 28).float())
Y_test = Variable(mnist_test.test_labels)

prediction = model(X_test)
correct_prediction = (torch.max(prediction.data, 1)[1] == Y_test.data)
accuracy = correct_prediction.float().mean()
print('Accuracy:', accuracy)

Accuracy: 0.9812


In [38]:
# 임의 데이터 시뮬레이션
r = random.randint(0, len(mnist_test) - 1)
X_single_data = Variable(mnist_test.test_data[r:r + 1].view(-1, 28 * 28).float())
Y_single_data = Variable(mnist_test.test_labels[r:r + 1])

print("Label: ", Y_single_data.data)
single_prediction = model(X_single_data)
print("Prediction: ", torch.max(single_prediction.data, 1)[1])
print(int(time()-t0)//60, 'min', int(time()-t0)%60, 'sec')

Label:  
 5
[torch.LongTensor of size 1]

Prediction:  
 5
[torch.LongTensor of size 1x1]

17 min 25 sec
