Convolution의 output 크기

$$ Output size = \frac{input size - fileter size + (2 * padding)}{stride} + 1 $$

ex1)  
intput image size : 227 * 227  
filter size = 11 * 11  
stride = 4  
padding = 0  
output image size = ?    
->  
$$ \frac{227 - 11 + (2*0)}{4}+1 = 55 $$

ex2)  
input image size : 32 * 64  
filter size = 5 * 5  
stride = 1  
padding = 0  
output image size = ?  
->  
$$ \frac{(32, 64) - 5 + (2*0)}{1}+1 = (27, 59)+1 = (28, 60) $$

In [1]:
import torch
import torch.nn as nn

In [2]:
inputs = torch.Tensor(1, 1, 28, 28)
inputs.shape

torch.Size([1, 1, 28, 28])

In [3]:
conv1 = nn.Conv2d(1, 32, 3, padding=1)
pool = nn.MaxPool2d(2)

In [4]:
pool

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

In [5]:
conv2 = nn.Conv2d(32, 64, 3, padding=1)

In [6]:
out = conv1(inputs)
out.shape

torch.Size([1, 32, 28, 28])

In [7]:
out = pool(out)
out.shape

torch.Size([1, 32, 14, 14])

In [8]:
out = conv2(out)
out.shape

torch.Size([1, 64, 14, 14])

In [9]:
out = pool(out)
out.shape

torch.Size([1, 64, 7, 7])

In [10]:
out = out.view(out.size(0), -1)
out.shape

torch.Size([1, 3136])

In [11]:
fc = nn.Linear(3136, 10)
out = fc(out)
out

tensor([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan]],
       grad_fn=<AddmmBackward>)

In [12]:
out.shape

torch.Size([1, 10])

In [13]:
import torch
import torch.nn
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn.init

In [14]:
device = 'cpu'
torch.manual_seed(777)

<torch._C.Generator at 0x7f94302c8d30>

In [15]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [16]:
# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/', train=True, 
                          transform=transforms.ToTensor(), download=False)
mnist_test = dsets.MNIST(root='MNIST_data/', train=False,
                        transform=transforms.ToTensor(), download=False)

In [17]:
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train, batch_size=batch_size,
                                         shuffle=True, drop_last=True)

In [18]:
# CNN Model (2 conv layers)
class CNN(torch.nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        # L1 image in shape = (?, 28, 28, 1)
        # conv1            -> (?, 28, 28, 32)
        # pool             -> (?, 14, 14, 32)
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        
        # L2 image in shape = (?, 14, 14, 32)
        # conv2            -> (?, 14, 14, 64)
        # pool             -> (?, 7, 7, 64)
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        
        # Final FC 7X7X64 inputs -> 10 outputs
        self.fc = torch.nn.Linear(7 * 7 * 64, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc.weight)
    
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1) # Flatten them for FC
        out = self.fc(out)
        return out

In [19]:
# instantiate CNN model
model = CNN().to(device)

In [20]:
# define cost / loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device) # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [21]:
# train my model
total_batch = len(data_loader)
print('Learning started. It takes sometime.')
for epoch in range(training_epochs):
    avg_cost = 0
    
    for X, Y in data_loader:
        # image is already size of (28X28), no reshape
        # label is not one-hot encoded
        X = X.to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
        
    print('[Epoch: {:>4f}] cost = {:>.9}'.format(epoch+1, avg_cost))

print('Learning Finished')

Learning started. It takes sometime.
[Epoch: 1.000000] cost = 0.223939255
[Epoch: 2.000000] cost = 0.0621613935
[Epoch: 3.000000] cost = 0.0449009687
[Epoch: 4.000000] cost = 0.0355566293
[Epoch: 5.000000] cost = 0.0288751405
[Epoch: 6.000000] cost = 0.0247465279
[Epoch: 7.000000] cost = 0.0208460782
[Epoch: 8.000000] cost = 0.0183143727
[Epoch: 9.000000] cost = 0.0150467549
[Epoch: 10.000000] cost = 0.0125005851
[Epoch: 11.000000] cost = 0.0103304209
[Epoch: 12.000000] cost = 0.0102607198
[Epoch: 13.000000] cost = 0.00809532497
[Epoch: 14.000000] cost = 0.00646170136
[Epoch: 15.000000] cost = 0.0072748703
Learning Finished


In [22]:
# Test model and check accuracy
with torch.no_grad():
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy: ', accuracy.item())



Accuracy:  0.9873999953269958


In [23]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.init

In [24]:
device = 'cpu'

# for reproducibility
torch.manual_seed(777)

<torch._C.Generator at 0x7f94302c8d30>

In [25]:
# parameters
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [26]:
# MNIST dataset
mnist_train = dsets.MNIST(root='MNIST_data/', train=True,
                         transform=transforms.ToTensor(), download=False)
mnist_test = dsets.MNIST(root='MNIST_data/', train=False,
                        transform=transforms.ToTensor(), download=False)

In [27]:
# dataset loader
data_loader = torch.utils.data.DataLoader(dataset=mnist_train,
                                         batch_size=batch_size,
                                         shuffle=True,
                                         drop_last=True)

In [28]:
# CNN Model
class CNN(torch.nn.Module):
    
    def __init__(self):
        super(CNN, self).__init__()
        self.keep_prob = 0.5
        # L1 image shape = (?, 28, 28, 1)
        # Conv          -> (?, 28, 28, 32)
        # Pool          -> (?, 14, 14, 32)
        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        
        # L2 image shape = (?, 14, 14, 32)
        # Conv          -> (?, 14, 14, 64)
        # Pool          -> (?, 7,  7,  64)
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2))
        
        # L3 image shape = (?, 7, 7, 64)
        # Conv          -> (?, 7, 7, 128)
        # Pool          -> (?, 4, 4, 128)
        self.layer3 = torch.nn.Sequential(
            torch.nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2, padding=1))
        
        # L4 FC 4x4x128 inputs => 625 outpus
        self.fc1 = torch.nn.Linear(4 * 4 * 128, 625, bias=True)
        torch.nn.init.xavier_uniform_(self.fc1.weight)
        self.layer4 = torch.nn.Sequential(
            self.fc1, 
            torch.nn.ReLU(),
            torch.nn.Dropout(p=1 - self.keep_prob))
        
        # L5 Final FC 625 inputs -> 10 outputs
        self.fc2 = torch.nn.Linear(625, 10, bias=True)
        torch.nn.init.xavier_uniform_(self.fc2.weight)
        
    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = self.layer3(out)
        out = out.view(out.size(0), -1) # Flatten them for FC
        out = self.layer4(out)
        out = self.fc2(out)
        return out

In [34]:
# instantiate CNN Model
model = CNN().to(device)
print(model)

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer3): Sequential(
    (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (fc1): Linear(in_features=2048, out_features=625, bias=True)
  (layer4): Sequential(
    (0): Linear(in_features=2048, out_features=625, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
  )
  (fc2): Linear(in_features=625, out_features=10, bias=True)
)


In [35]:
value = torch.Tensor(1, 1, 28, 28).to(device)
print(model(value).shape)

torch.Size([1, 10])


In [31]:
# define cost/loss & optimizer
criterion = torch.nn.CrossEntropyLoss().to(device) # Softmax is internally computed.
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [32]:
# train my model
total_batch = len(data_loader)
model.train()
print('Learning started.')
for epoch in range(training_epochs):
    avg_cost = 0
    
    for X, Y in data_loader:
        # image is already size of (28x28), no reshape
        # label is not one-hot encoded
        X = X.to(device)
        Y = Y.to(device)
        
        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()
        
        avg_cost += cost / total_batch
    
    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))

Learning started.
[Epoch:    1] cost = 0.189059123
[Epoch:    2] cost = 0.0532271266
[Epoch:    3] cost = 0.037352208
[Epoch:    4] cost = 0.0289540682
[Epoch:    5] cost = 0.0249762945
[Epoch:    6] cost = 0.0197217893
[Epoch:    7] cost = 0.0173077974
[Epoch:    8] cost = 0.0143984593
[Epoch:    9] cost = 0.012650175
[Epoch:   10] cost = 0.0110715851
[Epoch:   11] cost = 0.0112678763
[Epoch:   12] cost = 0.00972414296
[Epoch:   13] cost = 0.00784225855
[Epoch:   14] cost = 0.0079669999
[Epoch:   15] cost = 0.00723450771


In [33]:
# Test model and check accuracy
with torch.no_grad():
    model.eval() # set the model to evaluation mode (dropout=False)
    
    X_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
    Y_test = mnist_test.test_labels.to(device)
    
    prediction = model(X_test)
    correct_prediction = torch.argmax(prediction, 1) == Y_test
    accuracy = correct_prediction.float().mean()
    print('Accuracy: ', accuracy.item())

Accuracy:  0.9889000058174133
