In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data import sampler

import torchvision.datasets as dset
import torchvision.transforms as transforms

import numpy as np

import timeit

In [2]:
class ChunkSampler(sampler.Sampler):
    """Samples elements sequentially from some offset. 
    Arguments:
        num_samples: # of desired datapoints
        start: offset where we should start selecting from
    """

    def __init__(self,num_samples,start = 0):
      self.num_samples = num_samples
      self.start = start

    def __iter__(self):
      return iter(range(self.start,self.start+self.num_samples))

    def __len__(self):
      return self.num_samples

num_train = 49000
num_val = 1000

train_data = dset.CIFAR10('./CIFAR10',train=True,download=True,transform=transforms.ToTensor())
train_loader = DataLoader(train_data,batch_size=64,sampler=ChunkSampler(num_train,0))

val_data = dset.CIFAR10('./CIFAR10',train=True,download=True,transform=transforms.ToTensor())
val_loader = DataLoader(val_data,batch_size=64,sampler=ChunkSampler(num_val,num_train))

test_data = dset.CIFAR10('./CIFAR10',train=False,download=True,transform=transforms.ToTensor())
test_loader = DataLoader(test_data,batch_size=64)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./CIFAR10/cifar-10-python.tar.gz


HBox(children=(FloatProgress(value=0.0, max=170498071.0), HTML(value='')))


Extracting ./CIFAR10/cifar-10-python.tar.gz to ./CIFAR10
Files already downloaded and verified
Files already downloaded and verified


In [3]:
dtype = torch.FloatTensor

print_every = 100

def reset(m):
  if hasattr(m,'reset_parameters'):
    m.reset_parameters()

In [4]:
class Flatten(nn.Module):
  def forward(self,x):
    return x.view(x.shape[0],-1)

In [5]:
simple_model = nn.Sequential(
    nn.Conv2d(3,32,kernel_size=7,stride=2),
    nn.ReLU(inplace=True), #inplace True면 input을 수정, 메모리 usage가 좋아진다
    Flatten(),
    nn.Linear(5408,10)
)

simple_model.type(dtype)

loss_fn = nn.CrossEntropyLoss().type(dtype)
optimizer = optim.Adam(simple_model.parameters(),lr=1e-2)

In [6]:
fixed_model_base = nn.Sequential(
    nn.Conv2d(3,32,kernel_size=7,stride=1),
    nn.ReLU(inplace=True),
    nn.BatchNorm2d(32),
    nn.MaxPool2d(2,2),
    Flatten(),
    nn.Linear(5408,1024),
    nn.ReLU(inplace=True),
    nn.Linear(1024,10)
)
fixed_model = fixed_model_base.type(dtype)

In [7]:
## Now we're going to feed a random batch into the model you defined and make sure the output is the right size
x = torch.randn(64, 3, 32, 32).type(dtype)
x_var = Variable(x.type(dtype)) # Construct a PyTorch Variable out of your input data
ans = fixed_model(x_var)        # Feed it through the model! 

# Check to make sure what comes out of your model
# is the right dimensionality... this should be True
# if you've done everything correctly
np.array_equal(np.array(ans.size()), np.array([64, 10]))   

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


True

In [8]:
torch.cuda.is_available()

True

In [9]:
import copy
gpu_dtype = torch.cuda.FloatTensor

fixed_model_gpu = copy.deepcopy(fixed_model_base).type(gpu_dtype)

x_gpu = torch.randn(64, 3, 32, 32).type(gpu_dtype)
x_var_gpu = Variable(x.type(gpu_dtype)) # Construct a PyTorch Variable out of your input data
ans = fixed_model_gpu(x_var_gpu)        # Feed it through the model! 

# Check to make sure what comes out of your model
# is the right dimensionality... this should be True
# if you've done everything correctly
np.array_equal(np.array(ans.size()), np.array([64, 10]))

True

In [10]:
%%timeit 
ans = fixed_model(x_var)

10 loops, best of 5: 33 ms per loop


In [11]:
%%timeit 
torch.cuda.synchronize() # Make sure there are no pending GPU computations
ans = fixed_model_gpu(x_var_gpu)        # Feed it through the model! 
torch.cuda.synchronize() # Make sure there are no pending GPU computations

The slowest run took 7.03 times longer than the fastest. This could mean that an intermediate result is being cached.
1000 loops, best of 5: 709 µs per loop


In [31]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.RMSprop(fixed_model.parameters(),lr=1e-3)

In [17]:
# This sets the model in "training" mode. This is relevant for some layers that may have different behavior
# in training mode vs testing mode, such as Dropout and BatchNorm. 
fixed_model_gpu.train()

# Load one batch at a time.
for t, (x, y) in enumerate(train_loader):
    x_var = Variable(x.type(gpu_dtype))
    y_var = Variable(y.type(gpu_dtype).long())

    # This is the forward pass: predict the scores for each class, for each x in the batch.
    scores = fixed_model_gpu(x_var)
    
    # Use the correct y values and the predicted y values to compute the loss.
    loss = loss_fn(scores, y_var)
    if (t + 1) % print_every == 0:
        print('t = %d, loss = %.4f' % (t + 1, loss.data))

    # Zero out all of the gradients for the variables which the optimizer will update.
    optimizer.zero_grad()
    
    # This is the backwards pass: compute the gradient of the loss with respect to each 
    # parameter of the model.
    loss.backward()
    
    # Actually update the parameters of the model using the gradients computed by the backwards pass.
    optimizer.step()

t = 100, loss = 2.3543
t = 200, loss = 2.3505
t = 300, loss = 2.3490
t = 400, loss = 2.3334
t = 500, loss = 2.3196
t = 600, loss = 2.3490
t = 700, loss = 2.3255


In [24]:
def train(model, loss_fn, optimizer, num_epochs = 1):
    for epoch in range(num_epochs):
        print('Starting epoch %d / %d' % (epoch + 1, num_epochs))
        model.train()
        for t, (x, y) in enumerate(train_loader):
            x_var = Variable(x.type(gpu_dtype))
            y_var = Variable(y.type(gpu_dtype).long())

            scores = model(x_var)
            
            loss = loss_fn(scores, y_var)
            if (t + 1) % print_every == 0:
                print('t = %d, loss = %.4f' % (t + 1, loss.data))

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

def check_accuracy(model, loader):
    if loader.dataset.train:
        print('Checking accuracy on validation set')
    else:
        print('Checking accuracy on test set')   
    num_correct = 0
    num_samples = 0
    model.eval() # Put the model in test mode (the opposite of model.train(), essentially)
    with torch.no_grad():
      for x, y in loader:
          x_var = Variable(x.type(gpu_dtype))

          scores = model(x_var)
          _, preds = scores.data.cpu().max(1)
          num_correct += (preds == y).sum()
          num_samples += preds.size(0)
    acc = float(num_correct) / num_samples
    print('Got %d / %d correct (%.2f)' % (num_correct, num_samples, 100 * acc))

In [32]:
torch.cuda.random.manual_seed(12345)
fixed_model_gpu.apply(reset)
train(fixed_model_gpu, loss_fn, optimizer, num_epochs=1)
check_accuracy(fixed_model_gpu, val_loader)

Starting epoch 1 / 1
t = 100, loss = 2.3415
t = 200, loss = 2.2957
t = 300, loss = 2.3805
t = 400, loss = 2.3142
t = 500, loss = 2.3195
t = 600, loss = 2.3283
t = 700, loss = 2.3121
Checking accuracy on validation set
Got 103 / 1000 correct (10.30)
