In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import os
import datetime

In [4]:
# DEVICE_IDS = [0, 1, 2,3,4,5,6,7]
# DEVICE_IDS = [0, 1, 2,3,4]
# DEVICE_IDS = [0, 1, 2,3]
os.environ['CUDA_VISIBLE_DEVICES']='1'
# DEVICE_IDS = [0,1,2]
DEVICE_IDS = [0]
BATCHSIZE = 1000 * len(DEVICE_IDS)

In [5]:

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCHSIZE, shuffle=False)

class VGG(nn.Module):
    "Implementation Ref: https://github.com/kuangliu/pytorch-cifar"
    def __init__(self):
        super(VGG, self).__init__()
        VGG16 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 
                 512, 512, 512, 'M', 512, 512, 512, 'M']
        self.features = self._make_layers(VGG16)
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

net = VGG()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

device = "cuda"
torch.cuda.set_device(0)
net.to(device);
net = nn.DataParallel(net, device_ids=DEVICE_IDS)




Files already downloaded and verified


## 1 GPU

In [None]:
print('Training starts...')
start = datetime.datetime.now()
for epoch in range(5):  # loop over the dataset multiple times
    
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        
        inputs, labels = inputs.cuda(device, async=True), labels.cuda(device, async=True)
        inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    print('[{:d}, {:5f}]'.format(epoch+1, loss.item()))
finish = datetime.datetime.now()
print('Done.. Duration: ',finish - start)

Training starts...


## 3 GPU

In [None]:
print('Training starts...')
start = datetime.datetime.now()
for epoch in range(5):  # loop over the dataset multiple times
    
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        
        inputs, labels = inputs.cuda(device, async=True), labels.cuda(device, async=True)
        inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    print('[{:d}, {:5f}]'.format(epoch+1, loss.item()))
finish = datetime.datetime.now()
print('Done.. Duration: ',finish - start)

Training starts...
[1, 1.966122]
[2, 1.615373]
[3, 1.402906]
[4, 1.255466]


## 4 GPU

In [4]:
print('Training starts...')
start = datetime.datetime.now()
for epoch in range(5):  # loop over the dataset multiple times
    
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        
        inputs, labels = inputs.cuda(device, async=True), labels.cuda(device, async=True)
        inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    print('[{:d}, {:5f}]'.format(epoch+1, loss.item()))
finish = datetime.datetime.now()
print('Done.. Duration: ',finish - start)

Training starts...
[1, 2.045465]
[2, 1.753967]
[3, 1.571599]
[4, 1.416739]
[5, 1.287294]
Done.. Duration:  0:02:38.597538


## 5 GPU

In [None]:
print('Training starts...')
start = datetime.datetime.now()
for epoch in range(5):  # loop over the dataset multiple times
    
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        
        inputs, labels = inputs.cuda(device, async=True), labels.cuda(device, async=True)
        inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    print('[{:d}, {:5f}]'.format(epoch+1, loss.item()))
finish = datetime.datetime.now()
print('Done.. Duration: ',finish - start)

## 8 GPU

In [10]:
print('Training starts...')
start = datetime.datetime.now()
for epoch in range(5):  # loop over the dataset multiple times
    
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs
        inputs, labels = data
        
        inputs, labels = inputs.cuda(device, async=True), labels.cuda(device, async=True)
        inputs, targets = torch.autograd.Variable(inputs), torch.autograd.Variable(labels)

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
    
    print('[{:d}, {:5f}]'.format(epoch+1, loss.item()))
finish = datetime.datetime.now()
print('Done.. Duration: ',finish - start)

Training starts...
[1, 2.238004]
[2, 2.004407]
[3, 1.821976]
[4, 1.673215]
[5, 1.554240]
Done.. Duration:  0:01:38.230195
