# Using GPUs

In [15]:
import torch
import torch.nn as nn
from torch.autograd import Variable

PyTorch lets you control where tensors live by calling `.cuda()` on them

In [3]:
a = torch.rand(5, 5)
b = torch.rand(5, 1).cuda()

In [6]:
# Tensors must be on the same device (CPU or GPU) to be operable
print(type(a), type(b))
a * b

<class 'torch.FloatTensor'> <class 'torch.cuda.FloatTensor'>


TypeError: mul received an invalid combination of arguments - got (torch.cuda.FloatTensor), but expected one of:
 * (float value)
      didn't match because some of the arguments have invalid types: ([31;1mtorch.cuda.FloatTensor[0m)
 * (torch.FloatTensor other)
      didn't match because some of the arguments have invalid types: ([31;1mtorch.cuda.FloatTensor[0m)


In [7]:
a.cuda() * b


 0.0131  0.2374  0.0321  0.2809  0.0493
 0.0774  0.1295  0.2297  0.2132  0.0293
 0.3600  0.1510  0.0834  0.0089  0.1778
 0.3057  0.1884  0.1690  0.0606  0.3616
 0.1667  0.1570  0.1096  0.0525  0.1271
[torch.cuda.FloatTensor of size 5x5 (GPU 0)]

In [8]:
from utils import SimpleClassifier, FashionMNIST

In [64]:
FASHION_DIR = '/home/erikreppel/data/fashion-mnist/'
train = FashionMNIST(FASHION_DIR)
test = FashionMNIST(FASHION_DIR, kind='test')

batch_size = 64

train_loader = torch.utils.data.DataLoader(dataset=train,
                                           batch_size=batch_size,
                                           shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test,
                                          batch_size=batch_size,
                                          shuffle=True)

In [65]:
n_classes = 10
n_features = 28*28
n_examples = len(train_loader)
hidden_size = 512

n_epoch = 5
USE_GPU = torch.cuda.is_available()


model = SimpleClassifier(n_features, hidden_size, n_classes)

criterion = nn.CrossEntropyLoss()
if USE_GPU:
    criterion = criterion.cuda()
optimizer = torch.optim.Adam(model.parameters())

## Recall that 5 epochs took ~71s on my 8 core / 16 thread CPU

Note the very minimal changes needed to train using a GPU

In [67]:
import time

model.train()
if USE_GPU: model.cuda()

start = time.time()

for epoch in range(1, n_epoch+1):
    total_loss = 0.0
    for i, (X, y) in enumerate(train_loader):
        # our model is expecting batches of 1D inputs
        X = Variable(X.float())
        y = Variable(y)
        
        if USE_GPU:
            X = X.cuda()
            y = y.cuda()
        
        y_hat = model(X)
        loss = criterion(y_hat, y)
        total_loss += loss.data[0]
    
        # backprop error, update weights, zero old grads
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    print('Avg loss: {}'.format(total_loss / len(train_loader)))
    
print('Took {:.3f}s'.format(time.time() - start))

Avg loss: 1.7505646972005555
Avg loss: 1.729608011525323
Avg loss: 1.723290304131091
Avg loss: 1.715811915489148
Avg loss: 1.71175047850558
Took 9.988s


### The larger the size of the model the more it pays off to have it on GPU

A few stats:

|Batch Size|Hidden Size| Time taken CPU (s)| Time taken GPU (s)|
|---|--- |---    |---    |
|32 |256 |45.149 |16.819 |
|32 |512 |80.357 |17.196 |
|64 |512 |41.201 |9.639  |
|128|1024|49.248 |5.827  |
|256|2048|66.144 |4.066  |
|max|2048|28.104 |2.389  |