### 5.6.1 计算设备

In [28]:
import torch
from torch import nn
import time

In [3]:
torch.device('cpu'), torch.device('cuda'), torch.device('cuda:1')

(device(type='cpu'), device(type='cuda'), device(type='cuda', index=1))

In [4]:
# 查询可用GPU的数量
torch.cuda.device_count()

1

In [6]:
def try_gpu(i=0):  #@save
    """如果存在，则返回gpu(i)，否则返回cpu()"""
    if torch.cuda.device_count() >= i + 1:
        return torch.device(f'cuda:{i}')
    return torch.device('cpu')


def try_all_gpus():  #@save
    """返回所有可用的GPU，如果没有GPU，则返回[cpu(),]"""
    devices = [torch.device(f'cuda:{i}')
               for i in range(torch.cuda.device_count())]
    return devices if devices else [torch.device('cpu')]

In [7]:
try_gpu(), try_gpu(10), try_all_gpus()

(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0)])

### 5.6.2 张量与GPU

In [8]:
x = torch.tensor([1, 2, 3])
x.device

device(type='cpu')

In [10]:
X = torch.ones([2, 3], device=try_gpu())
X, X.device

(tensor([[1., 1., 1.],
         [1., 1., 1.]], device='cuda:0'),
 device(type='cuda', index=0))

In [11]:
Z = X.cuda(try_gpu())

In [12]:
Z is X

True

In [14]:
id(Z), id(X)

(2209615099920, 2209615099920)

### 5.6.3 神经网络与GPU

In [15]:
net = nn.Sequential(nn.Linear(3, 1))
net = net.to(device=try_gpu())

In [18]:
net[0].weight

Parameter containing:
tensor([[-0.4092, -0.0080,  0.0500]], device='cuda:0', requires_grad=True)

In [20]:
net(X),

(tensor([[-0.6457],
         [-0.6457]], device='cuda:0', grad_fn=<AddmmBackward0>),)

In [21]:
net[0].weight.data.device

device(type='cuda', index=0)

### 练习1: 大矩阵乘法: CPU VS GPU

In [22]:
M1 = torch.rand([10000, 10000], device='cpu')
N1 = torch.rand([10000, 10000], device='cpu')

In [23]:
M2 = torch.rand([10000, 10000], device=try_gpu())
N2 = torch.rand([10000, 10000], device=try_gpu())

In [32]:
startT1 = time.time()
O1 = M1 @ N1
endT1 = time.time()
print(endT1 - startT1)

1.716261863708496


In [33]:
startT2 = time.time()
O2 = M2 @ N2
endT2 = time.time()
print(endT2 - startT2)

0.07734274864196777


In [34]:
(endT1 - startT1) / (endT2 - startT2)

22.190339674289994

In [35]:
O2.device

device(type='cuda', index=0)

### 练习2: 如何在GPU上读写模型参数?

In [36]:
K = torch.tensor([1, 2, 3, 4, 5], device=try_gpu())

In [38]:
K,

(tensor([1, 2, 3, 4, 5], device='cuda:0'),)

In [39]:
torch.save(K, 'k-file-gpu.pt')

In [40]:
L = torch.load('k-file-gpu.pt')
L,

(tensor([1, 2, 3, 4, 5], device='cuda:0'),)