In [1]:
import torch
from torch import nn
from d2l import torch as d2l

In [2]:
## Computing devices

In [3]:
def cpu():
    return torch.device('cpu')

def gpu(i=0):
    return torch.device(f'cuda:{i}')

cpu(),gpu(),gpu(1)

(device(type='cpu'),
 device(type='cuda', index=0),
 device(type='cuda', index=1))

In [4]:
def num_gpus():  #@save
    """Get the number of available GPUs."""
    return torch.cuda.device_count()

num_gpus()


1

In [5]:
def try_gpu(i=0):  #@save
    """Return gpu(i) if exists, otherwise return cpu()."""
    if num_gpus() >= i + 1:
        return gpu(i)
    return cpu()

def try_all_gpus():  #@save
    """Return all available GPUs, or [cpu(),] if no GPU exists."""
    return [gpu(i) for i in range(num_gpus())]

try_gpu(), try_gpu(10), try_all_gpus()


(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0)])

## Tensors and GPUs

In [6]:
x = torch.tensor([1, 2, 3])
x.device


device(type='cpu')

In [14]:
X = torch.ones(2, 3, device=try_gpu())
X


tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

In [15]:
Y = torch.rand(2, 3, device=cpu())
Y


tensor([[0.7641, 0.7605, 0.1925],
        [0.8807, 0.1042, 0.7989]])

In [20]:
X.device


device(type='cuda', index=0)

In [19]:
Y.device

device(type='cpu')

## Neural Networks and GPUs

In [21]:
net = nn.Sequential(nn.LazyLinear(1))
net = net.to(device=try_gpu())




In [22]:
net(X)


tensor([[-0.3333],
        [-0.3333]], device='cuda:0', grad_fn=<AddmmBackward0>)

net[0].weight.data.device


In [27]:
@d2l.add_to_class(d2l.Trainer)
def __init__(self,max_epochs,num_gpus=0,gradient_clip_val=0):
    self.save_hyperparameters()
    self.gpus = [d2l.gpu(i) for i in range(min(num_gpus,d2l.num_gpus()))]

@d2l.add_to_class(d2l.Trainer)
def prepare_batch(self,batch):
    if self.gpus:
        batch= [a.to(self.gpus[0]) for a in batch]
    return batch

@d2l.add_to_class(d2l.Trainer)
def prepare_model(self,model):
    model.trainer= self
    model.board.xlim = [0,self.max_epochs]
    if self.gpus:
        model.to(self.gpus[0])
    self.model=model

## Question and answring

Q1. Try a large computation task (e.g., large matrix multiplication). What’s the difference in speed between CPU and GPU? What about a task with a small number of calculations?

Answer:

Large tasks: GPUs are much faster than CPUs for highly parallelizable operations (like multiplying large matrices), because they have thousands of cores optimized for matrix/vector operations.

Small tasks: Overhead of transferring data to/from GPU can dominate. In such cases, CPU may actually be faster, since GPU setup time outweighs computation gains.

Q2. How should we read and write model parameters on the GPU?

Answer:

Use the framework’s device management:

PyTorch: # Move model to GPU
model = model.to("cuda")

torch.save(model.state_dict(), "model.pth")

model.load_state_dict(torch.load("model.pth", map_location="cuda"))
