In [19]:
!nvidia-smi

Wed Jul 19 15:21:19 2023       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.54.03              Driver Version: 535.54.03    CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  NVIDIA GeForce RTX 3090        Off | 00000000:65:00.0  On |                  N/A |
| 30%   45C    P8              27W / 350W |   1823MiB / 24576MiB |     12%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                                    

In [20]:
import torch
from torch import nn



torch.device("cpu"), torch.device('cuda'), torch.device('cuda:0')

(device(type='cpu'), device(type='cuda'), device(type='cuda', index=0))

In [21]:
torch.cuda.device_count()

1

In [22]:

from typing import List

def try_gpu(index=0) -> list or torch.device:
    if torch.cuda.device_count() >= index+1:
        return torch.device(f'cuda:{index}') # 索引是要torch.device的
    return torch.device('cpu')
        


def try_all_gpus() -> List[torch.device] or torch.device:
    devices = [torch.device(f"cuda:{i}") for i in range(torch.cuda.device_count())]
    return devices if devices else torch.device("cpu")


        
try_gpu(), try_gpu(10), try_all_gpus()


(device(type='cuda', index=0),
 device(type='cpu'),
 [device(type='cuda', index=0)])

In [23]:
# 存储在GPU上:
X = torch.ones(size=(2, 3), device=try_gpu())

X

tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')

In [24]:
# 如果我们想要计算. 那么我们默认是会在两个变量所在的device上面, 那么我们需要两个变量都在同一个device上面,
# 例如都在cpu上面, 或者都在gpu上面, 同时, 如果我们要在gpu上面, 我们也需要保证是同一块GPU. 这跟设备的内存有关.
Z = X.cuda(0) # 保存, cuda(num)你可以认为是将一个变量的值从一个内存copy到另一个卡的内存中. 从cuda:0到cuda:1
# Z = X.cuda(0)  # 就是从0copy
print(X)
print(Z)

tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')
tensor([[1., 1., 1.],
        [1., 1., 1.]], device='cuda:0')


In [25]:
X + Z # 默认就是在X,Z的device上做计算. 得到的结果, 也是写在这个device上


tensor([[2., 2., 2.],
        [2., 2., 2.]], device='cuda:0')

In [29]:
# 神经网络与GPU
net = nn.Sequential(
    nn.Linear(3, 1), nn.ReLU()
)
net.to(device=try_gpu())

print(next(iter(net.parameters())).device)


cuda:0


In [36]:
X = torch.randn(size=(20, 3),dtype=torch.float32)
X = X.to(0)

net(X)

tensor([[0.0000],
        [1.3625],
        [0.4900],
        [1.5682],
        [1.1068],
        [0.6400],
        [0.0000],
        [0.3807],
        [0.0000],
        [0.4515],
        [0.0000],
        [1.6553],
        [0.0379],
        [0.9451],
        [0.0000],
        [0.0000],
        [0.7531],
        [0.6430],
        [0.3887],
        [0.3877]], device='cuda:0', grad_fn=<ReluBackward0>)