# pytorch Tutorial

In [1]:
import torch
import numpy as np


In [2]:
torch.__version__

'2.1.0+cu121'

### 確認裝置類型 GPU or CPU

In [3]:
if torch.cuda.is_available():
    # Get the number of available GPUs
    num_gpus = torch.cuda.device_count()
    
    # Get the name of the current GPU
    current_gpu_name = torch.cuda.get_device_name(0)  # Assuming you have at least one GPU
    
    print(f"Number of available GPUs: {num_gpus}")
    print(f"Current GPU name: {current_gpu_name}")
       
    # Now, operations will be performed on the GPU by default if available
    cuda0 = torch.device('cuda:0')
    tsr = torch.tensor([[1,2],[3,4],[5,6]], dtype=torch.float64, device=cuda0)
else:
    print("No GPU available. Using CPU.")

cuda0 = torch.device('cuda', 0)
cuda1 = torch.device('cuda', 1)
cpu = torch.device('cpu')


Number of available GPUs: 1
Current GPU name: NVIDIA GeForce RTX 3090


### tensor 創建

In [4]:
tsr = torch.tensor([[1,2],[3,4],[5,6]]); tsr

tensor([[1, 2],
        [3, 4],
        [5, 6]])

In [5]:
# 創建一個 2x3 的零矩陣
tensor1 = torch.zeros(2, 3)
print("零矩陣:")
print(tensor1)

# 創建一個 2x3 的隨機矩陣
tensor2 = torch.rand(2, 3)
print("\n隨機矩陣:")
print(tensor2)

# 創建一個直接由數據構造的 Tensor
data = [[1, 2, 5], [3, 4, 7]]
tensor3 = torch.tensor(data)
print("\n由數據構造的 Tensor:")
print(tensor3)
print(tensor3.dtype)


零矩陣:
tensor([[0., 0., 0.],
        [0., 0., 0.]])

隨機矩陣:
tensor([[0.5956, 0.8338, 0.2295],
        [0.4152, 0.4992, 0.4502]])

由數據構造的 Tensor:
tensor([[1, 2, 5],
        [3, 4, 7]])
torch.int64


In [6]:
tsr = torch.tensor([[1,2],[3,4]], dtype=torch.float64); tsr

tensor([[1., 2.],
        [3., 4.]], dtype=torch.float64)

### 邏輯運算

In [7]:
tsr = torch.randn(4,5)
tsr

tensor([[ 1.1881, -0.2218,  0.6883,  1.6405, -0.1076],
        [-0.4914, -1.2082,  1.0859,  0.3926, -0.7073],
        [-0.1189,  1.0979,  0.3442, -0.3480, -0.7166],
        [-0.1212, -0.3576,  0.0276,  0.9781,  0.4469]])

In [8]:
# 1. max of entire tensor (torch.max(input) → Tensor)
m = torch.max(tsr)
print(m)

tensor(1.6405)


In [9]:
tsr.shape

torch.Size([4, 5])

In [10]:
# 2. max along a dimension (torch.max(input, dim, keepdim=False, *, out=None) → (Tensor, LongTensor))
m, idx = torch.max(tsr,0)
print(m)
print(idx)

tensor([1.1881, 1.0979, 1.0859, 1.6405, 0.4469])
tensor([0, 2, 1, 0, 3])


In [11]:
# 2-2
m, idx = torch.max(input=tsr,dim=0)
print(m)
print(idx)

tensor([1.1881, 1.0979, 1.0859, 1.6405, 0.4469])
tensor([0, 2, 1, 0, 3])


In [12]:
# 2-3
m, idx = torch.max(tsr,0,False)
print(m)
print(idx)

tensor([1.1881, 1.0979, 1.0859, 1.6405, 0.4469])
tensor([0, 2, 1, 0, 3])


In [13]:
# 2-4
m, idx = torch.max(tsr,dim=0,keepdim=True)
print(m)
print(idx)

tensor([[1.1881, 1.0979, 1.0859, 1.6405, 0.4469]])
tensor([[0, 2, 1, 0, 3]])


In [14]:
# 2-5
p = (m,idx)
torch.max(tsr,0,False,out=p)
print(p[0])
print(p[1])
print(p)

tensor([1.1881, 1.0979, 1.0859, 1.6405, 0.4469])
tensor([0, 2, 1, 0, 3])
(tensor([1.1881, 1.0979, 1.0859, 1.6405, 0.4469]), tensor([0, 2, 1, 0, 3]))


  torch.max(tsr,0,False,out=p)


### **tensor data type**
![**tensor data type**](https://miro.medium.com/v2/resize:fit:1100/format:webp/1*CHitOyDsG5fXhR80cAT3Ag.png)

### tensor <-----> numpy 

In [15]:
# tensor transfer to numpy
tsr2numpy = tsr.numpy()
print('numpy', tsr2numpy)
print('type:', type(tsr2numpy))


numpy [[ 1.1881301  -0.22177304  0.688282    1.6404712  -0.10755035]
 [-0.49139878 -1.2082385   1.0859488   0.39258292 -0.7073044 ]
 [-0.11885507  1.0979258   0.344225   -0.34797606 -0.7166301 ]
 [-0.12120967 -0.3576219   0.02757552  0.97814554  0.4468925 ]]
type: <class 'numpy.ndarray'>


In [16]:
# numpy transfer to tensor

'''
torch.Tensor
torch.tensor
torch.as_tensor
torch.from_numpy

'''
arr = np.array([[1,2,3],[4,5,6]])

#1 sol
arr2tsr_1 = torch.tensor(arr)
print("tensor:", arr2tsr_1)
print("dtype:", arr2tsr_1.dtype)

#2 sol
arr2tsr_2 = torch.Tensor(arr)
print("tensor:", arr2tsr_2)
print("dtype:", arr2tsr_2.dtype)

#3 sol
arr2tsr_3 = torch.as_tensor(arr)
print("tensor:", arr2tsr_3)
print("dtype:", arr2tsr_3.dtype)

#4 sol
arr2tsr_4 = torch.from_numpy(arr)
print("tensor:", arr2tsr_4)
print("dtype:", arr2tsr_4.dtype)




tensor: tensor([[1, 2, 3],
        [4, 5, 6]], dtype=torch.int32)
dtype: torch.int32
tensor: tensor([[1., 2., 3.],
        [4., 5., 6.]])
dtype: torch.float32
tensor: tensor([[1, 2, 3],
        [4, 5, 6]], dtype=torch.int32)
dtype: torch.int32
tensor: tensor([[1, 2, 3],
        [4, 5, 6]], dtype=torch.int32)
dtype: torch.int32


### tensor 運算操作

In [17]:
tensor2

tensor([[0.5956, 0.8338, 0.2295],
        [0.4152, 0.4992, 0.4502]])

In [18]:
tensor3

tensor([[1, 2, 5],
        [3, 4, 7]])

In [19]:
# Tensor 加法
result_add = tensor2 + tensor3
print("\nTensor 加法結果:")
print(result_add)


# Tensor 加法 (apple 2 apple)
result_muiltiple = tensor2 * tensor3
print("\nTensor 乘法結果:")
print(result_muiltiple)


# Tensor 改變形狀
reshaped = tensor3.view(1, 6)
print("\n重塑後的 Tensor:")
print(reshaped)


Tensor 加法結果:
tensor([[1.5956, 2.8338, 5.2295],
        [3.4152, 4.4992, 7.4502]])

Tensor 乘法結果:
tensor([[0.5956, 1.6675, 1.1477],
        [1.2457, 1.9968, 3.1511]])

重塑後的 Tensor:
tensor([[1, 2, 5, 3, 4, 7]])


### 參數追蹤

In [20]:
# auto_grad
# 設定 requires_grad 指定參數是否追蹤

# 指定tensor
tsr = torch.tensor([[1,2],[3,4],[5,6]], dtype=torch.float64, requires_grad=True)

# 隨機tensor
tsr_randn = torch.randn(3, 2, requires_grad=True)

print(tsr)
print(tsr_randn)




tensor([[1., 2.],
        [3., 4.],
        [5., 6.]], dtype=torch.float64, requires_grad=True)
tensor([[-0.2016, -1.6679],
        [ 0.2950, -1.2705],
        [ 0.7401, -3.1303]], requires_grad=True)


### 梯度計算

In [21]:

# 建立隨機數值的 Tensor 並設定 requires_grad=True
x = torch.rand(2,3, requires_grad=True)
y = torch.rand(2,3, requires_grad=True)
z = torch.rand(2,3, requires_grad=True)


print("tensor_x", x)
print("tensor_y", y)
print("tensor_z", z)


print("--" * 20)

# 計算式子
# b = x*y + z
a = x * y
b = a + z
c = torch.sum(b)

# 計算梯度
c.backward()

# 查看 x 的梯度值
print(x.grad)


tensor_x tensor([[0.2396, 0.1117, 0.6634],
        [0.1873, 0.3473, 0.3479]], requires_grad=True)
tensor_y tensor([[0.4829, 0.7932, 0.1140],
        [0.5022, 0.6296, 0.9223]], requires_grad=True)
tensor_z tensor([[0.9059, 0.1766, 0.2995],
        [0.5391, 0.3627, 0.2754]], requires_grad=True)
----------------------------------------
tensor([[0.4829, 0.7932, 0.1140],
        [0.5022, 0.6296, 0.9223]])


### 停止梯度計算 
5 大方法, 用在模型推論過程

* detach()
* with wotch.no_grad()
* @torch.no_grad()
* with torch.inference_mode()
* @torch.inference_mode()

### detach()

In [22]:

x = torch.randn(3, requires_grad=True)
print(x)
print("requires_grad", x.requires_grad)

# No gradient will be computed for y
y = x.detach()
print(y)
print("requires_grad", y.requires_grad)

tensor([-0.6077,  0.3964,  1.4620], requires_grad=True)
requires_grad True
tensor([-0.6077,  0.3964,  1.4620])
requires_grad False


### with torch.no_grad()

In [78]:

x = torch.randn(3, 3, requires_grad=True)
print(x)
print("requires_grad", x.requires_grad)


with torch.no_grad():
    y = x * 2
    print(y)

print("requires_grad", y.requires_grad)

tensor([[-1.5487,  0.8920,  0.2638],
        [-0.2611,  1.7792, -0.3887],
        [ 1.5519,  1.4101,  0.1652]], requires_grad=True)
requires_grad True
tensor([[-3.0973,  1.7840,  0.5275],
        [-0.5223,  3.5584, -0.7773],
        [ 3.1038,  2.8201,  0.3304]])
requires_grad False


### @torch.no_grad()

In [85]:

@torch.no_grad()
def double(x):
    return x * 2

x = torch.randn(3, 3, requires_grad=True)
print(x)
print("requires_grad", x.requires_grad)

y = double(x)
print(y)

print("requires_grad", y.requires_grad)

tensor([[ 1.6129,  0.5429, -0.4001],
        [ 0.2064,  0.3922, -0.1294],
        [-0.5708,  1.1313,  0.2226]], requires_grad=True)
requires_grad True
tensor([[ 3.2258,  1.0857, -0.8003],
        [ 0.4128,  0.7845, -0.2587],
        [-1.1415,  2.2626,  0.4451]])
requires_grad False


### with torch.inference_mode()

In [82]:
x = torch.randn(3, 3, requires_grad=True)
print(x)
print("requires_grad", x.requires_grad)


with torch.inference_mode():
    y = x * 2
    print(y)
print("requires_grad", y.requires_grad)

tensor([[ 0.5451,  0.6024, -0.4503],
        [ 1.7358, -1.6545,  1.5861],
        [ 0.3672,  0.7445,  0.3024]], requires_grad=True)
requires_grad True
tensor([[ 1.0903,  1.2047, -0.9006],
        [ 3.4717, -3.3089,  3.1721],
        [ 0.7345,  1.4891,  0.6048]])
requires_grad False


### @torch.inference_mode()

In [84]:
@torch.inference_mode()
def double(x):
    return x * 2

x = torch.randn(3, 3, requires_grad=True)
print(x)
print("requires_grad", x.requires_grad)


y = double(x)
print(y)
print("requires_grad", y.requires_grad)

tensor([[ 1.9562,  2.0713,  1.2839],
        [-1.6120, -0.6295, -0.6977],
        [-0.2354, -1.2650,  0.1786]], requires_grad=True)
requires_grad True
tensor([[ 3.9124,  4.1426,  2.5678],
        [-3.2240, -1.2590, -1.3955],
        [-0.4708, -2.5300,  0.3571]])
requires_grad False


**Common errors**



The following code blocks show some common errors while using the torch library. First, execute the code with error, and then execute the next code block to fix the error. You need to change the runtime to GPU.


In [108]:
# 1. different device error
# 5個神經元 hidden layer, 1個神經元 output
model = torch.nn.Linear(5,1).to("cuda:0")
x = torch.Tensor([1,2,3,4,5]).to("cpu")
y = model(x)

AssertionError: Torch not compiled with CUDA enabled

In [109]:
# 1. different device error (fixed)
x = torch.Tensor([1,2,3,4,5]).to("cuda:0")
y = model(x)
print(y.shape)

AssertionError: Torch not compiled with CUDA enabled

In [110]:
# 2. mismatched dimensions error
x = torch.randn(4,5)
y= torch.randn(5,4)
z = x + y

RuntimeError: The size of tensor a (5) must match the size of tensor b (4) at non-singleton dimension 1

In [111]:
# 2. mismatched dimensions error (fixed)
y= y.transpose(0,1)
z = x + y
print(z.shape)

torch.Size([4, 5])


In [114]:
# 3. cuda out of memory error
import torch
import torchvision.models as models
resnet18 = models.resnet18().to("cuda:0") # Neural Networks for Image Recognition
data = torch.randn(2048,3,244,244) # Create fake data (512 images)
out = resnet18(data.to("cuda:0")) # Use Data as Input and Feed to Model
print(out.shape)


AssertionError: Torch not compiled with CUDA enabled

In [116]:
# 3. cuda out of memory error (fixed)
resnet18 = models.resnet18().to("cuda:0")
for d in data:
  out = resnet18(d.to("cuda:0").unsqueeze(0))
print(out.shape)

AssertionError: Torch not compiled with CUDA enabled

In [118]:
# 4. mismatched tensor type
import torch.nn as nn
L = nn.CrossEntropyLoss()
outs = torch.randn(5,5)
labels = torch.Tensor([1,2,3,4,0])
lossval = L(outs,labels) # Calculate CrossEntropyLoss between outs and labels

RuntimeError: expected scalar type Long but found Float

In [119]:
# 4. mismatched tensor type (fixed)
labels = labels.long()
lossval = L(outs,labels)
print(lossval)

tensor(2.3373)
