##  基本数据操作

In [1]:
import torch
print("PyTorch version:", torch.__version__)

PyTorch version: 1.3.1


### 1.1 创建PyTorch Tensor

In [2]:
x = torch.empty(5, 3)
print(x)

tensor([[1.1210e-44, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00],
        [0.0000e+00, 0.0000e+00, 0.0000e+00]])


In [3]:
x = torch.rand(5, 3)
print(x)

tensor([[0.7753, 0.5968, 0.3043],
        [0.8182, 0.9059, 0.0084],
        [0.0928, 0.0080, 0.9260],
        [0.4166, 0.8482, 0.8696],
        [0.0504, 0.7147, 0.5023]])


指定数据类型：

In [4]:
x = torch.zeros(5, 3, dtype=torch.long)
print(x)
print(x.dtype)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])
torch.int64


从现有array创建：

In [5]:
x = torch.tensor([5.5, 3])
print(x)
y = torch.tensor([[5.2, 2], [1.2, 9]])
print(y)

tensor([5.5000, 3.0000])
tensor([[5.2000, 2.0000],
        [1.2000, 9.0000]])


从现有的tensor创建：

In [6]:
x = x.new_ones(5, 3)
print(x)
x = torch.randn_like(x, dtype=torch.float)
print(x)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])
tensor([[-0.3641,  0.3783,  0.4136],
        [ 0.0720, -1.4639,  1.2516],
        [ 1.1459, -0.7147, -0.0098],
        [ 0.7281, -0.3607, -0.6848],
        [-0.8404,  1.4801,  0.6986]])


获取tensor形状：

In [7]:
print(x.shape)
print(x.size())

torch.Size([5, 3])
torch.Size([5, 3])


### 1.2 PyTorch Tensor操作

加法：

In [8]:
y = torch.rand(5, 3)
print(x+y)
print(torch.add(x, y))

# 输出复制给新的变量
z = torch.empty(5, 3)
print(z)
torch.add(x, y, out=z)
print(z)
z = x + y
print(z)

# inplace operate: add x to y
y.add_(x)
print(y)

tensor([[ 0.3062,  1.0174,  0.9678],
        [ 0.9446, -0.9590,  1.5893],
        [ 1.9307,  0.0167,  0.5829],
        [ 1.6073,  0.3414, -0.1228],
        [-0.1569,  1.5402,  1.2369]])
tensor([[ 0.3062,  1.0174,  0.9678],
        [ 0.9446, -0.9590,  1.5893],
        [ 1.9307,  0.0167,  0.5829],
        [ 1.6073,  0.3414, -0.1228],
        [-0.1569,  1.5402,  1.2369]])
tensor([[ 0.3062,  1.0174,  0.9678],
        [ 0.9446, -0.9590,  1.5893],
        [ 1.9307,  0.0167,  0.5829],
        [ 1.6073,  0.3414, -0.1228],
        [-0.1569,  1.5402,  1.2369]])
tensor([[ 0.3062,  1.0174,  0.9678],
        [ 0.9446, -0.9590,  1.5893],
        [ 1.9307,  0.0167,  0.5829],
        [ 1.6073,  0.3414, -0.1228],
        [-0.1569,  1.5402,  1.2369]])
tensor([[ 0.3062,  1.0174,  0.9678],
        [ 0.9446, -0.9590,  1.5893],
        [ 1.9307,  0.0167,  0.5829],
        [ 1.6073,  0.3414, -0.1228],
        [-0.1569,  1.5402,  1.2369]])
tensor([[ 0.3062,  1.0174,  0.9678],
        [ 0.9446, -0.9590,  1.589

索引：

In [9]:
print(x)
print(x[4])
print(x[:, 0])

tensor([[-0.3641,  0.3783,  0.4136],
        [ 0.0720, -1.4639,  1.2516],
        [ 1.1459, -0.7147, -0.0098],
        [ 0.7281, -0.3607, -0.6848],
        [-0.8404,  1.4801,  0.6986]])
tensor([-0.8404,  1.4801,  0.6986])
tensor([-0.3641,  0.0720,  1.1459,  0.7281, -0.8404])


改变形状（内存共享）：

In [10]:
y = x.view(15)
print(y)
y = x.view(-1, 5)
print(y)

tensor([-0.3641,  0.3783,  0.4136,  0.0720, -1.4639,  1.2516,  1.1459, -0.7147,
        -0.0098,  0.7281, -0.3607, -0.6848, -0.8404,  1.4801,  0.6986])
tensor([[-0.3641,  0.3783,  0.4136,  0.0720, -1.4639],
        [ 1.2516,  1.1459, -0.7147, -0.0098,  0.7281],
        [-0.3607, -0.6848, -0.8404,  1.4801,  0.6986]])


In [11]:
x += 1
print(y)

tensor([[ 0.6359,  1.3783,  1.4136,  1.0720, -0.4639],
        [ 2.2516,  2.1459,  0.2853,  0.9902,  1.7281],
        [ 0.6393,  0.3152,  0.1596,  2.4801,  1.6986]])


改变形状并分配给拥有独立内存的变量:

In [12]:
y = x.clone().view(15)
x += 1
print(x)
print(y)

tensor([[1.6359, 2.3783, 2.4136],
        [2.0720, 0.5361, 3.2516],
        [3.1459, 1.2853, 1.9902],
        [2.7281, 1.6393, 1.3152],
        [1.1596, 3.4801, 2.6986]])
tensor([ 0.6359,  1.3783,  1.4136,  1.0720, -0.4639,  2.2516,  2.1459,  0.2853,
         0.9902,  1.7281,  0.6393,  0.3152,  0.1596,  2.4801,  1.6986])


将一个标量tensor转换为一个number：

In [13]:
z = torch.randn(1)
print(z)
print(z.item())

tensor([-1.1103])
-1.1102626323699951


当两个tensor维度不同时，element-wise operate会触发广播机制：

In [14]:
x = torch.arange(1, 3).view(1, 2)
print(x)
y = torch.arange(1, 4).view(3, 1)
print(y)

print(x + y)
# x --> [[1,2], [1,2], [1,2]]
# y --> [[1,1], [2,2], [3,3]]

tensor([[1, 2]])
tensor([[1],
        [2],
        [3]])
tensor([[2, 3],
        [3, 4],
        [4, 5]])


关于是否共享内存的探讨（+=和inplace操作是共享内存的，一般的赋值运算则不共享内存）：

In [15]:
x = torch.tensor([1, 2])
y = torch.tensor([3, 4])
id_before = id(y)
y = y + x
id_after = id(y)
print(id_before, id_after, id_before == id_after)

140585563645104 140585563643808 False


In [16]:
x = torch.tensor([1, 2])
y = torch.tensor([3, 4])
id_before = id(y)
y += x
id_after = id(y)
print(id_before == id_after)

True


In [17]:
x = torch.tensor([1, 2])
y = torch.tensor([3, 4])
id_before = id(y)
y[:] = y + x
id_after = id(y)
print(id_before == id_after)

True


In [18]:
x = torch.tensor([1, 2])
y = torch.tensor([3, 4])
id_before = id(y)
y.add_(x)
id_after = id(y)
print(id_before == id_after)

True


Tensor与numpy array之间的转换（from_numpy()：二者共享内存）：

In [19]:
a = torch.ones(5, 3)
b = a.numpy()
print(a, b, '\n')

a += 1
print(a, b, '\n')
b += 1
print(a, b)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]) [[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]] 

tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]]) [[2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]] 

tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]]) [[3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]]


In [20]:
import numpy as np
a = np.ones((5, 3))
b = torch.from_numpy(a)
print(a, '\n', b, '\n')

a += 1
print(a, '\n', b, '\n')
b += 1
print(a, '\n', b)

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]] 
 tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64) 

[[2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]
 [2. 2. 2.]] 
 tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]], dtype=torch.float64) 

[[3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]] 
 tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]], dtype=torch.float64)


使用tensor.tensor()不会共享内存：

In [21]:
c = torch.tensor(a)
print(a, '\n', c, '\n')

a += 1
print(a, '\n', c, '\n')
c += 1
print(a, '\n', c)

[[3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]
 [3. 3. 3.]] 
 tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]], dtype=torch.float64) 

[[4. 4. 4.]
 [4. 4. 4.]
 [4. 4. 4.]
 [4. 4. 4.]
 [4. 4. 4.]] 
 tensor([[3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.],
        [3., 3., 3.]], dtype=torch.float64) 

[[4. 4. 4.]
 [4. 4. 4.]
 [4. 4. 4.]
 [4. 4. 4.]
 [4. 4. 4.]] 
 tensor([[4., 4., 4.],
        [4., 4., 4.],
        [4., 4., 4.],
        [4., 4., 4.],
        [4., 4., 4.]], dtype=torch.float64)


## 2 自动求梯度

### 2.1 相关概念
``tensor.requires_grad``若被设置为true，则该变量上的所有操作都将被追踪。通过调用``.backward``完成所有梯度计算。计算结果被存放到``.grad``属性中。

Tensor和Function共同构建了一个记录整个计算过程的非循环图。

In [22]:
x = torch.tensor([[1., 2.], [3., 4.]], requires_grad=True)
print(x)
# 直接通过数值创建x，不是任何变量的参数
print(x.grad_fn)

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)
None


In [23]:
y = x + 2
print(y)
# y是x的函数，有一个加法的grad_fn
print(y.grad_fn)

tensor([[3., 4.],
        [5., 6.]], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7fdca0955950>


In [24]:
# is_leaf用来判断一个变量是否是计算图中的叶子结点（自变量）
print(x.is_leaf, y.is_leaf)

True False


In [25]:
z = 3 * y**2
o = z.mean()
print(z)
print(o)

tensor([[ 27.,  48.],
        [ 75., 108.]], grad_fn=<MulBackward0>)
tensor(64.5000, grad_fn=<MeanBackward0>)


用inplace的方式改变变量的requires_grad属性：

In [26]:
a = torch.randn(2, 2)
b = (a * 3) / (a - 1)
print(a.requires_grad)
print(b.grad_fn)

a.requires_grad_(True)
b = (a * 3) / (a - 1)
print(b.grad_fn)

False
None
<DivBackward0 object at 0x7fdca09554d0>


### 2.2 梯度计算（采用分母记法）
因为$o = \frac{1}{4} \sum_{i=1}^4 z_i = \frac{1}{4} \sum_{i=1}^4 3(x_i + 2)^2$，
所以$\frac{\partial o}{\partial x} \in \mathbb{R}^{(2 \times 2) \times 1} = \mathbb{R}^{2 \times 2}$，
梯度与自变量同形。且$\frac{\partial o}{\partial x_i} = \frac{3}{2} (x_i + 2)$。

因为$x_1 = 1$，所以$\frac{\partial o}{\partial x_1} = 4.5$；
因为$x_2 = 2$，所以$\frac{\partial o}{\partial x_1} = 6$。

In [27]:
# o是一个标量，无需指定要求导的变量（只有一个）。如果o是张量，要指定o中需要被求导的那个元素
o.backward()
print(x.grad)

tensor([[4.5000, 6.0000],
        [7.5000, 9.0000]])


梯度在反向传播的过程中是累加的。每运行一次反向传播，梯度都会累加之前的梯度。如果不想要这样，可以在执行反向传播之前将梯度清零。
$o_2 = \sum_{i=1}^4 x_i$，$\frac{\partial o_2}{\partial x_i} = x_i$。当$x_1 = 1$时，$o_2 = \sum_{i=1}^4 x_i = 1$。
这个结果累加上上一轮的[[4.5, 6.0], [7.5, 9.0]]，才是真正的输出。

In [28]:
o2 = x.sum()
o2.backward()
print(x.grad)

tensor([[ 5.5000,  7.0000],
        [ 8.5000, 10.0000]])


In [29]:
# 把梯度清零
o3 = x.sum()
x.grad.data.zero_()
o3.backward()
print(x.grad)

tensor([[1., 1.],
        [1., 1.]])


为了简化反向传播，Torch仅允许**标量对张量**求导，不允许张量对张量求导。也就是说，因变量必须是一个标量。可是很多情况下因变量并不是张量，怎么办？

设$y = f(x)$是个张量，$w$是个和$y$同形的张量，则``y.backward(w)``的含义是$\frac{\partial l}{\partial x}$，
其中$l = \sum_{i} w_i y_i$。因此结果仍然是个与$x$同形的张量。

In [30]:
x = torch.tensor([1., 2., 3., 4.], requires_grad=True)
y = 2 * x
z = y.view(2, 2)
print(z)

tensor([[2., 4.],
        [6., 8.]], grad_fn=<ViewBackward>)


In [31]:
w = torch.tensor([[1., 0.1], [0.01, 0.001]])
z.backward(w)

print(x.grad)

tensor([2.0000, 0.2000, 0.0200, 0.0020])


如果中断梯度的跟踪，那么在后续的梯度计算中，被中断的变量不会参与进来。

In [33]:
x = torch.tensor(1., requires_grad=True)
y1 = x ** 2
with torch.no_grad():
    y2 = x ** 3
y3 = y1 + y2
print(x.requires_grad)
print(y1, y1.requires_grad)
print(y2, y2.requires_grad)
print(y3, y3.requires_grad)

True
tensor(1., grad_fn=<PowBackward0>) True
tensor(1.) False
tensor(2., grad_fn=<AddBackward0>) True


下面的结果之所以是2是因为：$y_3 = y_1 + terminated(y_2) = y_1 = x^2$，
所以$\frac{\partial y_3}{\partial x} = 2 x$。

In [34]:
y3.backward()
print(x.grad)

tensor(2.)


在不影响后续梯度的计算的前提下，如何修改自变量的数值？直接对``tensor.data``属性进行操作。

In [35]:
x = torch.ones(1,requires_grad=True)

print(x.data) # 还是一个tensor
print(x.data.requires_grad) # 但是已经是独立于计算图之外

y = 2 * x
x.data *= 100 # 只改变了值，不会记录在计算图，所以不会影响梯度传播

y.backward()
print(x) # 更改data的值也会影响tensor的值
print(x.grad)

tensor([1.])
False
tensor([100.], requires_grad=True)
tensor([2.])
