## pytorch的张量
pytorch的张量和numpy的ndarray对象相比，多了GPU计算支持和自动微分支持。而在具体使用语法上非常相似，下面不会给出过多的解释，只是给出一些代码片段，看看输出是否是你心中想的。


In [1]:
import torch

In [2]:
t_1 = torch.arange(3*4*5)
print(t_1)
print(t_1.shape)
print(t_1.ndim)

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
        54, 55, 56, 57, 58, 59])
torch.Size([60])
1


In [3]:
t_1 = t_1.reshape(3,4,-1)
print(t_1)
print(t_1.shape)
print(t_1.ndim)

tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]])
torch.Size([3, 4, 5])
3


In [4]:
# zeros
t_2 = torch.zeros(3,4,5)
print(t_2)

tensor([[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]])


In [5]:
# ones
t_3 = torch.ones(3,4,5)
print(t_3)

tensor([[[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]],

        [[1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1.]]])


In [6]:
# rand是[0,1)的均匀分布采样
t_4 = torch.rand(3,4,5)
t_4

tensor([[[0.6197, 0.4289, 0.7787, 0.1606, 0.1643],
         [0.8005, 0.9559, 0.6820, 0.7387, 0.5123],
         [0.9465, 0.6000, 0.5535, 0.8892, 0.7596],
         [0.0502, 0.7657, 0.0827, 0.5335, 0.4434]],

        [[0.5814, 0.0769, 0.6520, 0.7216, 0.2113],
         [0.5906, 0.8641, 0.2657, 0.0104, 0.1978],
         [0.4860, 0.5115, 0.7121, 0.1078, 0.9044],
         [0.4777, 0.6741, 0.1351, 0.3047, 0.3914]],

        [[0.9972, 0.1498, 0.5555, 0.7830, 0.8576],
         [0.7460, 0.6982, 0.5741, 0.0640, 0.3117],
         [0.8744, 0.1787, 0.3502, 0.5414, 0.4150],
         [0.8294, 0.8559, 0.4008, 0.5101, 0.5790]]])

In [7]:
# randn是均值为0、标准差为1的标准正态分布采样
t_5 = torch.randn(3,4,5)
t_5

tensor([[[ 1.3690, -1.2125,  0.9713,  0.4438,  0.2695],
         [-1.2590, -0.1723,  0.2416,  1.1429,  1.5999],
         [-0.1632, -0.7154,  0.2331, -1.0099, -0.4292],
         [-0.6885,  0.1371,  1.6853, -0.6001,  1.1662]],

        [[-0.9101, -0.9169,  1.0831,  0.0043,  0.1835],
         [ 1.5337,  0.6460, -0.8395, -0.1553,  0.4278],
         [ 1.5331,  2.2837, -0.7637, -1.0780, -0.0724],
         [-0.8201, -1.0202, -0.4884,  0.6331,  0.5497]],

        [[ 0.4832,  0.2854, -0.0367,  1.6824, -1.0917],
         [ 1.6993,  1.5096, -0.4693, -0.5930,  0.9404],
         [-1.6893,  0.1939,  0.4922,  0.5521,  2.0050],
         [ 0.6059,  0.9814,  1.0248, -0.1422,  0.0547]]])

## 单元素张量
单元素张量可以用 `item()` 来将该元素取出来，返回的是python对象。

In [8]:
t_6 = torch.tensor([5])
print(t_6.shape)
print(t_6.item())

torch.Size([1])
5


In [9]:
t_7 = t_3.sum()
print(t_7)
print(t_7.item())
print(type(t_7.item()))

tensor(60.)
60.0
<class 'float'>


## 按元素运算

In [10]:
torch.ones(3,4,5) * 2

tensor([[[2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.]],

        [[2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.]],

        [[2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.],
         [2., 2., 2., 2., 2.]]])

In [11]:
(torch.ones(3,4,5) * 2) + torch.ones(3,4,5)

tensor([[[3., 3., 3., 3., 3.],
         [3., 3., 3., 3., 3.],
         [3., 3., 3., 3., 3.],
         [3., 3., 3., 3., 3.]],

        [[3., 3., 3., 3., 3.],
         [3., 3., 3., 3., 3.],
         [3., 3., 3., 3., 3.],
         [3., 3., 3., 3., 3.]],

        [[3., 3., 3., 3., 3.],
         [3., 3., 3., 3., 3.],
         [3., 3., 3., 3., 3.],
         [3., 3., 3., 3., 3.]]])

In [12]:
t_5 > 0

tensor([[[ True, False,  True,  True,  True],
         [False, False,  True,  True,  True],
         [False, False,  True, False, False],
         [False,  True,  True, False,  True]],

        [[False, False,  True,  True,  True],
         [ True,  True, False, False,  True],
         [ True,  True, False, False, False],
         [False, False, False,  True,  True]],

        [[ True,  True, False,  True, False],
         [ True,  True, False, False,  True],
         [False,  True,  True,  True,  True],
         [ True,  True,  True, False,  True]]])

## 索引和切片

In [13]:
t_1

tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]])

In [14]:
t_1[0,0,0]

tensor(0)

In [15]:
t_1[0,0,:]

tensor([0, 1, 2, 3, 4])

In [16]:
t_1[0,:,:]

tensor([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]])

In [17]:
print(t_2)
t_2[:,:,1] = 1
print(t_2)

tensor([[[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]],

        [[0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0.]]])
tensor([[[0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.]],

        [[0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.]],

        [[0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.]]])


## 对于维度的再思考
numpy和pytorch里面都有一个维度的概念，某些函数的运作可以指定一个dim参数，一维二维的情况还好，再多的维度试图从图形来理解并不是个好办法。下面提供了另外一种思路。

一个基本的思路是：将张量按照指定维度拆分，然后将这些拆分出来的张量应用目标函数。

### sum

In [18]:
t_1

tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]])

In [19]:
t_1[0]

tensor([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]])

In [20]:
t_1[1]

tensor([[20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39]])

In [21]:
t_1[2]

tensor([[40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49],
        [50, 51, 52, 53, 54],
        [55, 56, 57, 58, 59]])

In [22]:
# = t_1[0]+t_1[1]+t_1[2]
t_1.sum(dim=0)

tensor([[ 60,  63,  66,  69,  72],
        [ 75,  78,  81,  84,  87],
        [ 90,  93,  96,  99, 102],
        [105, 108, 111, 114, 117]])

### argmax

In [23]:
t_4

tensor([[[0.6197, 0.4289, 0.7787, 0.1606, 0.1643],
         [0.8005, 0.9559, 0.6820, 0.7387, 0.5123],
         [0.9465, 0.6000, 0.5535, 0.8892, 0.7596],
         [0.0502, 0.7657, 0.0827, 0.5335, 0.4434]],

        [[0.5814, 0.0769, 0.6520, 0.7216, 0.2113],
         [0.5906, 0.8641, 0.2657, 0.0104, 0.1978],
         [0.4860, 0.5115, 0.7121, 0.1078, 0.9044],
         [0.4777, 0.6741, 0.1351, 0.3047, 0.3914]],

        [[0.9972, 0.1498, 0.5555, 0.7830, 0.8576],
         [0.7460, 0.6982, 0.5741, 0.0640, 0.3117],
         [0.8744, 0.1787, 0.3502, 0.5414, 0.4150],
         [0.8294, 0.8559, 0.4008, 0.5101, 0.5790]]])

In [24]:
t_4[:,0,:]

tensor([[0.6197, 0.4289, 0.7787, 0.1606, 0.1643],
        [0.5814, 0.0769, 0.6520, 0.7216, 0.2113],
        [0.9972, 0.1498, 0.5555, 0.7830, 0.8576]])

In [25]:
t_4[:,1,:]

tensor([[0.8005, 0.9559, 0.6820, 0.7387, 0.5123],
        [0.5906, 0.8641, 0.2657, 0.0104, 0.1978],
        [0.7460, 0.6982, 0.5741, 0.0640, 0.3117]])

In [26]:
t_4[:,2,:]

tensor([[0.9465, 0.6000, 0.5535, 0.8892, 0.7596],
        [0.4860, 0.5115, 0.7121, 0.1078, 0.9044],
        [0.8744, 0.1787, 0.3502, 0.5414, 0.4150]])

In [27]:
t_4[:,3,:]

tensor([[0.0502, 0.7657, 0.0827, 0.5335, 0.4434],
        [0.4777, 0.6741, 0.1351, 0.3047, 0.3914],
        [0.8294, 0.8559, 0.4008, 0.5101, 0.5790]])

In [28]:
# =argmax(t_4[:,0,:],t_4[:,1,:],t_4[:,2,:],t_4[:,3,:])
t_4.argmax(dim=1)

tensor([[2, 1, 0, 2, 2],
        [1, 1, 2, 0, 2],
        [0, 3, 1, 0, 0]])

即使是最简单的情况，因为批量处理的存在，神经网络的一个输出可能是这样的：

In [29]:
# 第一个维度存储的样本计数
t_8 = torch.rand(32,10)

In [30]:
t_8.argmax(dim=1)

tensor([4, 3, 2, 4, 5, 4, 6, 9, 2, 1, 2, 6, 8, 6, 9, 0, 2, 5, 1, 2, 7, 0, 1, 7,
        4, 9, 6, 8, 1, 4, 1, 0])

In [31]:
t_8[:,0]

tensor([0.4131, 0.8180, 0.3917, 0.3345, 0.8670, 0.5456, 0.3986, 0.9495, 0.2092,
        0.1439, 0.4612, 0.6389, 0.4393, 0.6853, 0.1444, 0.9543, 0.0125, 0.7579,
        0.4564, 0.6202, 0.4166, 0.9375, 0.5368, 0.4997, 0.9461, 0.4795, 0.0059,
        0.7026, 0.6019, 0.1864, 0.8529, 0.9390])

In [32]:
t_8[:,1]

tensor([0.4892, 0.4910, 0.5342, 0.2114, 0.4533, 0.8597, 0.3824, 0.1042, 0.5499,
        0.9526, 0.0244, 0.4279, 0.0041, 0.2420, 0.7186, 0.8301, 0.1459, 0.7017,
        0.8894, 0.8296, 0.7008, 0.2696, 0.7596, 0.1588, 0.0218, 0.8417, 0.0926,
        0.3920, 0.9952, 0.3645, 0.9781, 0.6934])

按照上面的分析，就能理解为什么 `t_8.argmax(dim=1)` 的值的含义就是一个样本的输出的最大值索引。

### cat
cat用于张量连接，一般需要指定按照那个维度来连接。

In [33]:
t_9 = torch.arange(12, dtype=torch.float32).reshape((3,4))
print(t_9)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.]])


In [34]:
t_10 = torch.tensor([[2.0, 1, 4, 3], [1, 2, 3, 4], [4, 3, 2, 1]])
print(t_10)

tensor([[2., 1., 4., 3.],
        [1., 2., 3., 4.],
        [4., 3., 2., 1.]])


In [35]:
torch.cat((t_9, t_10), dim=0)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [ 2.,  1.,  4.,  3.],
        [ 1.,  2.,  3.,  4.],
        [ 4.,  3.,  2.,  1.]])

In [36]:
t_9[0]

tensor([0., 1., 2., 3.])

In [37]:
t_9[1]

tensor([4., 5., 6., 7.])

In [38]:
t_10[0]

tensor([2., 1., 4., 3.])

In [39]:
t_10[1]

tensor([1., 2., 3., 4.])

可以理解为上面的输出按照维度=0拼接。

In [40]:
torch.cat((t_9,t_10), dim=1)

tensor([[ 0.,  1.,  2.,  3.,  2.,  1.,  4.,  3.],
        [ 4.,  5.,  6.,  7.,  1.,  2.,  3.,  4.],
        [ 8.,  9., 10., 11.,  4.,  3.,  2.,  1.]])

In [41]:
t_9[:,0]

tensor([0., 4., 8.])

In [42]:
t_9[:,1]

tensor([1., 5., 9.])

In [43]:
t_10[:,0]

tensor([2., 1., 4.])

In [44]:
t_10[:,1]

tensor([1., 2., 3.])

拼接的情况太复杂了，参考上面的运行代码大概会有个概念。

## 和numpy进行数据交换
```
np_array = np.array(data)
x_np = torch.from_numpy(np_array)
```

```
t = torch.ones(5)
n = t.numpy()
```

## 节省内存的原地修改
用索引和切片对张量进行操作就是原地修改的，pytorch里面有些方法也是原地修改的，这里要说的主要是基于python的赋值语句：`x = x + 1` ,python的内在机制是如果变量是可变对象，那么修改之后变量对象是会重新创建的，这在这边大型张量环境下可能是个问题了。

In [45]:
print(t_2)
print(id(t_2))

tensor([[[0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.]],

        [[0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.]],

        [[0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.],
         [0., 1., 0., 0., 0.]]])
2075447541616


In [46]:
t_2 = t_2 + 1
print(id(t_2))

2075987627536


可以通过切片语法来实现原地修改，节省内存开销。

In [47]:
print(t_2)
print(id(t_2))

tensor([[[1., 2., 1., 1., 1.],
         [1., 2., 1., 1., 1.],
         [1., 2., 1., 1., 1.],
         [1., 2., 1., 1., 1.]],

        [[1., 2., 1., 1., 1.],
         [1., 2., 1., 1., 1.],
         [1., 2., 1., 1., 1.],
         [1., 2., 1., 1., 1.]],

        [[1., 2., 1., 1., 1.],
         [1., 2., 1., 1., 1.],
         [1., 2., 1., 1., 1.],
         [1., 2., 1., 1., 1.]]])
2075987627536


In [48]:
t_2[:] = t_2 + 1
print(id(t_2))

2075987627536


## 广播机制
类似numpy，pytorch这边也有广播机制，这里也不做过多介绍了，并不鼓励这样的编码风格。

In [50]:
t_11 = torch.arange(3).reshape((3, 1))
t_12 = torch.arange(2).reshape((1, 2))
print(t_11)
print(t_12)

tensor([[0],
        [1],
        [2]])
tensor([[0, 1]])


In [51]:
torch.broadcast_tensors(t_11, t_12)

(tensor([[0, 0],
         [1, 1],
         [2, 2]]),
 tensor([[0, 1],
         [0, 1],
         [0, 1]]))

In [52]:
t_11 + t_12

tensor([[0, 1],
        [1, 2],
        [2, 3]])