# Tensors

Tensors类似与NumPy的,唯一的区别是Tensor可以在GPU上加速运行
可以定义一些深度学习模型

In [4]:
import torch

In [22]:
# 构建5*3矩阵，只是分配空间，未初始化
x = torch.Tensor(5,3)
x

tensor([[8.7245e-39, 9.2755e-39, 8.9082e-39],
        [9.9184e-39, 8.4490e-39, 9.6429e-39],
        [1.0653e-38, 1.0469e-38, 4.2246e-39],
        [1.0378e-38, 9.6429e-39, 9.2755e-39],
        [9.7346e-39, 1.0745e-38, 1.0102e-38]])

构造一个未初始化的5*3矩阵

In [7]:
x = torch.empty(5, 3)
x

tensor([[1.0653e-38, 1.0194e-38, 8.4490e-39],
        [1.0469e-38, 9.3674e-39, 9.9184e-39],
        [8.7245e-39, 9.2755e-39, 8.9082e-39],
        [9.9184e-39, 8.4490e-39, 9.6429e-39],
        [1.0653e-38, 1.0469e-38, 4.2246e-39]])

In [13]:
print(x.size())  #查看x的形状

torch.Size([5, 3])


In [12]:
x.size()[0]    #查看行的个数

5

In [11]:
x.size(1)   #查看列的个数，两种写法等价

3

构建一个随机初始化的矩阵

In [8]:
torch.rand(5, 3)

tensor([[0.0013, 0.9803, 0.3387],
        [0.6857, 0.5501, 0.0112],
        [0.0553, 0.9426, 0.7204],
        [0.3887, 0.6482, 0.3172],
        [0.8924, 0.6909, 0.8008]])

构建一个全为0，类型为long的矩阵

In [10]:
x = torch.zeros(5, 3)
x

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [14]:
x.dtype  #打印类型

torch.float32

In [19]:
x = torch.zeros(5, 3, dtype=torch.long)   #类型转换
print(x)
print(x.dtype)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])
torch.int64


In [20]:
x = torch.zeros(5, 3).long()   #类型转换
print(x)
print(x.dtype)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])
torch.int64


从数据直接构建tensor.

In [22]:
x = torch.tensor([5.5, 3])
x

tensor([5.5000, 3.0000])

可以从已有的tensor构建另一个tensor。
这些方法会重用原来的tensor的特征，例如：数据类型，除非提供新的数据

In [25]:
x = x.new_ones(5, 3)
print(x,x.dtype) #跟原来的x一样是浮点型
#若要改变数据类型
y = x.new_ones(5, 3, dtype = torch.double)
y

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]) torch.float32


tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]], dtype=torch.float64)

随机产生的数字   like表示形状得一样 

In [26]:
x = torch.randn_like(x, dtype=torch.float)    #随机产生跟x形状相同的随机数
x

tensor([[-0.5696, -0.1003, -0.5218],
        [ 0.0816,  1.2711, -0.8568],
        [-0.7871, -0.4898,  0.7647],
        [-1.5623, -0.0914,  0.5044],
        [ 1.2804, -0.8605,  0.3387]])

得到tensor的形状：

In [30]:
x.shape    #x.size()

torch.Size([5, 3])

## Operation

### 加法运算

In [16]:
y = torch.rand(5,3)
y

tensor([[0.6923, 0.9679, 0.8331],
        [0.1266, 0.0457, 0.8368],
        [0.3858, 0.8025, 0.6611],
        [0.4196, 0.2747, 0.7842],
        [0.5247, 0.3939, 0.6175]])

In [17]:
x

tensor([[1.0653e-38, 1.0194e-38, 8.4490e-39],
        [1.0469e-38, 9.3674e-39, 9.9184e-39],
        [8.7245e-39, 9.2755e-39, 8.9082e-39],
        [9.9184e-39, 8.4490e-39, 9.6429e-39],
        [1.0653e-38, 1.0469e-38, 4.2246e-39]])

加法的第一种写法

In [19]:
x + y   

tensor([[0.6923, 0.9679, 0.8331],
        [0.1266, 0.0457, 0.8368],
        [0.3858, 0.8025, 0.6611],
        [0.4196, 0.2747, 0.7842],
        [0.5247, 0.3939, 0.6175]])

加法的第二种写法

In [20]:
torch.add(x, y)         

tensor([[0.6923, 0.9679, 0.8331],
        [0.1266, 0.0457, 0.8368],
        [0.3858, 0.8025, 0.6611],
        [0.4196, 0.2747, 0.7842],
        [0.5247, 0.3939, 0.6175]])

加法的第三种写法:指定加法结果的输出目标为result

In [23]:
result = torch.Tensor(5,3)    # 预先分配空间
torch.add(x,y, out = result)    # 输入到result
result

tensor([[0.6923, 0.9679, 0.8331],
        [0.1266, 0.0457, 0.8368],
        [0.3858, 0.8025, 0.6611],
        [0.4196, 0.2747, 0.7842],
        [0.5247, 0.3939, 0.6175]])

加法：把输出作为一个变量

In [38]:
result = torch.empty(5, 3)
#torch.add(x, y, out=result)
result = x + y
result

tensor([[-0.0993,  0.6963, -0.1581],
        [ 0.5298,  1.5560, -0.6586],
        [-0.4301, -0.2539,  0.8213],
        [-1.2690,  0.5434,  1.4106],
        [ 1.6871, -0.4403,  1.3381]])

in-place加法

In [39]:
y.add_(x)   #加_表示把x操作在y中
y   #y的值改变

tensor([[-0.0993,  0.6963, -0.1581],
        [ 0.5298,  1.5560, -0.6586],
        [-0.4301, -0.2539,  0.8213],
        [-1.2690,  0.5434,  1.4106],
        [ 1.6871, -0.4403,  1.3381]])

<mark style=background-color:red>
注：</mark>
任何in-place的运算都会以_结尾。eg:x.copy_(y)，x.t_()，都会改变x

各种类似的NumPy的indexing都可以在PyTorch tensor上面使用

In [41]:
x

tensor([[-0.5696, -0.1003, -0.5218],
        [ 0.0816,  1.2711, -0.8568],
        [-0.7871, -0.4898,  0.7647],
        [-1.5623, -0.0914,  0.5044],
        [ 1.2804, -0.8605,  0.3387]])

In [42]:
x[:, 1: ]   #把所有的行留下   从第一列开始往后面取

tensor([[-0.1003, -0.5218],
        [ 1.2711, -0.8568],
        [-0.4898,  0.7647],
        [-0.0914,  0.5044],
        [-0.8605,  0.3387]])

In [44]:
x[1:, 1: ] 

tensor([[ 1.2711, -0.8568],
        [-0.4898,  0.7647],
        [-0.0914,  0.5044],
        [-0.8605,  0.3387]])

Resizing:如果你希望resize/reshape一个tensor，可以使用torch.view:

In [51]:
x = torch.randn(4, 4)
print(x)
y = x.view(16)
print(y)
z = x.view(2, 8)   #把x变为2x8
print(z)
b = x.view(2,-1)    #系统可以自动算出来此处的-1是为8    b = x.view(-1,8) 同理：系统可以自动更改-1为2
b

tensor([[-1.2689,  1.0911,  0.0690, -0.1765],
        [ 1.5710,  2.4072,  0.4937, -0.5687],
        [-3.5520,  1.3367,  0.9791, -0.9546],
        [-0.7522, -1.3900,  0.9307,  0.3008]])
tensor([-1.2689,  1.0911,  0.0690, -0.1765,  1.5710,  2.4072,  0.4937, -0.5687,
        -3.5520,  1.3367,  0.9791, -0.9546, -0.7522, -1.3900,  0.9307,  0.3008])
tensor([[-1.2689,  1.0911,  0.0690, -0.1765,  1.5710,  2.4072,  0.4937, -0.5687],
        [-3.5520,  1.3367,  0.9791, -0.9546, -0.7522, -1.3900,  0.9307,  0.3008]])


tensor([[-1.2689,  1.0911,  0.0690, -0.1765,  1.5710,  2.4072,  0.4937, -0.5687],
        [-3.5520,  1.3367,  0.9791, -0.9546, -0.7522, -1.3900,  0.9307,  0.3008]])

如果只有一个元素的tensor,使用.item()方法可以把里面的value变成Python数值

In [13]:
x = torch.randn(1)
x

tensor([-0.4930])

In [15]:
dir(x)

['T',
 '__abs__',
 '__add__',
 '__and__',
 '__array__',
 '__array_priority__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__div__',
 '__doc__',
 '__eq__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__getitem__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__idiv__',
 '__ilshift__',
 '__imul__',
 '__index__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__irshift__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__long__',
 '__lshift__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pow__',
 '__radd__',
 '__rdiv__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__reversed__',
 '__rfloordiv__',
 '__rmul__',
 '__rpow__',
 '__rshift__',
 '__rsub__',
 '__rtruediv__',
 '__setattr__',
 '__se

In [16]:
x.item()

-0.49303799867630005

# Numpy和Tensor之间的转化

Torch Tensor和Numpy array会共享内存，所以改变其中一项也会改变另一项

把Torch Tensor转变成Numpy Array

In [19]:
a = torch.ones(5)
a

tensor([1., 1., 1., 1., 1.])

In [20]:
b = a.numpy()     # Tensor -> Numpy
b

array([1., 1., 1., 1., 1.], dtype=float32)

改变numpy array里面的值

In [22]:
b[1] = 2
b   %改变b的同时a也会改变

array([1., 2., 1., 1., 1.], dtype=float32)

In [23]:
a

tensor([1., 2., 1., 1., 1.])

把numpy array转变成torch tensor

In [26]:
import numpy as np   # 后续使用numpy时可以用np代替numpy

In [27]:
a = np.ones(5)
a

array([1., 1., 1., 1., 1.])

In [28]:
import torch

In [29]:
b = torch.from_numpy(a)   # Numpy -> Tensor
b

tensor([1., 1., 1., 1., 1.], dtype=torch.float64)

In [21]:
np.add(a, 1, out=a)      #在原始内存空间上改变值
a

array([2., 2., 2., 2., 2.])

In [22]:
b   # a与b共享内存

tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

In [28]:
a = a + 1    #a = a + 1   数值并没有存到原来的a里面，相当于重新定义了一个内存空间
print(a)    
b    #所以此时的a与b不相同

[4. 4. 4. 4. 4.]


tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

所有的CPU上的Tensor都支持转成numpy或者中numpr转成Tensor

# CUDA Tensors

使用.to方法，Tensor可以被移动到别的device上

In [30]:
torch.cuda.is_available()   #因为此时在笔记本上  没有GPU

False

In [27]:
if torch.cuda.is_available():      # 若在电脑上有GPU,则返回true
    device = torch.device("cuda")   #cuda是一个GPU运算库
    y = torch.ones_like(x, device=device)  # 把Tensor放到GPU上
    x = x.to(device)    #与上步操作一样，可以把tensor搬到GPU上
    #然后可以进行运算，运算效果会有一个很大的提升
    z = x + y
    print(z)   #此时z在GPU上
    print(z.to("cpu", torch.double))         #把z搬回到CPU上   类型转换为torch.double

In [None]:
 #如果y是一个GPU的tensor,是不能直接变成numpy 要先变到cpu上   因为numpy的所有操作是在cpu上
y.cpu().data.numpy()   
y.to("cpu").data.numpy()   

In [None]:
#在GPU上进行操作可以大大提高运算效率，尤其面对大数据的时候
#要训练一个很大的模型的时候，CPU上就训练不起来，需要搬到GPU上
#把整个模型变成cuda 可以写；model = model.cuda

# 热身：用Numpy实现两层神经网络

一个全连接ReLU神经网络，一个隐藏层，没有bias，用来从x预测y，使用L2 Loss

- $h = W_1X$ 

- $a = max(0, h)$

- $y_{hat} = W_2a$

这一实现完全使用numpy来计算前向神经网络，loss，和反向传播。
- forward pass
- loss
- backward pass

使用numpy来计算前向神经网络，loss，和反向传播

numpy ndarray是一个普通的n微array。他不知道任何关于深度学习或者梯度（gradient）的知识，也不知道计算图
（computation graph）,只是一种用来计算数学运算的数据结构

In [34]:
# N:输入   D_in：输入1000维   H：输出10维    D_out：中间层100维
N, D_in, H, D_out = 64, 1000, 100, 10 
#随机创建一些训练数据
x = np.random.randn(N, D_in)
y = np.random.randn(N, D_out)
w1 = np.random.randn(D_in, H)
w2 = np.random.randn(H, D_out)
learning_rate = 1e-6   
for it in range(500):
    # forward pass
    h = x.dot(w1)   # N * H
    h_relu = np.maximum(h, 0)   # N * H
    y_pred = h_relu.dot(w2)  # N * D_out
    
    # compute loss
    loss = np.square(y_pred - y).sum()
    print(it, loss)
    
    #backward pass
    #compute the gradient
    # y = ax + b
    # dy / dx = a
    # dy / da = x
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.T.dot(grad_y_pred)    # T 转置
    grad_h_relu = grad_y_pred.dot(w2.T)
    grad_h = grad_h_relu.copy()
    grad_h[h<0] = 0
    grad_w1 = x.T.dot(grad_h)
    
    
    
    # compute weight of w1 and w2
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 32508816.86736097
1 26696958.238504488
2 21857093.11924529
3 16556837.119172925
4 11541332.714072587
5 7552407.28961717
6 4854781.2900699945
7 3173703.5329361157
8 2167509.452189046
9 1559002.058216245
10 1177910.3028039997
11 926792.8701247531
12 752069.2986668046
13 624162.7887723373
14 526512.0861935796
15 449497.2417425005
16 387222.5179967876
17 335885.18218275555
18 293113.8847723546
19 257032.4623036594
20 226328.811718776
21 200044.67902338947
22 177425.804693802
23 157850.90247764206
24 140826.61906261812
25 125970.71589296494
26 112962.04426345383
27 101522.45369528969
28 91434.15226335326
29 82510.41466953642
30 74593.21697868255
31 67551.83605983132
32 61275.9301353778
33 55679.47479572585
34 50672.21407206453
35 46182.389094529346
36 42145.67200724143
37 38510.47424723723
38 35232.06002107379
39 32270.519848330205
40 29592.76290642543
41 27166.13663923723
42 24963.268618096175
43 22961.36632218388
44 21138.484411171503
45 19478.282710409003
46 17963.877843632563
47 16580

360 0.0018264260294313314
361 0.0017458863025315366
362 0.0016689403238171327
363 0.0015953722306289203
364 0.0015250915586276603
365 0.001457947223046451
366 0.0013936823037084144
367 0.001332230265464688
368 0.0012735353632518792
369 0.001217484701464103
370 0.001163856518029702
371 0.0011125711203577463
372 0.0010635772557229823
373 0.0010167548533815429
374 0.0009719768651424984
375 0.0009292027583159484
376 0.0008883262479355354
377 0.0008492247729534874
378 0.0008118445219392386
379 0.0007761135546344185
380 0.0007419557679709099
381 0.0007093382209330145
382 0.000678191369097881
383 0.0006483835099120267
384 0.0006198611616513122
385 0.0005926068961858845
386 0.0005665446747824658
387 0.0005416352772521304
388 0.0005178285930669675
389 0.0004950926988630306
390 0.0004733388421731916
391 0.00045253643000126324
392 0.0004326464757891611
393 0.0004136371401701377
394 0.00039546637979945883
395 0.00037809648901805003
396 0.00036149648601320624
397 0.0003456327933365429
398 0.0003304

### PyTorch:Tensors

使用tensors来创建前向神经网络，计算损失，以及反向传播

一个Pytorch Tensor很像一个numpy的ndarry，但是他和numpy ndarry最大的区别是：Pytorch Tensor可以在CPU上运算。如果想要在GPU上运算，就需要把Tensor换成cuda类型

In [37]:
N, D_in, H, D_out = 64, 1000, 100, 10 
#随机创建一些训练数据
x = torch.randn(N, D_in)
y = torch.randn(N, D_out)
w1 = torch.randn(D_in, H)
w2 = torch.randn(H, D_out)
learning_rate = 1e-6   
for it in range(500):
    # forward pass
    h = x.mm(w1)   # N * H
    h_relu = h.clamp(min=0)   # N * H
    y_pred = h_relu.mm(w2)  # N * D_out
    
    # compute loss
    loss = (y_pred - y).pow(2).sum().item()
    print(it, loss)
    
    #backward pass
    #compute the gradient
    # y = ax + b
    # dy / dx = a
    # dy / da = x
    grad_y_pred = 2.0 * (y_pred - y)
    grad_w2 = h_relu.t().mm(grad_y_pred)    # t()  转置
    grad_h_relu = grad_y_pred.mm(w2.t())
    grad_h = grad_h_relu.clone()
    grad_h[h<0] = 0
    grad_w1 = x.t().mm(grad_h)
    
    # compute weight of w1 and w2
    w1 -= learning_rate * grad_w1
    w2 -= learning_rate * grad_w2

0 29951818.0
1 23308764.0
2 19889714.0
3 16807886.0
4 13469244.0
5 10057719.0
6 7101619.0
7 4825993.0
8 3252558.5
9 2219624.0
10 1560765.75
11 1139141.625
12 864525.6875
13 679481.625
14 550044.75
15 455717.28125
16 384443.0
17 328665.25
18 283723.84375
19 246797.6875
20 216064.234375
21 190067.75
22 167883.9375
23 148788.390625
24 132268.734375
25 117903.5625
26 105357.1171875
27 94352.078125
28 84672.6171875
29 76142.2734375
30 68603.4765625
31 61917.7578125
32 55972.8671875
33 50676.7578125
34 45950.48828125
35 41727.25390625
36 37942.17578125
37 34545.68359375
38 31491.33984375
39 28744.22265625
40 26266.853515625
41 24027.1640625
42 22002.03515625
43 20168.5078125
44 18506.25
45 16996.375
46 15622.6494140625
47 14372.2197265625
48 13232.912109375
49 12193.91796875
50 11244.728515625
51 10377.12890625
52 9583.197265625
53 8856.0625
54 8189.61865234375
55 7578.0751953125
56 7016.3603515625
57 6499.94873046875
58 6024.93798828125
59 5587.9765625
60 5185.39111328125
61 4814.2578125
62

444 0.00011123338481411338
445 0.00010918248881353065
446 0.0001075129330274649
447 0.00010549947182880715
448 0.00010350241063861176
449 0.00010177301737712696
450 9.977327135857195e-05
451 9.797479287954047e-05
452 9.625185339245945e-05
453 9.4316725153476e-05
454 9.250565926777199e-05
455 9.101707837544382e-05
456 8.938113751355559e-05
457 8.770262502366677e-05
458 8.63562454469502e-05
459 8.473225170746446e-05
460 8.343956142198294e-05
461 8.21103822090663e-05
462 8.071990305325016e-05
463 7.93677318142727e-05
464 7.798353908583522e-05
465 7.68247336964123e-05
466 7.57333473302424e-05
467 7.458837353624403e-05
468 7.31859399820678e-05
469 7.189045572886243e-05
470 7.056746107991785e-05
471 6.967142689973116e-05
472 6.852616934338585e-05
473 6.726032006554306e-05
474 6.65633415337652e-05
475 6.531918188557029e-05
476 6.440009747166187e-05
477 6.362851854646578e-05
478 6.269618461374193e-05
479 6.168567051645368e-05
480 6.0801332438131794e-05
481 5.991800207993947e-05
482 5.906978185

tensor可以自动实现grad

简单的autograd

In [39]:
x = torch.tensor(1., requires_grad = True)
w = torch.tensor(2., requires_grad = True)
b = torch.tensor(3., requires_grad = True)
y = w*x + b    
y.backward()
#dy / dw = x
print(w.grad)
print(x.grad)
print(b.grad)

tensor(1.)
tensor(2.)
tensor(1.)
