# Torch

In [1]:
import torch

In [2]:
!nvidia-smi

Tue Oct 13 14:27:22 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.100      Driver Version: 440.100      CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  GeForce RTX 208...  Off  | 00000000:65:00.0  On |                  N/A |
| 18%   48C    P2    53W / 250W |   1591MiB / 10985MiB |      3%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage    

In [3]:
torch.__version__, torch.cuda.is_available()

('1.6.0', True)

# Tensor

In [4]:
v = torch.tensor([1, 2, 3, 4])
v, v.dtype, v.shape, v.size(), v.dim(), v.numel()

(tensor([1, 2, 3, 4]), torch.int64, torch.Size([4]), torch.Size([4]), 1, 4)

In [5]:
w = torch.tensor([1, 0, 2, 0])
w

tensor([1, 0, 2, 0])

In [6]:
v * w, v @ w, torch.dot(v, w)

(tensor([1, 0, 6, 0]), tensor(7), tensor(7))

In [7]:
x = torch.Tensor(5).random_(10)
x, x[0], x[-1], x[1:2], x[1:2+1]

(tensor([1., 0., 8., 9., 8.]),
 tensor(1.),
 tensor(8.),
 tensor([0.]),
 tensor([0., 8.]))

In [8]:
v = torch.arange(1,4+1)
v, v.pow(2)

(tensor([1, 2, 3, 4]), tensor([ 1,  4,  9, 16]))

In [9]:
m = torch.tensor([[2, 5, 3, 7],
                  [4, 2, 1, 9]])
m

tensor([[2, 5, 3, 7],
        [4, 2, 1, 9]])

In [10]:
m.dtype, m.shape, m.size(), m.dim(), m.numel()

(torch.int64, torch.Size([2, 4]), torch.Size([2, 4]), 2, 8)

In [11]:
m[0][2], m[0,2]

(tensor(3), tensor(3))

In [12]:
m[:,1], m[:,1].shape

(tensor([5, 2]), torch.Size([2]))

In [13]:
m[:,[1]], m[:,[1]].shape

(tensor([[5],
         [2]]),
 torch.Size([2, 1]))

In [14]:
m[0,:], m[0,:].shape

(tensor([2, 5, 3, 7]), torch.Size([4]))

In [15]:
m[[0],:], m[[0],:].shape

(tensor([[2, 5, 3, 7]]), torch.Size([1, 4]))

In [16]:
v = torch.arange(1,5)
v

tensor([1, 2, 3, 4])

In [17]:
m * v, m @ v

(tensor([[ 2, 10,  9, 28],
         [ 4,  4,  3, 36]]),
 tensor([49, 47]))

In [18]:
m[[0],:] @ v, (m[0,:] @ v)

(tensor([49]), tensor(49))

In [19]:
m + torch.rand(2,4), m - torch.rand(2,4)

(tensor([[2.6713, 5.8160, 3.8891, 7.6753],
         [4.5583, 2.0758, 1.2766, 9.9474]]),
 tensor([[1.5004, 4.6900, 2.4438, 6.6373],
         [3.0566, 1.8937, 0.4054, 8.7432]]))

In [20]:
m * torch.rand(2,4), m / torch.rand(2,4)

(tensor([[0.9999, 3.7475, 2.7610, 6.2455],
         [3.1917, 0.1958, 0.4395, 8.8613]]),
 tensor([[ 9.4761, 36.5896,  4.1445, 10.0223],
         [46.9610, 12.7203,  7.1309, 43.3253]]))

In [21]:
m.t(), m.t().shape, m.t().size(), m.t().dim(), m.t().numel()

(tensor([[2, 4],
         [5, 2],
         [3, 1],
         [7, 9]]),
 torch.Size([4, 2]),
 torch.Size([4, 2]),
 2,
 8)

In [22]:
m.transpose(0,1)

tensor([[2, 4],
        [5, 2],
        [3, 1],
        [7, 9]])

In [23]:
n = torch.tensor([[1, 1, 1, 1],
                  [0, 0, 0, 0]])
n, n.shape

(tensor([[1, 1, 1, 1],
         [0, 0, 0, 0]]),
 torch.Size([2, 4]))

In [24]:
m.t().shape, n.shape, torch.matmul(m.t(), n), m.t().mm(n)

(torch.Size([4, 2]),
 torch.Size([2, 4]),
 tensor([[2, 2, 2, 2],
         [5, 5, 5, 5],
         [3, 3, 3, 3],
         [7, 7, 7, 7]]),
 tensor([[2, 2, 2, 2],
         [5, 5, 5, 5],
         [3, 3, 3, 3],
         [7, 7, 7, 7]]))

In [25]:
n.shape, m.t().shape, torch.matmul(n, m.t()), n.mm(m.t())

(torch.Size([2, 4]),
 torch.Size([4, 2]),
 tensor([[17, 16],
         [ 0,  0]]),
 tensor([[17, 16],
         [ 0,  0]]))

# Constructor

In [26]:
x = torch.empty(5,3)
x, x.shape, x.numel(), x.dim()

(tensor([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]),
 torch.Size([5, 3]),
 15,
 2)

In [27]:
x = torch.rand(5,3)
x, x.shape, x.numel(), x.dim()

(tensor([[0.7969, 0.6324, 0.6476],
         [0.2170, 0.3210, 0.0404],
         [0.2209, 0.1960, 0.0468],
         [0.9313, 0.3142, 0.4371],
         [0.5908, 0.7782, 0.7441]]),
 torch.Size([5, 3]),
 15,
 2)

In [28]:
x = torch.zeros(5,3, dtype=torch.long)
x, x.dtype

(tensor([[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]),
 torch.int64)

In [29]:
x = torch.zeros(5,3, dtype=torch.float)
x, x.dtype

(tensor([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]),
 torch.float32)

In [30]:
x = torch.ones(5,3)
x, x.dtype

(tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 torch.float32)

In [31]:
x = torch.ones(5,3, dtype=int)
x, x.dtype

(tensor([[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]]),
 torch.int64)

In [32]:
x = torch.tensor([5.5, 3])
x, x.dtype

(tensor([5.5000, 3.0000]), torch.float32)

In [33]:
y = x.new_ones(5,3)
y, y.dtype

(tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 torch.float32)

In [34]:
z = torch.randn_like(x, dtype=torch.float)
z, z.dtype

(tensor([ 0.8728, -1.0019]), torch.float32)

# Operations

In [35]:
x = torch.ones(5,3)
y = torch.rand(5,3)
x, y, x+y, torch.add(x,y)

(tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 tensor([[0.3339, 0.9244, 0.3020],
         [0.8745, 0.9309, 0.8830],
         [0.4871, 0.8341, 0.9936],
         [0.2091, 0.5095, 0.9730],
         [0.2136, 0.4608, 0.0913]]),
 tensor([[1.3339, 1.9244, 1.3020],
         [1.8745, 1.9309, 1.8830],
         [1.4871, 1.8341, 1.9936],
         [1.2091, 1.5095, 1.9730],
         [1.2136, 1.4608, 1.0913]]),
 tensor([[1.3339, 1.9244, 1.3020],
         [1.8745, 1.9309, 1.8830],
         [1.4871, 1.8341, 1.9936],
         [1.2091, 1.5095, 1.9730],
         [1.2136, 1.4608, 1.0913]]))

In [36]:
r = torch.empty(5,3)
print(r)
torch.add(x,y, out=r)
r

tensor([[-4.3340e+23,  4.5849e-41, -4.3340e+23],
        [ 4.5849e-41,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  1.8788e+31,  1.7220e+22],
        [ 3.7771e-08,  2.0429e+20,  1.2859e-11]])


tensor([[1.3339, 1.9244, 1.3020],
        [1.8745, 1.9309, 1.8830],
        [1.4871, 1.8341, 1.9936],
        [1.2091, 1.5095, 1.9730],
        [1.2136, 1.4608, 1.0913]])

In [37]:
print(y)
y.add_(x)

tensor([[0.3339, 0.9244, 0.3020],
        [0.8745, 0.9309, 0.8830],
        [0.4871, 0.8341, 0.9936],
        [0.2091, 0.5095, 0.9730],
        [0.2136, 0.4608, 0.0913]])


tensor([[1.3339, 1.9244, 1.3020],
        [1.8745, 1.9309, 1.8830],
        [1.4871, 1.8341, 1.9936],
        [1.2091, 1.5095, 1.9730],
        [1.2136, 1.4608, 1.0913]])

In [38]:
x = torch.rand(4,4)
x.shape, x.numel(), x

(torch.Size([4, 4]),
 16,
 tensor([[0.5284, 0.2707, 0.1703, 0.8539],
         [0.1463, 0.4246, 0.2217, 0.9443],
         [0.8398, 0.3720, 0.2578, 0.4710],
         [0.6297, 0.0862, 0.7246, 0.2244]]))

In [39]:
y = x.view(16)
y.shape, y.numel(), y

(torch.Size([16]),
 16,
 tensor([0.5284, 0.2707, 0.1703, 0.8539, 0.1463, 0.4246, 0.2217, 0.9443, 0.8398,
         0.3720, 0.2578, 0.4710, 0.6297, 0.0862, 0.7246, 0.2244]))

In [40]:
z = x.view(-1,8)
z.shape, z.numel(), z

(torch.Size([2, 8]),
 16,
 tensor([[0.5284, 0.2707, 0.1703, 0.8539, 0.1463, 0.4246, 0.2217, 0.9443],
         [0.8398, 0.3720, 0.2578, 0.4710, 0.6297, 0.0862, 0.7246, 0.2244]]))

In [41]:
x = torch.randn(1)
x, x.item()

(tensor([-0.6652]), -0.6651809215545654)

In [42]:
x = torch.randn(5,3,4,1)
y = torch.randn(  3,1,1)
z = x + y
x.shape, y.shape, z.shape

(torch.Size([5, 3, 4, 1]), torch.Size([3, 1, 1]), torch.Size([5, 3, 4, 1]))

In [43]:
x = torch.arange(3)
y = torch.tensor(5)
z = x + y
x, x.shape, y, y.shape, z, z.shape

(tensor([0, 1, 2]),
 torch.Size([3]),
 tensor(5),
 torch.Size([]),
 tensor([5, 6, 7]),
 torch.Size([3]))

# NumPy Bridge

In [44]:
a = torch.ones(5)
b = a.numpy()
a, b

(tensor([1., 1., 1., 1., 1.]), array([1., 1., 1., 1., 1.], dtype=float32))

In [45]:
a.add_(1), b

(tensor([2., 2., 2., 2., 2.]), array([2., 2., 2., 2., 2.], dtype=float32))

In [46]:
import numpy as np

a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
a, b

(array([2., 2., 2., 2., 2.]),
 tensor([2., 2., 2., 2., 2.], dtype=torch.float64))

# CUDA Tensors

In [47]:
x = torch.zeros(2,2)
x, x.device

(tensor([[0., 0.],
         [0., 0.]]),
 device(type='cpu'))

In [48]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    y = torch.ones_like(x, device=device)
    x = x.to(device)
    z = x + y
x, y, z, z.to("cpu")

(tensor([[0., 0.],
         [0., 0.]], device='cuda:0'),
 tensor([[1., 1.],
         [1., 1.]], device='cuda:0'),
 tensor([[1., 1.],
         [1., 1.]], device='cuda:0'),
 tensor([[1., 1.],
         [1., 1.]]))

# AutoGrad

In [49]:
x = torch.ones(2,2, requires_grad=True)
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [50]:
y = x + 2
y, y.grad_fn

(tensor([[3., 3.],
         [3., 3.]], grad_fn=<AddBackward0>),
 <AddBackward0 at 0x7fcf5850bed0>)

In [51]:
z = y * y * 3
o = z.mean()
z, o

(tensor([[27., 27.],
         [27., 27.]], grad_fn=<MulBackward0>),
 tensor(27., grad_fn=<MeanBackward0>))

In [52]:
a = torch.randn(2,2)
a = ((a * 3) / (a - 1))
a.requires_grad

False

In [53]:
a.requires_grad_(True)
a.requires_grad

True

In [54]:
b = (a * a).sum()
a, b, b.grad_fn

(tensor([[ 1.2726,  1.7089],
         [-4.7136,  0.7944]], requires_grad=True),
 tensor(27.3883, grad_fn=<SumBackward0>),
 <SumBackward0 at 0x7fcf5852a110>)

In [55]:
print(o)
o.backward()

tensor(27., grad_fn=<MeanBackward0>)


In [56]:
x.grad

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])

In [57]:
print(x.requires_grad)
print((x**2).requires_grad)
with torch.no_grad():
    print((x**2).requires_grad)

True
True
False


In [58]:
print(x.requires_grad)
print(x.grad)

True
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [59]:
y = x.detach()
print(y.requires_grad)
print(y.grad)

False
None


In [60]:
x, y, x.eq(y).all()

(tensor([[1., 1.],
         [1., 1.]], requires_grad=True),
 tensor([[1., 1.],
         [1., 1.]]),
 tensor(True))