# Torch

In [1]:
import torch

In [2]:
!nvidia-smi

Tue Oct 20 18:26:45 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.95.01    Driver Version: 440.95.01    CUDA Version: 10.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  TITAN RTX           Off  | 00000000:3B:00.0 Off |                  N/A |
| 40%   36C    P0    59W / 280W |      0MiB / 24220MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
|   1  TITAN RTX           Off  | 00000000:D9:00.0 Off |                  N/A |
| 37%   36C    P0    23W / 280W |      0MiB / 24220MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                            

In [3]:
torch.__version__, torch.cuda.is_available()

('1.6.0', True)

# Tensor

In [4]:
v = torch.tensor([1, 2, 3, 4])
v, v.dtype, v.shape, v.size(), v.dim(), v.numel()

(tensor([1, 2, 3, 4]), torch.int64, torch.Size([4]), torch.Size([4]), 1, 4)

In [5]:
w = torch.tensor([1, 0, 2, 0])
w

tensor([1, 0, 2, 0])

In [6]:
v * w, v @ w, torch.dot(v, w)

(tensor([1, 0, 6, 0]), tensor(7), tensor(7))

In [7]:
x = torch.Tensor(5).random_(10)
x, x[0], x[-1], x[1:2], x[1:2+1]

(tensor([9., 5., 5., 3., 4.]),
 tensor(9.),
 tensor(4.),
 tensor([5.]),
 tensor([5., 5.]))

In [8]:
v = torch.arange(1,4+1)
v, v.pow(2)

(tensor([1, 2, 3, 4]), tensor([ 1,  4,  9, 16]))

In [9]:
m = torch.tensor([[2, 5, 3, 7],
                  [4, 2, 1, 9]])
m

tensor([[2, 5, 3, 7],
        [4, 2, 1, 9]])

In [10]:
m.dtype, m.shape, m.size(), m.dim(), m.numel()

(torch.int64, torch.Size([2, 4]), torch.Size([2, 4]), 2, 8)

In [11]:
m[0][2], m[0,2]

(tensor(3), tensor(3))

In [12]:
m[:,1], m[:,1].shape

(tensor([5, 2]), torch.Size([2]))

In [13]:
m[:,[1]], m[:,[1]].shape

(tensor([[5],
         [2]]),
 torch.Size([2, 1]))

In [14]:
m[0,:], m[0,:].shape

(tensor([2, 5, 3, 7]), torch.Size([4]))

In [15]:
m[[0],:], m[[0],:].shape

(tensor([[2, 5, 3, 7]]), torch.Size([1, 4]))

In [16]:
v = torch.arange(1,5)
v

tensor([1, 2, 3, 4])

In [17]:
m * v, m @ v

(tensor([[ 2, 10,  9, 28],
         [ 4,  4,  3, 36]]),
 tensor([49, 47]))

In [18]:
m[[0],:] @ v, (m[0,:] @ v)

(tensor([49]), tensor(49))

In [19]:
m + torch.rand(2,4), m - torch.rand(2,4)

(tensor([[2.1456, 5.9475, 3.2575, 7.5551],
         [4.3291, 2.3504, 1.2157, 9.9149]]),
 tensor([[1.0880, 4.2221, 2.6737, 6.5120],
         [3.6080, 1.4514, 0.4887, 8.6299]]))

In [20]:
m * torch.rand(2,4), m / torch.rand(2,4)

(tensor([[1.5921, 2.2838, 2.2324, 2.5355],
         [2.4632, 1.7707, 0.9084, 5.9700]]),
 tensor([[  5.8693,   5.2311,  10.2894,  14.7569],
         [ 18.7023, 138.5184,   1.4952,  16.5680]]))

In [21]:
m.t(), m.t().shape, m.t().size(), m.t().dim(), m.t().numel()

(tensor([[2, 4],
         [5, 2],
         [3, 1],
         [7, 9]]),
 torch.Size([4, 2]),
 torch.Size([4, 2]),
 2,
 8)

In [22]:
m.transpose(0,1)

tensor([[2, 4],
        [5, 2],
        [3, 1],
        [7, 9]])

In [23]:
n = torch.tensor([[1, 1, 1, 1],
                  [0, 0, 0, 0]])
n, n.shape

(tensor([[1, 1, 1, 1],
         [0, 0, 0, 0]]),
 torch.Size([2, 4]))

In [24]:
m.t().shape, n.shape, torch.matmul(m.t(), n), m.t().mm(n)

(torch.Size([4, 2]),
 torch.Size([2, 4]),
 tensor([[2, 2, 2, 2],
         [5, 5, 5, 5],
         [3, 3, 3, 3],
         [7, 7, 7, 7]]),
 tensor([[2, 2, 2, 2],
         [5, 5, 5, 5],
         [3, 3, 3, 3],
         [7, 7, 7, 7]]))

In [25]:
n.shape, m.t().shape, torch.matmul(n, m.t()), n.mm(m.t())

(torch.Size([2, 4]),
 torch.Size([4, 2]),
 tensor([[17, 16],
         [ 0,  0]]),
 tensor([[17, 16],
         [ 0,  0]]))

# Constructor

In [26]:
x = torch.empty(5,3)
x, x.shape, x.numel(), x.dim()

(tensor([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]),
 torch.Size([5, 3]),
 15,
 2)

In [27]:
x = torch.rand(5,3)
x, x.shape, x.numel(), x.dim()

(tensor([[0.4983, 0.5930, 0.6834],
         [0.2336, 0.2621, 0.5814],
         [0.3482, 0.6548, 0.8395],
         [0.5166, 0.1494, 0.9822],
         [0.0233, 0.1559, 0.7612]]),
 torch.Size([5, 3]),
 15,
 2)

In [28]:
x = torch.zeros(5,3, dtype=torch.long)
x, x.dtype

(tensor([[0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0],
         [0, 0, 0]]),
 torch.int64)

In [29]:
x = torch.zeros(5,3, dtype=torch.float)
x, x.dtype

(tensor([[0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.],
         [0., 0., 0.]]),
 torch.float32)

In [30]:
x = torch.ones(5,3)
x, x.dtype

(tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 torch.float32)

In [31]:
x = torch.ones(5,3, dtype=int)
x, x.dtype

(tensor([[1, 1, 1],
         [1, 1, 1],
         [1, 1, 1],
         [1, 1, 1],
         [1, 1, 1]]),
 torch.int64)

In [32]:
x = torch.tensor([5.5, 3])
x, x.dtype

(tensor([5.5000, 3.0000]), torch.float32)

In [33]:
y = x.new_ones(5,3)
y, y.dtype

(tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 torch.float32)

In [34]:
z = torch.randn_like(x, dtype=torch.float)
z, z.dtype

(tensor([-0.1705, -0.4688]), torch.float32)

# Operations

In [35]:
x = torch.ones(5,3)
y = torch.rand(5,3)
x, y, x+y, torch.add(x,y)

(tensor([[1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.],
         [1., 1., 1.]]),
 tensor([[0.9596, 0.4888, 0.3264],
         [0.3503, 0.4785, 0.4543],
         [0.7787, 0.2219, 0.3825],
         [0.7200, 0.4972, 0.4999],
         [0.3937, 0.1775, 0.9737]]),
 tensor([[1.9596, 1.4888, 1.3264],
         [1.3503, 1.4785, 1.4543],
         [1.7787, 1.2219, 1.3825],
         [1.7200, 1.4972, 1.4999],
         [1.3937, 1.1775, 1.9737]]),
 tensor([[1.9596, 1.4888, 1.3264],
         [1.3503, 1.4785, 1.4543],
         [1.7787, 1.2219, 1.3825],
         [1.7200, 1.4972, 1.4999],
         [1.3937, 1.1775, 1.9737]]))

In [36]:
r = torch.empty(5,3)
print(r)
torch.add(x,y, out=r)
r

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00, -2.7485e+17,  3.0908e-41],
        [ 0.0000e+00,  0.0000e+00, -2.6293e+17],
        [ 3.0908e-41, -2.6293e+17,  3.0908e-41],
        [-2.6293e+17,  3.0908e-41,  0.0000e+00]])


tensor([[1.9596, 1.4888, 1.3264],
        [1.3503, 1.4785, 1.4543],
        [1.7787, 1.2219, 1.3825],
        [1.7200, 1.4972, 1.4999],
        [1.3937, 1.1775, 1.9737]])

In [37]:
print(y)
y.add_(x)

tensor([[0.9596, 0.4888, 0.3264],
        [0.3503, 0.4785, 0.4543],
        [0.7787, 0.2219, 0.3825],
        [0.7200, 0.4972, 0.4999],
        [0.3937, 0.1775, 0.9737]])


tensor([[1.9596, 1.4888, 1.3264],
        [1.3503, 1.4785, 1.4543],
        [1.7787, 1.2219, 1.3825],
        [1.7200, 1.4972, 1.4999],
        [1.3937, 1.1775, 1.9737]])

In [38]:
x = torch.rand(4,4)
x.shape, x.numel(), x

(torch.Size([4, 4]),
 16,
 tensor([[0.8295, 0.9194, 0.2694, 0.4045],
         [0.3548, 0.6812, 0.3000, 0.4250],
         [0.5231, 0.9878, 0.7323, 0.2693],
         [0.1566, 0.9932, 0.0129, 0.1997]]))

In [39]:
y = x.view(16)
y.shape, y.numel(), y

(torch.Size([16]),
 16,
 tensor([0.8295, 0.9194, 0.2694, 0.4045, 0.3548, 0.6812, 0.3000, 0.4250, 0.5231,
         0.9878, 0.7323, 0.2693, 0.1566, 0.9932, 0.0129, 0.1997]))

In [40]:
z = x.view(-1,8)
z.shape, z.numel(), z

(torch.Size([2, 8]),
 16,
 tensor([[0.8295, 0.9194, 0.2694, 0.4045, 0.3548, 0.6812, 0.3000, 0.4250],
         [0.5231, 0.9878, 0.7323, 0.2693, 0.1566, 0.9932, 0.0129, 0.1997]]))

In [41]:
x = torch.randn(1)
x, x.item()

(tensor([-1.3678]), -1.3677515983581543)

In [42]:
x = torch.randn(5,3,4,1)
y = torch.randn(  3,1,1)
z = x + y
x.shape, y.shape, z.shape

(torch.Size([5, 3, 4, 1]), torch.Size([3, 1, 1]), torch.Size([5, 3, 4, 1]))

In [43]:
x = torch.arange(3)
y = torch.tensor(5)
z = x + y
x, x.shape, y, y.shape, z, z.shape

(tensor([0, 1, 2]),
 torch.Size([3]),
 tensor(5),
 torch.Size([]),
 tensor([5, 6, 7]),
 torch.Size([3]))

# NumPy Bridge

In [44]:
a = torch.ones(5)
b = a.numpy()
a, b

(tensor([1., 1., 1., 1., 1.]), array([1., 1., 1., 1., 1.], dtype=float32))

In [45]:
a.add_(1), b

(tensor([2., 2., 2., 2., 2.]), array([2., 2., 2., 2., 2.], dtype=float32))

In [46]:
import numpy as np

a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
a, b

(array([2., 2., 2., 2., 2.]),
 tensor([2., 2., 2., 2., 2.], dtype=torch.float64))

# CUDA Tensors

In [47]:
x = torch.zeros(2,2)
x, x.device

(tensor([[0., 0.],
         [0., 0.]]),
 device(type='cpu'))

In [48]:
if torch.cuda.is_available():
    device = torch.device("cuda")
    y = torch.ones_like(x, device=device)
    x = x.to(device)
    z = x + y
x, y, z, z.to("cpu")

(tensor([[0., 0.],
         [0., 0.]], device='cuda:0'),
 tensor([[1., 1.],
         [1., 1.]], device='cuda:0'),
 tensor([[1., 1.],
         [1., 1.]], device='cuda:0'),
 tensor([[1., 1.],
         [1., 1.]]))

# AutoGrad

In [49]:
x = torch.ones(2,2, requires_grad=True)
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [50]:
y = x + 2
y, y.grad_fn

(tensor([[3., 3.],
         [3., 3.]], grad_fn=<AddBackward0>),
 <AddBackward0 at 0x7f27d1d442b0>)

In [51]:
z = y * y * 3
o = z.mean()
z, o

(tensor([[27., 27.],
         [27., 27.]], grad_fn=<MulBackward0>),
 tensor(27., grad_fn=<MeanBackward0>))

In [52]:
a = torch.randn(2,2)
a = ((a * 3) / (a - 1))
a.requires_grad

False

In [53]:
a.requires_grad_(True)
a.requires_grad

True

In [54]:
b = (a * a).sum()
a, b, b.grad_fn

(tensor([[-25.1171,  -8.6672],
         [  1.0968,   0.5776]], requires_grad=True),
 tensor(707.5282, grad_fn=<SumBackward0>),
 <SumBackward0 at 0x7f27d1d4b8e0>)

In [55]:
print(o)
o.backward()

tensor(27., grad_fn=<MeanBackward0>)


In [56]:
x.grad

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])

In [57]:
print(x.requires_grad)
print((x**2).requires_grad)
with torch.no_grad():
    print((x**2).requires_grad)

True
True
False


In [58]:
print(x.requires_grad)
print(x.grad)

True
tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [59]:
y = x.detach()
print(y.requires_grad)
print(y.grad)

False
None


In [60]:
x, y, x.eq(y).all()

(tensor([[1., 1.],
         [1., 1.]], requires_grad=True),
 tensor([[1., 1.],
         [1., 1.]]),
 tensor(True))