In [1]:
import torch

# 2.3.1. Scalars

In [2]:
x = torch.tensor(3.0)
y = torch.tensor(2.0)
x + y, x * y, x / y, x**y

(tensor(5.), tensor(6.), tensor(1.5000), tensor(9.))

# 2.3.2. Vectors

In [3]:
x = torch.arange(3)
x

tensor([0, 1, 2])

In [4]:
x[2]

tensor(2)

In [5]:
len(x)

3

In [6]:
x.shape

torch.Size([3])

# 2.3.3. Matrices

In [7]:
A = torch.arange(6)
A

tensor([0, 1, 2, 3, 4, 5])

In [8]:
A.reshape(6, 1)

tensor([[0],
        [1],
        [2],
        [3],
        [4],
        [5]])

In [9]:
A.reshape(1, 6)

tensor([[0, 1, 2, 3, 4, 5]])

In [10]:
A = A.reshape(3, 2)
A

tensor([[0, 1],
        [2, 3],
        [4, 5]])

In [11]:
A.T

tensor([[0, 2, 4],
        [1, 3, 5]])

In [12]:
A = torch.tensor([[1, 2, 3], [2, 0, 4], [3, 4, 5]])
A == A.T

tensor([[True, True, True],
        [True, True, True],
        [True, True, True]])

# 2.3.4. Tensors

In [13]:
torch.arange(24).reshape(2, 3, 4)

tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

# 2.3.5. Basic Properties of Tensor Arithmetic

In [14]:
A = torch.arange(6, dtype=torch.float32).reshape(2, 3)
B = A.clone()  # Assign a copy of A to B by allocating new memory
A, A + B

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[ 0.,  2.,  4.],
         [ 6.,  8., 10.]]))

In [15]:
A * B

tensor([[ 0.,  1.,  4.],
        [ 9., 16., 25.]])

In [16]:
a = 2
X = torch.arange(24).reshape(2, 3, 4)
a + X, (a * X).shape

(tensor([[[ 2,  3,  4,  5],
          [ 6,  7,  8,  9],
          [10, 11, 12, 13]],
 
         [[14, 15, 16, 17],
          [18, 19, 20, 21],
          [22, 23, 24, 25]]]),
 torch.Size([2, 3, 4]))

# 2.3.6. Reduction

In [17]:
x = torch.arange(3, dtype=torch.float32)
x, x.sum()

(tensor([0., 1., 2.]), tensor(3.))

In [19]:
A.shape, A.sum()

(torch.Size([2, 3]), tensor(15.))

In [22]:
A.shape, A.sum(axis=0).shape

(torch.Size([2, 3]), torch.Size([3]))

In [None]:
A.shape, A.sum(axis=1).shape

(torch.Size([2, 3]), torch.Size([2]))

In [23]:
A.sum(axis=[0, 1]) == A.sum()  # Same as A.sum()

tensor(True)

In [24]:
A.mean(), A.sum() / A.numel()

(tensor(2.5000), tensor(2.5000))

In [25]:
A.mean(axis=0), A.sum(axis=0) / A.shape[0]

(tensor([1.5000, 2.5000, 3.5000]), tensor([1.5000, 2.5000, 3.5000]))

# 2.3.7. Non-Reduction Sum

In [26]:
sum_A = A.sum(axis=1, keepdims=True)
sum_A, sum_A.shape

(tensor([[ 3.],
         [12.]]),
 torch.Size([2, 1]))

In [27]:
A / sum_A

tensor([[0.0000, 0.3333, 0.6667],
        [0.2500, 0.3333, 0.4167]])

In [28]:
A, A.cumsum(axis=0)

(tensor([[0., 1., 2.],
         [3., 4., 5.]]),
 tensor([[0., 1., 2.],
         [3., 5., 7.]]))

# 2.3.8. Dot Products

In [29]:
y = torch.ones(3, dtype=torch.float32)
x, y, torch.dot(x, y)

(tensor([0., 1., 2.]), tensor([1., 1., 1.]), tensor(3.))

In [30]:
torch.sum(x * y)

tensor(3.)

# 2.3.9. Matrix-Vector Products

In [31]:
A.shape, x.shape, torch.mv(A, x), A@x

(torch.Size([2, 3]), torch.Size([3]), tensor([ 5., 14.]), tensor([ 5., 14.]))

# 2.3.10. Matrix-Matrix Multiplication

In [32]:
B = torch.ones(3, 4)
torch.mm(A, B), A@B

(tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]),
 tensor([[ 3.,  3.,  3.,  3.],
         [12., 12., 12., 12.]]))

# 2.3.11. Norms

In [33]:
u = torch.tensor([3.0, -4.0])
torch.norm(u)

tensor(5.)

In [34]:
torch.abs(u).sum()

tensor(7.)

In [35]:
torch.norm(torch.ones((4, 9)))

tensor(6.)

# 2.3.13. Exercises

In [None]:
X = torch.arange(24).reshape(2, 3, 4)
len(X)  # Number of elements in the first axis (row)

2

In [41]:
# 9
from scipy import linalg

X = torch.arange(24).reshape(2, 3, 4)
X


tensor([[[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]],

        [[12, 13, 14, 15],
         [16, 17, 18, 19],
         [20, 21, 22, 23]]])

In [42]:
linalg.norm(X, axis=0)  # Compute the norm along the first axis

array([[12.        , 13.03840481, 14.14213562, 15.29705854],
       [16.4924225 , 17.72004515, 18.97366596, 20.24845673],
       [21.54065923, 22.84731932, 24.16609195, 25.49509757]])

In [43]:
linalg.norm(X, axis=1)  # Compute the norm along the second axis

array([[ 8.94427191, 10.34408043, 11.83215957, 13.37908816],
       [28.28427125, 29.9833287 , 31.68595904, 33.39161571]])

In [44]:
linalg.norm(X, axis=2)  # Compute the norm along the third axis

array([[ 3.74165739, 11.22497216, 19.13112647],
       [27.09243437, 35.07135583, 43.05810028]])

In [46]:
# 10
A = torch.randn((2**10, 2**16))
B = torch.randn((2**16, 2**5))
C = torch.randn((2**5, 2**14))

In [49]:
torch.mm(torch.mm(A, B), C)

tensor([[ 1998.8823,  -337.1237,  -812.2938,  ..., -1251.2198,  1627.0067,
         -1508.1763],
        [-1052.0015,  -286.5263,  1418.5066,  ...,  1864.1572,  2282.9702,
          -222.4520],
        [  864.7156,   811.2568,  -674.6117,  ..., -1961.9290, -1579.4138,
           -65.4369],
        ...,
        [ -797.4426, -2814.7729,    84.2910,  ...,  -323.3568, -1006.3843,
         -1078.2936],
        [ -711.3341,  1191.8881,  -770.9439,  ..., -1542.2427,  2037.1470,
         -1648.8812],
        [-1423.3683, -3053.0442,   927.0954,  ..., -1068.8671,   562.8793,
          -286.9958]])

In [50]:
torch.mm(A, torch.mm(B, C))

tensor([[ 1998.8813,  -337.1234,  -812.2936,  ..., -1251.2208,  1627.0077,
         -1508.1749],
        [-1052.0012,  -286.5269,  1418.5071,  ...,  1864.1575,  2282.9712,
          -222.4508],
        [  864.7160,   811.2565,  -674.6111,  ..., -1961.9288, -1579.4143,
           -65.4363],
        ...,
        [ -797.4427, -2814.7725,    84.2907,  ...,  -323.3565, -1006.3837,
         -1078.2932],
        [ -711.3337,  1191.8887,  -770.9433,  ..., -1542.2432,  2037.1465,
         -1648.8818],
        [-1423.3674, -3053.0437,   927.0951,  ..., -1068.8662,   562.8802,
          -286.9976]])

In [None]:
# difference in memory footprint?

In [51]:
# 11
A = torch.randn((2**10, 2**16))
B = torch.randn((2**16, 2**5))
C = torch.randn((2**5, 2**16))

In [52]:
torch.mm(A, B)

tensor([[-271.7728, -112.8768,   40.8153,  ...,   43.4248, -184.4579,
         -314.0730],
        [ 278.2987, -115.7160,  220.3426,  ...,  175.8753,  188.2533,
          -61.2932],
        [ -53.4759,  272.7431,  -59.5937,  ...,   44.6255,  151.5520,
          259.3879],
        ...,
        [ 124.0121, -381.5967,   21.1493,  ...,  478.5606, -186.3619,
         -310.1042],
        [-152.6959,  148.3256, -155.4439,  ...,  356.5699,   21.2725,
         -114.9664],
        [ -68.9394,   12.1427, -142.6776,  ...,  314.1106,  -35.8469,
         -190.9957]])

In [54]:
torch.mm(A, C.T)

tensor([[-1.2473e+02,  6.7610e+01,  1.0417e+02,  ...,  3.2096e+02,
         -1.9951e+00,  6.7785e+02],
        [ 4.1192e+02,  3.1947e+02, -3.4726e+02,  ...,  2.0738e+02,
          1.1938e+02, -1.4925e+02],
        [-2.0653e+02,  1.6729e+02,  2.0572e+01,  ...,  3.0411e+02,
          4.4812e+02,  8.5356e+01],
        ...,
        [ 5.2164e-01, -1.1115e+02,  4.0622e+02,  ..., -4.3674e+01,
          2.5022e+02, -7.2693e+01],
        [ 1.5856e+02,  4.7398e+02, -2.6726e+02,  ..., -7.6510e+01,
         -9.5807e+00, -2.1205e+02],
        [-1.1633e+02,  3.8278e+01, -2.4426e+02,  ...,  3.7907e+02,
          1.5720e+02,  2.4894e+02]])

In [55]:
before = id(C)
C = B.T
after = id(C)
print('before:', before)
print('after:', after)

before: 2873855855808
after: 2874524534160


In [64]:
# 12
# Creating a tensor with extremely large dimensions will cause an error.
A = torch.randn((2**10, 2**12))
B = A.clone()
C = A.clone()

In [67]:
D = torch.concat((A, B, C))
D.shape

torch.Size([3072, 4096])

In [69]:
D = torch.concat((A, B, C), axis=1)
D.shape

torch.Size([1024, 12288])

In [70]:
D = torch.stack((A, B, C))
D.shape

torch.Size([3, 1024, 4096])

In [71]:
D[1,:,:] == B

tensor([[True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        ...,
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True],
        [True, True, True,  ..., True, True, True]])