In [2]:
import torch

x = torch.tensor(2) # scalar
y = torch.tensor(3)

x/y, x*y, x**y, x+y

(tensor(0.6667), tensor(6), tensor(8), tensor(5))

In [3]:
x = torch.arange(4) # vector
x

tensor([0, 1, 2, 3])

In [4]:
x.shape

torch.Size([4])

In [8]:
A = torch.arange(12).reshape(3,4) # matrices
A

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])

In [10]:
# transpose 
A.T

tensor([[ 0,  4,  8],
        [ 1,  5,  9],
        [ 2,  6, 10],
        [ 3,  7, 11]])

In [12]:
# symmetric matrice is same as its transpose

B = torch.ones(16).reshape(4,4)

In [14]:
B == B.T

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

In [17]:
C = torch.zeros(24).reshape(3,4,2)
C

tensor([[[0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]]])

# Basic properties


In [20]:
# elementwise operation

A = torch.arange(20, dtype=torch.float32).reshape(5,4)
B = A.clone()
C = A

B, C, B + C

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([[ 0.,  2.,  4.,  6.],
         [ 8., 10., 12., 14.],
         [16., 18., 20., 22.],
         [24., 26., 28., 30.],
         [32., 34., 36., 38.]]))

In [21]:
# Elmentwise multiplication is called Hamadard operation

A* B

tensor([[  0.,   1.,   4.,   9.],
        [ 16.,  25.,  36.,  49.],
        [ 64.,  81., 100., 121.],
        [144., 169., 196., 225.],
        [256., 289., 324., 361.]])

In [22]:
# scalar operations are all elementwise

A + 2

tensor([[ 2.,  3.,  4.,  5.],
        [ 6.,  7.,  8.,  9.],
        [10., 11., 12., 13.],
        [14., 15., 16., 17.],
        [18., 19., 20., 21.]])

### Reduction sum - does not keep dimension

In [23]:
# summation along different axis
A.sum(), 

tensor(190.)

In [24]:
A.sum(axis=1) #columns

tensor([ 6., 22., 38., 54., 70.])

In [25]:
A.sum(axis=0) # rows

tensor([40., 45., 50., 55.])

In [26]:
# mean
A.mean()

tensor(9.5000)

In [27]:
# mean is sameas

A.mean()/A.numel()

tensor(0.4750)

In [28]:
A.mean(axis=0)/A.shape[0]

tensor([1.6000, 1.8000, 2.0000, 2.2000])

### non reduction sum - keeping axis even after sum/mean

In [30]:
sum_A = A.sum(axis=1, keepdims=True)
sum_A

tensor([[ 6.],
        [22.],
        [38.],
        [54.],
        [70.]])

In [31]:
sum_reduction_A = A.sum(axis=1)
sum_reduction_A

tensor([ 6., 22., 38., 54., 70.])

In [33]:
print(A/ sum_A, "this works") # works

try:
    A/sum_reduction_A
except Exception as e:
    print(e)
    print("This doesnt work!")

tensor([[0.0000, 0.1667, 0.3333, 0.5000],
        [0.1818, 0.2273, 0.2727, 0.3182],
        [0.2105, 0.2368, 0.2632, 0.2895],
        [0.2222, 0.2407, 0.2593, 0.2778],
        [0.2286, 0.2429, 0.2571, 0.2714]]) this works
The size of tensor a (4) must match the size of tensor b (5) at non-singleton dimension 1
This doesnt work!


In [34]:
# also cumulative sum

A.cumsum(axis=0)

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  6.,  8., 10.],
        [12., 15., 18., 21.],
        [24., 28., 32., 36.],
        [40., 45., 50., 55.]])

In [35]:
A.sum(axis=0, keepdims=True)

tensor([[40., 45., 50., 55.]])

In [39]:
# dot product
x = torch.ones(4, dtype=torch.float32)
y = torch.arange(4, dtype=torch.float32)

torch.dot(x,y)

tensor(6.)

In [41]:
# same as

torch.sum(x * y)

tensor(6.)

In [42]:
A.shape, x.shape

(torch.Size([5, 4]), torch.Size([4]))

In [45]:
A, x

(tensor([[ 0.,  1.,  2.,  3.],
         [ 4.,  5.,  6.,  7.],
         [ 8.,  9., 10., 11.],
         [12., 13., 14., 15.],
         [16., 17., 18., 19.]]),
 tensor([1., 1., 1., 1.]))

In [44]:
# matrix vector
torch.mv(A, x)

tensor([ 6., 22., 38., 54., 70.])

In [47]:
# matrix multiplication

B = torch.zeros(12).reshape(4,3)

In [48]:
torch.mm(A,B)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

### norms

In linear algebra, a vector norm is a function f that maps a vector to a scalar, satisfying a handful
of properties. Given any vector x, the first property says that if we scale all the elements of a vector
by a constant factor α, its norm also scales by the absolute value of the same constant factor:
f(αx) = |α|f(x). (2.3.10)
The second property is the familiar triangle inequality:
f(x + y) ≤ f(x) + f(y). (2.3.11)
The third property simply says that the norm must be non-negative:
f(x) ≥ 0. (2.3.12)
That makes sense, as in most contexts the smallest size for anything is 0. The final property requires that the smallest norm is achieved and only achieved by a vector consisting of all zeros.
∀i, [x]i = 0 ⇔ f(x) = 0. (2.3.13)
You might notice that norms sound a lot like measures of distance. And if you remember Euclidean
distances (think Pythagorasʼ theorem) from grade school, then the concepts of non-negativity and
the triangle inequality might ring a bell. In fact, the Euclidean distance is a norm: specifically it
is the L2 norm. Suppose that the elements in the n-dimensional vector x are x1, . . . , xn.
The L2 norm of x is the square root of the sum of the squares of the vector elements.

In [50]:
# l2 norms

u = torch.tensor([3,-4], dtype=torch.float32)


torch.norm(u)

tensor(5.)

In [51]:
# L1 norm

abs(u).sum()

tensor(7.)

### Exercises

transpose of transpose of a matrix is same

In [52]:
A

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]])

In [53]:
A.T

tensor([[ 0.,  4.,  8., 12., 16.],
        [ 1.,  5.,  9., 13., 17.],
        [ 2.,  6., 10., 14., 18.],
        [ 3.,  7., 11., 15., 19.]])

In [54]:
A.T.T

tensor([[ 0.,  1.,  2.,  3.],
        [ 4.,  5.,  6.,  7.],
        [ 8.,  9., 10., 11.],
        [12., 13., 14., 15.],
        [16., 17., 18., 19.]])

Given two matrices A and B, show that the sum of transposes is equal to the transpose of a
sum: A
⊤ + B
⊤ = (A + B)
⊤.

In [62]:
B = torch.arange(0,20).reshape(5,4)

In [63]:
(A + B).T, A.T + B.T

(tensor([[ 0.,  8., 16., 24., 32.],
         [ 2., 10., 18., 26., 34.],
         [ 4., 12., 20., 28., 36.],
         [ 6., 14., 22., 30., 38.]]),
 tensor([[ 0.,  8., 16., 24., 32.],
         [ 2., 10., 18., 26., 34.],
         [ 4., 12., 20., 28., 36.],
         [ 6., 14., 22., 30., 38.]]))

Given any square matrix A, is A + A
⊤ always symmetric? Why?

In [64]:
C = torch.ones(16).reshape(4,4)
C + C.T

tensor([[2., 2., 2., 2.],
        [2., 2., 2., 2.],
        [2., 2., 2., 2.],
        [2., 2., 2., 2.]])

We defined the tensor X of shape (2, 3, 4) in this section. What is the output of len(X)?


In [65]:
D = torch.arange(24).reshape(2,3,4)

In [66]:
len(D)

2

For a tensor X of arbitrary shape, does len(X) always correspond to the length of a certain
axis of X? What is that axis?

In [67]:
#axis 0

Run A / A.sum(axis=1) and see what happens. Can you analyze the reason?

In [68]:
A.sum(axis=1)

tensor([ 6., 22., 38., 54., 70.])

In [71]:
try:
    A/A.sum(axis=1)
except Exception as e:
    print(e)

The size of tensor a (4) must match the size of tensor b (5) at non-singleton dimension 1


When traveling between two points in Manhattan, what is the distance that you need to cover
in terms of the coordinates, i.e., in terms of avenues and streets? Can you travel diagonally?

In [None]:
# it is through the roads

Consider a tensor with shape (2, 3, 4). What are the shapes of the summation outputs along
axis 0, 1, and 2?

In [73]:
D, D.sum(axis=0), D.sum(axis=1), D.sum(axis=2)

(tensor([[[ 0,  1,  2,  3],
          [ 4,  5,  6,  7],
          [ 8,  9, 10, 11]],
 
         [[12, 13, 14, 15],
          [16, 17, 18, 19],
          [20, 21, 22, 23]]]),
 tensor([[12, 14, 16, 18],
         [20, 22, 24, 26],
         [28, 30, 32, 34]]),
 tensor([[12, 15, 18, 21],
         [48, 51, 54, 57]]),
 tensor([[ 6, 22, 38],
         [54, 70, 86]]))

Feed a tensor with 3 or more axes to the linalg.norm function and observe its output. What
does this function compute for tensors of arbitrary shape?

In [75]:
torch.norm(A), A.shape

(tensor(49.6991), torch.Size([5, 4]))

In [77]:
E = torch.arange(24, dtype=torch.float32).reshape(2,3,4)

In [78]:
torch.norm(E)

tensor(65.7571)