In [2]:
import torch

In [3]:
A = torch.arange(12).reshape(3,4)
print(A)
print(A.T)

tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])
tensor([[ 0,  4,  8],
        [ 1,  5,  9],
        [ 2,  6, 10],
        [ 3,  7, 11]])


### Basic Properties of Tensor Arithmetic

Element wise operations regardless of the order of the tensor atleast between the same shape generate output of the same shape.

In [4]:
A = torch.arange(24).reshape(2,3,4)
B = A.clone()
A*B, A+B, A

(tensor([[[  0,   1,   4,   9],
          [ 16,  25,  36,  49],
          [ 64,  81, 100, 121]],
 
         [[144, 169, 196, 225],
          [256, 289, 324, 361],
          [400, 441, 484, 529]]]),
 tensor([[[ 0,  2,  4,  6],
          [ 8, 10, 12, 14],
          [16, 18, 20, 22]],
 
         [[24, 26, 28, 30],
          [32, 34, 36, 38],
          [40, 42, 44, 46]]]),
 tensor([[[ 0,  1,  2,  3],
          [ 4,  5,  6,  7],
          [ 8,  9, 10, 11]],
 
         [[12, 13, 14, 15],
          [16, 17, 18, 19],
          [20, 21, 22, 23]]]))

A * B is the elementwise product of two matrices or tensors called **Hadamard Product**.

In [5]:
A.shape, (A*A).shape

(torch.Size([2, 3, 4]), torch.Size([2, 3, 4]))

### Reduction

In [40]:
x = torch.arange(6, dtype = float).reshape(3,2)
x, x.sum()

(tensor([[0., 1.],
         [2., 3.],
         [4., 5.]], dtype=torch.float64),
 tensor(15., dtype=torch.float64))

Invoking the sum function reduces a tensor along all of it's axes producing a scalar.

TO sum over all the elements along the rows (axis 0), we specify axis=0 in sum or axis=1 along the column wise addition

In [7]:
x.sum(axis = 0), x.sum(axis = 1)

(tensor([6, 9]), tensor([1, 5, 9]))

**Mean** works similar to sum as it reduces the size to a scalar or reduces to a arbitrary number if done along row(axis = 0) or columns(axis =1)

### Non-Reduction Sum

In [8]:
non_reduced_sum = x.sum(axis=1, keepdims = True)

In [9]:
non_reduced_sum

tensor([[1],
        [5],
        [9]])

In [10]:
x.shape, non_reduced_sum.shape

(torch.Size([3, 2]), torch.Size([3, 1]))

In [11]:
x / non_reduced_sum

tensor([[0.0000, 1.0000],
        [0.4000, 0.6000],
        [0.4444, 0.5556]])

There is another function to calculate the cumulative sum of elements of A along some axis. 

In [12]:
print(x)
x.cumsum(axis = 0)

tensor([[0, 1],
        [2, 3],
        [4, 5]])


tensor([[0, 1],
        [2, 4],
        [6, 9]])

### Dot Products and Matrix-Vector Products

In [13]:
y = torch.arange(6, dtype = float).reshape(3,2)
y

tensor([[0., 1.],
        [2., 3.],
        [4., 5.]], dtype=torch.float64)

In [14]:
x * y

tensor([[ 0.,  1.],
        [ 4.,  9.],
        [16., 25.]], dtype=torch.float64)

We can calculate the dot product by summing the elementwise multiplication between two tensors.

In [15]:
torch.sum(x*y)

tensor(55., dtype=torch.float64)

As for Matrix and vector products, let A be a matrix and x be a vector then, Ax is the same as the matrix-matrix mul that happens

In [16]:
A.shape, x.shape

(torch.Size([2, 3, 4]), torch.Size([3, 2]))

In [43]:
A = torch.rand(6,2,3, dtype = float)

In [44]:
A@x

tensor([[[2.0457, 2.7468],
         [4.3119, 5.9719]],

        [[1.1162, 1.9337],
         [3.3819, 5.5209]],

        [[1.8268, 3.4163],
         [2.6845, 4.0483]],

        [[0.7419, 1.3831],
         [3.6297, 5.1229]],

        [[3.4498, 5.5752],
         [1.7135, 3.3291]],

        [[1.9082, 3.5913],
         [3.3868, 4.8114]]], dtype=torch.float64)

In [45]:
A, x

(tensor([[[0.0947, 0.1902, 0.4163],
          [0.2758, 0.6124, 0.7718]],
 
         [[0.5211, 0.0349, 0.2616],
          [0.9971, 0.5928, 0.5491]],
 
         [[0.6935, 0.8785, 0.0175],
          [0.5987, 0.1881, 0.5771]],
 
         [[0.3691, 0.1732, 0.0989],
          [0.4628, 0.2457, 0.7846]],
 
         [[0.9954, 0.5350, 0.5949],
          [0.9001, 0.5743, 0.1412]],
 
         [[0.8548, 0.7023, 0.1259],
          [0.1651, 0.8256, 0.4339]]], dtype=torch.float64),
 tensor([[0., 1.],
         [2., 3.],
         [4., 5.]], dtype=torch.float64))

### Matrix Multiplication

In [57]:
A = torch.arange(6, dtype = float).reshape(2,3)
B = torch.ones(3,2, dtype = float)

A,B


(tensor([[0., 1., 2.],
         [3., 4., 5.]], dtype=torch.float64),
 tensor([[1., 1.],
         [1., 1.],
         [1., 1.]], dtype=torch.float64))

In [58]:
torch.mm(A,B), A@B


(tensor([[ 3.,  3.],
         [12., 12.]], dtype=torch.float64),
 tensor([[ 3.,  3.],
         [12., 12.]], dtype=torch.float64))

### Norms

Informally, the norm of a vector tells us how big it is.

A norm is a function ||.|| that maps a vector to a scalar.

To calculate the l2 norm we call the following method:

In [61]:
u = torch.tensor([2.0,-2.0])
torch.norm(u)

tensor(2.8284)

As for the l1 norm, we compose the absolute values with the sum operator

In [63]:
torch.abs(u).sum()

tensor(4.)