# Tensor in Pytorch

In [1]:
import numpy as np
import torch
from IPython import display
from d2l import torch as d2l

## Initialize Tensor

In [2]:
a = torch.randn(2, 3, requires_grad=False)
a

tensor([[-0.8097, -0.0086,  0.4703],
        [ 0.4996,  1.4457,  0.3959]])

In [3]:
b = torch.zeros_like(a)
b

tensor([[0., 0., 0.],
        [0., 0., 0.]])

In [4]:
torch.ones_like(a)

tensor([[1., 1., 1.],
        [1., 1., 1.]])

In [5]:
c = torch.tensor(np.array([[1,2],[3,4]]))
c

tensor([[1, 2],
        [3, 4]])

## Maximum

In [6]:
def relu(X):
    #Return a tensor with the same shape with maximum value on each position
    a = torch.zeros_like(X)
    return torch.max(X, a) 

In [7]:
a, relu(a)

(tensor([[-0.8097, -0.0086,  0.4703],
         [ 0.4996,  1.4457,  0.3959]]),
 tensor([[0.0000, 0.0000, 0.4703],
         [0.4996, 1.4457, 0.3959]]))

## Sum

In [8]:
a, a.sum(1, keepdim= True) # sum on each row and keep dimension

(tensor([[-0.8097, -0.0086,  0.4703],
         [ 0.4996,  1.4457,  0.3959]]),
 tensor([[-0.3479],
         [ 2.3411]]))

In [9]:
a.sum(1, keepdim= False)

tensor([-0.3479,  2.3411])

## Broadcasting Rule
A general introduction is here:
https://numpy.org/doc/stable/user/basics.broadcasting.html#general-broadcasting-rules

In [10]:
def softmax(X):
    '''
    Define the softmax function for a matrix
    The division is row-wise.
    '''
    X_exp = torch.exp(X)
    partition = X_exp.sum(1, keepdim=True)
    return X_exp / partition  # The broadcasting mechanism is applied here

In [11]:
c, c.sum(1, keepdim=True)

(tensor([[1, 2],
         [3, 4]]),
 tensor([[3],
         [7]]))

(2,2) / (2,1) = (2,2)

Since (2,1), (dimensions with size 1), will be stretched or “copied” to match the other, which is (2,2),
```
c.sum(1, keepdim=True) 
```
becomes
```
tensor([[3,3], [7,7]])
```
In other words, Size 2 tensor will expand row-wise.

In [12]:
c / c.sum(1, keepdim=True) # Division is for each row

tensor([[0.3333, 0.6667],
        [0.4286, 0.5714]])

#### If we do not keep dimension after summand

In [13]:
c.sum(1, keepdim=False)

tensor([3, 7])

In [14]:
c.sum(1, keepdim=False).shape

torch.Size([2])

In [15]:
c / c.sum(1, keepdim=False) # Division is for each column

tensor([[0.3333, 0.2857],
        [1.0000, 0.5714]])

(2,2) / (2,) = (2,2)

Since (2,), (dimensions with size 1), will be stretched or “copied” to match the other, which is (2,2),

```
c.sum(1, keepdim=True) 
```
becomes
```
tensor([[3., 7.], [3., 7.]])
```
. In other words, size 1 tensor will expand downward and keep the first element same as before

## Multiplication
Check shape and data type

In [16]:
a.shape, b.shape, c.shape

(torch.Size([2, 3]), torch.Size([2, 3]), torch.Size([2, 2]))

In [17]:
a.dtype

torch.float32

Convert data type

In [18]:
c = c.to(a.dtype)
c

tensor([[1., 2.],
        [3., 4.]])

In [19]:
a.T @ c 

tensor([[0.6890, 0.3789],
        [4.3284, 5.7654],
        [1.6579, 2.5241]])

In [20]:
d = torch.tensor([1,1])
d.shape

torch.Size([2])

In [21]:
a.T @ c + d # broadcasting rule: expand on each row to the following rows

tensor([[1.6890, 1.3789],
        [5.3284, 6.7654],
        [2.6579, 3.5241]])