In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import torch
import numpy as np

# Tensor

- Three attributes: `data`, `grad` and `grad_fn`
- `requires_grad=True` means track all operations on it, and `.backward()` will update gradients automatically. The arrtibute `.grad` stores the gradients of the tensor.
- `.detach()` detaches the tensor from computation history. When **evaluating the model**, `with torch.no_grad():` block is useful to prevent updating tensors with `requires_grad=True`.

In [2]:
x = torch.ones(2, 2)

print('x: ', x)
print('x dtype: ', x.dtype)
print('x device: ', x.device)
print('x reshape shape: ', x.reshape(1,4).shape)
print('x requires_grad: ', x.requires_grad)

# set requires_grad=True inplace
x.requires_grad_(True)

print('x grad_fn: ', x.grad_fn)

print('='*10)

# y is created by an operation
y = torch.add(x, 2)
print('y: ', y)
print('y requires_grad: ', y.requires_grad)
print('y grad_fn: ', y.grad_fn)

out = torch.mean(y)
out.backward()
print(x.grad)

x:  tensor([[1., 1.],
        [1., 1.]])
x dtype:  torch.float32
x device:  cpu
x reshape shape:  torch.Size([1, 4])
x requires_grad:  False
x grad_fn:  None
y:  tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward>)
y requires_grad:  True
y grad_fn:  <AddBackward object at 0x10635b978>
tensor([[0.2500, 0.2500],
        [0.2500, 0.2500]])


## Sharing memory for performance: copy vs share

In [3]:
a = np.array([1, 2, 3])
t1 = torch.Tensor(a)

# Perferred copy operation vs torch.Tensor()
t2 = torch.tensor(a)

# Perfered share operation vs torch.from_numpy()
t3 = torch.as_tensor(a)

t4 = torch.from_numpy(a)

# copy
print(t1)
print(t2)

# share
print(t3)
print(t4)

tensor([1., 2., 3.])
tensor([1, 2, 3])
tensor([1, 2, 3])
tensor([1, 2, 3])


In [4]:
a[0] = 0
a[1] = 0
a[2] = 0

# copy
print(t1)
print(t2)

# share
print(t3)
print(t4)

tensor([1., 2., 3.])
tensor([1, 2, 3])
tensor([0, 0, 0])
tensor([0, 0, 0])


# Tensor operations
Four different operation types:
- Reshape
- Element-wise
- Reduction
- Access

From http://deeplizard.com/learn/video/fCVuiW9AFzY

## 1. Reshape

In [5]:
t = torch.tensor([[1, 1, 1], 
                  [2, 2, 2]], 
                 dtype=torch.float64)

### Number of elements

In [6]:
t.numel()

6

### 1.1 Reshape

In [7]:
t.reshape(3, 2)

tensor([[1., 1.],
        [1., 2.],
        [2., 2.]], dtype=torch.float64)

### 1.2 Squeeze and unsequeeze
- Squeezing a tensor **removes the axes that have a length of one**
- Unsqueezing a tensor **adds a dimension with a length of one**

In [8]:
t.reshape(1, 2, 1, 3, 1, 1).squeeze().shape

torch.Size([2, 3])

In [9]:
t.reshape(2, 1, 3).unsqueeze(dim=3).shape

torch.Size([2, 1, 3, 1])

### 1.3 Flatten
- Flattening a tensor means to **remove all of the dimensions except for one**
- **Flatten = reshape(1, -1) + squeeze**
- Useful when convert **convolution layer results to fully-connected layer**

In [10]:
t.flatten().shape

torch.Size([6])

#### 1.3.1 Flatten specifc dimension
- For example, the convolutional results tensor has shape of `[batch_size, num_channels, height, width]`, we want to reshape it to full-connected layer.

In [11]:
# image 1
img1 = torch.tensor(
    [
        [1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]
    ])

# image 2
img2 = torch.tensor(
    [
        [2, 2, 2],
        [2, 2, 2],
        [2, 2, 2]
    ])

# image 3
img3 = torch.tensor(
    [
        [3, 3, 3],
        [3, 3, 3],
        [3, 3, 3]
    ])

In [12]:
# stach them to batch
batch_imgs = torch.stack((img1, img2, img3))
batch_imgs.shape

torch.Size([3, 3, 3])

In [13]:
# add num_channel dimension
cnn_block = batch_imgs.unsqueeze(dim=1)
cnn_block.shape

torch.Size([3, 1, 3, 3])

In [14]:
# flatten from dim = 1
cnn_block.flatten(start_dim=1)

tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1],
        [2, 2, 2, 2, 2, 2, 2, 2, 2],
        [3, 3, 3, 3, 3, 3, 3, 3, 3]])

### 1.4 Concat

In [15]:
t1 = torch.ones(2, 3)
t2 = torch.zeros(2, 3)
print(t1)
print(t2)

tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [16]:
torch.cat((t1, t2), dim=0)

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [17]:
torch.stack((t1, t2)).shape

torch.Size([2, 2, 3])

## 2. Element-wise
- An **element-wise** operation **operates on corresponding elements between tensors**.
- Two tensors must have the **same shape** in order to perform element-wise operations on them.

### 2.1 Addition
- Addition is an element-wise operation.
- Application
    - **Word embedding + positional embedding**

In [18]:
t1 = torch.tensor([[1, 1], [1, 1]])
t2 = torch.tensor([[2, 2], [2, 2]])

In [19]:
t1 + t2

tensor([[3, 3],
        [3, 3]])

In [20]:
t1.add(t2)

tensor([[3, 3],
        [3, 3]])

### 2.2 Arithmetic operations
- All the arithmetic operations, **add, subtract, multiply, and divide** are element-wise operations.

In [21]:
t1 * t2

tensor([[2, 2],
        [2, 2]])

In [22]:
t1.mul(t2)

tensor([[2, 2],
        [2, 2]])

In [23]:
t1.sub(t2)

tensor([[-1, -1],
        [-1, -1]])

In [24]:
t1.div(t2)

tensor([[0, 0],
        [0, 0]])

### 2.3 Comparison operations
- Comparison operations are element-wise
- Comparison results
    - `0` if the comparison between corresponding elements is `False`.
    - `1` if the comparison between corresponding elements is `True`.

In [25]:
# equal
t1.eq(1)

tensor([[1, 1],
        [1, 1]], dtype=torch.uint8)

In [26]:
# greater than or euqal to
t1.ge(1)

tensor([[1, 1],
        [1, 1]], dtype=torch.uint8)

In [27]:
# greater than
t1.gt(1)

tensor([[0, 0],
        [0, 0]], dtype=torch.uint8)

In [28]:
# less than or equal to
t1.le(1)

tensor([[1, 1],
        [1, 1]], dtype=torch.uint8)

In [29]:
# less than
t1.lt(1)

tensor([[0, 0],
        [0, 0]], dtype=torch.uint8)

### 2.4 Element-wise operations using functions

In [30]:
t1.abs()

tensor([[1, 1],
        [1, 1]])

In [31]:
t1.neg()

tensor([[-1, -1],
        [-1, -1]])

### 2.5 Broadcasting
- Broadcasting allows us to **add scalars to higher dimensional tensors**.
- Application
    - Preprocessing: **substract the mean**

In [32]:
t1.add(2)

tensor([[3, 3],
        [3, 3]])

In [33]:
t3 = torch.tensor([2, 2])
t1.add(t3)

tensor([[3, 3],
        [3, 3]])

## 3. Reduction
- A reduction operation **reduces the number of elements** contained within the tensor.

### 3.1 Common tensor reduction operations

In [34]:
t = torch.ones(3, 3)

In [35]:
t.sum()

tensor(9.)

In [36]:
t.prod()

tensor(1.)

In [37]:
t.mean()

tensor(1.)

In [38]:
t.std()

tensor(0.)

### 3.2 Reducing tensors by dims

In [39]:
t = torch.tensor([
    [1,1,1,1],
    [2,2,2,2],
    [3,3,3,3]
], dtype=torch.float32)

In [40]:
torch.sum(t, dim=0)

tensor([6., 6., 6., 6.])

### 3.3 `Argmax`
- `Argmax` returns the **index of the maximum value** inside a tensor.
- In practice, `argmax` used to determine the **highest category index** of the network’s output prediction tensor.

In [41]:
t = torch.tensor([
    [1,0,0,2],
    [0,3,3,0],
    [4,0,0,5]
], dtype=torch.float32)

In [42]:
torch.max(t, dim=0)

(tensor([4., 3., 3., 5.]), tensor([2, 1, 1, 2]))

In [43]:
torch.argmax(t, dim=0)

tensor([2, 1, 1, 2])

## 4. Access

In [44]:
t = torch.tensor([
    [1,2,3],
    [4,5,6],
    [7,8,9]
], dtype=torch.float32)

In [45]:
# only one element tensors can be converted to Python scalars
torch.mean(t).item()

5.0

In [46]:
torch.mean(t, dim=0).numpy()

array([4., 5., 6.], dtype=float32)

In [47]:
torch.mean(t, dim=0).tolist()

[4.0, 5.0, 6.0]