# Preliminaries before diving

- basic mathmatics tools and functions
- basic data process
- basic probability theory

In [1]:
import os
import sys

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import torch
from d2l import torch as d2l
print(f'Import PyTorch V{torch.__version__}')

Import PyTorch V1.12.1


## Creation of tensors

Major creations:

1. arange: generate an ascending sequence
1. zeros: generate a tensor whose elements are initialized as 0
1. ones: generate a tensor whose elements are initialized as 1
1. rand: generate a tensor whose elements folow uniform distribution in [0, 1]
1. randn: generate a tensor whose elements follow N(0, 1)
1. tensor: convert a numerical array into a tensor

In [2]:
t0 = torch.arange(8, dtype=torch.float64)
print(f'Create a tensor: {t0}')
print(f'Its shape is {t0.shape}, and it contains {t0.numel()} elements')

Create a tensor: tensor([0., 1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64)
Its shape is torch.Size([8]), and it contains 8 elements


In [3]:
print(f'Reshape tensor to a matrix with two rows:\n{t0.reshape(2, -1)}')

Reshape tensor to a matrix with two rows:
tensor([[0., 1., 2., 3.],
        [4., 5., 6., 7.]], dtype=torch.float64)


In [4]:
creations = [torch.zeros, torch.ones, torch.rand, torch.randn]
shape = (2, 3, 2)
print(f'Create tensor with shape {shape} by several common creations')
print()
for func in creations:
    print(func.__name__)
    print(func(shape))

Create tensor with shape (2, 3, 2) by several common creations

zeros
tensor([[[0., 0.],
         [0., 0.],
         [0., 0.]],

        [[0., 0.],
         [0., 0.],
         [0., 0.]]])
ones
tensor([[[1., 1.],
         [1., 1.],
         [1., 1.]],

        [[1., 1.],
         [1., 1.],
         [1., 1.]]])
rand
tensor([[[0.9676, 0.8664],
         [0.5957, 0.9204],
         [0.4323, 0.0160]],

        [[0.6488, 0.9836],
         [0.0734, 0.4740],
         [0.2651, 0.0745]]])
randn
tensor([[[ 0.1965, -0.2668],
         [ 0.4189, -1.6777],
         [-0.0469, -0.2384]],

        [[-0.8146, -0.2606],
         [ 0.6501, -2.2808],
         [-0.3288,  0.1152]]])


In [5]:
t2 = torch.tensor([[2, 1, 3], [1, 2, 1]])
print(f'Create an integer tensor from an array:\n{t2}')
print(f'Shape: {t2.shape}')
print(f'DType: {t2.dtype}')
print(f'Size: {t2.numel()}')

Create an integer tensor from an array:
tensor([[2, 1, 3],
        [1, 2, 1]])
Shape: torch.Size([2, 3])
DType: torch.int64
Size: 6


In [6]:
print(f'There are {torch.cuda.device_count()} CUDA devices')
if torch.cuda.is_available():
    print('Try create a tensor in GPU')
    t3 = torch.rand_like(t0, dtype=torch.float32, device=torch.device(type='cuda'))
    print(t3)

There are 1 CUDA devices
Try create a tensor in GPU
tensor([0.5274, 0.1961, 0.6530, 0.7673, 0.2769, 0.0662, 0.9333, 0.4722],
       device='cuda:0')


## Load data from csv files

Use pandas to load data and convert into tensor

In [7]:
print('Prepare csv data file')
os.makedirs('data', exist_ok=True)
data_file = os.path.join('data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('room_count, alley, price\n')
    f.write('NA, Pave, 127500\n')
    f.write('2, NA, 106000\n')
    f.write('4, NA, 178100\n') 
    f.write('NA, NA, 140000\n')
    f.flush()

Prepare csv data file


In [8]:
print(f'Load data by using Pandas V{pd.__version__}')
houses = pd.read_csv(data_file, skipinitialspace=True)
print(houses)

Load data by using Pandas V1.4.3
   room_count alley   price
0         NaN  Pave  127500
1         2.0   NaN  106000
2         4.0   NaN  178100
3         NaN   NaN  140000


In [9]:
print('Consider price as function of room count and alley')
inputs, outputs = houses.loc[:, ('room_count', 'alley')], houses.loc[:, 'price']
print(f'inputs: \n{inputs}')
print(f'outputs: \n{outputs}')

Consider price as function of room count and alley
inputs: 
   room_count alley
0         NaN  Pave
1         2.0   NaN
2         4.0   NaN
3         NaN   NaN
outputs: 
0    127500
1    106000
2    178100
3    140000
Name: price, dtype: int64


In [10]:
print('Fill inputs by using mean values')
inputs = inputs.fillna(inputs.mean())
print(inputs)

Fill inputs by using mean values
   room_count alley
0         3.0  Pave
1         2.0   NaN
2         4.0   NaN
3         3.0   NaN


  inputs = inputs.fillna(inputs.mean())


In [11]:
print('Use integers to represent category-like attributes')
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)

Use integers to represent category-like attributes
   room_count  alley_Pave  alley_nan
0         3.0           1          0
1         2.0           0          1
2         4.0           0          1
3         3.0           0          1


In [12]:
print('Convert Pandas DataFrame into PyTorch tensor')
inputs, outputs = torch.tensor(inputs.values), torch.tensor(outputs.values)
inputs, outputs

Convert Pandas DataFrame into PyTorch tensor


(tensor([[3., 1., 0.],
         [2., 0., 1.],
         [4., 0., 1.],
         [3., 0., 1.]], dtype=torch.float64),
 tensor([127500, 106000, 178100, 140000]))

# Linear Algebra

In [13]:
print('prepare scalars')
s0 = torch.tensor(42.0)
s1 = torch.tensor(0.7)
s0, s1

prepare scalars


(tensor(42.), tensor(0.7000))

In [14]:
print('prepare vectors')
v0 = torch.arange(6)
v1 = torch.ones((6))
v0, v1

prepare vectors


(tensor([0, 1, 2, 3, 4, 5]), tensor([1., 1., 1., 1., 1., 1.]))

In [15]:
print('prepare matrices')
m0 = torch.arange(12).reshape((3, 4))
m1 = torch.ones((3, 4))
m0, m1

prepare matrices


(tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]]),
 tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]))

In [16]:
print('prepare tensors')
t0 = torch.arange(18).reshape((2, 3, 3))
t1 = torch.ones((2, 3, 3))
t0, t1

prepare tensors


(tensor([[[ 0,  1,  2],
          [ 3,  4,  5],
          [ 6,  7,  8]],
 
         [[ 9, 10, 11],
          [12, 13, 14],
          [15, 16, 17]]]),
 tensor([[[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]],
 
         [[1., 1., 1.],
          [1., 1., 1.],
          [1., 1., 1.]]]))

In [17]:
tensors = [s0, s1, v0, v1, m0, m1, t0, t1]

In [18]:
for t in tensors:
    print(f'dtype: {t.dtype}, shape: {t.shape}')

dtype: torch.float32, shape: torch.Size([])
dtype: torch.float32, shape: torch.Size([])
dtype: torch.int64, shape: torch.Size([6])
dtype: torch.float32, shape: torch.Size([6])
dtype: torch.int64, shape: torch.Size([3, 4])
dtype: torch.float32, shape: torch.Size([3, 4])
dtype: torch.int64, shape: torch.Size([2, 3, 3])
dtype: torch.float32, shape: torch.Size([2, 3, 3])


In [19]:
for t, axis in [
    (s0, None), (v0, None), (v1, 0), (m0, None), (m0, 1), (t0, 2), (t1, 0)
]:
    print(f'Shape {t.shape}, sum of axis {axis}: ', end='')
    if axis is None:
        print(t.sum())
    else:
        print(t.sum(axis=axis))

Shape torch.Size([]), sum of axis None: tensor(42.)
Shape torch.Size([6]), sum of axis None: tensor(15)
Shape torch.Size([6]), sum of axis 0: tensor(6.)
Shape torch.Size([3, 4]), sum of axis None: tensor(66)
Shape torch.Size([3, 4]), sum of axis 1: tensor([ 6, 22, 38])
Shape torch.Size([2, 3, 3]), sum of axis 2: tensor([[ 3, 12, 21],
        [30, 39, 48]])
Shape torch.Size([2, 3, 3]), sum of axis 0: tensor([[2., 2., 2.],
        [2., 2., 2.],
        [2., 2., 2.]])


In [20]:
m0, m0.sum(axis=0), m0.sum(axis=1)

(tensor([[ 0,  1,  2,  3],
         [ 4,  5,  6,  7],
         [ 8,  9, 10, 11]]),
 tensor([12, 15, 18, 21]),
 tensor([ 6, 22, 38]))

In [21]:
m0_float = m0.to(torch.float32)
m0_float.mean(), m0_float.numel(), m0_float.mean(axis=0), m0_float.mean(axis=1, keepdim=True)

(tensor(5.5000),
 12,
 tensor([4., 5., 6., 7.]),
 tensor([[1.5000],
         [5.5000],
         [9.5000]]))

In [22]:
torch.dot(v0.to(dtype=torch.float32), v1), torch.mv(m1.reshape((2, 6)), v1), torch.mm(m0.to(dtype=torch.float32), m1.t())

(tensor(15.),
 tensor([6., 6.]),
 tensor([[ 6.,  6.,  6.],
         [22., 22., 22.],
         [38., 38., 38.]]))

In [23]:
for t in tensors:
    if t.dtype == torch.int64:
        t = t.to(dtype=torch.float32)
    print((t.shape, t.norm()))

(torch.Size([]), tensor(42.))
(torch.Size([]), tensor(0.7000))
(torch.Size([6]), tensor(7.4162))
(torch.Size([6]), tensor(2.4495))
(torch.Size([3, 4]), tensor(22.4944))
(torch.Size([3, 4]), tensor(3.4641))
(torch.Size([2, 3, 3]), tensor(42.2493))
(torch.Size([2, 3, 3]), tensor(4.2426))


## Auto computing of differentiation

- First, create gradient of independent variables. There are two ways:
    - call tensor variable's `requires_grad_(True)` method;
    - add `requires_grad=True` argument when creating the variable.
- Then, generate dependent scalar variable.
- Call dependent scalar variable's `backward()` function to update independent variable's `grad` attribute.

In [24]:
v1.requires_grad_(True)
v2 = torch.arange(6.0, requires_grad=True)

v1, v2

(tensor([1., 1., 1., 1., 1., 1.], requires_grad=True),
 tensor([0., 1., 2., 3., 4., 5.], requires_grad=True))

In [25]:
y = torch.dot(v1, v2)
y

tensor(15., grad_fn=<DotBackward0>)

In [26]:
v1.grad, v2.grad

(None, None)

In [27]:
y.backward()
v1.grad, v2.grad

(tensor([0., 1., 2., 3., 4., 5.]), tensor([1., 1., 1., 1., 1., 1.]))

Two special methods:

- `zero_()` on `grad` attribute to clear previous gradient
- `detach()` on independent variable to generate a new variable without gradient

In [28]:
v1.grad.zero_()
tmp = v1.detach()
y2 = torch.dot(tmp, v1)
y2

tensor(6., grad_fn=<DotBackward0>)

In [29]:
tmp.grad, v1.grad

(None, tensor([0., 0., 0., 0., 0., 0.]))

In [30]:
y2.backward()
tmp.grad, v1.grad

(None, tensor([1., 1., 1., 1., 1., 1.]))