In [None]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt

from copy import deepcopy

In [None]:
torch.backends.mps.is_available()

# Mathematical Objects
## Scalars, Vectors, Matrices and Tensors

* A scalar ( $a$ ): just single number, (italic, lowercase)
* A vector ( $\boldsymbol{a}$ ): an array of numbers, (bold typeface, lowercase)
* A matrix ( $\boldsymbol{A}$ ): 2-D array of numbers, (bold typeface, uppercase)
* A tensor ( $\boldsymbol{\mathsf{A}}$ ): n-D array of numbers

In [None]:
np.random.seed(219)
np.set_printoptions(precision=4)

a = 1  # scalar
b = np.array([2, 3, 4])  # vector
c = np.array([ [2, 3, 4],
               [5, 6, 7] ])  # matrix
d = np.random.uniform(size=(2, 2, 3))  # 3-rank tensor
e = np.random.uniform(size=(2, 2, 3, 4))  # 4-rank tensor

print(f'scalar: {a}')
print(f'vector: {b}')
print(f'matrix:\n{c}')
print(f'3-rank tensor:\n{d}')
print(f'4-rank tensor:\n{e}')

In [None]:
torch.manual_seed(219)

a = 1  # scalar
b = torch.tensor([2, 3, 4])  # vector
c = torch.tensor([ [2, 3, 4],
                   [5, 6, 7] ])  # matrix
d = torch.rand(size=(2, 2, 3))  # 3-rank tensor
e = torch.rand(size=(2, 2, 3, 4))  # 4-rank tensor

print(f'scalar: {a}')
print(f'vector: {b}')
print(f'matrix:\n{c}')
print(f'3-rank tensor:\n{d}')
print(f'4-rank tensor:\n{e}')

### Tensor shape

<img width="600" src="https://user-images.githubusercontent.com/11681225/183875482-8fbdeeb2-09fc-4e54-b8c0-7554b81a64db.png">

In [None]:
v = np.arange(3 * 2 * 5)
v = v.reshape(3, 2, 5)
print(v)

In [None]:
w = torch.arange(3 * 2 * 5)
w = w.reshape(3, 2, 5)
print(w)

## Rank, Dimension and Shape of Tensor

<img width="300" alt="image" src="https://user-images.githubusercontent.com/11681225/183872452-e9648e4c-e96c-4f2f-ad74-a7136ecea0a9.png">

In [None]:
np.random.seed(219)
np.set_printoptions(precision=4)
# v = np.random.normal(size=(3, 2, 4, 5))
v = np.arange(3 * 2 * 4 * 5)
v = v.reshape(3, 2, 4, 5)
print(v.shape)

In [None]:
torch.manual_seed(219)
# w = torch.randn(size=(3, 2, 4, 5))
w = torch.arange(3 * 2 * 4 * 5)
w = w.reshape(3, 2, 4, 5)
print(w.shape)

### Slice of tensor

<img width="300" alt="image" src="https://user-images.githubusercontent.com/11681225/183872452-e9648e4c-e96c-4f2f-ad74-a7136ecea0a9.png">

In [None]:
# 녹색블럭만 분리하기
v1 = v[2]
w1 = w[2]
print(f'v1 shape: {v1.shape}, w1 shape: {w1.shape}')

In [None]:
# 맨 오른쪽 블럭들만 분리하기
v2 = v[:, :, -1, :]
w2 = w[:, :, -1, :]
print(f'v2 shape: {v2.shape}, w2 shape: {w2.shape}')

In [None]:
# 파란색 블럭의 윗면만 분리하기
v3 = v[1, 0, :, :]
w3 = w[1, 0, :, :]
print(f'v3 shape: {v3.shape}, w3 shape: {w3.shape}')

# Basic Operations of Matrices
## Basic Operations of Matrices I
### Addition of two vectors

* Two vectors must have the same shape ( $\boldsymbol{u}, \boldsymbol{v} \in \mathbb{R}^{n}$ )
$$\boldsymbol{w} = \boldsymbol{u} + \boldsymbol{v}\\
w_{i} = u_{i} + v_{i}$$

In [None]:
def draw_vector_addition(a=np.array([2, 1]), b=np.array([1, 4])):
  a = np.array(a)
  b = np.array(b)
  assert a.shape == (2,) and b.shape == (2,)
  c = a + b
  # draw figure code ref.:http://datascienceschool.net/02%20mathematics/03.01%20선형대수와%20해석기하의%20기초.html
  # modified by Il Gu Yi
  fig = plt.figure()
  gray = {'facecolor': 'gray'}
  black = {'facecolor': 'black'}
  ax = fig.add_subplot(111)
  ax.set_aspect('equal', adjustable='box')
  plt.annotate('', xy=a, xytext=(0, 0), arrowprops=gray)
  plt.annotate('', xy=b, xytext=(0, 0), arrowprops=gray)
  plt.annotate('', xy=c, xytext=(0, 0), arrowprops=black)
  plt.plot(0, 0, 'kP', ms=10)
  plt.plot(a[0], a[1], 'ro', ms=10)
  plt.plot(b[0], b[1], 'ro', ms=10)
  plt.plot(c[0], c[1], 'ro', ms=10)
  plt.plot([a[0], c[0]], [a[1], c[1]], 'k--')
  plt.plot([b[0], c[0]], [b[1], c[1]], 'k--')

  right = np.max([a[0], b[0], c[0], 0])
  left = np.min([a[0], b[0], c[0], 0])
  up = np.max([a[1], b[1], c[1], 0])
  down = np.min([a[1], b[1], c[1], 0])
  # left = down = np.min([left, down])
  # right = up = np.max([right, up])
  x_scale = (right - left) / 5.
  y_scale = (up - down) / 5.
  plt.xticks(np.arange(left, right + x_scale, x_scale * 2))
  plt.yticks(np.arange(down, up + y_scale, y_scale * 2))
  plt.xlim(left - x_scale, right + x_scale)
  plt.ylim(down - y_scale, up + y_scale)
  plt.show()

In [None]:
draw_vector_addition([20, 10], [-20, 40])

In [None]:
# Addition of two vectors
u = torch.tensor([1, 2, 3])
v = torch.tensor([4, 5, 6])
w = u + v
print(f'{u.numpy()} + {v.numpy()} = {w.numpy()}')

In [None]:
# Addition of two vectors with different size
# Error!!
u = torch.tensor([1, 2, 3])
v = torch.tensor([4, 5])
w = u + v

### Scalar multiplication

* $\boldsymbol{u} \in \mathbb{R}^{n}$, $c \in \mathbb{R}$
$$\boldsymbol{v} = c \cdot \boldsymbol{u}\\
v_{i} = c \cdot u_{i}$$

In [None]:
# Scalar multiplication
def draw_scalar_multiplication(a=np.array([2, 1]), c=2):
  a = np.array(a)
  assert a.shape == (2,)
  b = c * a
  minus_a = -a

  fig = plt.figure()
  gray = {'facecolor': 'gray'}
  red = {'facecolor': 'red'}
  blue = {'facecolor': 'blue'}
  ax = fig.add_subplot(111)
  ax.set_aspect('equal', adjustable='box')
  plt.annotate('', xy=b, xytext=(0, 0), arrowprops=red)
  plt.annotate('', xy=a, xytext=(0, 0), arrowprops=gray)
  plt.annotate('', xy=minus_a, xytext=(0, 0), arrowprops=blue)
  plt.plot(0, 0, 'ro', ms=15)

  right = np.max([a[0], b[0], minus_a[0], 0])
  left = np.min([a[0], b[0], minus_a[0], 0])
  up = np.max([a[1], b[1], minus_a[1], 0])
  down = np.min([a[1], b[1], minus_a[1], 0])
  left = down = np.min([left, down])
  right = up = np.max([right, up])
  x_scale = (right - left) / 10.
  y_scale = (up - down) / 10.
  plt.xticks(np.arange(left, right + x_scale, x_scale * 2))
  plt.yticks(np.arange(down, up + y_scale, y_scale * 2))
  plt.xlim(left - x_scale, right + x_scale)
  plt.ylim(down - y_scale, up + y_scale)
  plt.show()

In [None]:
draw_scalar_multiplication([2, 1], 2)

In [None]:
# Scalar multiplication
a = 2
u = torch.tensor([1, 2, 3])
v = a * u
print(f'{a} x {u.numpy()} = {v.numpy()}')

### Addition of two matrices

* Two matrices must have the same shape ( $\boldsymbol{A}, \boldsymbol{B} \in \mathbb{R}^{m \times n}$ )
$$\boldsymbol{C} = \boldsymbol{A} + \boldsymbol{B}\\
c_{ij} = a_{ij} + b_{ij}$$

In [None]:
# Addition of two matrices
row = 2
col = 2
u = torch.arange(row * col).reshape(row, col)
v = (torch.arange(row * col) + row * col).reshape(row, col)
w = u + v
print(f'u:\n{u.numpy()}')
print(f'v:\n{v.numpy()}')
print(f'w:\n{w.numpy()}')

In [None]:
# Addition of two matrices with different size
# Error!!
row = 2
col = 2
col1 = 3
u = torch.arange(row * col).reshape(row, col)  # shape of u: (2, 2)
v = (torch.arange(row * col1) + row * col).reshape(row, col1)  # shape of v: (2, 3)
w = u + v

In [None]:
# Addition of two matrices with different size
# Error!!
row = 2
col = 2
row1 = 3
u = torch.arange(row * col).reshape(row, col)  # shape of u: (2, 2)
v = (torch.arange(row1 * col) + row * col).reshape(row1, col)  # shape of v: (3, 2)
w = u + v

### Broadcasting I

$$
\left[ \begin{array}{cc}
0 & 1 \\
2 & 3 \\
\end{array} \right]
+ \left[ \begin{array}{cc}
4 & 5
\end{array} \right]
= \left[ \begin{array}{cc}
4 & 6 \\
6 & 8 \\
\end{array} \right]
\implies
\left[ \begin{array}{cc}
0 & 1 \\
2 & 3 \\
\end{array} \right]
+ \left[ \begin{array}{cc}
4 & 5 \\
4 & 5 \\
\end{array} \right]
= \left[ \begin{array}{cc}
4 & 6 \\
6 & 8 \\
\end{array} \right]
$$

In [None]:
# Addition of two matrices with different size
# Error??
row = 2
col = 2
row1 = 1
u = torch.arange(row * col).reshape(row, col)  # shape of u: (2, 2)
v = (torch.arange(row1 * col) + row * col).reshape(row1, col)  # shape of v: (1, 2)
w = u + v
print(f'u:\n{u.numpy()}')
print(f'v:\n{v.numpy()}')
print(f'w:\n{w.numpy()}')

In [None]:
print(v.shape)  # matrix

In [None]:
# Addition of a matrix and a vector
# Error??
row = 2
col = 2
row1 = 1
u = torch.arange(row * col).reshape(row, col)  # shape of u: (2, 2)
v = (torch.arange(row1 * col) + row * col).reshape(col)  # shape of v: (2)
w = u + v
print(f'u:\n{u.numpy()}')
print(f'v:\n{v.numpy()}')
print(f'w:\n{w.numpy()}')

In [None]:
print(v.shape)  # vector

### Broadcasting II

$$
\left[ \begin{array}{c}
\left[ \begin{array}{ccc}
0 & 1 & 2 \\
3 & 4 & 5 \\
\end{array} \right]\\
\left[ \begin{array}{ccc}
6 & 7 & 8 \\
9 & 10 & 11 \\
\end{array} \right]
\end{array} \right]
+ 
\left[ \begin{array}{ccc}
12 & 13 & 14 \\
15 & 16 & 17 \\
\end{array} \right]
\implies
\left[ \begin{array}{c}
\left[ \begin{array}{ccc}
0 & 1 & 2 \\
3 & 4 & 5 \\
\end{array} \right]\\
\left[ \begin{array}{ccc}
6 & 7 & 8 \\
9 & 10 & 11 \\
\end{array} \right]
\end{array} \right]
+ 
\left[ \begin{array}{c}
\left[ \begin{array}{ccc}
12 & 13 & 14 \\
15 & 16 & 17 \\
\end{array} \right]\\
\left[ \begin{array}{ccc}
12 & 13 & 14 \\
15 & 16 & 17 \\
\end{array} \right]
\end{array} \right]
=
\left[ \begin{array}{c}
\left[ \begin{array}{ccc}
12 & 14 & 16 \\
18 & 20 & 22 \\
\end{array} \right]\\
\left[ \begin{array}{ccc}
18 & 20 & 22 \\
24 & 26 & 28 \\
\end{array} \right]
\end{array} \right]
$$

In [None]:
# Addition of a 3-rank tensor and a 2-rank tensor
# Error??
a0 = 2  # axis-0
a1 = 2  # axis-1
a2 = 3  # axis-2
b = 1
u = torch.arange(a0 * a1 * a2).reshape(a0, a1, a2)  # shape of u: (2, 2, 3)
v = (torch.arange(a1 * a2) + a0 * a1 * a2).reshape(a1, a2)  # shape of v: (2, 3)
# v = (torch.arange(b * a1 * a2) + a0 * a1 * a2).reshape(b, a1, a2)  # shape of v: (1, 2, 3)
# v = (torch.arange(a0 * a1) + a0 * a1 * a2).reshape(a0, a1)  # shape of v: (2, 2)  ## Error!!
w = u + v
print(f'u shape: {u.shape}\n{u.numpy()}\n')
print(f'v shape: {v.shape}\n{v.numpy()}\n')
print(f'w shape: {w.shape}\n{w.numpy()}\n')

### Broadcasting III

$$
\left[ \begin{array}{c}
\left[ \begin{array}{ccc}
0 & 1 & 2 \\
3 & 4 & 5 \\
\end{array} \right]\\
\left[ \begin{array}{ccc}
6 & 7 & 8 \\
9 & 10 & 11 \\
\end{array} \right]
\end{array} \right]
+ 
\left[ \begin{array}{c}
\left[ \begin{array}{ccc}
12 & 13 & 14 \\
\end{array} \right]\\
\left[ \begin{array}{ccc}
15 & 16 & 17 \\
\end{array} \right]
\end{array} \right]
\implies
\left[ \begin{array}{c}
\left[ \begin{array}{ccc}
0 & 1 & 2 \\
3 & 4 & 5 \\
\end{array} \right]\\
\left[ \begin{array}{ccc}
6 & 7 & 8 \\
9 & 10 & 11 \\
\end{array} \right]
\end{array} \right]
+ 
\left[ \begin{array}{c}
\left[ \begin{array}{ccc}
12 & 13 & 14 \\
12 & 13 & 14 \\
\end{array} \right]\\
\left[ \begin{array}{ccc}
15 & 16 & 17 \\
15 & 16 & 17 \\
\end{array} \right]
\end{array} \right]
=
\left[ \begin{array}{c}
\left[ \begin{array}{ccc}
12 & 14 & 16 \\
15 & 17 & 19 \\
\end{array} \right]\\
\left[ \begin{array}{ccc}
21 & 23 & 25 \\
24 & 26 & 28 \\
\end{array} \right]
\end{array} \right]
$$

In [None]:
# Addition of two 3-rank tensors with different size
# Error??
a0 = 2  # axis-0
a1 = 2  # axis-1
a2 = 3  # axis-2
b = 1
u = torch.arange(a0 * a1 * a2).reshape(a0, a1, a2)  # shape of u: (2, 2, 3)
v = (torch.arange(a1 * a2) + a0 * a1 * a2).reshape(a1, a2)  # shape of v: (2, 3)
# v = (torch.arange(a0 * b * a2) + a0 * a1 * a2).reshape(a0, b, a2)  # shape of v: (2, 1, 3)
w = u + v
print(f'u shape: {u.size()}\n{u.numpy()}\n')
print(f'v shape: {v.shape}\n{v.numpy()}\n')
print(f'w shape: {w.shape}\n{w.numpy()}\n')

## Basic Operations of Matrices II
### Hadamard product

* Two matrices must have the same shape ( $\boldsymbol{A}, \boldsymbol{B} \in \mathbb{R}^{m \times n}$ )
* Element-wise product
$$\boldsymbol{C} = \boldsymbol{A} \odot \boldsymbol{B}\\
c_{ij} = a_{ij} \cdot b_{ij}$$

In [None]:
# Hardmard product of two matrices
a0 = 2  # axis-0
a1 = 3  # axis-1
u = torch.arange(a0 * a1).reshape(a0, a1)  # shape of u: (2, 3)
v = (torch.arange(a0 * a1) + a0 * a1).reshape(a0, a1)  # shape of v: (2, 3)
w = u * v
# w = torch.mul(u, v)
print(f'u shape: {u.size()}\n{u.numpy()}\n')
print(f'v shape: {v.shape}\n{v.numpy()}\n')
print(f'w shape: {w.shape}\n{w.numpy()}\n')

In [None]:
# Hardmard product of two 3-rank tensors
a0 = 2  # axis-0
a1 = 2  # axis-1
a2 = 3  # axis-1
u = torch.arange(a0 * a1 * a2).reshape(a0, a1, a2)  # shape of u: (2, 2, 3)
v = (torch.arange(a0 * a1 * a2) + a0 * a1 * a2).reshape(a0, a1, a2)  # shape of v: (2, 2, 3)
w = u * v
# w = torch.mul(u, v)
print(f'u shape: {u.size()}\n{u.numpy()}\n')
print(f'v shape: {v.shape}\n{v.numpy()}\n')
print(f'w shape: {w.shape}\n{w.numpy()}\n')

## Basic Operations of Matrices III
### Multiplication of two matrices


* $\boldsymbol{A} \in \mathbb{R}^{m \times n}$, $\boldsymbol{B} \in \mathbb{R}^{n \times p}$

$$
\boldsymbol{A} \boldsymbol{B} := 
\left[ \begin{array}{cccc}
a_{11} & a_{12} & \cdots & a_{1n} \\
\vdots & \vdots & \cdots & \vdots \\
a_{i1} & a_{i2} & \cdots & a_{in} \\
\vdots & \vdots & \cdots & \vdots \\
a_{m1} & a_{m2} & \cdots & a_{mn} \\
\end{array} \right]
\left[ \begin{array}{ccccc}
b_{11} &\cdots & b_{1j} & \cdots & b_{1p} \\
b_{21} &\cdots & b_{2j} & \cdots & b_{2p} \\
\vdots & & \vdots & & \vdots \\
b_{n1} & \cdots & b_{nj} & \cdots & b_{np} \\
\end{array} \right]
=
\left[ \begin{array}{cccc}
c_{11} & c_{12} & \cdots & c_{1p} \\
\vdots & \vdots & c_{ij} & \vdots \\
c_{m1} & c_{m2} & \cdots & c_{mp} \\
\end{array} \right]
\in \mathbb{R}^{m \times p}
$$

$$\boldsymbol{C} = \boldsymbol{A} \boldsymbol{B}$$
$$c_{ij} = \sum_{k=1}^{n} a_{ik}b_{kj}$$

In [None]:
# Multiplication of two matrices
m, n, p = 3, 2, 4
A = torch.arange(m * n).reshape(m, n)  # shape of A: (3, 2)
B = (torch.arange(n * p) + m * n).reshape(n, p)  # shape of B: (2, 4)
C = torch.matmul(A, B)  # shape of C: (3, 4)
D = A @ B
# C = A.matmul(B)  # shape of C: (3, 4)
print(f'A shape: {A.size()}\n{A.numpy()}\n')
print(f'B shape: {B.shape}\n{B.numpy()}\n')
print(f'C shape: {C.shape}\n{C.numpy()}\n')
print(f'D shape: {D.shape}\n{D.numpy()}\n')

### Properties of matrix multiplication

* Distributivity: $\boldsymbol{A} (\boldsymbol{B} + \boldsymbol{C}) = \boldsymbol{A} \boldsymbol{B} + \boldsymbol{A} \boldsymbol{C}$
* Associativity: $\boldsymbol{A} (\boldsymbol{B} \boldsymbol{C}) = ( \boldsymbol{A} \boldsymbol{B} ) \boldsymbol{C}$
* Non-commutativity: $\boldsymbol{A} \boldsymbol{B} \neq \boldsymbol{B} \boldsymbol{A}$

In [None]:
# Distributivity
m, n, p = 3, 2, 4
A = torch.arange(m * n).reshape(m, n)  # shape of A: (3, 2)
B = (torch.arange(n * p) + m * n).reshape(n, p)  # shape of B: (2, 4)
C = (torch.arange(n * p) + m * n + n * p).reshape(n, p)  # shape of C: (2, 4)

D1 = A @ (B + C)  # shape of D1: (3, 4)
D2 = A @ B + A @ C  # shape of D2: (3, 4)
print(D1)
print(D2)

In [None]:
# Associativity
m, n, p = 3, 2, 4
A = torch.arange(m * n).reshape(m, n)  # shape of A: (3, 2)
B = (torch.arange(n * n) + m * n).reshape(n, n)  # shape of B: (2, 2)
C = (torch.arange(n * p) + m * n + n * n).reshape(n, p)  # shape of C: (2, 4)

D1 = A @ (B @ C)  # shape of D1: (3, 4)
D2 = (A @ B) @ C  # shape of D2: (3, 4)
print(D1)
print(D2)

In [None]:
# Non-commutativity
# Error!!
m, n, p = 3, 2, 4
A = torch.arange(m * n).reshape(m, n)  # shape of A: (3, 2)
B = (torch.arange(n * p) + m * n).reshape(n, p)  # shape of B: (2, 4)
D1 = torch.matmul(A, B)
D2 = torch.matmul(B, A)

### Inner product between two vectors

$$
\begin{array}{rl}
\boldsymbol{x}^{\top} \boldsymbol{y}
= \boldsymbol{y}^{\top} \boldsymbol{x} 
&= \sum_{k=1}^{d} x_{k} y_{k}\\
&= x_{1} y_{1} + x_{2} y_{2} + \cdots + x_{d} y_{d} \in \mathbb{R}
\end{array}
$$

In [None]:
d = 3
x = torch.arange(d)  # shape of x: (3,)
y = torch.arange(d) + d  # shape of y: (3,)
z1 = torch.dot(x, y)
z2 = (x * y).sum()
print(f'x: {x.numpy()}')
print(f'y: {y.numpy()}')
print(f'z1: {z1.numpy()} / z2: {z2.numpy()}')

### Outer product between two vectors

* $\boldsymbol{x} \in \mathbb{R}^{m}$, $\boldsymbol{y} \in \mathbb{R}^{n}$

$$
\begin{array}{rl}
\boldsymbol{x} \boldsymbol{y}^{\top} \in \mathbb{R}^{m \times n}
= \left[ \begin{array}{c}
x_{1} \\
x_{2} \\
\vdots \\
x_{m} \\
\end{array} \right]
\left[ \begin{array}{cccc}
y_{1} & y_{2} & \cdots & y_{n} \\
\end{array} \right]
= \left[ \begin{array}{cccc}
x_{1} y_{1} & x_{1} y_{2} & \cdots & x_{1} y_{n} \\
x_{2} y_{1} & x_{2} y_{2} & \cdots & x_{2} y_{n} \\
\vdots & \vdots & \ddots & \vdots \\
x_{m} y_{1} & x_{m} y_{2} & \cdots & x_{m} y_{n} \\
\end{array} \right]
\end{array}
$$

In [None]:
m = 3
n = 2
x = torch.arange(m)  # shape of x: (3,)
y = torch.arange(n) + n  # shape of y: (2,)
z = torch.outer(x, y)  # shape of z: (3, 2)
print(f'x: {x.numpy()}')
print(f'y: {y.numpy()}')
print(f'z shape: {z.shape}\n{z.numpy()}')

In [None]:
# `torch.squeeze()` and `torch.unsqueeze()`
X = x.unsqueeze(dim=1)  # shpae of X: (3, 1)
Y = y.unsqueeze(dim=0)  # shpae of Y: (1, 2)
Z = torch.matmul(X, Y)  # shpae of Z: (3, 2)
print(f'X shape: {X.shape}\n{X.numpy()}')
print(f'Y shape: {Y.shape}\n{Y.numpy()}')
print(f'Z shape: {Z.shape}\n{Z.numpy()}')

In [None]:
# `numpy.expand_dims`
x = np.random.normal(size=(3,))
y = np.random.normal(size=(2,))
z = np.outer(x, y)
X = np.expand_dims(x, axis=1)
Y = np.expand_dims(y, axis=0)
Z = X @ Y
print(f'{z}\n\n{Z}')

### Matrix-Vector product

* Given a matrix $\boldsymbol{A} \in \mathbb{R}^{m \times n}$ and a vectors $\boldsymbol{x} \in \mathbb{R}^{n}$
* Matrix-vector product: $\boldsymbol{y} = \boldsymbol{A} \boldsymbol{x} \in \mathbb{R}^{m}$

In [None]:
m = 3
n = 2
A = torch.arange(m * n).reshape(m, n)  # shape of A: (3, 2)
x = torch.arange(n) + m * n  # shape of x: (2,)
#y = torch.matmul(A, x)
y = A @ x
print(f'A shape: {A.shape}\n{A.numpy()}')
print(f'x shape: {x.shape}\n{x.numpy()}')
print(f'y shape: {y.shape}\n{y.numpy()}')

### `torch.matmul()`

$$c_{ij} = \sum_{k=1}^{n} a_{ik}b_{kj}$$

In [None]:
# `torch.matmul()` of two 3-rank tensors
torch.manual_seed(219)
m, n, p, q = 2, 3, 4, 5
x = torch.randn(m, n, p)
y = torch.randn(m, p, q)
z = torch.matmul(x, y)
print(f'{z.shape}')

In [None]:
d = 1
i, j = 0, 3
c = 0  # for summation
for k in range(p):
  c += x[d, i, k] * y[d, k, j]
  
print(f'({i}, {j}) element of {d}th data: {c.numpy():.4f}')
print(f'z[{d}, {i}, {j}]: {z[d, i, j]:.4f}')

In [None]:
# `torch.matmul()` of two 4-rank tensors
torch.manual_seed(219)
m, n, p, q, r = 3, 2, 4, 5, 6
x = torch.randn(m, n, p, q)
y = torch.randn(m, n, q, r)
z = torch.matmul(x, y)
print(f'{z.shape}')

## Linear Combination and Linear Transformation

### Linear Combination

* Consider a vector space $V$ and a finite number of vectors $\boldsymbol{x}_{1}, \cdots, \boldsymbol{x}_{k} \in V$
* Every $\boldsymbol{x} \in V$ is a linear combination of the vectors $\boldsymbol{x}_{1}, \cdots, \boldsymbol{x}_{k}$
$$\boldsymbol{x} = c_{1} \boldsymbol{x}_{1}
+ \cdots +
c_{k} \boldsymbol{x}_{k}
= \sum_{i=1}^{k} c_{i} \boldsymbol{x}_{i} \in V$$
with $c_{1}, \cdots, c_{k} \in \mathbb{R}$

In [None]:
def draw_linear_combination(a=np.array([1, 3]), b=np.array([3, 1]), w1=1, w2=1):
  a = np.array(a)
  b = np.array(b)
  c = w1 * a + w2 * b
  # draw figure code ref.:http://datascienceschool.net/02%20mathematics/03.01%20선형대수와%20해석기하의%20기초.html
  # modified by Il Gu Yi
  fig = plt.figure()
  gray = {'facecolor': 'gray'}
  black = {'facecolor': 'black'}
  ax = fig.add_subplot(111)
  ax.set_aspect('equal', adjustable='box')
  plt.annotate('', xy=a, xytext=(0, 0), arrowprops=gray)
  plt.annotate('', xy=b, xytext=(0, 0), arrowprops=gray)
  plt.annotate('', xy=c, xytext=(0, 0), arrowprops=black)
  plt.plot(0, 0, 'kP', ms=10)
  plt.plot(a[0], a[1], 'ro', ms=10)
  plt.plot(b[0], b[1], 'ro', ms=10)
  plt.plot(c[0], c[1], 'ro', ms=10)
  plt.plot([a[0], c[0]], [a[1], c[1]], 'k--')
  plt.plot([b[0], c[0]], [b[1], c[1]], 'k--')

  right = np.max([a[0], b[0], c[0], 0])
  left = np.min([a[0], b[0], c[0], 0])
  up = np.max([a[1], b[1], c[1], 0])
  down = np.min([a[1], b[1], c[1], 0])
  x_scale = (right - left) / 5.
  y_scale = (up - down) / 5.
  plt.xticks(np.arange(left, right + x_scale, x_scale * 2))
  plt.yticks(np.arange(down, up + y_scale, y_scale * 2))
  plt.xlim(left - x_scale, right + x_scale)
  plt.ylim(down - y_scale, up + y_scale)
  plt.show()

In [None]:
draw_linear_combination(w1=1, w2=1)

### Linear Transformation in Machine Learning

<img width="200" alt="Colored_neural_network" src="https://user-images.githubusercontent.com/11681225/183967601-ab747c72-8710-4ad5-977e-4fd1ba2f95c8.png">

* input vector: $\boldsymbol{x} \in \mathbb{R}^{3}$
* hidden vector: $\boldsymbol{h} \in \mathbb{R}^{4}$
* output vector: $\boldsymbol{y} \in \mathbb{R}^{3}$
* input-hidden weight: $\boldsymbol{w}_{1} \in \mathbb{R}^{4 \times 3}$
* hidden-output weight: $\boldsymbol{w}_{2} \in \mathbb{R}^{2 \times 4}$
* neural net: $\boldsymbol{y} = \boldsymbol{w}_{2} (\boldsymbol{w}_{1} \boldsymbol{x}) = (\boldsymbol{w}_{2} \boldsymbol{w}_{1}) \boldsymbol{x} = \boldsymbol{w}' \boldsymbol{x}$

In [None]:
torch.manual_seed(219)
x = torch.randn(3)
w1 = torch.randn(4, 3)
w2 = torch.randn(2, 4)

h = torch.matmul(w1, x)
y = torch.matmul(w2, h)
print(f'h shape: {h.shape}')
print(f'h: {h}')
print(f'y shape: {y.shape}')
print(f'y: {y}')

In [None]:
w3 = torch.matmul(w2, w1)
y = torch.matmul(w3, x)
print(f'y shape: {y.shape}')
print(f'y: {y}')

* neural net added non-linearity: $\boldsymbol{y} = \sigma (\boldsymbol{w}_{2} \, \text{ReLU} (\boldsymbol{w}_{1} \boldsymbol{x}) )$

In [None]:
h = torch.relu(torch.matmul(w1, x))
y = torch.sigmoid(torch.matmul(w2, h))
print(f'h: {h}')
print(f'y: {y}')

### feed-forward neural net in practice

In [None]:
torch.manual_seed(219)
x_ = torch.randn(1, 3)
fc1 = nn.Linear(3, 4)
fc2 = nn.Linear(4, 2)

h = torch.relu(fc1(x_))
y = torch.sigmoid(fc2(h))
print(f'h: {h}')
print(f'y: {y}')

In [None]:
torch.manual_seed(219)
x_ = torch.randn(1, 3)
fc1 = nn.Linear(3, 4, bias=False)
fc2 = nn.Linear(4, 2, bias=False)
fc1.weight.data = w1
fc2.weight.data = w2

h = torch.relu(fc1(x_))
y = torch.sigmoid(fc2(h))
print(f'h: {h}')
print(f'y: {y}')

In [None]:
h = torch.relu(torch.matmul(w1, x))
y = torch.sigmoid(torch.matmul(w2, h))
print(f'h: {h}')
print(f'y: {y}')

### When batch size is more than 1

$\boldsymbol{y} = \boldsymbol{w}_{2} (\boldsymbol{w}_{1} \boldsymbol{x}) = (\boldsymbol{w}_{2} \boldsymbol{w}_{1}) \boldsymbol{x} = \boldsymbol{w}' \boldsymbol{x}$

* batch size 1, shape: $(2) = (2 \times 4) \times ( 4 \times 3 ) \times (3)$
* batch size 5, shape: $ (5, 2) \neq (2 \times 4) \times ( 4 \times 3 ) \times (5 \times 3)$

In [None]:
# Error
torch.manual_seed(219)
x = torch.randn(5, 3)
w1 = torch.randn(4, 3)
w2 = torch.randn(2, 4)

h = torch.matmul(w1, x)
y = torch.matmul(w2, h)
print(f'h shape: {h.shape}')
print(f'h: {h}')
print(f'y shape: {y.shape}')
print(f'y: {y}')

$\boldsymbol{y} = (\boldsymbol{x} \boldsymbol{w}_{1}) \boldsymbol{w}_{2}$

* batch size 1, shape: $(2) = (3) \times (3 \times 4) \times ( 4 \times 2 )$
* batch size 5, shape: $(5, 2) = (5 \times 3) \times ( 3 \times 4 ) \times (4 \times 2)$

In [None]:
torch.manual_seed(219)
x = torch.randn(5, 3)
w1 = torch.randn(3, 4)
w2 = torch.randn(4, 2)

h = torch.relu(torch.matmul(x, w1))
y = torch.matmul(h, w2)
print(f'h shape: {h.shape}')
print(f'h: {h}')
print(f'y shape: {y.shape}')
print(f'y: {y}')

In [None]:
torch.manual_seed(219)
x = torch.randn(5, 3)
fc1 = nn.Linear(3, 4, bias=False)
fc2 = nn.Linear(4, 2, bias=False)
fc1.weight.data = w1.T
fc2.weight.data = w2.T

h = torch.relu(fc1(x))
y = fc2(h)
print(f'h: {h}')
print(f'y: {y}')

### Rank and nullity

* Dimension ( $\dim V$ ): the number of vectors of basis for vector space $V$
* Rank ( $\mathrm{rank} \, \boldsymbol{A}$ ): dimension of the column space of $\boldsymbol{A}$
* Nullity ( $\mathrm{nullity} \, \boldsymbol{A}$ ): dimension of the null space of $\boldsymbol{A}$
* Rank-nullity theorem: $\boldsymbol{A} \in \mathbb{R}^{m \times n}$
  * $\mathrm{rank} \, \boldsymbol{A} + \mathrm{nullity} \, \boldsymbol{A} = n$

In [None]:
A = np.array([ [1, 1, 0, 2],
               [-2, -2, 1, -5],
               [1, 1, -1, 3],
               [4, 4, -1, 9] ], dtype=np.float32)

In [None]:
A = np.random.normal(size=(15, 15))

In [None]:
rank = np.linalg.matrix_rank(A)
print(f'the rank of a matrix A: {rank}')

## Basic Operations of Matrices IV
### Transpose

properties of matrix transpose
* $(\boldsymbol{A}^{\top})^{\top} = \boldsymbol{A}$
* $(\boldsymbol{A} \boldsymbol{B})^{\top} = \boldsymbol{B}^{\top} \boldsymbol{A}^{\top}$
* For any scalar $c$, $(c \boldsymbol{A})^{\top} = c \boldsymbol{A}^{\top}$
* $(\boldsymbol{A} + \boldsymbol{B})^{\top} = \boldsymbol{A}^{\top} + \boldsymbol{B}^{\top}$

In [None]:
A = torch.randn(3, 2)
B = torch.randn(2, 4)
A_t = A.T
print(f'shape of transpose of A: {A_t.shape}')

In [None]:
# property 1
torch.allclose(A, A.T.T)

In [None]:
# property 2
torch.allclose(torch.matmul(A, B).T, torch.matmul(B.T, A.T))

In [None]:
# property 3
c = 2.1
torch.allclose((c * A).T, c * A.T)

In [None]:
# property 4
A = torch.randn(3, 2)
B = torch.randn(3, 2)
torch.allclose((A + B).T, A.T + B.T)

## Basic Operations of Matrices IV
### Identity matrix

* Notation: $\boldsymbol{I}_{n} \in \mathbb{R}^{n \times n}$
$$I_{ij} = \left\{ \begin{array}{cl}
1 & i = j \\
0 & 1 \neq j
\end{array} \right.$$
* Property: $\forall \boldsymbol{A} \in \mathbb{R}^{m \times n}$,
$$\boldsymbol{A} \boldsymbol{I}_{n} = \boldsymbol{A} = \boldsymbol{I}_{m} \boldsymbol{A}$$
* Element notation
$$(\boldsymbol{I}_{n})_{ij} = \delta_{ij}$$

In [None]:
I = torch.eye(3)
print(I)

### Diagonal matrix

* Notation: $\boldsymbol{D} = \mathrm{diag}(d_{1}, d_{2}, \ldots, d_{n})$
$$D_{ij} = \left\{ \begin{array}{cl}
d_{i} & i = j \\
0 & 1 \neq j
\end{array} \right.$$
* Identity matrix: $\boldsymbol{I}_{n} = \mathrm{diag}(1, 1, \ldots, 1)$

In [None]:
d = torch.tensor([2, 4, 5])
D = torch.diag(d)
print(D)

## Basic Operations of Matrices V
### Inverse matrix

* Inverse of $2 \times 2$ matrix, if $ad - bc \neq 0$
$$
\boldsymbol{A} =
\left[ \begin{array}{cc}
a & b \\
c & d
\end{array} \right]
\quad \quad
\boldsymbol{A}^{-1} =
\frac{1}{ad - bc}
\left[ \begin{array}{cc}
d & -b \\
-c & a
\end{array} \right]
$$

In [None]:
A = torch.randn(2, 2)
print(A)

In [None]:
# inverse matrix from formula
ad_bc = A[0, 0] * A[1, 1] - A[0, 1] * A[1, 0]
A_inv = torch.empty(2, 2)
A_inv[0, 0] = A[1, 1]
A_inv[0, 1] = -A[0, 1]
A_inv[1, 0] = -A[1, 0]
A_inv[1, 1] = A[0, 0]
A_inv = A_inv / ad_bc
print(A_inv)

In [None]:
A_inv2 = torch.inverse(A)
print(A_inv2)

In [None]:
torch.matmul(A, A_inv)

In [None]:
torch.matmul(A_inv, A)

### Properties of inverse matrix

* $(\boldsymbol{A}^{-1})^{-1} = \boldsymbol{A}$
* $(\boldsymbol{A} \boldsymbol{B})^{-1} = \boldsymbol{B}^{-1} \boldsymbol{A}^{-1}$
* $(\boldsymbol{A}^{-1})^{\top} = (\boldsymbol{A}^{\top})^{-1}$
* $\det \boldsymbol{A}^{-1} = (\det \boldsymbol{A})^{-1}$

In [None]:
# property 1
A = torch.randn(3, 3)
A_inv_inv = A.inverse().inverse()
print(A_inv_inv == A)
print(A_inv_inv)
print(A)

In [None]:
torch.allclose(A_inv_inv, A)

In [None]:
# property 2
A = torch.randn(3, 3)
B = torch.randn(3, 3)
C = torch.inverse(torch.matmul(A, B))
D = torch.matmul(B.inverse(), A.inverse())
print(torch.allclose(C, D))

In [None]:
# property 3
A = torch.randn(3, 3)
C = torch.inverse(A).T
D = torch.inverse(A.T)
print(torch.allclose(C, D))

In [None]:
# property 4
a = torch.det(A.inverse())
b = 1 / torch.det(A)
print(torch.allclose(a, b))

### The determinant

* The determinant of a square matrix $\boldsymbol{A} \in \mathbb{E}^{n \times n}$, is a function that maps matrices to real scalars
$$\det (\boldsymbol{A}) := \mathbb{R}^{n \times n} \rightarrow \mathbb{R}$$

Definition
$$\det (\boldsymbol{A}) := \sum_{\sigma \in S_{n}}
\left( \mathrm{sgn}(\sigma) \prod_{i=1}^{n} a_{i, \sigma_{i}} \right)$$

Properties of the determinant
* $|\boldsymbol{I}_{n}| = 1$
* $| a_{1}, \ldots, c a_{i}, \ldots, a_{n} | = c| a_{1}, \ldots, a_{i}, \ldots, a_{n} |$
* $| c \boldsymbol{A} | = c^{n} | \boldsymbol{A}|$
* $| a_{1}, \ldots, a_{j}, \ldots, a_{i}, \ldots, a_{n} |
= -| a_{1}, \ldots, a_{i}, \ldots, a_{j}, \ldots, a_{n} |$
* $|\boldsymbol{A}^{\top}| = |\boldsymbol{A}|$
* $|\boldsymbol{A} \boldsymbol{B}| = |\boldsymbol{A}| |\boldsymbol{B}|$
* $|\boldsymbol{A}^{-1}| = 1/|\boldsymbol{A}|$

In [None]:
# the deteminant of identity matrix
n = 5
I = torch.eye(n)
print(f'the deteminant of identity matrix: {torch.det(I)}')

In [None]:
# the deteminant of multiply a single column by a scalar c
torch.manual_seed(219)
n = 2
specific_column = 2  # must be less than n
c = 3

A = torch.randn(n, n)
det_A = torch.det(A)
B = deepcopy(A)
B[:, specific_column] = B[:, specific_column] * c
# B = A * c
det_AA = torch.det(B)

print(f'det A: {det_A:.4f}')
print(f'det (A after multipy c in a one column): {det_AA:.4f}')
print(f'c * det A: {c * det_A:.4f}')
# print(f'c^{n} * det A: {np.power(c, n) * det_A:.4f}')

In [None]:
# the deteminant of interchanging any pair of columns
torch.manual_seed(219)
n = 5
i, j = 1, 4

A = torch.randn(n, n)
det_A = torch.det(A)
B = deepcopy(A)
B[:, i] = A[:, j]
B[:, j] = A[:, i]
det_AA = torch.det(B)

print(f'det A: {det_A:.4f}')
print(f'det (A after interchanging any pari of columns): {det_AA:.4f}')

In [None]:
# the determinant of transpose matrix
torch.manual_seed(219)
n = 5
A = torch.randn(n, n)
det_A = torch.det(A)
det_A_T = torch.det(A.T)
print(f'det A: {det_A:.4f}')
print(f'det A.T: {det_A_T:.4f}')

In [None]:
# the determinant of multiplication of two matrices
torch.manual_seed(219)
n = 5
A = torch.randn(n, n)
B = torch.randn(n, n)
det_A = torch.det(A)
det_B = torch.det(B)
det_AB = torch.det(torch.matmul(A, B))
print(f'det AB: {det_AB:.4f}')
print(f'det A x det B: {det_A * det_B:.4f}')

In [None]:
# the determinant of a inverse matrix
torch.manual_seed(219)
n = 5
A = torch.randn(n, n)
det_A = torch.det(A)
det_A_inv = torch.det(torch.inverse(A))
print(f'det A: {det_A:.4f}')
print(f'det A_inv: {det_A_inv:.4f}')
print(f'1 / (det A): {1 / det_A:.4f}')

## Solving Systems of Linear Equations

* $\boldsymbol{A} \boldsymbol{x} = \boldsymbol{b} = \boldsymbol{0}$
* if $\boldsymbol{A}^{-1}$ exists
  * solution: $\boldsymbol{x} = \boldsymbol{A}^{-1} \boldsymbol{b}$
  
proof)
$$\boldsymbol{A} \boldsymbol{x} = \boldsymbol{b} \\
\boldsymbol{A}^{-1} \boldsymbol{A} \boldsymbol{x} = \boldsymbol{A}^{-1} \boldsymbol{b} \\
\boldsymbol{I}_{n} \boldsymbol{x} = \boldsymbol{A}^{-1} \boldsymbol{b} \\
\boldsymbol{x} = \boldsymbol{A}^{-1} \boldsymbol{b}$$

example1)
$$
\begin{array}{l}
4 x_{1} + 4 x_{2} = 5 \\
2 x_{1} - 4 x_{2} = 1
\end{array}
\implies
\boldsymbol{A} =
\left[ \begin{array}{cc}
4 & 4 \\
2 & -4
\end{array} \right],
\boldsymbol{b} =
\left[ \begin{array}{c}
5 \\
1
\end{array} \right]
$$
$$
\boldsymbol{x} = \boldsymbol{A}^{-1} \boldsymbol{b} =
\left[ \begin{array}{c}
1 \\
1/4
\end{array} \right]
$$

In [None]:
A = torch.tensor([ [4, 4],
                   [2, -4] ], dtype=torch.float32)
b = torch.tensor([5, 1], dtype=torch.float32)

In [None]:
x = torch.matmul(A.inverse(), b)
print(f'solution x: {x}')

example2)
$$
\begin{array}{l}
2 x_{1} + 4 x_{2} = 1 \\
- x_{1} + 5 x_{2} = 3
\end{array}
\implies
\boldsymbol{A} =
\left[ \begin{array}{cc}
2 & 4 \\
-1 & 5
\end{array} \right],
\boldsymbol{b} =
\left[ \begin{array}{c}
1 \\
3
\end{array} \right]
$$
$$
\boldsymbol{x} = \boldsymbol{A}^{-1} \boldsymbol{b} =
\left[ \begin{array}{c}
-1/2 \\
1/2
\end{array} \right]
$$

In [None]:
A = torch.tensor([ [2, 4],
                   [-1, 5] ], dtype=torch.float32)
b = torch.tensor([1, 3], dtype=torch.float32)

In [None]:
x = torch.matmul(A.inverse(), b)
print(f'solution x: {x}')

example3)
$$
\begin{array}{l}
6 x_{1} + 2 x_{2} = 2 \\
3 x_{1} + x_{2} = 3
\end{array}
\implies
\boldsymbol{A} =
\left[ \begin{array}{cc}
6 & 2 \\
3 & 1
\end{array} \right],
\boldsymbol{b} =
\left[ \begin{array}{c}
2 \\
3
\end{array} \right]
$$
$$
\boldsymbol{x} = \boldsymbol{A}^{-1} \boldsymbol{b} ?
$$

In [None]:
A = torch.tensor([ [6, 2],
                   [3, 1] ], dtype=torch.float32)
b = torch.tensor([2, 3], dtype=torch.float32)

In [None]:
x = torch.matmul(A.inverse(), b)
print(f'solution x: {x}')

example4)
$$
\begin{array}{l}
6 x_{1} + 2 x_{2} = 2 \\
3 x_{1} + x_{2} = 1
\end{array}
\implies
\boldsymbol{A} =
\left[ \begin{array}{cc}
6 & 2 \\
3 & 1
\end{array} \right],
\boldsymbol{b} =
\left[ \begin{array}{c}
2 \\
1
\end{array} \right]
$$
$$
\boldsymbol{x} = \boldsymbol{A}^{-1} \boldsymbol{b} ?
$$

## Basic Operations of Matrices VII
### Trace

The trace of a square matrix $\boldsymbol{A} \in \mathbb{R}^{n \times n}$ is defined as
$$\mathrm{tr}(\boldsymbol{A}) := \sum_{i=1}^{n} a_{ii}
= a_{11} + a_{22} + \cdots + a_{nn}$$

Properties
* $\mathrm{tr} ( \boldsymbol{A} ) = \mathrm{tr} ( \boldsymbol{A}^{\top} )$
* $\mathrm{tr} ( \boldsymbol{A} + \boldsymbol{B} ) = \mathrm{tr} ( \boldsymbol{A} ) + \mathrm{tr} ( \boldsymbol{B} )$
* $\mathrm{tr} ( c \boldsymbol{A} ) = c \, \mathrm{tr} ( \boldsymbol{A} )$, where $c \in \mathbb{R}$
* For $\boldsymbol{A} \in \mathbb{R}^{m \times n}, \ \boldsymbol{B} \in \mathbb{R}^{n \times m}$, 
$$\mathrm{tr} ( \underbrace{\boldsymbol{A} \boldsymbol{B}}_{\in \mathbb{R}^{m \times m}} )
= \mathrm{tr} ( \underbrace{\boldsymbol{B} \boldsymbol{A}}_{\in \mathbb{R}^{n \times n}} )
= \sum_{i=1}^{m} \sum_{j=1}^{n} a_{ij} b_{ji}$$

In [None]:
A = torch.tensor([ [2, 3],
                   [4, 6] ])
tr = torch.trace(A)
print(f'trace of A: {tr}')

In [None]:
# trace of a transpose matrix
torch.manual_seed(219)
n = 4
A = torch.randn(n, n)
tr = torch.trace(A)
tr_T = torch.trace(A.T)
print(f'trace of A: {tr:.4f}')
print(f'trace of A.T: {tr_T:.4f}')

In [None]:
# trace of A + B
torch.manual_seed(219)
n = 4
A = torch.randn(n, n)
B = torch.randn(n, n)
tr_A = torch.trace(A)
tr_B = torch.trace(B)
tr_AB = torch.trace(A + B)
print(f'trace of A + B: {tr_AB:.4f}')
print(f'trace of A + trace of B: {tr_A + tr_B:.4f}')

In [None]:
# trace of scalar multiplication cA
torch.manual_seed(219)
n = 4
c = 3
A = torch.randn(n, n)
tr_A = torch.trace(A)
tr_cA = torch.trace(c * A)
print(f'trace of cA: {tr_cA:.4f}')
print(f'trace of A x scalar c: {tr_A * c:.4f}')

In [None]:
# trace of matrix multiplication AB and BA
torch.manual_seed(219)
n = 4
A = torch.randn(n, n)
B = torch.randn(n, n)
tr_AB = torch.trace(torch.matmul(A, B))
tr_BA = torch.trace(torch.matmul(B, A))
print(f'trace of AB: {tr_AB:.4f}')
print(f'trace of BA: {tr_BA:.4f}')

## Basic Operations of Matrices VIII
### Orthogonal matrix

* A square matrix $\boldsymbol{A} \in \mathbb{R}^{n \times n}$ whose rows (and columns) are mutually orthonormal
$$\boldsymbol{A}^{\top} \boldsymbol{A} = \boldsymbol{A} \boldsymbol{A}^{\top} = \boldsymbol{I}_{n}$$

Properties
* $\boldsymbol{A}^{-1} = \boldsymbol{A}^{\top}$

Examples of orthogonal matrix

<img width="600" alt="git fig4" src="https://user-images.githubusercontent.com/11681225/184408353-e86a7950-26b2-4855-a7d6-921588ec6531.png">

In [None]:
# reflection across x-axis
A = torch.tensor([ [1, 0],
                   [0, -1] ])  #, dtype=torch.float32)
A_T_A = torch.matmul(A.T, A)
print(A_T_A)

In [None]:
# rotation across origin
theta = np.pi / 3  # 60 degrees
A = torch.tensor([ [np.cos(theta), -np.sin(theta)],
                   [np.sin(theta), np.cos(theta)] ], dtype=torch.float32)
A_T_A = torch.matmul(A.T, A)
print(A_T_A)

In [None]:
# permutation of coordinate axes
A = torch.tensor([ [0, 0, 1, 0],
                   [0, 0, 0, 1],
                   [1, 0, 0, 0],
                   [0, 1, 0, 0] ])  #, dtype=torch.float32)
A_T_A = torch.matmul(A.T, A)
print(A_T_A)

# Matrix Decompositions
## Eigenvalues and Eigenvectors

* Given a square matrix $\boldsymbol{A} \in \mathbb{E}^{n \times n}$
$$\boldsymbol{A} \boldsymbol{x} = \lambda \boldsymbol{x}, \quad \boldsymbol{x} \neq \boldsymbol{0}$$
* eigenvalues: $\lambda \in \mathbb{C}$
* corresponding eigenvectors: $\boldsymbol{x} \in \mathbb{R}\setminus \{ 0 \}$

Example
$$\boldsymbol{A} = \left[ \begin{array}{cc}
5 & -6 \\
2 & -2
\end{array} \right]$$
* eigenvalues: $\lambda_{1} = 2, \ \lambda_{2} = 1$
* eigenvectors:
$$\boldsymbol{x}_{1} = \frac{1}{\sqrt{5}} \left[ \begin{array}{c}
2 \\
1
\end{array} \right]
\quad
\boldsymbol{x}_{2} = \frac{1}{\sqrt{13}} \left[ \begin{array}{c}
3 \\
2
\end{array} \right]$$

In [None]:
np.random.seed(219)
np.set_printoptions(precision=4)

A = np.random.normal(size=(3, 3))
# A = np.array([[5, -6], [2, -2]], dtype=np.float32)
# A = torch.tensor(A, dtype=torch.float32)
# A = torch.tensor([[5, -6], [2, -2]], dtype=torch.float64)

In [None]:
# eigendecomposition
L, V = np.linalg.eig(A)
# L, V = torch.linalg.eig(A)

In [None]:
L

In [None]:
V

### Graphical intuition in two dimensions

In [None]:
def mesh_plot(X, Y, xlim=2, ylim=2):
  fig = plt.figure()
  ax = fig.add_subplot(111)
  plt.plot(X, Y, ls='None', marker='.')
  plt.xlim(-xlim, xlim)
  plt.ylim(-ylim, ylim)
  plt.axvline(x=0, color='black')
  plt.axhline(y=0, color='black')
  ax.set_aspect('equal', adjustable='box')
  plt.show()

In [None]:
n = 21
x = np.linspace(-1, 1, n)
y = np.linspace(-1, 1, n)
# full coordinate arrays
X, Y = np.meshgrid(x, y)
mesh_plot(X, Y)

In [None]:
def linear_transformation(A, X, Y):
  from copy import deepcopy
  XX, YY = deepcopy(X), deepcopy(Y)
  for i in range(n):
    for j in range(n):
      xx = np.array([[X[i, j]], [Y[i, j]]])
      res = np.matmul(A, xx)
      XX[i, j] = res[0][0]
      YY[i, j] = res[1][0]
  return XX, YY

Example1

<img width="600" alt="git fig1" src="https://user-images.githubusercontent.com/11681225/184524938-01fa0126-2cce-4846-aec6-889af0edbff9.png">

In [None]:
A = np.array([ [0.5, 0],
               [0, 2.] ])
L, V = np.linalg.eig(A)
print(f'The eigenvalues of a matrix A: {L}')
print(f'The eigenvectors of a matrix A:\n{V}')
XX, YY = linear_transformation(A, X, Y)
mesh_plot(XX, YY, 3, 3)

Example2

<img width="600" alt="git fig2" src="https://user-images.githubusercontent.com/11681225/184524922-cd9d123e-d974-4fd5-b6e4-c991a9ee63b7.png">

In [None]:
A = np.array([ [1, 0.5],
               [0, 1.] ])
L, V = np.linalg.eig(A)
print(f'The eigenvalues of a matrix A: {L}')
print(f'The eigenvectors of a matrix A:\n{V}')
XX, YY = linear_transformation(A, X, Y)
mesh_plot(XX, YY, 3, 3)

Example3

<img width="600" alt="git fig3" src="https://user-images.githubusercontent.com/11681225/184784993-d689071e-88fd-41ae-b1ab-c2ad4e88a4aa.png">

In [None]:
A = np.array([ [np.cos(np.pi/6), -np.sin(np.pi/6)],
               [np.sin(np.pi/6), np.cos(np.pi/6)] ])
L, V = np.linalg.eig(A)
print(f'The eigenvalues of a matrix A: {L}')
print(f'The eigenvectors of a matrix A:\n{V}')
XX, YY = linear_transformation(A, X, Y)
mesh_plot(XX, YY)

Example4

<img width="600" alt="git fig4" src="https://user-images.githubusercontent.com/11681225/184524925-41e7ec7b-74ab-4652-b5e7-b49350c4c6cb.png">

In [None]:
A = np.array([ [1, -1],
               [-1, 1] ])
L, V = np.linalg.eig(A)
print(f'The eigenvalues of a matrix A: {L}')
print(f'The eigenvectors of a matrix A:\n{V}')
XX, YY = linear_transformation(A, X, Y)
mesh_plot(XX, YY)

Example5

<img width="600" alt="git fig5" src="https://user-images.githubusercontent.com/11681225/184152969-70500d15-21c4-4f08-9a5c-79eb2fe457c0.png">

In [None]:
A = np.array([ [1, 0.5],
               [0.5, 1] ])
L, V = np.linalg.eig(A)
print(f'The eigenvalues of a matrix A: {L}')
print(f'The eigenvectors of a matrix A:\n{V}')
XX, YY = linear_transformation(A, X, Y)
mesh_plot(XX, YY)

In [None]:
# A = np.random.uniform(size=(2, 2))
A = np.random.normal(size=(2, 2))
L, V = np.linalg.eig(A)
print(f'The eigenvalues of a matrix A: {L}')
print(f'The eigenvectors of a matrix A:\n{V}')
XX, YY = linear_transformation(A, X, Y)
mesh_plot(XX, YY, 3, 3)

### Properties of Eigenvalues and Eigenvectors

* The eigenvalues of a triangular matrix are the entries on its main diagonal
* A matrix $\boldsymbol{A}$ and its transpose $\boldsymbol{A}^{\top}$ possess the same eigenvalues, but not necessarily the same eigenvectors
* The eigenvectors $\boldsymbol{x}_{1}, \ldots, \boldsymbol{x}_{n}$ of a matrix $\boldsymbol{A} \in \mathbb{R}^{n \times n}$ with $n$ distinct eigenvalues $\lambda_{1}, \ldots, \lambda_{n}$ are linearly independent
* The **trace** of a matrix $\boldsymbol{A} \in \mathbb{R}^{n \times n}$ is the *sum* of its eigenvalues
$$\mathrm{tr}(\boldsymbol{A}) = \sum_{i=1}^{n} \lambda_{i}$$
* The **determinant** of a matrix $\boldsymbol{A} \in \mathbb{R}^{n \times n}$ is the *product* of its eigenvalues
$$\det(\boldsymbol{A}) = \prod_{i=1}^{n} \lambda_{i}$$

In [None]:
torch.manual_seed(219)
A = torch.rand(3, 3)
A = (A + A.T) / 2  # symmetric matrix

L, V = torch.linalg.eig(A)
L1, V1 = torch.linalg.eig(A.T)

In [None]:
# eigenvalues of A == # eigenvalues of A.T
print(f'Are eigenvalus real part the same: {torch.allclose(L1.real, L.real)}')
print(f'Are eigenvalus imaginary part the same: {torch.allclose(L1.imag, L.imag)}')

In [None]:
# trace == sum of eigenvalues
torch.allclose(L.sum().real, torch.trace(A))

In [None]:
# determinant == prod of eigenvalues
torch.allclose(L.prod().real, torch.det(A))

### Eigendecomposition

* Let a matrix $\boldsymbol{A} \in \mathbb{R}^{n \times n}$ has $n$ linearly independent eigenvectors $\boldsymbol{v}_{1}, \ldots, \boldsymbol{v}_{n}$ with corresponding eigenvalues $\lambda_{1}, \ldots, \lambda_{n}$
* Let a matrix $\boldsymbol{V} = [\boldsymbol{v}_{1}, \ldots, \boldsymbol{v}_{n}] \in \mathbb{R}^{n \times n}$: concatenated all the eigenvectors as the columns of $V$
* Let a diagonal matrix $\boldsymbol{\Lambda} = \mathrm{diag} (\lambda_{1}, \ldots, \lambda_{n})$

$$\boldsymbol{A} = \boldsymbol{V} \boldsymbol{\Lambda} \boldsymbol{V}^{-1}$$
* Note that only diagonalizable matrices can be factorized in this way

In [None]:
np.random.seed(219)
np.set_printoptions(precision=4)
A = np.random.normal(size=(3, 3))
L, V = np.linalg.eig(A)
print(f'The eigenvalues of a matrix A: {L}')
print(f'The eigenvectors of a matrix A:\n{V}')

In [None]:
# eigendecomposition
Lambda = np.diag(L)
np.allclose(V @ Lambda @ np.linalg.inv(V), A)

In [None]:
# degenerate eigenvalues case
A = np.array([ [1, 0.5],
               [0, 1.] ])
L, V = np.linalg.eig(A)
Lambda = np.diag(L)
np.allclose(V @ Lambda @ np.linalg.inv(V), A)

### Diagonalization of Symmetric Matrices

* A square matrix $\boldsymbol{A} \in \mathbb{R}^{n \times n}$ is said to be orthogonally diagonalizable if there are an orthogonal matrix $\boldsymbol{Q}$ (with $\boldsymbol{Q}^{-1} = \boldsymbol{Q}^{\top}$) and a diagonal matrix $\boldsymbol{\Lambda}$ such that
$$\boldsymbol{A} = \boldsymbol{Q} \boldsymbol{\Lambda} \boldsymbol{Q}^{\top} = \boldsymbol{Q} \boldsymbol{\Lambda} \boldsymbol{Q}^{-1}$$

In [None]:
# np.random.seed(219)
np.set_printoptions(precision=4)
A = np.random.normal(size=(3, 3))
A = (A + A.T) / 2  # symmetric matrix
L, V = np.linalg.eig(A)
print(f'The eigenvalues of a matrix A: {L}')
print(f'The eigenvectors of a matrix A:\n{V}')

In [None]:
# eigendecomposition
Lambda = np.diag(L)
np.allclose(V @ Lambda @ np.linalg.inv(V), A)

In [None]:
print(f'the inverse of Q and the transpose of Q are the same: {np.allclose(np.linalg.inv(V), V.T)}')

## Singular Value Decomposition


* Let $\boldsymbol{A} \in \mathbb{R}^{m \times n}$ be a matrix of rank $r \in [0, \min (m, n)]$
$$\boldsymbol{A} = \boldsymbol{U} \boldsymbol{\Sigma} \boldsymbol{V}^{\top}$$
* Orthogonal matrix $\boldsymbol{U} \in \mathbb{R}^{m \times m}$; left singular vectors of $\boldsymbol{A}$: columns of $\boldsymbol{U}$
* Orthogonal matrix $\boldsymbol{V} \in \mathbb{R}^{n \times n}$; right singular vectors of $\boldsymbol{A}$: columns of $\boldsymbol{U}$
* A singular value matrix $\boldsymbol{\Sigma} \in \mathbb{R}^{m \times n}$, diagonal entries of $\boldsymbol{D} \in \mathbb{R}^{r \times r}$: $\sigma_{i} = \boldsymbol{D}_{ii}$
* Singular values: $\sigma_{1} \geq \sigma_{2} \geq \ldots \geq \sigma_{r} > 0$

<img width="300" alt="svd" src="https://user-images.githubusercontent.com/11681225/184527038-7144131d-d80d-470b-b294-54de3da90f39.png">

Example

$$\boldsymbol{A} = \left[ \begin{array}{ccc}
1 & 0 & 1 \\
-2 & 1 & 0
\end{array} \right]
= \underbrace{\left[ \begin{array}{cc}
1/\sqrt{5} & 2/\sqrt{5} \\
-2/\sqrt{5} & 1/\sqrt{5}
\end{array} \right]}_{\boldsymbol{U}}
\underbrace{\left[ \begin{array}{ccc}
\sqrt{6} & 0 & 0 \\
0 & 1 & 0
\end{array} \right]}_{\boldsymbol{\Sigma}}
\underbrace{\left[ \begin{array}{ccc}
5/\sqrt{30} & -2\sqrt{30} & 1/\sqrt{30} \\
0 & 1/\sqrt{5} & 2/\sqrt{5} \\
-1/\sqrt{6} & -2/\sqrt{6} & 1/\sqrt{6}
\end{array} \right]}_{\boldsymbol{V}^{\top}}
$$

In [None]:
# singluar value decomposition
A = np.array([ [1, 0, 1],
               [-2, 1, 0] ])
U, S, VT = np.linalg.svd(A)

In [None]:
S = np.concatenate((np.diag(S), np.array([[0], [0]])), axis=-1)
np.allclose(U @ S @ VT, A)