In [1]:
import torch
import tensorflow as tf


def pr(*sth):
    print(sth)

## Scalars

In [2]:
x1 = torch.tensor(3)
x2 = torch.tensor([3])

In [3]:
pr(x1, x2)

(tensor(3), tensor([3]))


In [4]:
pr(x1.shape, x2.shape)

(torch.Size([]), torch.Size([1]))


In [5]:
x1 + x2, x1 * x2, x1**x2

(tensor([6]), tensor([9]), tensor([27]))

In [6]:
x1 = tf.constant(3)
x2 = tf.constant([4])

In [7]:
x1 + x2, x1 - x2, x1 * x2

(<tf.Tensor: shape=(1,), dtype=int32, numpy=array([7])>,
 <tf.Tensor: shape=(1,), dtype=int32, numpy=array([-1])>,
 <tf.Tensor: shape=(1,), dtype=int32, numpy=array([12])>)

## Vectors

A vector is simply a list of scalar values. We call these values the elements(entries or components) of the vector. 

In [8]:
x = torch.arange(4)
x

tensor([0, 1, 2, 3])

In [9]:
x = tf.range(4)
x

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([0, 1, 2, 3])>

### Length, Dimensionality, Shape 

In [10]:
x = torch.arange(4,10)
len(x), x.shape

(6, torch.Size([6]))

In [11]:
x = tf.range(4,10)
len(x), tf.shape(x), x.shape

(6, <tf.Tensor: shape=(1,), dtype=int32, numpy=array([6])>, TensorShape([6]))

## Matrices

In [12]:
A = torch.arange(25).reshape((-1,5))
A.T

tensor([[ 0,  5, 10, 15, 20],
        [ 1,  6, 11, 16, 21],
        [ 2,  7, 12, 17, 22],
        [ 3,  8, 13, 18, 23],
        [ 4,  9, 14, 19, 24]])

In [13]:
A = tf.reshape(tf.range(36),(-1,6))
tf.transpose(A) # speechless 

<tf.Tensor: shape=(6, 6), dtype=int32, numpy=
array([[ 0,  6, 12, 18, 24, 30],
       [ 1,  7, 13, 19, 25, 31],
       [ 2,  8, 14, 20, 26, 32],
       [ 3,  9, 15, 21, 27, 33],
       [ 4, 10, 16, 22, 28, 34],
       [ 5, 11, 17, 23, 29, 35]])>

## Tensors

In [14]:
X = torch.arange(20).reshape(2,2,5)
X

tensor([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9]],

        [[10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]]])

In [15]:
X = tf.reshape(tf.range(24), (2,3,4))
X

<tf.Tensor: shape=(2, 3, 4), dtype=int32, numpy=
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])>

## Reduction

In [16]:
x = torch.arange(4, dtype=torch.float32)
x, x.sum()

(tensor([0., 1., 2., 3.]), tensor(6.))

In [19]:
A = torch.arange(20, dtype=torch.float32).reshape(4,-1)
A, A.shape, A.sum()

(tensor([[ 0.,  1.,  2.,  3.,  4.],
         [ 5.,  6.,  7.,  8.,  9.],
         [10., 11., 12., 13., 14.],
         [15., 16., 17., 18., 19.]]), torch.Size([4, 5]), tensor(190.))

In [20]:
A_sum_axis0 = A.sum(axis=0)
A_sum_axis0, A_sum_axis0.shape

(tensor([30., 34., 38., 42., 46.]), torch.Size([5]))

In [21]:
A_sum_axis1 = A.sum(axis=1)
A_sum_axis1, A_sum_axis1.shape

(tensor([10., 35., 60., 85.]), torch.Size([4]))

In [22]:
A.mean(), A.sum() / A.numel()

(tensor(9.5000), tensor(9.5000))

In [23]:
A.mean(axis=0), A.sum(axis=0)/A.shape[0]

(tensor([ 7.5000,  8.5000,  9.5000, 10.5000, 11.5000]),
 tensor([ 7.5000,  8.5000,  9.5000, 10.5000, 11.5000]))

In [24]:
A.mean(axis=1), A.sum(axis=1)/A.shape[1]

(tensor([ 2.,  7., 12., 17.]), tensor([ 2.,  7., 12., 17.]))

In [25]:
x = tf.range(4, dtype=tf.float32)
x, tf.reduce_sum(x)

(<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 1., 2., 3.], dtype=float32)>,
 <tf.Tensor: shape=(), dtype=float32, numpy=6.0>)

In [26]:
A = tf.reshape(tf.range(20, dtype=tf.float32), (5,-1))
A.shape, tf.reduce_sum(A)

(TensorShape([5, 4]), <tf.Tensor: shape=(), dtype=float32, numpy=190.0>)

In [27]:
A_sum_axis0 = tf.reduce_sum(A, axis=0)
A_sum_axis0, A_sum_axis0.shape

(<tf.Tensor: shape=(4,), dtype=float32, numpy=array([40., 45., 50., 55.], dtype=float32)>,
 TensorShape([4]))

In [28]:
A_sum_axis1 = tf.reduce_sum(A, axis=1)
A_sum_axis1, A_sum_axis1.shape

(<tf.Tensor: shape=(5,), dtype=float32, numpy=array([ 6., 22., 38., 54., 70.], dtype=float32)>,
 TensorShape([5]))

In [30]:
tf.reduce_mean(A), tf.reduce_sum(A) / tf.size(A).numpy()

(<tf.Tensor: shape=(), dtype=float32, numpy=9.5>,
 <tf.Tensor: shape=(), dtype=float32, numpy=9.5>)

### Non-Reduction Sum

In [34]:
A = torch.arange(20, dtype=torch.float32).reshape(4,-1)
sum_A = A.sum(axis=1)
sum_A_kd = A.sum(axis=1,keepdims=True)
A.shape, sum_A.shape, sum_A_kd.shape

(torch.Size([4, 5]), torch.Size([4]), torch.Size([4, 1]))

In [35]:
sum_A = A.sum(axis=0)
sum_A_kd = A.sum(axis=0,keepdims=True)
A.shape, sum_A.shape, sum_A_kd.shape

(torch.Size([4, 5]), torch.Size([5]), torch.Size([1, 5]))

In [36]:
A / sum_A

tensor([[0.0000, 0.0294, 0.0526, 0.0714, 0.0870],
        [0.1667, 0.1765, 0.1842, 0.1905, 0.1957],
        [0.3333, 0.3235, 0.3158, 0.3095, 0.3043],
        [0.5000, 0.4706, 0.4474, 0.4286, 0.4130]])

In [37]:
A / sum_A_kd

tensor([[0.0000, 0.0294, 0.0526, 0.0714, 0.0870],
        [0.1667, 0.1765, 0.1842, 0.1905, 0.1957],
        [0.3333, 0.3235, 0.3158, 0.3095, 0.3043],
        [0.5000, 0.4706, 0.4474, 0.4286, 0.4130]])

In [41]:
pr(A); pr(A.cumsum(axis=0))

(tensor([[ 0.,  1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.,  9.],
        [10., 11., 12., 13., 14.],
        [15., 16., 17., 18., 19.]]),)
(tensor([[ 0.,  1.,  2.,  3.,  4.],
        [ 5.,  7.,  9., 11., 13.],
        [15., 18., 21., 24., 27.],
        [30., 34., 38., 42., 46.]]),)


In [42]:
A = tf.reshape(tf.range(20, dtype=tf.float32), (5,-1))
tf.reduce_sum(A, axis=1, keepdims=True)

<tf.Tensor: shape=(5, 1), dtype=float32, numpy=
array([[ 6.],
       [22.],
       [38.],
       [54.],
       [70.]], dtype=float32)>

In [43]:
tf.cumsum(A, axis=1)

<tf.Tensor: shape=(5, 4), dtype=float32, numpy=
array([[ 0.,  1.,  3.,  6.],
       [ 4.,  9., 15., 22.],
       [ 8., 17., 27., 38.],
       [12., 25., 39., 54.],
       [16., 33., 51., 70.]], dtype=float32)>

## Dot Products 

In [46]:
x = torch.tensor([1.,2,3])
y = torch.tensor([3.,4,5])
x, y, torch.dot(x,y), torch.sum(x * y)

(tensor([1., 2., 3.]), tensor([3., 4., 5.]), tensor(26.), tensor(26.))

In [47]:
A = torch.randn((2,3))
B = torch.randn((2,3))
# torch.dot(A,B) # Error 

RuntimeError: 1D tensors expected, but got 2D and 2D tensors

In [53]:
x = tf.constant([1,2,3])
y = tf.constant([4,5,6])
x, y, tf.tensordot(x, y, axes=1), tf.reduce_sum(x*y)

(<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 2, 3])>,
 <tf.Tensor: shape=(3,), dtype=int32, numpy=array([4, 5, 6])>,
 <tf.Tensor: shape=(), dtype=int32, numpy=32>,
 <tf.Tensor: shape=(), dtype=int32, numpy=32>)

## Matrix-Vector Products 

In [56]:
A = torch.arange(15).reshape((3,5))
x = torch.arange(5)
A, x, torch.mv(A,x)

(tensor([[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14]]),
 tensor([0, 1, 2, 3, 4]),
 tensor([ 30,  80, 130]))

In [59]:
A = tf.reshape(tf.range(15),(3,5))
x = tf.range(5)
A, x, tf.linalg.matvec(A, x)

(<tf.Tensor: shape=(3, 5), dtype=int32, numpy=
 array([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14]])>,
 <tf.Tensor: shape=(5,), dtype=int32, numpy=array([0, 1, 2, 3, 4])>,
 <tf.Tensor: shape=(3,), dtype=int32, numpy=array([ 30,  80, 130])>)

## Matrix-Matrix Multiplication 

In [63]:
A = torch.arange(15, dtype=torch.float32).reshape((3,5))
B = torch.ones((5,3))
pr(A)
pr(B)
pr(torch.mm(A,B))
pr(torch.mm(A,B).shape)

(tensor([[ 0.,  1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.,  9.],
        [10., 11., 12., 13., 14.]]),)
(tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]]),)
(tensor([[10., 10., 10.],
        [35., 35., 35.],
        [60., 60., 60.]]),)
(torch.Size([3, 3]),)


In [64]:
A = tf.reshape(tf.range(15, dtype=tf.float32), (-1,5))
B = tf.ones((5,3))
tf.matmul(A,B)

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[10., 10., 10.],
       [35., 35., 35.],
       [60., 60., 60.]], dtype=float32)>

## Norms
Informally, the norm of a vector tells us how big a vector is. 

- L2 norm $||x||_2$ or $||x||$
- L1 norm $||x||_1$
- Frobenius norm of a matrix $X\in R^{m\times n}$

In [65]:
u = torch.tensor([3.0,4])
torch.norm(u)

tensor(5.)

In [66]:
torch.abs(u).sum()

tensor(7.)

In [67]:
torch.norm(torch.ones((4,9)))

tensor(6.)

In [69]:
u = tf.constant([3.,4.])
tf.norm(u)

<tf.Tensor: shape=(), dtype=float32, numpy=5.0>

In [70]:
tf.reduce_sum(tf.abs(u))

<tf.Tensor: shape=(), dtype=float32, numpy=7.0>

In [71]:
tf.norm(tf.ones((4,9)))

<tf.Tensor: shape=(), dtype=float32, numpy=6.0>

### Norms and Objectives

In deep learning, we are often trying to solve optimization problems:
- maximize the probability assigned to observed data 
- minimize the distance between predictions and the ground-truth observations
- assign vector representations to items(words, products, etc.) such that the distance between similar items is minimized and the distance between dissimilar items are maximized. 

In [72]:
X = torch.ones((2,3,4))
len(X)

2