# Initialization and Casting


In [1]:
import tensorflow as tf

In [2]:
tensor_zero_d = tf.constant(4)
print(tensor_zero_d)

tf.Tensor(4, shape=(), dtype=int32)


In [3]:
tensor_one_d = tf.constant([2, 0, -3])
print(tensor_one_d)

tf.Tensor([ 2  0 -3], shape=(3,), dtype=int32)


In [4]:
tensor_two_d = tf.constant(
    [
        [1, 2, 0],
        [3, 5, -1],
        [1, 5, 6],
        [2, 3, 8],
    ]
)
print(tensor_two_d)

tf.Tensor(
[[ 1  2  0]
 [ 3  5 -1]
 [ 1  5  6]
 [ 2  3  8]], shape=(4, 3), dtype=int32)


In [5]:
tensor_three_d = tf.constant(
    [
        [[1, 2, 0], [3, 5, -1]],
        [[10, 2, 0], [1, 0, 2]],
        [[5, 8, 0], [2, 7, 0]],
        [[2, 1, 9], [4, -3, 32]],
    ]
)
print(tensor_three_d)

tf.Tensor(
[[[ 1  2  0]
  [ 3  5 -1]]

 [[10  2  0]
  [ 1  0  2]]

 [[ 5  8  0]
  [ 2  7  0]]

 [[ 2  1  9]
  [ 4 -3 32]]], shape=(4, 2, 3), dtype=int32)


In [6]:
print(tensor_three_d.shape)

(4, 2, 3)


In [7]:
print(tensor_three_d.ndim)

3


In [8]:
tensor_four_d = tf.constant(
    [
        [
            [[1, 2, 0], [3, 5, -1]],
            [[10, 2, 0], [1, 0, 2]],
            [[5, 8, 0], [2, 7, 0]],
            [[2, 1, 9], [4, -3, 32]],
        ],
        [
            [[1, 2, 0], [3, 5, -1]],
            [[10, 2, 0], [1, 0, 2]],
            [[5, 8, 0], [2, 7, 0]],
            [[2, 1, 9], [4, -3, 32]],
        ],
        [
            [[1, 2, 0], [3, 5, -1]],
            [[10, 2, 0], [1, 0, 2]],
            [[5, 8, 0], [2, 7, 0]],
            [[2, 1, 9], [4, -3, 32]],
        ],
    ]
)
print(tensor_four_d)

tf.Tensor(
[[[[ 1  2  0]
   [ 3  5 -1]]

  [[10  2  0]
   [ 1  0  2]]

  [[ 5  8  0]
   [ 2  7  0]]

  [[ 2  1  9]
   [ 4 -3 32]]]


 [[[ 1  2  0]
   [ 3  5 -1]]

  [[10  2  0]
   [ 1  0  2]]

  [[ 5  8  0]
   [ 2  7  0]]

  [[ 2  1  9]
   [ 4 -3 32]]]


 [[[ 1  2  0]
   [ 3  5 -1]]

  [[10  2  0]
   [ 1  0  2]]

  [[ 5  8  0]
   [ 2  7  0]]

  [[ 2  1  9]
   [ 4 -3 32]]]], shape=(3, 4, 2, 3), dtype=int32)


In [9]:
# dtype
tensor_one_d = tf.constant([2, 0, -3], dtype=tf.float32)
print(tensor_one_d)

tf.Tensor([ 2.  0. -3.], shape=(3,), dtype=float32)


In [10]:
# tf.cast(x, dtype=)
casted_tensor_one_d = tf.cast(tensor_one_d, dtype=tf.int64)
print(tensor_one_d)
print(casted_tensor_one_d)

tf.Tensor([ 2.  0. -3.], shape=(3,), dtype=float32)
tf.Tensor([ 2  0 -3], shape=(3,), dtype=int64)


In [11]:
# tf.cast(x, dtype=tf.bool)
# All positives and negatives become True. 0 becomes False
casted_tensor_one_d = tf.cast(tensor_one_d, dtype=tf.bool)
print(tensor_one_d)
print(casted_tensor_one_d)

tf.Tensor([ 2.  0. -3.], shape=(3,), dtype=float32)
tf.Tensor([ True False  True], shape=(3,), dtype=bool)


In [12]:
import numpy as np

In [13]:
np_array = np.array([1, 2, 4])
print(np_array)

[1 2 4]


In [14]:
# tf.convert_to_tensor(np_array)
# convert numpy array to tensor
converted_tensor = tf.convert_to_tensor(np_array)

In [15]:
# tf.eye()
# construct identity matrix
eye_tensor = tf.eye(
    num_rows=3, num_columns=None, batch_shape=None, dtype=tf.dtypes.float32, name=None
)
print(eye_tensor)

tf.Tensor(
[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]], shape=(3, 3), dtype=float32)


In [16]:
print(4 * eye_tensor)

tf.Tensor(
[[4. 0. 0.]
 [0. 4. 0.]
 [0. 0. 4.]], shape=(3, 3), dtype=float32)


In [17]:
# Identity matrix is truncated based on rows and columns
eye_tensor = tf.eye(
    num_rows=5, num_columns=3, batch_shape=None, dtype=tf.dtypes.bool, name=None
)
print(eye_tensor)

tf.Tensor(
[[ True False False]
 [False  True False]
 [False False  True]
 [False False False]
 [False False False]], shape=(5, 3), dtype=bool)


In [18]:
# Identity matrix in 1D batch shape is multiples of matrix
eye_tensor = tf.eye(
    num_rows=4,
    num_columns=6,
    batch_shape=[
        2,
    ],
    dtype=tf.dtypes.int32,
    name=None,
)
print(eye_tensor)

tf.Tensor(
[[[1 0 0 0 0 0]
  [0 1 0 0 0 0]
  [0 0 1 0 0 0]
  [0 0 0 1 0 0]]

 [[1 0 0 0 0 0]
  [0 1 0 0 0 0]
  [0 0 1 0 0 0]
  [0 0 0 1 0 0]]], shape=(2, 4, 6), dtype=int32)


In [19]:
# Identity matrix in nD batch shape of arbitrary dimension
eye_tensor = tf.eye(
    num_rows=4, num_columns=6, batch_shape=[2, 3], dtype=tf.dtypes.int32, name=None
)
print(eye_tensor)

tf.Tensor(
[[[[1 0 0 0 0 0]
   [0 1 0 0 0 0]
   [0 0 1 0 0 0]
   [0 0 0 1 0 0]]

  [[1 0 0 0 0 0]
   [0 1 0 0 0 0]
   [0 0 1 0 0 0]
   [0 0 0 1 0 0]]

  [[1 0 0 0 0 0]
   [0 1 0 0 0 0]
   [0 0 1 0 0 0]
   [0 0 0 1 0 0]]]


 [[[1 0 0 0 0 0]
   [0 1 0 0 0 0]
   [0 0 1 0 0 0]
   [0 0 0 1 0 0]]

  [[1 0 0 0 0 0]
   [0 1 0 0 0 0]
   [0 0 1 0 0 0]
   [0 0 0 1 0 0]]

  [[1 0 0 0 0 0]
   [0 1 0 0 0 0]
   [0 0 1 0 0 0]
   [0 0 0 1 0 0]]]], shape=(2, 3, 4, 6), dtype=int32)


In [20]:
# To get a matrix which is filled with an number
fill_tensor = tf.fill([3, 4], 5, name=None)
print(fill_tensor)

tf.Tensor(
[[5 5 5 5]
 [5 5 5 5]
 [5 5 5 5]], shape=(3, 4), dtype=int32)


In [21]:
# To get a matrix which is filled with an 1s
ones_tensor = tf.ones([5, 3], dtype=tf.dtypes.float32, name=None)
print(ones_tensor)

tf.Tensor(
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]], shape=(5, 3), dtype=float32)


In [22]:
# Pass a tensor, get a tensor of ones of similar shape
ones_like_tensor = tf.ones_like(fill_tensor, dtype=None, name=None)
print(ones_like_tensor)

tf.Tensor(
[[1 1 1 1]
 [1 1 1 1]
 [1 1 1 1]], shape=(3, 4), dtype=int32)


In [23]:
# To get a matrix which is filled with an 0s. Henceforth I will not use 'matrix', I will use 'tensor'
zeros_tensor = tf.zeros(shape=[3, 2], dtype=tf.dtypes.float32, name=None)
zeros_tensor

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[0., 0.],
       [0., 0.],
       [0., 0.]], dtype=float32)>

In [24]:
# To get shape of tensor
print(tf.shape(tensor_three_d))

tf.Tensor([4 2 3], shape=(3,), dtype=int32)


In [25]:
# To get rank of tensor
print(tf.rank(tensor_four_d))

tf.Tensor(4, shape=(), dtype=int32)


In [26]:
# To get size of tensor (0D Tensor which tells total number of elements in the tensor)
print(tf.size(tensor_four_d))

tf.Tensor(72, shape=(), dtype=int32)


In [27]:
# Outputs random values from a normal distribution, Imagine a normal distribution bell curve. you will get random values around the mean, and number of standard deviations away, std dev increas/decrease makes the curve wider/thinner. Modify the out-dtype tp get floats or ints random values
random_normal_tensor = tf.random.normal(
    shape=[3, 2], mean=10.0, stddev=1.0, dtype=tf.dtypes.float32, seed=None, name=None
)
random_normal_tensor

<tf.Tensor: shape=(3, 2), dtype=float32, numpy=
array([[ 9.496061,  8.944515],
       [11.633282, 11.377333],
       [10.822213, 12.032447]], dtype=float32)>

In [28]:
# Outputs random values from a uniform distribution, with values between a minval (default 0) and a maxval (default1). Modify the out-dtype tp get floats or ints random values

random_uniform_tensor = tf.random.uniform(
    shape=[7, 5], minval=0, maxval=1000, dtype=tf.dtypes.int32, seed=None, name=None
)
random_uniform_tensor

<tf.Tensor: shape=(7, 5), dtype=int32, numpy=
array([[405, 570, 985, 150, 265],
       [707, 827, 184, 524, 155],
       [ 95, 391, 645, 461, 395],
       [218, 257, 447, 436, 974],
       [591, 616, 794, 165, 420],
       [947, 632, 562, 863, 813],
       [878, 503,  77, 548, 722]])>

In [29]:
# seed argument: A Python integer. Used in combination with tf.random.set_seed to create a REPRODUCIBLE sequence of tensors across multiple calls.
tf.random.set_seed(5)
print(
    tf.random.uniform(
        shape=[
            3,
        ],
        maxval=5,
        dtype=tf.int32,
        seed=10,
    )
)
print(
    tf.random.uniform(
        shape=[
            3,
        ],
        maxval=5,
        dtype=tf.int32,
        seed=10,
    )
)
print(
    tf.random.uniform(
        shape=[
            3,
        ],
        maxval=5,
        dtype=tf.int32,
        seed=10,
    )
)
print(
    tf.random.uniform(
        shape=[
            3,
        ],
        maxval=5,
        dtype=tf.int32,
        seed=10,
    )
)

tf.Tensor([4 3 1], shape=(3,), dtype=int32)
tf.Tensor([4 3 2], shape=(3,), dtype=int32)
tf.Tensor([1 1 1], shape=(3,), dtype=int32)
tf.Tensor([1 3 3], shape=(3,), dtype=int32)


In [30]:
# Comparing seed outputs
tf.random.set_seed(5)
print(
    tf.random.uniform(
        shape=[
            3,
        ],
        maxval=5,
        dtype=tf.int32,
        seed=10,
    )
)
print(
    tf.random.uniform(
        shape=[
            3,
        ],
        maxval=5,
        dtype=tf.int32,
        seed=10,
    )
)
print(
    tf.random.uniform(
        shape=[
            3,
        ],
        maxval=5,
        dtype=tf.int32,
        seed=10,
    )
)
print(
    tf.random.uniform(
        shape=[
            3,
        ],
        maxval=5,
        dtype=tf.int32,
        seed=10,
    )
)

tf.Tensor([4 3 1], shape=(3,), dtype=int32)
tf.Tensor([4 3 2], shape=(3,), dtype=int32)
tf.Tensor([1 1 1], shape=(3,), dtype=int32)
tf.Tensor([1 3 3], shape=(3,), dtype=int32)


# Indexing


In [31]:
# Indexing, slicing, skipping a tensor
tensor_indexed = tf.constant([3, 6, 2, 4, 6, 66, 7])
print(tensor_indexed)
print(tensor_indexed[:4])  # min index is by default 0, so it looks like [0:4]
print(tensor_indexed[1:5])
print(
    tensor_indexed[5:]
)  # max index is by default the last value, so it looks like [5:7]
print(
    tensor_indexed[:]
)  # no min or max index is the whole range, so it looks like [0:7]
print(
    tensor_indexed[0:7:2]
)  # the skip parameter gives the i+skipth position, default is i+1
print(tensor_indexed[3:-1])  # negative max parameter gives the last-negative position

tf.Tensor([ 3  6  2  4  6 66  7], shape=(7,), dtype=int32)
tf.Tensor([3 6 2 4], shape=(4,), dtype=int32)
tf.Tensor([6 2 4 6], shape=(4,), dtype=int32)
tf.Tensor([66  7], shape=(2,), dtype=int32)
tf.Tensor([ 3  6  2  4  6 66  7], shape=(7,), dtype=int32)
tf.Tensor([3 2 6 7], shape=(4,), dtype=int32)
tf.Tensor([ 4  6 66], shape=(3,), dtype=int32)


In [32]:
tf.range(2, 5)

<tf.Tensor: shape=(3,), dtype=int32, numpy=array([2, 3, 4])>

In [33]:
# indeexing a 2-D Tensor
"""
where
#tf.Tensor(
[[ 1  2  0]
 [ 3  5 -1]
 [ 1  5  6]
 [ 2  3  8]], shape=(4, 3), dtype=int32)"""

print(
    tensor_two_d[0:3, 0:2]
)  # default is [rowrange, columnrange], i.e., [0:lastrow,0:lastcol]

tf.Tensor(
[[1 2]
 [3 5]
 [1 5]], shape=(3, 2), dtype=int32)


In [34]:
print(tensor_two_d[1, :])  # 2nd row (as indexing is 0,1,2...), all colums

tf.Tensor([ 3  5 -1], shape=(3,), dtype=int32)


In [35]:
print(tensor_two_d[1, 0])  # 2nd row, 2nd column

tf.Tensor(3, shape=(), dtype=int32)


In [36]:
print(tensor_two_d[1, 1:])  # 2nd row, all colums after 1st colum

tf.Tensor([ 5 -1], shape=(2,), dtype=int32)


In [37]:
print(tensor_two_d[:, 0])  # all rows, 0th column

tf.Tensor([1 3 1 2], shape=(4,), dtype=int32)


In [38]:
# anothher short hand for [:] is [...]
print(tensor_two_d[..., 0])  # all rows, 0th column

tf.Tensor([1 3 1 2], shape=(4,), dtype=int32)


In [39]:
"""
3D Tensor indexing
tf.Tensor(
[[[ 1  2  0]
  [ 3  5 -1]]

 [[10  2  0]
  [ 1  0  2]]

 [[ 5  8  0]
  [ 2  7  0]]

 [[ 2  1  9]
  [ 4 -3 32]]], shape=(4, 2, 3), dtype=int32)
"""

print(tensor_three_d[0, :, :])  # 0th element, all rows, all columns

tf.Tensor(
[[ 1  2  0]
 [ 3  5 -1]], shape=(2, 3), dtype=int32)


In [40]:
print(tensor_three_d[0, 0, :])  # 0th element, 0th row, all columns

tf.Tensor([1 2 0], shape=(3,), dtype=int32)


In [41]:
print(tensor_three_d[0, :, 2])  # 0th element, all rows, last column

tf.Tensor([ 0 -1], shape=(2,), dtype=int32)


In [42]:
print(tensor_three_d[0:2, :, 2])  # 0th and 1st element, all rows, last colum

tf.Tensor(
[[ 0 -1]
 [ 0  2]], shape=(2, 2), dtype=int32)


In [43]:
print(tensor_three_d[..., :, 2])  # all elements, all rows, 1st column
# [... , ... , 2] does not work surprisingly

tf.Tensor(
[[ 0 -1]
 [ 0  2]
 [ 0  0]
 [ 9 32]], shape=(4, 2), dtype=int32)


# Math Operations


In [44]:
# Computes the absolute value of a tensor.
# i.e., turns negatives into positives

# for a real number
x_abs = tf.constant([-2.25, 3.25])
tf.abs(x_abs)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.25, 3.25], dtype=float32)>

In [45]:
# complex number, For a complex number a+bj, its absolute value is computed as sqrt(a^2 + b^2)
x_abs_complex = tf.constant([[-2.25 + 4.75j], [-3.25 + 5.75j]])
tf.abs(x_abs)

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([2.25, 3.25], dtype=float32)>

In [46]:
# checking above. Note: for squaring negative numbers, remember to put then in brackets, then square, else you will get wrong value.
print(tf.math.sqrt((-2.25) ** 2 + 4.75**2))
print(tf.math.sqrt((-3.25) ** 2 + 5.75**2))

tf.Tensor(5.255949, shape=(), dtype=float32)
tf.Tensor(6.6049223, shape=(), dtype=float32)


In [47]:
# Add two tensors

# Scalar and List

x = [1, 2, 3, 4, 5]
y = 1
tf.add(x, y)

<tf.Tensor: shape=(5,), dtype=int32, numpy=array([2, 3, 4, 5, 6])>

In [48]:
# usage of + operator
x = tf.convert_to_tensor([1, 2, 3, 4, 5])
y = tf.convert_to_tensor(1)
x + y

<tf.Tensor: shape=(5,), dtype=int32, numpy=array([2, 3, 4, 5, 6])>

In [49]:
# Add Tensor and List of same shape
x = [1, 2, 3, 4, 5]
y = tf.constant([1, 2, 3, 4, 5])
tf.add(x, y)

<tf.Tensor: shape=(5,), dtype=int32, numpy=array([ 2,  4,  6,  8, 10])>

In [50]:
# Warning: If one of the inputs (x or y) is a tensor and the other is a non-tensor, the non-tensor input will adopt (or get casted to) the data type of the tensor input. This can potentially cause unwanted overflow or underflow conversion. here it becomes -126, -124 even on adding

x = tf.constant([1, 2], dtype=tf.int8)
y = [2**7 + 1, 2**7 + 2]
tf.add(x, y)

<tf.Tensor: shape=(2,), dtype=int8, numpy=array([-126, -124], dtype=int8)>

In [51]:
# Add Tensor and Tensor of same shape
x = tf.constant([1, 2, 3, 4, 5])
y = tf.constant([1, 2, 3, 4, 5])
tf.add(x, y)

<tf.Tensor: shape=(5,), dtype=int32, numpy=array([ 2,  4,  6,  8, 10])>

In [52]:
# Multiply Tensor and Tensor of same shape
x = tf.constant([1, 2, 3, 4, 5])
y = tf.constant([1, 2, 3, 4, 5])
tf.multiply(x, y)

<tf.Tensor: shape=(5,), dtype=int32, numpy=array([ 1,  4,  9, 16, 25])>

In [53]:
# Divide Tensor and Tensor of same shape
x = tf.constant([1, 2, 3, 4, 5])
y = tf.constant([1, 2, 3, 4, 5])
tf.divide(x, y)

<tf.Tensor: shape=(5,), dtype=float64, numpy=array([1., 1., 1., 1., 1.])>

In [54]:
# Divide_No_NaN Tensor and Tensor of same shape. Tensor dtype conversion requested. int not allowed, only float
# instad of infinity inf, it shows 0
x = tf.constant([1, 2, 3, 4, 5], dtype=tf.float32)
y = tf.zeros_like(x, tf.float32)
tf.math.divide_no_nan(x, y)

<tf.Tensor: shape=(5,), dtype=float32, numpy=array([0., 0., 0., 0., 0.], dtype=float32)>

# Broadcasting

When element wise operations are done on non-similar shape arrays, the smaller tensor is STRETCHED OUT to match the larger tensor


In [55]:
# Divide_No_NaN Tensor and Tensor of same shape. Tensor dtype conversion requested. int not allowed, only float
# instad of infinity inf, it shows 0
x = tf.constant([1, 2, 3, 4, 5], dtype=tf.float32)
y = tf.constant([7], dtype=tf.float32)
y_stretched = tf.constant([7, 7, 7, 7, 7], dtype=tf.float32)

print(tf.math.add(x, y))
print(tf.math.add(x, y_stretched))
# the result will be the same

tf.Tensor([ 8.  9. 10. 11. 12.], shape=(5,), dtype=float32)
tf.Tensor([ 8.  9. 10. 11. 12.], shape=(5,), dtype=float32)


In [56]:
# Multiply. It is elementwise Multiplication, NOT matrix multiplicaion
x = tf.constant([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]], dtype=tf.float32)
y = tf.constant([7], dtype=tf.float32)
y_stretched = tf.constant([7, 7, 7, 7, 7], dtype=tf.float32)

print(tf.math.multiply(x, y))
print(tf.math.multiply(x, y_stretched))

tf.Tensor(
[[ 7. 14. 21. 28. 35.]
 [42. 49. 56. 63. 70.]], shape=(2, 5), dtype=float32)
tf.Tensor(
[[ 7. 14. 21. 28. 35.]
 [42. 49. 56. 63. 70.]], shape=(2, 5), dtype=float32)


In [57]:
# Broadcasted elementwise Multiplication
x = tf.constant([[1, 2, 3, 4, 5]], dtype=tf.float32) # only one element [1,2,3,4,5]
y = tf.constant([[7], [5], [3]], dtype=tf.float32)   # three elements [7], [5], [3]

print(x.shape)
print(y.shape)
print(tf.math.multiply(x, y))
print(tf.math.multiply(y, x))  # Same output, as it is elementwise multiplication, NOT matrix multiplication

(1, 5)
(3, 1)
tf.Tensor(
[[ 7. 14. 21. 28. 35.]
 [ 5. 10. 15. 20. 25.]
 [ 3.  6.  9. 12. 15.]], shape=(3, 5), dtype=float32)
tf.Tensor(
[[ 7. 14. 21. 28. 35.]
 [ 5. 10. 15. 20. 25.]
 [ 3.  6.  9. 12. 15.]], shape=(3, 5), dtype=float32)


In [58]:
# Comaring with Non-broadcasted elementwise Multiplication
x_stretched = tf.constant(
    [[1, 2, 3, 4, 5], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5]],
    dtype=tf.float32,  # x stretched out to 3 elements, to multiply with 3 elements/rows of y
)
y_stretched = tf.constant(
    [[7, 7, 7, 7, 7], [5, 5, 5, 5, 5], [3, 3, 3, 3, 3]],
    dtype=tf.float32,  # y stretched out to multiply elementwise with all 5 columns of x
)
print(tf.math.multiply(x_stretched, y_stretched))
print(tf.math.multiply(y_stretched, x_stretched))  # Same output, as it is elementwise multiplication, NOT matrix multiplication

tf.Tensor(
[[ 7. 14. 21. 28. 35.]
 [ 5. 10. 15. 20. 25.]
 [ 3.  6.  9. 12. 15.]], shape=(3, 5), dtype=float32)
tf.Tensor(
[[ 7. 14. 21. 28. 35.]
 [ 5. 10. 15. 20. 25.]
 [ 3.  6.  9. 12. 15.]], shape=(3, 5), dtype=float32)


Broadcasting(Stretching the Tensors) rule of thumb:

Both tensors should have one of their respective dimensions as 1. The other dimension is stretched corresponding to the other tensor.

In [59]:
# Element wise Maximum of 2 tensors when comparing both

x = tf.constant([0., 0., 0., 0.])
y = tf.constant([-2., 0., 2., 5.])
tf.math.maximum(x, y)


<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0., 0., 2., 5.], dtype=float32)>

In [60]:
# Note that maximum supports broadcast semantics for x and y.

x = tf.constant([-5., 0., 0., 0.])
y = tf.constant([-3.])
tf.math.maximum(x, y)


<tf.Tensor: shape=(4,), dtype=float32, numpy=array([-3.,  0.,  0.,  0.], dtype=float32)>

In [61]:
# Element wise Minimum of 2 tensors when comparing both

x = tf.constant([0., 0., 0., 0.])
y = tf.constant([-5., -2., 0., 3.])
tf.math.minimum(x, y)


<tf.Tensor: shape=(4,), dtype=float32, numpy=array([-5., -2.,  0.,  0.], dtype=float32)>

In [62]:
# Returns the index with the largest value across axes of a tensor.
A = tf.constant([2, 20, 30, 3, 6])
tf.math.argmax(A)  # A[2] is maximum in tensor A

<tf.Tensor: shape=(), dtype=int64, numpy=2>

In [63]:
'''tf.math.argmax(
    input,
    axis=None,              # axis =0 means row is fixed, comarison for arg max is done on the columns. Vice versa for axis = 1
    output_type=tf.dtypes.int64,
    name=None
)'''
B = tf.constant([[2, 20, 30, 3, 6], 
                 [3, 11, 16, 1, 8],
                 [14, 45, 23, 5, 27]])
tf.math.argmax(B, 0)



<tf.Tensor: shape=(5,), dtype=int64, numpy=array([2, 2, 0, 2, 2], dtype=int64)>

In [64]:
# ArgMIN
tf.math.argmin(B, 1)

<tf.Tensor: shape=(3,), dtype=int64, numpy=array([0, 3, 3], dtype=int64)>

In [65]:
C = tf.constant([0, 0, 0, 0])
tf.math.argmax(C) # Returns smallest index in case of ties

<tf.Tensor: shape=(), dtype=int64, numpy=0>

In [66]:
# Returns the truth value of (x == y) element-wise.
x = tf.constant([2, 4])
y = tf.constant(2)  # broadcasting happens for comparing non-similar shaped tensors, y=[2] becpmes [2,2] to commpare with x= [2,4]
tf.math.equal(x, y)


<tf.Tensor: shape=(2,), dtype=bool, numpy=array([ True, False])>

In [67]:
x = tf.constant([2, 4])
y = tf.constant([2, 4])
tf.math.equal(x, y)


<tf.Tensor: shape=(2,), dtype=bool, numpy=array([ True,  True])>

In [68]:
# Computes the power of one value to another.
x = tf.constant([[2, 2], [3, 3]])
y = tf.constant([[3, 0], [1, 4]])
tf.pow(x, y)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[ 8,  1],
       [ 3, 81]])>

In [69]:
# Computes the sum of elements across dimensions of a tensor.
# tf.math.reduce_sum(
#     input_tensor, axis=None, keepdims=False, name=None
# )

x = tf.constant([[1, 1, 1], [1, 1, 1]])


In [70]:
# sum all the elements: 1 + 1 + 1 + 1 + 1+ 1 = 6
print(tf.reduce_sum(x))

tf.Tensor(6, shape=(), dtype=int32)


In [71]:
# reduce along the first dimension/axis
# the result is [1, 1, 1] + [1, 1, 1] = [2, 2, 2]
print(tf.reduce_sum(x,0))

tf.Tensor([2 2 2], shape=(3,), dtype=int32)


In [72]:
# reduce along the first dimension/axis
# the result is [1, 1, 1] + [1, 1, 1] = [2, 2, 2]
print(tf.reduce_sum(x,1))

tf.Tensor([3 3], shape=(2,), dtype=int32)


In [73]:
tf.reduce_sum(x, 1, keepdims=True)

<tf.Tensor: shape=(2, 1), dtype=int32, numpy=
array([[3],
       [3]])>

In [74]:
# reduce along both dimensions, the result is 1 + 1 + 1 + 1 + 1 + 1 = 6
# or, equivalently, reduce along rows [1, 1, 1] + [1, 1, 1] = [2, 2, 2], then reduce the resultant array 2 + 2 + 2 = 6
tf.reduce_sum(x, [0, 1]).numpy()

6

In [75]:
tensor_two_d

<tf.Tensor: shape=(4, 3), dtype=int32, numpy=
array([[ 1,  2,  0],
       [ 3,  5, -1],
       [ 1,  5,  6],
       [ 2,  3,  8]])>

In [76]:
# Computes tf.math.maximum of elements across dimensions of a tensor.
tf.reduce_max(tensor_two_d).numpy()

8

In [77]:
# Computes tf.math.minimum of elements across dimensions of a tensor.
tf.reduce_min(tensor_two_d).numpy()


-1

In [78]:
print(tensor_two_d.shape)
print(tf.math.reduce_sum(tensor_two_d, axis= 0, keepdims=False, name=None))

(4, 3)
tf.Tensor([ 7 15 13], shape=(3,), dtype=int32)


In [79]:
print(tf.math.reduce_sum(tensor_two_d, axis= 1, keepdims=False, name=None))

tf.Tensor([ 3  7 12 13], shape=(4,), dtype=int32)


In [80]:
# reduce_max
print(tf.math.reduce_max(tensor_two_d, axis= 1, keepdims=False, name=None))

tf.Tensor([2 5 6 8], shape=(4,), dtype=int32)


In [81]:
# reduce_mean, axis = 1, so (1+3+1+2)/4. ALso if tensor dtype is int, you will only get int values in mean, not float. So Cast it
print(tf.math.reduce_mean(tensor_two_d, axis= 1, keepdims=False, name=None))

tf.Tensor([1 2 4 4], shape=(4,), dtype=int32)


In [82]:
casted_tensor_two_d = tf.cast(tensor_two_d, dtype=tf.float32)

In [83]:
print(tf.math.reduce_mean(casted_tensor_two_d, axis= 1, keepdims=False, name=None))

tf.Tensor([1.        2.3333333 4.        4.3333335], shape=(4,), dtype=float32)


In [84]:
# reduce_std, axis = 1, DOES NOT work with int dtype tensor, so use float instead
print(tf.math.reduce_std(casted_tensor_two_d, axis= 0, keepdims=False, name=None))

tf.Tensor([0.8291562 1.299038  3.8324275], shape=(3,), dtype=float32)


In [85]:
#KeepDims, If true, retains reduced dimensions with length 1.
print(tf.math.reduce_std(casted_tensor_two_d, axis= 0, keepdims=True, name=None))

tf.Tensor([[0.8291562 1.299038  3.8324275]], shape=(1, 3), dtype=float32)


In [86]:
# Computes sigmoid of x element-wise.
x = tf.constant([0.0, 1.0, 50.0, 100.0])
tf.math.sigmoid(x)

<tf.Tensor: shape=(4,), dtype=float32, numpy=array([0.5       , 0.73105854, 1.        , 1.        ], dtype=float32)>

In [87]:
# top_k finds VALUES and INDICES of the K largest entries for the last dimension. By default K=1 and output top_k values are sorted (for K>1)
# here the input is a vector (rank=1). Thus values[j] is the j-th largest entry in input, and its index is indices[j].
result = tf.math.top_k([1, 2, 98, 1, 1, 99, 3, 1, 3, 96, 4, 1],
                        k=3,
                        sorted=True)
result.values.numpy()
# .numpy() cast tensor to numpy array

array([99, 98, 96])

In [88]:
result.indices.numpy()

array([5, 2, 9])

In [89]:
# If you just print result you get both VALUES and INDICES
result

TopKV2(values=<tf.Tensor: shape=(3,), dtype=int32, numpy=array([99, 98, 96])>, indices=<tf.Tensor: shape=(3,), dtype=int32, numpy=array([5, 2, 9])>)

In [90]:
# for 2D tensor topk will scan each row for the top_k elements, default k =1
tf.math.top_k(tensor_two_d)

TopKV2(values=<tf.Tensor: shape=(4, 1), dtype=int32, numpy=
array([[2],
       [5],
       [6],
       [8]])>, indices=<tf.Tensor: shape=(4, 1), dtype=int32, numpy=
array([[1],
       [1],
       [2],
       [2]])>)

In [91]:
tf.math.top_k(tensor_two_d, k=2)

TopKV2(values=<tf.Tensor: shape=(4, 2), dtype=int32, numpy=
array([[2, 1],
       [5, 3],
       [6, 5],
       [8, 3]])>, indices=<tf.Tensor: shape=(4, 2), dtype=int32, numpy=
array([[1, 0],
       [1, 0],
       [2, 1],
       [2, 1]])>)

# Linear Algebra Operations

In [92]:
# Multiplies matrix a by matrix b, producing a * b.
# tf.linalg.matmul(
#     a,
#     b,
#     transpose_a=False,
#     transpose_b=False,
#     adjoint_a=False,
#     adjoint_b=False,
#     a_is_sparse=False,
#     b_is_sparse=False,
#     output_type=None,
#     name=None
# )

In [93]:
a = tf.constant([1, 2, 3, 4, 5, 6], shape=[2, 3])
a  # 2-D tensor

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[1, 2, 3],
       [4, 5, 6]])>

In [94]:
b = tf.constant([7, 8, 9, 10, 11, 12], shape=[3, 2])
b  # 2-D tensor

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[ 7,  8],
       [ 9, 10],
       [11, 12]])>

In [95]:
c = tf.matmul(a, b)
c  # `a` * `b`

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[ 58,  64],
       [139, 154]])>

In [96]:
# Matrix multiplication shorthand x@y
a@b

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[ 58,  64],
       [139, 154]])>

In [97]:
# to get transpose
tf.transpose(a)

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[1, 4],
       [2, 5],
       [3, 6]])>

In [98]:
# A batch matrix multiplication with batch shape [2]:

m = tf.constant(np.arange(1, 13, dtype=np.int32), shape=[2, 2, 3])
m  # 3-D tensor

<tf.Tensor: shape=(2, 2, 3), dtype=int32, numpy=
array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])>

In [99]:
n = tf.constant(np.arange(13, 25, dtype=np.int32), shape=[2, 3, 2])
n  # 3-D tensor

<tf.Tensor: shape=(2, 3, 2), dtype=int32, numpy=
array([[[13, 14],
        [15, 16],
        [17, 18]],

       [[19, 20],
        [21, 22],
        [23, 24]]])>

In [100]:
o = tf.matmul(m, n)
o  # `m@n`

<tf.Tensor: shape=(2, 2, 2), dtype=int32, numpy=
array([[[ 94, 100],
        [229, 244]],

       [[508, 532],
        [697, 730]]])>

In [101]:
m@n

<tf.Tensor: shape=(2, 2, 2), dtype=int32, numpy=
array([[[ 94, 100],
        [229, 244]],

       [[508, 532],
        [697, 730]]])>

### Sidenote: 
1. For sparse matrices, multiplication operations for such tensors can be optimized by setting a_is_sparse or b_is_sparse to True, in tf.matmul() flags
2. For multiplying bu transpose or adjoint, just set their flags to true as well, no need to explicitly define them

In [102]:
# Adjoint method: Transposes the last two dimensions of FLOAT/COMPLEX tensor (it's MUST) and conjugates tensor matrix.
x = tf.constant([[1 + 1j, 2 + 2j, 3 + 3j],
                 [4 + 4j, 5 + 5j, 6 + 6j]])
tf.linalg.adjoint(x)  # [[1 - 1j, 4 - 4j],
                      #  [2 - 2j, 5 - 5j],
                      #  [3 - 3j, 6 - 6j]]

<tf.Tensor: shape=(3, 2), dtype=complex128, numpy=
array([[1.-1.j, 4.-4.j],
       [2.-2.j, 5.-5.j],
       [3.-3.j, 6.-6.j]])>

Band_Part: Copy a tensor setting everything outside a central band in each innermost matrix to zero.

tf.linalg.band_part(input, num_lower, num_upper, name=None)

Assume input has k dimensions [I, J, K, ..., M, N], then the output is a tensor with the same shape where

The band part is computed as follows:

band[i, j, k, ..., m, n] = in_band(m, n) * input[i, j, k, ..., m, n].

The indicator function:

in_band(m, n) = (num_lower < 0 || (m-n) <= num_lower)) && (num_upper < 0 || (n-m) <= num_upper).


In [103]:
# (num_lower<0 or m-n<=num_upper) and (num_upper>0 or n-m<=num_upper)
# where m is for rows, and n is or columns
input = tf.constant([[0, 1, 2, 3], 
                     [-1, 0, 1, 2], 
                     [-2, -1, 0, 1], 
                     [-3, -2, -1, 0]], dtype = tf.float32)

tf.linalg.band_part(input, num_lower = 1, num_upper = -1)

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[ 0.,  1.,  2.,  3.],
       [-1.,  0.,  1.,  2.],
       [ 0., -1.,  0.,  1.],
       [ 0.,  0., -1.,  0.]], dtype=float32)>

In [104]:
tf.linalg.band_part(input, num_lower = 2, num_upper = 1)

<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[ 0.,  1.,  0.,  0.],
       [-1.,  0.,  1.,  0.],
       [-2., -1.,  0.,  1.],
       [ 0., -2., -1.,  0.]], dtype=float32)>

In [105]:
tensor_two_d_m_minus_n = tf.constant([[0,-1,-2],
                                      [1,0,1],
                                      [2,1,0],
                                      [3,2,1]], dtype = tf.float16)

In [106]:
tensor_two_d_n_minus_m = tf.constant([[0, 1, 2],
                                      [-1, 0, 1],
                                      [-2,-1, 0],
                                      [-3,-2,-1]], dtype = tf.float16)

### Useful special cases for Band_part:

 
 tf.linalg.band_part(input, 0, -1) ==> Upper triangular part.

 tf.linalg.band_part(input, -1, 0) ==> Lower triangular part.
 
 tf.linalg.band_part(input, 0, 0) ==> Diagonal.

In [107]:
# tf.linalg.cholesky : Computes the Cholesky decomposition of one or more SQUARE matrices. Int dtype not allowed

tf.linalg.cholesky(
    input, name=None
)



<tf.Tensor: shape=(4, 4), dtype=float32, numpy=
array([[nan,  0.,  0.,  0.],
       [nan, nan,  0.,  0.],
       [nan, nan, nan,  0.],
       [nan, nan, nan, nan]], dtype=float32)>

In [108]:
# tf.linalg.cross : Compute the pairwise CROSS product.
# tf.linalg.cross(a, b, name=None)

In [109]:
# tf.linalg.det : Computes the determinant of one or more SQUARE matrices. Int dtype not allowed
# casted_tensor_two_d[:3] to select first 3 rows and make matrix square
tf.linalg.det(casted_tensor_two_d[:3], name=None).numpy()

-3.0

In [110]:
# tf.linalg.inv : Computes the inverse of one or more SQUARE invertible matrices or their adjoints (conjugate transposes). Int dtype not allowed

tf.linalg.inv(
    casted_tensor_two_d[:3], adjoint=False, name=None
)

<tf.Tensor: shape=(3, 3), dtype=float32, numpy=
array([[-11.666667  ,   4.        ,   0.6666667 ],
       [  6.3333335 ,  -2.        ,  -0.33333334],
       [ -3.3333335 ,   1.        ,   0.33333334]], dtype=float32)>

In [111]:
# tf.linalg.matrix_transpose : Transposes last two dimensions of tensor a. Output same as tf.transpose but more prefereble to use transpose flags in tf.matmul() for matrix multiplication

tf.linalg.matrix_transpose(
    a, name='matrix_transpose', conjugate=False
)

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[1, 4],
       [2, 5],
       [3, 6]])>

In [112]:
# tf.linalg.svd : Computes the singular value decompositions of one or more matrices. u and v decompositions

s,u,v = tf.linalg.svd(casted_tensor_two_d)
print(s)
print(u)
print(v)

tf.Tensor([11.934144   5.8839946  1.3981452], shape=(3,), dtype=float32)
tf.Tensor(
[[-0.11580264  0.29874796 -0.00687289]
 [-0.23386282  0.8843067   0.26446015]
 [-0.6538247   0.01995025 -0.75080764]
 [-0.71021914 -0.3582642   0.6052285 ]], shape=(4, 3), dtype=float32)
tf.Tensor(
[[-0.24230093  0.38325816  0.8912931 ]
 [-0.56985235  0.68728614 -0.45045093]
 [-0.78521246 -0.6170502   0.05187052]], shape=(3, 3), dtype=float32)


### Einsum: Another way to operations on matrices (e.g, multiply, transpose, reduce_sum etc)

np.einsum('ij, jk -> ik', A,B) # for multiplication

np.einsum('ij -> ji', A)       # for transpose

np.einsum('ij -> ')            # for reduce_sum

for batch size b, or nD operations

np.einsum('bij, bjk -> bik', A,B)    # for multiplication

np.einsum('bij -> ')                 # for reduce_sum

In [113]:
# Using Einsum is easy the following way, when we have High dimensional tensors

# From Attention is all you need paper, we need to make a complex calculation with following
# Q = batchsize, s_q, modelsize
# K = batchsize, s_k, modelsize

Q = np.random.randn(32,64,512)      # bqm
K = np.random.randn(32,128,512)     # bkm

In [114]:
np.einsum('bqm, bkm -> bqk', Q,K)    # without any transposes or anything for km to mk, and then calculating of qm*mk = qk

array([[[-4.58664970e+01,  5.41515732e+00,  1.45904850e+01, ...,
          4.09111697e+01,  2.30418229e+01,  1.06121906e+01],
        [ 5.25117922e+01,  2.83288572e+01,  5.58805145e+01, ...,
         -1.45563475e+01, -1.40000834e+01, -3.42022221e+01],
        [-1.66694404e+01, -1.91370688e+00,  3.18057071e+01, ...,
          1.63361631e+01,  1.71349062e+00,  8.66298155e+00],
        ...,
        [-1.93308747e+00,  4.13189449e+01,  4.57436846e+00, ...,
         -2.70718034e+00, -1.72272713e+00, -6.98273324e+00],
        [-1.37619795e+01,  1.88654694e+00, -8.94101835e+00, ...,
          4.12050018e+01,  2.75115873e+00, -7.99143728e+00],
        [ 2.49097154e+00,  2.11912573e+01,  4.43317687e+01, ...,
         -4.40345896e+00, -2.57117682e+00, -2.73240613e+01]],

       [[-5.27301023e+00, -2.28280673e+01, -1.98725913e+01, ...,
         -8.76269818e+00,  9.71297819e+00, -5.29531976e+01],
        [-3.27377143e+00,  3.37347804e+00,  1.37741404e+01, ...,
         -8.57642909e+00,  3.30370699e

In [115]:
np.einsum('bqm, bkm -> bqk', Q,K).shape

(32, 64, 128)

In [116]:
# Another example, from the Reformer: THe efficient transformer paper, another calculation
A = np.random.randn(2,4,4,2)    # bcij
B = np.random.randn(2,4,4,1)    # bcik
np.einsum('bcik, bcij -> bckj', B,A).shape          # no need for transposing ik to ki, then doing ki*ij = kj

(2, 4, 1, 2)

# Common TensorFlow Operations