Matrix Multiplication

In Machine Learning, matrix multiliplication is one of the most common tensor operations.
1. The inner dimensions must match.
2. The resulting matrix has the shape of the outer dimensions.

In [2]:
# Matrix multiplication in tensorflow
import tensorflow as tf

a = tf.constant([[1,2,7],[7,2,1],[3,3,3,]])
b = tf.constant([[2,5],[6,7],[1,8]])
tf.matmul(a, b)

<tf.Tensor: shape=(3, 2), dtype=int32, numpy=
array([[21, 75],
       [27, 57],
       [27, 60]], dtype=int32)>

In [3]:
# Matrix multiplication with Python Operator "@"

c = a @ b
print(c)

tf.Tensor(
[[21 75]
 [27 57]
 [27 60]], shape=(3, 2), dtype=int32)


In [4]:
# Matrix multiplication of same shape

X = tf.constant([[1,2],[3,4],[5,6]])
Y = tf.constant([[7,8],[9,10],[11,12]])
X @ Y

InvalidArgumentError: ignored

In [5]:
# Let's change the shape of Y
tf.reshape(Y,shape=(2, 3))

<tf.Tensor: shape=(2, 3), dtype=int32, numpy=
array([[ 7,  8,  9],
       [10, 11, 12]], dtype=int32)>

In [6]:
# Try to matrix multiply X by reshaped Y
X @ tf.reshape(Y,shape=(2, 3))

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[ 27,  30,  33],
       [ 61,  68,  75],
       [ 95, 106, 117]], dtype=int32)>

In [7]:
tf.matmul(X, tf.reshape(Y, shape=(2,3)))

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[ 27,  30,  33],
       [ 61,  68,  75],
       [ 95, 106, 117]], dtype=int32)>

In [8]:
# TRy Change the shape of X instead of Y

tf.matmul(tf.reshape(X, shape=(2, 3)), Y)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[ 58,  64],
       [139, 154]], dtype=int32)>

In [9]:
# Try matrix multiplication with transpose rather than reshape

tf.matmul(tf.transpose(X), Y)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[ 89,  98],
       [116, 128]], dtype=int32)>

** The Dot Product **

Matrix multiplication is also referred to as the dot product.

You can perform matrix multiplication using:

* `tf.matmul()`
* `tf.tensordot()`

In [10]:
# Perform the dot product on X and Y (requires X or Y to be transposed)

tf.tensordot(tf.transpose(X), Y, axes=1)

<tf.Tensor: shape=(2, 2), dtype=int32, numpy=
array([[ 89,  98],
       [116, 128]], dtype=int32)>

In [11]:
# Perform matrix multiplication between X and Y (transposed)

tf.matmul(X, tf.transpose(Y))

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[ 23,  29,  35],
       [ 53,  67,  81],
       [ 83, 105, 127]], dtype=int32)>

In [12]:
# Perform matrix multiplication between X and Y (reshaped)

tf.matmul(X, tf.reshape(Y, shape=(2, 3)))

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[ 27,  30,  33],
       [ 61,  68,  75],
       [ 95, 106, 117]], dtype=int32)>

In [13]:
# Check the values of Y, reshape Y and transposed Y

print("Normal Y:")
print(Y, "\n")  # "\n" is for new line

print("Y reshaoed to (2,3):")
print(tf.reshape(Y, shape=(2,3)), "\n")

print("Y transposed:")
print(tf.transpose(Y))

Normal Y:
tf.Tensor(
[[ 7  8]
 [ 9 10]
 [11 12]], shape=(3, 2), dtype=int32) 

Y reshaoed to (2,3):
tf.Tensor(
[[ 7  8  9]
 [10 11 12]], shape=(2, 3), dtype=int32) 

Y transposed:
tf.Tensor(
[[ 7  9 11]
 [ 8 10 12]], shape=(2, 3), dtype=int32)


In [14]:
tf.matmul(X, tf.transpose(Y))

<tf.Tensor: shape=(3, 3), dtype=int32, numpy=
array([[ 23,  29,  35],
       [ 53,  67,  81],
       [ 83, 105, 127]], dtype=int32)>

Generally do transpose instead of reshape for matrix multiplication

In [15]:
# Changing the datatype of tensor


# Create a new tensor with default datatype (float 32)

B = tf.constant([1.7, 7.4])
B.dtype

tf.float32

In [16]:
# Change from float32 to float16 (reduced precision)

D = tf.cast(B, dtype=tf.float16)
D.dtype

tf.float16

In [17]:
# Change from int32 to float32

E = tf.cast(B, dtype=tf.float32)
E

<tf.Tensor: shape=(2,), dtype=float32, numpy=array([1.7, 7.4], dtype=float32)>

Aggregating tensors

Aggregating tensors = condensing them from multiple values down to a smaller amount of values.

In [18]:
# Get the absolute values

D = tf.constant([-7, -10])  # absolute values takes out negative number
tf.abs(D)

<tf.Tensor: shape=(2,), dtype=int32, numpy=array([ 7, 10], dtype=int32)>

Let's go through the following forms of aggregation:
* Get the minimumn
* Get the maximum
* Get the mean of a tensor
* Get the sum of a tensor

In [19]:
# Create a random tensor with values between 0 and 100 of size 50
import numpy as np
E = tf.constant(np.random.randint(0,100, size=50))
E

<tf.Tensor: shape=(50,), dtype=int64, numpy=
array([ 4, 63, 31, 51, 85, 12, 17, 95, 77, 59, 34, 91, 89, 85, 65, 26, 65,
       56, 87, 31, 71, 32, 12, 48, 55, 74, 83, 76, 60, 50, 20, 45, 66, 90,
       44, 90, 64, 68, 33, 37, 48,  0, 27, 65, 76, 90, 48, 71, 11, 47])>

In [20]:
# Find the minimum

tf.reduce_min(E)

<tf.Tensor: shape=(), dtype=int64, numpy=0>

In [21]:
# Find the maximum

tf.reduce_max(E)

<tf.Tensor: shape=(), dtype=int64, numpy=95>

In [22]:
# Find the mean

tf.reduce_mean(E)

<tf.Tensor: shape=(), dtype=int64, numpy=54>

In [23]:
# Find the sum

tf.reduce_sum(E)

<tf.Tensor: shape=(), dtype=int64, numpy=2724>

⏰ With what we have just learned, find the variance and standard deviation of our `E` tensor using TensorFlow methods.

In [24]:
# Find the variance of tensor

import tensorflow_probability as tfp

tfp.stats.variance(E)

<tf.Tensor: shape=(), dtype=int64, numpy=666>

In [25]:
# Find the variance of E tensor

tf.math.reduce_variance(tf.cast(E, dtype=tf.float32))

<tf.Tensor: shape=(), dtype=float32, numpy=665.8896>

In [26]:
# Find standard deviation of tensorflow

tf.math.reduce_std(tf.cast(E, dtype=tf.float32))  # standard deviation only accepts real or complex number that's why dtype is changed to float32

<tf.Tensor: shape=(), dtype=float32, numpy=25.804836>

In [27]:
# Find the positional maximum and minimum

# Create a new tensor for finding positional minimum and maximum
tf.random.set_seed(42)
F = tf.random.uniform(shape=[50])
F

<tf.Tensor: shape=(50,), dtype=float32, numpy=
array([0.6645621 , 0.44100678, 0.3528825 , 0.46448255, 0.03366041,
       0.68467236, 0.74011743, 0.8724445 , 0.22632635, 0.22319686,
       0.3103881 , 0.7223358 , 0.13318717, 0.5480639 , 0.5746088 ,
       0.8996835 , 0.00946367, 0.5212307 , 0.6345445 , 0.1993283 ,
       0.72942245, 0.54583454, 0.10756552, 0.6767061 , 0.6602763 ,
       0.33695042, 0.60141766, 0.21062577, 0.8527372 , 0.44062173,
       0.9485276 , 0.23752594, 0.81179297, 0.5263394 , 0.494308  ,
       0.21612847, 0.8457197 , 0.8718841 , 0.3083862 , 0.6868038 ,
       0.23764038, 0.7817228 , 0.9671384 , 0.06870162, 0.79873943,
       0.66028714, 0.5871513 , 0.16461694, 0.7381023 , 0.32054043],
      dtype=float32)>

In [28]:
# Find the positional maximum

tf.argmax(F)

<tf.Tensor: shape=(), dtype=int64, numpy=42>

In [29]:
# Index on our largest value position

F[tf.argmax(F)]

<tf.Tensor: shape=(), dtype=float32, numpy=0.9671384>

In [30]:
# Find the max value of F

tf.reduce_max(F)

<tf.Tensor: shape=(), dtype=float32, numpy=0.9671384>

In [31]:
# Check for equality

F[tf.argmax(F)] == tf.reduce_max(F)

<tf.Tensor: shape=(), dtype=bool, numpy=True>

In [32]:
# Find tye positional minimum

tf.argmin(F)

<tf.Tensor: shape=(), dtype=int64, numpy=16>

In [33]:
# Find the minimum using the positional minimum index

F[tf.argmin(F)]

<tf.Tensor: shape=(), dtype=float32, numpy=0.009463668>

Squeezing a tensor (removing all single dimensions)


In [35]:
# Create a tensor to get started

tf.random.set_seed(42)
G = tf.constant(tf.random.uniform(shape=[50]), shape=(1,1,1,1,50))
G

<tf.Tensor: shape=(1, 1, 1, 1, 50), dtype=float32, numpy=
array([[[[[0.6645621 , 0.44100678, 0.3528825 , 0.46448255, 0.03366041,
           0.68467236, 0.74011743, 0.8724445 , 0.22632635, 0.22319686,
           0.3103881 , 0.7223358 , 0.13318717, 0.5480639 , 0.5746088 ,
           0.8996835 , 0.00946367, 0.5212307 , 0.6345445 , 0.1993283 ,
           0.72942245, 0.54583454, 0.10756552, 0.6767061 , 0.6602763 ,
           0.33695042, 0.60141766, 0.21062577, 0.8527372 , 0.44062173,
           0.9485276 , 0.23752594, 0.81179297, 0.5263394 , 0.494308  ,
           0.21612847, 0.8457197 , 0.8718841 , 0.3083862 , 0.6868038 ,
           0.23764038, 0.7817228 , 0.9671384 , 0.06870162, 0.79873943,
           0.66028714, 0.5871513 , 0.16461694, 0.7381023 , 0.32054043]]]]],
      dtype=float32)>

In [36]:
G.shape

TensorShape([1, 1, 1, 1, 50])

In [37]:
G_squeezed = tf.squeeze(G)
G_squeezed, G

(<tf.Tensor: shape=(50,), dtype=float32, numpy=
 array([0.6645621 , 0.44100678, 0.3528825 , 0.46448255, 0.03366041,
        0.68467236, 0.74011743, 0.8724445 , 0.22632635, 0.22319686,
        0.3103881 , 0.7223358 , 0.13318717, 0.5480639 , 0.5746088 ,
        0.8996835 , 0.00946367, 0.5212307 , 0.6345445 , 0.1993283 ,
        0.72942245, 0.54583454, 0.10756552, 0.6767061 , 0.6602763 ,
        0.33695042, 0.60141766, 0.21062577, 0.8527372 , 0.44062173,
        0.9485276 , 0.23752594, 0.81179297, 0.5263394 , 0.494308  ,
        0.21612847, 0.8457197 , 0.8718841 , 0.3083862 , 0.6868038 ,
        0.23764038, 0.7817228 , 0.9671384 , 0.06870162, 0.79873943,
        0.66028714, 0.5871513 , 0.16461694, 0.7381023 , 0.32054043],
       dtype=float32)>,
 <tf.Tensor: shape=(1, 1, 1, 1, 50), dtype=float32, numpy=
 array([[[[[0.6645621 , 0.44100678, 0.3528825 , 0.46448255, 0.03366041,
            0.68467236, 0.74011743, 0.8724445 , 0.22632635, 0.22319686,
            0.3103881 , 0.7223358 , 0.133187