In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import time
import numpy as np

## Model With less Bit first approach 

Today, most models use the float32 dtype, which takes 32 bits of memory. However, there are two lower-precision dtypes, float16 and bfloat16, each which take 16 bits of memory instead. Modern accelerators can run operations faster in the 16-bit dtypes, as they have specialized hardware to run 16-bit computations and 16-bit dtypes can be read from memory faster.In case of CPU float 32 is the faster approach :) 


In [2]:
def TestInferenceOfBits_Dense(bits_number):
    model = tf.keras.models.Sequential([
      tf.keras.layers.Flatten(input_shape=(28,28)),
      tf.keras.layers.Dense(64,activation='relu', dtype=bits_number),
      tf.keras.layers.Dense(64,activation='relu', dtype=bits_number),
      tf.keras.layers.Dense(10,activation='softmax', dtype=bits_number)
    ])
    model.compile(loss='sparse_categorical_crossentropy',
              optimizer=tf.keras.optimizers.RMSprop(),
              metrics=['accuracy'])

    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    # changing data type and making the pixel value normalize (1-0)
    x_train = x_train.astype(bits_number) /255.
    x_test = x_test.astype(bits_number) /255.
    
    history = model.fit(x_train, y_train,
                    batch_size=1000,
                    epochs=5,
                    validation_split=0.2,
                   verbose=0)
    start=time.time()
    model.predict(x_test)
    stop=time.time()
    return stop-start

In [3]:
print(TestInferenceOfBits_Dense(None))
print(TestInferenceOfBits_Dense('float64'))
print(TestInferenceOfBits_Dense('float32'))
print(TestInferenceOfBits_Dense('float16'))

0.2786731719970703
0.35553526878356934
0.2551863193511963
3.496459722518921


In [4]:
default_value=tf.keras.backend.floatx()

def TestInferenceOfBits(bits_number):
    if bits_number==None:
        tf.keras.backend.set_floatx(default_value)
    else :
        tf.keras.backend.set_floatx(bits_number)
    model = tf.keras.models.Sequential(
      [
        keras.Input(shape=(28, 28, 1)),
        layers.Conv2D(64, (3, 3), strides=(2, 2), padding="same", dtype=bits_number),
        layers.LeakyReLU(alpha=0.2),
        layers.MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding="same", dtype=bits_number),
        layers.Conv2D(128, (3, 3), strides=(2, 2), padding="same", dtype=bits_number),
        layers.Flatten(),
        layers.Dense(10, dtype=bits_number),
    ]
    )

    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
    # changing data type and making the pixel value normalize (1-0)
    x_train = np.reshape(x_train, (-1, 28, 28, 1))
    x_train = x_train.astype(bits_number) /255.
    x_test = np.reshape(x_test, (-1, 28, 28, 1))
    x_test = x_test.astype(bits_number) /255.
    
    
    model.compile(
        optimizer=keras.optimizers.Adam(),
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[keras.metrics.SparseCategoricalAccuracy()],
        )

    # Train and evaluate teacher on data.
    model.fit(x_train, y_train, epochs=1,batch_size=10)

    start=time.time()
    model.predict(x_test)
    stop=time.time()
    return stop-start

In [5]:
print(TestInferenceOfBits(None))
print(TestInferenceOfBits('float64'))
print(TestInferenceOfBits('float32'))
print(TestInferenceOfBits('float16'))

0.5577178001403809
1.1191236972808838
0.941617488861084
3.0806760787963867


Mixed precission : Mixed precision is the use of both 16-bit and 32-bit floating-point types in a model during training to make it run faster and use less memory. By keeping certain parts of the model in the 32-bit types for numeric stability, the model will have a lower step time and train equally as well in terms of the evaluation metrics such as accuracy. This guide describes how to use the Keras mixed precision API to speed up your models. Using this API can improve performance by more than 3 times on modern GPUs and 60% on TPUs.

[Here](https://www.tensorflow.org/guide/mixed_precision) ypu will find all other details for mixed precision 