# train mlp on MNIST using fp32

In [1]:
import numpy as np
import tensorflow as tf
from dataset import load_mnist
from tensorflow import keras
from keras import layers


# load dataset
x_train, y_train, x_test, y_test = load_mnist(flatten=True)

In [2]:
x = x_in = keras.layers.Input((784))
x = keras.layers.Dense(256, kernel_initializer="glorot_normal")(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(256, kernel_initializer="glorot_normal")(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(10, kernel_initializer="glorot_normal")(x)

mlp = keras.Model(inputs=[x_in], outputs=[x])
mlp.summary()
mlp.compile(optimizer=keras.optimizers.Adam(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 10, validation_data=(x_test, y_test))

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 784)]             0         
                                                                 
 dense (Dense)               (None, 256)               200960    
                                                                 
 re_lu (ReLU)                (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 256)               65792     
                                                                 
 re_lu_1 (ReLU)              (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                2570      
                                                                 
Total params: 269,322
Trainable params: 269,322
Non-trainable

In [3]:
from NeuralNetwork import QNeuralNetworkWithScale
import Activations
import FullyConnectedLayer 


# Define neural network inputnoutput
input_size = x_train.shape[1]
output_size = y_train.shape[1]

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

# Create and train the neural network
neural_network_with_scale = QNeuralNetworkWithScale(input_size, output_size)


(60000, 784) (10000, 784) (60000, 10) (10000, 10)
784 256
256 256
256 10


# deep nibble direct quantization

In [4]:
mean_acc = 0
for i in range(100):
    print(f"iteration {i} ... \n\n")
    
    neural_network_with_scale.load_layers_from_model(mlp)

    y_pred = neural_network_with_scale.predict(x_test, 256)

    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(y_pred == tf.argmax(y_test, axis=1), tf.float32))
    mean_acc += accuracy
    print(f"Accuracy: {accuracy * 100}%")

mean_acc /= 100
print(f"Accuracy: {mean_acc * 100}%")

iteration 0 ... 


instantiating weights from  dense
instantiating relu
instantiating weights from  dense_1
instantiating relu
instantiating weights from  dense_2
loaded layers [<FullyConnectedLayer.QFullyConnectedLayerWithScale object at 0x000001FBDD007640>, <Activations.QReLU object at 0x000001FBB5A6AEF0>, <FullyConnectedLayer.QFullyConnectedLayerWithScale object at 0x000001FBB5A6AFB0>, <Activations.QReLU object at 0x000001FB8BAFE9E0>, <FullyConnectedLayer.QFullyConnectedLayerWithScale object at 0x000001FB8BAFF9D0>]
Accuracy: 96.38999938964844%
iteration 1 ... 


instantiating weights from  dense
instantiating relu
instantiating weights from  dense_1
instantiating relu
instantiating weights from  dense_2
loaded layers [<FullyConnectedLayer.QFullyConnectedLayerWithScale object at 0x000001FBB5A6AEF0>, <Activations.QReLU object at 0x000001FBB5A6AFB0>, <FullyConnectedLayer.QFullyConnectedLayerWithScale object at 0x000001FB8BAFE9E0>, <Activations.QReLU object at 0x000001FB8BAFF9D0>, <Full

# deep nibble finetuning

In [5]:
mean_acc = 0
for i in range(10):
    print(f"iteration {i} ... \n\n")
    
    neural_network_with_scale.load_layers_from_model(mlp)

    # finetune the dnn
    neural_network_with_scale.train(x_train, y_train, learning_rate=0.00010, num_epochs=1, batch_size=256, x_val=x_test, y_val=y_test)
    
    # predict finetuned
    y_pred = neural_network_with_scale.predict(x_test, 256)

    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(y_pred == tf.argmax(y_test, axis=1), tf.float32))
    mean_acc += accuracy
    print(f"Accuracy: {accuracy * 100}%")

mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

iteration 0 ... 


instantiating weights from  dense
instantiating relu
instantiating weights from  dense_1
instantiating relu
instantiating weights from  dense_2
loaded layers [<FullyConnectedLayer.QFullyConnectedLayerWithScale object at 0x000001FBB5A98B50>, <Activations.QReLU object at 0x000001FBB5A98A60>, <FullyConnectedLayer.QFullyConnectedLayerWithScale object at 0x000001FB8BC1B970>, <Activations.QReLU object at 0x000001FB8BC1A230>, <FullyConnectedLayer.QFullyConnectedLayerWithScale object at 0x000001FB8BC18B20>]
Epoch 1/1, Loss: 0.00015819660620763898 Accuracy: 97.38999938964844%
Accuracy: 97.22999572753906%
iteration 1 ... 


instantiating weights from  dense
instantiating relu
instantiating weights from  dense_1
instantiating relu
instantiating weights from  dense_2
loaded layers [<FullyConnectedLayer.QFullyConnectedLayerWithScale object at 0x000001FBB5A98A60>, <Activations.QReLU object at 0x000001FB8BC18B20>, <FullyConnectedLayer.QFullyConnectedLayerWithScale object at 0x00000

# PO2 direct quantization

In [6]:
# add one relu layer after input
x = x_in = keras.layers.Input((784))
x = keras.layers.ReLU()(x)
for l in mlp.layers[1:]:
    x = l(x)


mlp = keras.Model(inputs=[x_in], outputs=[x])
mlp.summary()
mlp.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])


Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 784)]             0         
                                                                 
 re_lu_2 (ReLU)              (None, 784)               0         
                                                                 
 dense (Dense)               (None, 256)               200960    
                                                                 
 re_lu (ReLU)                (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 256)               65792     
                                                                 
 re_lu_1 (ReLU)              (None, 256)               0         
                                                                 
 dense_2 (Dense)             (None, 10)                2570

# quantize

In [7]:
from qkeras.utils import model_quantize, model_save_quantized_weights
from qkeras import *


quantizer_config = {        
    "QDense": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QActivation": { "relu": "quantized_relu_po2(4,1,use_stochastic_rounding=True)" },    
}

qmodel2 = model_quantize(mlp, quantizer_config, activation_bits=4, transfer_weights=True)    
qmodel2.summary()


mean_acc = 0
for i in range(10):
    # quantize the mlp model
    qmodel2 = model_quantize(mlp, quantizer_config, activation_bits=4, transfer_weights=True)    

    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    
    # evaluate
    loss, acc = qmodel2.evaluate(x_test, y_test)

    mean_acc += acc


mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")



Model: "model_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 784)]             0         
                                                                 
 re_lu_2 (QActivation)       (None, 784)               0         
                                                                 
 dense (QDense)              (None, 256)               200960    
                                                                 
 re_lu (QActivation)         (None, 256)               0         
                                                                 
 dense_1 (QDense)            (None, 256)               65792     
                                                                 
 re_lu_1 (QActivation)       (None, 256)               0         
                                                                 
 dense_2 (QDense)            (None, 10)                2570

# PO2 finetuning

In [8]:

BATCH_SIZE = 256


mean_acc = 0
for i in range(10):
    # quantize the mlp model
    qmodel2 = model_quantize(mlp, quantizer_config, activation_bits=4, transfer_weights=True)    

    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])

    # train
    history = qmodel2.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, validation_data=(x_test, y_test), validation_freq=1)            

    # evaluate
    loss, acc = qmodel2.evaluate(x_test, y_test)

    mean_acc += acc


mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

Accuracy: 96.4079999923706%


# deep nibble training from scratch

In [None]:
import numpy as np
import tensorflow as tf
from dataset import load_mnist
from NeuralNetwork import QNeuralNetworkWithScale
import Activations
import FullyConnectedLayer


# load dataset
x_train, y_train, x_test, y_test = load_mnist(flatten=True)

# Define neural network inputnoutput
input_size = x_train.shape[1]
output_size = y_train.shape[1]

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

acc_hist = []
for i in range(100):
    print(f"iteration {i} ... \n")

    # Create and train the neural network
    neural_network_with_scale = QNeuralNetworkWithScale(input_size, output_size)

    # train the nn
    neural_network_with_scale.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, batch_size=256, x_val=x_test, y_val=y_test)
    neural_network_with_scale.train(x_train, y_train, learning_rate=0.000100, num_epochs=10, batch_size=256, x_val=x_test, y_val=y_test)
    neural_network_with_scale.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, batch_size=256, x_val=x_test, y_val=y_test)

    acc_hist.append(neural_network_with_scale.acc_hist[-1])

# PO2 training from scratch

In [None]:
import qkeras
from qkeras import *

x = x_in = keras.layers.Input((784))
x = qkeras.QActivation("quantized_relu_po2(4,1,use_stochastic_rounding=True)")(x)
x = qkeras.QDense(256, kernel_quantizer=quantized_po2(4,1,use_stochastic_rounding=True), bias_quantizer=quantized_po2(4,1,use_stochastic_rounding=True))(x)
x = qkeras.QActivation("quantized_relu_po2(4,1,use_stochastic_rounding=True)")(x)
x = qkeras.QDense(256, kernel_quantizer=quantized_po2(4,1,use_stochastic_rounding=True), bias_quantizer=quantized_po2(4,1,use_stochastic_rounding=True))(x)
x = qkeras.QActivation("quantized_relu_po2(4,1,use_stochastic_rounding=True)")(x)
x = qkeras.QDense(10, kernel_quantizer=quantized_po2(4,1,use_stochastic_rounding=True), bias_quantizer=quantized_po2(4,1,use_stochastic_rounding=True))(x)

mlp = keras.Model(inputs=[x_in], outputs=[x])
mlp.summary()
mlp.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 1, validation_data=(x_test, y_test))
mlp.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 10, validation_data=(x_test, y_test))
mlp.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 1, validation_data=(x_test, y_test))

# FP32 training from scratch (same number of epochs)

In [None]:
x = x_in = keras.layers.Input((784))
x = keras.layers.Dense(256)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(256)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(10)(x)

mlp = keras.Model(inputs=[x_in], outputs=[x])
mlp.summary()
mlp.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 1, validation_data=(x_test, y_test))
mlp.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 10, validation_data=(x_test, y_test))
mlp.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 1, validation_data=(x_test, y_test))