# train mlp on MNIST using fp32

In [None]:
import numpy as np
import tensorflow as tf
from dataset import load_mnist
from tensorflow import keras
from keras import layers


# load dataset
x_train, y_train, x_test, y_test = load_mnist(flatten=True)

In [None]:
x = x_in = keras.layers.Input((784))
x = keras.layers.Dense(256)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(256)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(10)(x)

mlp = keras.Model(inputs=[x_in], outputs=[x])
mlp.summary()
mlp.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 10, validation_data=(x_test, y_test))

In [None]:
from NeuralNetwork import QNeuralNetworkWithScale
import Activations
import FullyConnectedLayer 


# Define neural network inputnoutput
input_size = x_train.shape[1]
output_size = y_train.shape[1]

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

# Create and train the neural network
neural_network_with_scale = QNeuralNetworkWithScale(input_size, output_size)


# deep nibble direct quantization

In [None]:
mean_acc = 0
for i in range(100):
    print(f"iteration {i} ... \n\n")
    
    neural_network_with_scale.load_layers_from_model(mlp)

    y_pred = neural_network_with_scale.predict(x_test, 256)

    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(y_pred == tf.argmax(y_test, axis=1), tf.float32))
    mean_acc += accuracy
    print(f"Accuracy: {accuracy * 100}%")

mean_acc /= 100
print(f"Accuracy: {mean_acc * 100}%")

# deep nibble finetuning

In [None]:
mean_acc = 0
for i in range(10):
    print(f"iteration {i} ... \n\n")
    
    neural_network_with_scale.load_layers_from_model(mlp)

    # finetune the dnn
    neural_network_with_scale.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, batch_size=256, x_val=x_test, y_val=y_test)
    
    # predict finetuned
    y_pred = neural_network_with_scale.predict(x_test, 256)

    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(y_pred == tf.argmax(y_test, axis=1), tf.float32))
    mean_acc += accuracy
    print(f"Accuracy: {accuracy * 100}%")

mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

# PO2 direct quantization

In [None]:
# add one relu layer after input
x = x_in = keras.layers.Input((784))
x = keras.layers.ReLU()(x)
for l in mlp.layers[1:]:
    x = l(x)


mlp = keras.Model(inputs=[x_in], outputs=[x])
mlp.summary()
mlp.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])


# quantize

In [None]:
from qkeras.utils import model_quantize, model_save_quantized_weights
from qkeras import *


quantizer_config = {        
    "QDense": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QActivation": { "relu": "quantized_relu_po2(4,1,use_stochastic_rounding=True)" },    
}

qmodel2 = model_quantize(mlp, quantizer_config, activation_bits=4, transfer_weights=True)    
qmodel2.summary()


mean_acc = 0
for i in range(10):
    # quantize the mlp model
    qmodel2 = model_quantize(mlp, quantizer_config, activation_bits=4, transfer_weights=True)    

    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    
    # evaluate
    loss, acc = qmodel2.evaluate(x_test, y_test)

    mean_acc += acc


mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

# PO2 finetuning

In [None]:

BATCH_SIZE = 256


mean_acc = 0
for i in range(10):
    # quantize the mlp model
    qmodel2 = model_quantize(mlp, quantizer_config, activation_bits=4, transfer_weights=True)    

    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])

    # train
    history = qmodel2.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, validation_data=(x_test, y_test), validation_freq=1)    
    
    model_save_quantized_weights(qmodel2, "qmodels/qmlp_po2/")

    # evaluate
    loss, acc = qmodel2.evaluate(x_test, y_test)

    mean_acc += acc


mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

# deep nibble training from scratch

In [None]:
import numpy as np
import tensorflow as tf
from dataset import load_mnist
from NeuralNetwork import QNeuralNetworkWithScale
import Activations
import FullyConnectedLayer


# load dataset
x_train, y_train, x_test, y_test = load_mnist(flatten=True)

# Define neural network inputnoutput
input_size = x_train.shape[1]
output_size = y_train.shape[1]

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

acc_hist = []
for i in range(100):
    print(f"iteration {i} ... \n")

    # Create and train the neural network
    neural_network_with_scale = QNeuralNetworkWithScale(input_size, output_size)

    # train the nn
    neural_network_with_scale.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, batch_size=256, x_val=x_test, y_val=y_test)
    neural_network_with_scale.train(x_train, y_train, learning_rate=0.000100, num_epochs=10, batch_size=256, x_val=x_test, y_val=y_test)
    neural_network_with_scale.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, batch_size=256, x_val=x_test, y_val=y_test)

    acc_hist.append(neural_network_with_scale.acc_hist[-1])

# PO2 training from scratch

In [None]:
import qkeras
from qkeras import *

x = x_in = keras.layers.Input((784))
x = qkeras.QActivation("quantized_relu_po2(4,1,use_stochastic_rounding=True)")(x)
x = qkeras.QDense(256, kernel_quantizer=quantized_po2(4,1,use_stochastic_rounding=True), bias_quantizer=quantized_po2(4,1,use_stochastic_rounding=True))(x)
x = qkeras.QActivation("quantized_relu_po2(4,1,use_stochastic_rounding=True)")(x)
x = qkeras.QDense(256, kernel_quantizer=quantized_po2(4,1,use_stochastic_rounding=True), bias_quantizer=quantized_po2(4,1,use_stochastic_rounding=True))(x)
x = qkeras.QActivation("quantized_relu_po2(4,1,use_stochastic_rounding=True)")(x)
x = qkeras.QDense(10, kernel_quantizer=quantized_po2(4,1,use_stochastic_rounding=True), bias_quantizer=quantized_po2(4,1,use_stochastic_rounding=True))(x)

mlp = keras.Model(inputs=[x_in], outputs=[x])
mlp.summary()
mlp.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 1, validation_data=(x_test, y_test))
mlp.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 10, validation_data=(x_test, y_test))
mlp.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 1, validation_data=(x_test, y_test))

# FP32 training from scratch (same number of epochs)

In [None]:
x = x_in = keras.layers.Input((784))
x = keras.layers.Dense(256)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(256)(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(10)(x)

mlp = keras.Model(inputs=[x_in], outputs=[x])
mlp.summary()
mlp.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 1, validation_data=(x_test, y_test))
mlp.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 10, validation_data=(x_test, y_test))
mlp.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
hist = mlp.fit(x_train, y_train, 256, 1, validation_data=(x_test, y_test))