# train lenet-5 on MNIST using fp32

## import lib and load dataset

In [None]:
import numpy as np
from dataset import load_mnist
from tensorflow import keras
from NeuralNetwork import QLeNet
import Activations
import FullyConnectedLayer 


# load dataset
x_train, y_train, x_test, y_test = load_mnist()

## define and train model

In [None]:
x = x_in = keras.layers.Input((28,28,1))

x = keras.layers.Conv2D(16, kernel_size=3, padding="SAME", kernel_initializer="glorot_normal")(x)
x = keras.layers.ReLU()(x)
x = keras.layers.MaxPool2D()(x)
x = keras.layers.Conv2D(32, kernel_size=3, padding="SAME", kernel_initializer="glorot_normal")(x)
x = keras.layers.ReLU()(x)
x = keras.layers.MaxPool2D()(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(256, kernel_initializer="glorot_normal")(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(256, kernel_initializer="glorot_normal")(x)
x = keras.layers.ReLU()(x)
x = keras.layers.Dense(10, kernel_initializer="glorot_normal")(x)

lenet = keras.Model(inputs=[x_in], outputs=[x])
lenet.summary()
lenet.compile(optimizer=keras.optimizers.Adam(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.02), metrics=["accuracy"])
hist = lenet.fit(x_train, y_train, 256, 10, validation_data=(x_test, y_test))


# plot weight distribuiton

In [None]:
import matplotlib.pyplot as plt

for l in lenet.layers:
    if isinstance(l, keras.layers.Conv2D) or isinstance(l, keras.layers.Dense):
        w = l.weights[0].numpy()
        b = l.weights[1].numpy()

        plt.hist(np.ravel(w), bins=64)
        plt.show()


In [None]:
# como vincular um modelo com o outro?
# mais vale criar um modelo do 0 com base no modelo q vier...
# o problema maior é criar cada camada
# a rede é só um array de camadas

import matplotlib.pyplot as plt
import quantizer

from ConvLayer import *

dn_layers = []
for l in lenet.layers:
    if isinstance(l, keras.layers.Conv2D):    
        print("instanciating conv layer...")
        l.weights[0].shape[0],l.weights[0].shape[1]

        w_shape = l.weights[0].shape
        nfilters = w_shape[3]
        kernel_size = w_shape[0]
        input_channels = w_shape[2]
        strides=[1,1,1,1] ### TODO: variable strides
        padding= l.padding

        # create QCONVLAYER
        qfc = QConvLayer(nfilters, kernel_size, input_channels, strides, padding)
        
        fpw = l.weights[0].numpy()        
        fpb  = l.weights[1].numpy()
        
        w_scale = np.max(np.abs(fpw))
        
        fpw_scaled = fpw / w_scale
        qw = quantizer.quantize(fpw_scaled, True, False)
        
        # atribui o peso quantizado
        qfc.qw = qw
        qfc.weights_scale = fpw_scaled
               

        plt.hist(np.ravel(qw), bins=64)
        plt.hist(np.ravel(fpw_scaled), bins=64)
        plt.hist(np.ravel(fpw), bins=64)
        plt.show()

        
        fpb_scaled = fpb / w_scale
        qb = quantizer.quantize(fpb_scaled, True, False)
        qfc.qb = qb
        plt.hist(np.ravel(qb), bins=64)
        plt.hist(np.ravel(fpb_scaled), bins=64)
        plt.hist(np.ravel(fpb), bins=64)
        plt.show()


        dn_layers.append(qfc)

    if isinstance(l, keras.layers.MaxPool2D):    
        print(l)
        dn_maxpool = CustomMaxPool(l.pool_size, l.strides)
        dn_layers.append(dn_maxpool)

    if isinstance(l, keras.layers.Flatten):    
        dn_layers.append(CustomFlatten(l.input_shape))
    if isinstance(l, keras.layers.Dense):        
        
        qfc = FullyConnectedLayer.QFullyConnectedLayerWithScale(l.weights[0].shape[0],l.weights[0].shape[1])
        
        fpw = l.weights[0].numpy()        
        fpb  = l.weights[1].numpy()
        
        w_scale = np.max(np.abs(fpw))
        
        fpw_scaled = fpw / w_scale
        qw = quantizer.quantize(fpw_scaled, True, False)
        
        # atribui o peso quantizado
        qfc.qw = qw
        qfc.weights_scale = fpw_scaled
               

        plt.hist(np.ravel(qw), bins=64)
        plt.hist(np.ravel(fpw_scaled), bins=64)
        plt.hist(np.ravel(fpw), bins=64)
        plt.show()

        
        fpb_scaled = fpb / w_scale
        qb = quantizer.quantize(fpb_scaled, True, False)
        qfc.qb = qb
        plt.hist(np.ravel(qb), bins=64)
        plt.hist(np.ravel(fpb_scaled), bins=64)
        plt.hist(np.ravel(fpb), bins=64)
        plt.show()


        dn_layers.append(qfc)


    if isinstance(l, keras.layers.ReLU):                
        dn_layers.append(Activations.QReLU())

print(dn_layers)




# deep nibble direct quantization

In [None]:
input_shape = x_train.shape[1:]

# Create and train the neural network
qlenet = QLeNet(input_shape=input_shape, output_size=y_train.shape[-1], batch_size=256)

mean_acc = 0
for i in range(100): 
    qlenet.load_layers_from_model(lenet)
    y_pred = qlenet.predict(x_test, 256)
        
    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(y_pred == tf.argmax(y_test, axis=1), tf.float32))
    print(f"Accuracy: {accuracy * 100}%")

    mean_acc += accuracy
print(f"mean Accuracy: {mean_acc * 100/100}%")

In [None]:
print(f"mean Accuracy: {mean_acc * 100/100}%")

# deep nibble finetunning

In [None]:
input_shape = x_train.shape[1:]

# Create and train the neural network
qlenet = QLeNet(input_shape=input_shape, output_size=y_train.shape[-1], batch_size=256)


mean_acc = 0
for i in range(10):
    print(f"iteration {i} ... \n\n")
    
    qlenet.load_layers_from_model(lenet)
    

    # finetune the dnn
    qlenet.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, x_val=x_test, y_val=y_test)
    
    
    # predict finetuned
    y_pred = qlenet.predict(x_test, 256)

    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(y_pred == tf.argmax(y_test, axis=1), tf.float32))
    mean_acc += accuracy
    print(f"Accuracy: {accuracy * 100}%")

mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

# PO2 direct quantization

In [None]:
from qkeras.utils import model_quantize, model_save_quantized_weights
from qkeras import *


# add one relu layer after input
x = x_in = keras.layers.Input((28,28,1))
x = keras.layers.ReLU()(x)
for l in lenet.layers[1:]:
    x = l(x)


lenet = keras.Model(inputs=[x_in], outputs=[x])
lenet.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])


quantizer_config = {        
    "QConv2D": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QDense": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QActivation": { "relu": "quantized_relu_po2(4,1,use_stochastic_rounding=True)" },    
}

qmodel2 = model_quantize(lenet, quantizer_config, activation_bits=4, transfer_weights=True)    
qmodel2.summary()



In [None]:

mean_acc = 0
for i in range(10):
    # quantize the mlp model
    qmodel2 = model_quantize(lenet, quantizer_config, activation_bits=4, transfer_weights=True)    

    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    
    # evaluate
    loss, acc = qmodel2.evaluate(x_test, y_test)

    mean_acc += acc


mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

# PO2 finetuning

In [None]:

BATCH_SIZE = 256

mean_acc = 0
for i in range(10):
    # quantize the mlp model
    qmodel2 = model_quantize(lenet, quantizer_config, activation_bits=4, transfer_weights=True)    
    
    
    for l in qmodel2.layers:
        if isinstance(l, QConv2D):
            l.trainable = False


    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])

    # train
    history = qmodel2.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, validation_data=(x_test, y_test), validation_freq=1)            

    # evaluate
    loss, acc = qmodel2.evaluate(x_test, y_test)

    mean_acc += acc


mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

# training last layers deep nibble from scratch

In [None]:
input_shape = x_train.shape[1:]

# Create and train the neural network
qlenet = QLeNet(input_shape=input_shape, output_size=y_train.shape[-1], batch_size=256)


mean_acc = 0
for i in range(10):
    print(f"iteration {i} ... \n\n")
    
    # load pre-trained model
    qlenet.load_layers_from_model(lenet)
    qlenet.freeze_conv = True
    # restart
    qlenet.restart_fc_layers()
    

    # finetune the dnn
    qlenet.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, x_val=x_test, y_val=y_test)
    qlenet.train(x_train, y_train, learning_rate=0.000100, num_epochs=10, x_val=x_test, y_val=y_test)
    qlenet.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, x_val=x_test, y_val=y_test)
    
    
    # predict finetuned
    y_pred = qlenet.predict(x_test, 256)

    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(y_pred == tf.argmax(y_test, axis=1), tf.float32))
    mean_acc += accuracy
    print(f"Accuracy: {accuracy * 100}%")

mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

# training last layer po2 from scratch

In [None]:
from qkeras.utils import model_quantize, model_save_quantized_weights
from qkeras import *


# add one relu layer after input
x = x_in = keras.layers.Input((28,28,1))
x = keras.layers.ReLU()(x)
for l in lenet.layers[1:]:
    x = l(x)


lenet = keras.Model(inputs=[x_in], outputs=[x])
lenet.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])


quantizer_config = {        
    "QConv2D": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QDense": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QActivation": { "relu": "quantized_relu_po2(4,1,use_stochastic_rounding=True)" },    
}

qmodel2 = model_quantize(lenet, quantizer_config, activation_bits=4, transfer_weights=True)    
qmodel2.summary()



In [None]:

BATCH_SIZE = 256

mean_acc = 0
for i in range(10):
    print("iteratoin", i, "...\n")

    # quantize the mlp model
    qmodel2 = model_quantize(lenet, quantizer_config, activation_bits=4, transfer_weights=True)    
    
    # freeze and restart layer weights
    for l in qmodel2.layers:
        if isinstance(l, QConv2D):
            l.trainable = False
        if isinstance(l, QDense):            
            w = keras.initializers.GlorotNormal()(l.weights[0].shape)            
            b = tf.zeros_like(l.weights[1])            
            l.set_weights([w, b])


    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    # train
    history = qmodel2.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, validation_data=(x_test, y_test), validation_freq=1)                
    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    # train
    history = qmodel2.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=10, validation_data=(x_test, y_test), validation_freq=1)                
    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    # train
    history = qmodel2.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, validation_data=(x_test, y_test), validation_freq=1)                


    # evaluate
    loss, acc = qmodel2.evaluate(x_test, y_test)
    
    mean_acc += acc


mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")