# train VGG-16 on CIFAR10 using fp32

In [1]:
import numpy as np
import tensorflow as tf
from dataset import load_mnist, load_cifar10
from tensorflow import keras
from keras import layers
from NeuralNetwork import QLeNet, QNeuralNetworkWithScale
import Activations
import FullyConnectedLayer 
import qvgg16
from qkeras import *
from qkeras.utils import model_quantize, load_qmodel


# load dataset
x_train, y_train, x_test, y_test = load_cifar10()

# vgg_pretrained = keras.models.load_model("vgg_model_fp32.h5", compile=True)
# metrics = vgg_pretrained.evaluate(x_test, y_test)
# vgg.summary()

# finetuning with deep nibble without stochastic zero

In [None]:
x = x_in = keras.layers.Input((32,32,3))
x = keras.layers.ReLU(name="relu_input")(x)
for l in vgg_pretrained.layers[1:]:
    x = l(x)

vgg = keras.Model(inputs=[x_in], outputs=[x])
# vgg.summary()


vgg.compile(optimizer=keras.optimizers.Adam(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.02), metrics=["accuracy"])

quantizer_config = {        
    "QConv2D": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QDense": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QActivation": { "relu": "quantized_relu_po2(4,1,use_stochastic_rounding=True)" },    
}

qvgg = model_quantize(vgg, quantizer_config, activation_bits=4, transfer_weights=True)    
qvgg.summary()

qvgg.compile(optimizer=keras.optimizers.Adam(0.0001), loss=keras.losses.CategoricalCrossentropy(from_logits=True, label_smoothing=0.02), metrics=["accuracy"])
hist = qvgg.fit(x_train, y_train, 256, 5, validation_data=(x_test, y_test))


In [None]:
# qvgg.save("qvgg_model_fp32.h5")
qvgg = load_qmodel("qvgg_model_fp32.h5")
qvgg.evaluate(x_test, y_test)

# plot weight distribuiton

In [None]:
import matplotlib.pyplot as plt

for l in qvgg.layers:
    if isinstance(l, QConv2D) or isinstance(l, QDense):
        w = l.weights[0].numpy()
        b = l.weights[1].numpy()
        
        qw = get_quantizer(l.kernel_quantizer)(w)
        qb = get_quantizer(l.kernel_quantizer)(b)

        print(np.min(np.abs(qw)))

        fig, ax = plt.subplots(1, 2)
        ax[0].hist(np.ravel(qw), bins=128)
        ax[1].hist(np.ravel(qb), bins=128)
        plt.show()


In [None]:

import matplotlib.pyplot as plt
import quantizer

from ConvLayer import *

dn_layers = []
for l in vgg.layers:
    if isinstance(l, keras.layers.Conv2D):    
        print("instanciating conv layer...")
        l.weights[0].shape[0],l.weights[0].shape[1]

        w_shape = l.weights[0].shape
        nfilters = w_shape[3]
        kernel_size = w_shape[0]
        input_channels = w_shape[2]
        strides=[1,1,1,1] ### TODO: variable strides
        padding= l.padding

        # create QCONVLAYER
        qfc = QConvLayer(nfilters, kernel_size, input_channels, strides, padding)
        
        fpw = l.weights[0].numpy()        
        fpb  = l.weights[1].numpy()
        
        w_scale = np.max(np.abs(fpw))
        
        fpw_scaled = fpw / w_scale
        qw = quantizer.quantize(fpw_scaled, True, False)
        
        # atribui o peso quantizado
        qfc.qw = qw
        qfc.weights_scale = fpw_scaled
               

        plt.hist(np.ravel(qw), bins=64)
        plt.hist(np.ravel(fpw_scaled), bins=64)
        plt.hist(np.ravel(fpw), bins=64)
        plt.show()

        
        fpb_scaled = fpb / w_scale
        qb = quantizer.quantize(fpb_scaled, True, False)
        qfc.qb = qb
        plt.hist(np.ravel(qb), bins=64)
        plt.hist(np.ravel(fpb_scaled), bins=64)
        plt.hist(np.ravel(fpb), bins=64)
        plt.show()


        dn_layers.append(qfc)

    if isinstance(l, keras.layers.MaxPool2D):    
        print(l)
        dn_maxpool = CustomMaxPool(l.pool_size, l.strides)
        dn_layers.append(dn_maxpool)

    if isinstance(l, keras.layers.Flatten):    
        dn_layers.append(CustomFlatten(l.input_shape))
    if isinstance(l, keras.layers.Dense):        
        
        qfc = FullyConnectedLayer.QFullyConnectedLayerWithScale(l.weights[0].shape[0],l.weights[0].shape[1])
        
        fpw = l.weights[0].numpy()        
        fpb  = l.weights[1].numpy()
        
        w_scale = np.max(np.abs(fpw))
        
        fpw_scaled = fpw / w_scale
        qw = quantizer.quantize(fpw_scaled, True, False)
        
        # atribui o peso quantizado
        qfc.qw = qw
        qfc.weights_scale = fpw_scaled
               

        plt.hist(np.ravel(qw), bins=64)
        plt.hist(np.ravel(fpw_scaled), bins=64)
        plt.hist(np.ravel(fpw), bins=64)
        plt.show()

        
        fpb_scaled = fpb / w_scale
        qb = quantizer.quantize(fpb_scaled, True, False)
        qfc.qb = qb
        plt.hist(np.ravel(qb), bins=64)
        plt.hist(np.ravel(fpb_scaled), bins=64)
        plt.hist(np.ravel(fpb), bins=64)
        plt.show()


        dn_layers.append(qfc)


    if isinstance(l, keras.layers.ReLU):                
        dn_layers.append(Activations.QReLU())

print(dn_layers)




# deep nibble direct quantization

In [None]:
input_shape = x_train.shape[1:]

# Create and train the neural network
qlenet = QLeNet(input_shape=input_shape, output_size=y_train.shape[-1], batch_size=256)

iter = 20
mean_acc = 0
for i in range(iter): 
    qlenet.load_layers_from_model(qvgg, from_layer=0)
    y_pred = qlenet.predict(x_test, 256)            

    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(y_pred == tf.argmax(y_test, axis=1), tf.float32))
    print(f"Accuracy: {accuracy * 100}")

    mean_acc += accuracy
print(f"mean Accuracy: {mean_acc * 100/iter}")

# deep nibble finetunning

In [None]:
# # preprocess

# create backbone
x_in = qvgg.layers[0].output
x = qvgg.layers[19].output
back_bone = keras.models.Model(inputs=[x_in], outputs=[x])
back_bone.summary()

# preprocess the dataset with Deep Nibble quantized VGG
x_train_2 = back_bone.predict(x_train, 256)
x_test_2 = back_bone.predict(x_test, 256)
print(x_train_2.shape)

# # quantize backbone to deep nibble
# input_shape = x_train.shape[1:]
# qvgg_backbone = QLeNet(input_shape=input_shape, output_size=y_train.shape[-1], batch_size=256)
# qvgg_backbone.load_layers_from_model(back_bone)
# 
# # preprocess the dataset with Deep Nibble quantized VGG
# x_train_2 = qvgg_backbone.predict(x_train, 256, apply_argmax=False)
# x_test_2 = qvgg_backbone.predict(x_test, 256, apply_argmax=False)
# 
# 
# del x_train, x_test, back_bone, qvgg_backbone

In [None]:
np.save("x_train_preprocessed", x_train_2)
np.save("x_test_preprocessed", x_test_2)

In [2]:
x_train = np.load("x_train_preprocessed.npy")
x_test = np.load("x_test_preprocessed.npy")

input_shape = x_train.shape[1:]

# Create and train the neural network
qvgg_mlp = QNeuralNetworkWithScale(input_size=4096, output_size=10)

iters = 10
mean_acc = 0
for i in range(iters):
    print(f"iteration {i} ... \n\n")
    
    # qvgg_mlp.load_layers_from_model(qvgg) ## carrega as camadas totalmente conectadas
    # del qvgg

    # finetune the dnn
    qvgg_mlp.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, batch_size=128, x_val=x_test, y_val=y_test)
    qvgg_mlp.train(x_train, y_train, learning_rate=0.000100, num_epochs=1, batch_size=256, x_val=x_test, y_val=y_test)
        
    
    # predict finetuned
    y_pred = qvgg_mlp.predict(x_test, 256)

    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(y_pred == tf.argmax(y_test, axis=1), tf.float32))
    mean_acc += accuracy
    print(f"Accuracy: {accuracy * 100}%")

mean_acc /= iters
print(f"Accuracy: {mean_acc * 100}%")

4096 256
256 256
256 10
iteration 0 ... 


Epoch 1/1, Loss: 0.014873555861413479 Accuracy: 54.38999938964844%
Epoch 1/1, Loss: 0.003267445135861635 Accuracy: 68.91000366210938%
Accuracy: 68.7699966430664%
iteration 1 ... 


Epoch 1/1, Loss: 0.005771460477262735 Accuracy: 68.81999969482422%
Epoch 1/1, Loss: 0.003688649507239461 Accuracy: 67.16999816894531%
Accuracy: 67.37999725341797%
iteration 2 ... 


Epoch 1/1, Loss: 0.008524077944457531 Accuracy: 68.06999969482422%
Epoch 1/1, Loss: 0.0063020470552146435 Accuracy: 66.72999572753906%
Accuracy: 66.04000091552734%
iteration 3 ... 


Epoch 1/1, Loss: 0.01318732462823391 Accuracy: 66.25999450683594%
Epoch 1/1, Loss: 0.006914219353348017 Accuracy: 66.25%
Accuracy: 66.75999450683594%
iteration 4 ... 


Epoch 1/1, Loss: 0.015567622147500515 Accuracy: 66.5%
Epoch 1/1, Loss: 0.01179305650293827 Accuracy: 65.16999816894531%
Accuracy: 64.9000015258789%
iteration 5 ... 


Epoch 1/1, Loss: 0.026211291551589966 Accuracy: 65.52999877929688%
Epoch 1/

In [None]:
import matplotlib.pyplot as plt
from FullyConnectedLayer import *
from ConvLayer import *


for i, layer in enumerate(qvgg_mlp.layers):
    if isinstance(layer, QFullyConnectedLayerWithScale) or isinstance(layer, QConvLayer):
        plt.figure(dpi=300)
        plt.plot(np.clip(layer.ws_hist, 0, 2000000))
        plt.plot(np.clip(layer.bs_hist, 0, 2000000))
        plt.plot(np.clip(layer.os_hist, 0, 2000000))
        plt.plot(np.clip(layer.gws_hist, 0, 2000000))
        plt.plot(np.clip(layer.gbs_hist, 0, 2000000))
        plt.plot(np.clip(layer.gos_hist, 0, 2000000))
        
        leg = [ 
                "ws_hist",
                "bs_hist",
                "os_hist",
                "gws_hist",
                "gbs_hist",
                "gos_hist"
         ]
             
             
        plt.legend(leg)
        plt.show()
        
        

# PO2 direct quantization

In [None]:
from qkeras.utils import model_quantize, model_save_quantized_weights
from qkeras import *


# add one relu layer after input
x = x_in = keras.layers.Input((32,32,3))
x = keras.layers.ReLU(name="relu_input")(x)
for l in vgg.layers[1:]:
    x = l(x)


vgg = keras.Model(inputs=[x_in], outputs=[x])
vgg.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])


quantizer_config = {        
    "QConv2D": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QDense": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QActivation": { "relu": "quantized_relu_po2(4,1,use_stochastic_rounding=True)" },    
}

qmodel2 = model_quantize(vgg, quantizer_config, activation_bits=4, transfer_weights=True)    
qmodel2.summary()



In [None]:

mean_acc = 0
for i in range(10):
    # quantize the mlp model
    qmodel2 = model_quantize(vgg, quantizer_config, activation_bits=4, transfer_weights=True)    

    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    
    # evaluate
    loss, acc = qmodel2.evaluate(x_test, y_test)

    mean_acc += acc


mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

# PO2 finetuning

In [None]:

BATCH_SIZE = 256

mean_acc = 0
for i in range(10):
    # quantize the mlp model
    qmodel2 = model_quantize(lenet, quantizer_config, activation_bits=4, transfer_weights=True)    
    
    
    for l in qmodel2.layers:
        if isinstance(l, QConv2D):
            l.trainable = False


    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])

    # train
    history = qmodel2.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, validation_data=(x_test, y_test), validation_freq=1)            

    # evaluate
    loss, acc = qmodel2.evaluate(x_test, y_test)

    mean_acc += acc


mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

# training last layers deep nibble from scratch

In [None]:
input_shape = x_train.shape[1:]

# Create and train the neural network
qlenet = QLeNet(input_shape=input_shape, output_size=y_train.shape[-1], batch_size=256)


mean_acc = 0
for i in range(10):
    print(f"iteration {i} ... \n\n")
    
    # load pre-trained model
    qlenet.load_layers_from_model(lenet)
    qlenet.freeze_conv = True
    # restart
    qlenet.restart_fc_layers()
    

    # finetune the dnn
    qlenet.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, x_val=x_test, y_val=y_test)
    qlenet.train(x_train, y_train, learning_rate=0.000100, num_epochs=10, x_val=x_test, y_val=y_test)
    qlenet.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, x_val=x_test, y_val=y_test)
    
    
    # predict finetuned
    y_pred = qlenet.predict(x_test, 256)

    # Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(y_pred == tf.argmax(y_test, axis=1), tf.float32))
    mean_acc += accuracy
    print(f"Accuracy: {accuracy * 100}%")

mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")

# training last layer po2 from scratch

In [None]:
from qkeras.utils import model_quantize, model_save_quantized_weights
from qkeras import *


vgg = keras.models.load_model("vgg_model_fp32.h5", compile=False)

# add one relu layer after input
x = x_in = keras.layers.Input((32,32,3))
x = keras.layers.ReLU(name="relu_input")(x)
for l in vgg.layers[1:]:
    x = l(x)


vgg = keras.Model(inputs=[x_in], outputs=[x])
vgg.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])


quantizer_config = {        
    "QConv2D": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QDense": {
        "kernel_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)",
        "bias_quantizer": "quantized_po2(4,1,use_stochastic_rounding=True)"
    },
    "QActivation": { "relu": "quantized_relu_po2(4,1,use_stochastic_rounding=True)" },    
}

qmodel2 = model_quantize(vgg, quantizer_config, activation_bits=4, transfer_weights=True)    
qmodel2.summary()



In [None]:

BATCH_SIZE = 256

mean_acc = 0
for i in range(10):
    print("iteratoin", i, "...\n")

    # quantize the mlp model
    qmodel2 = model_quantize(vgg, quantizer_config, activation_bits=4, transfer_weights=True)    
    
    # freeze and restart layer weights
    for l in qmodel2.layers:
        if isinstance(l, QConv2D):
            l.trainable = False
        if isinstance(l, QDense):            
            w = keras.initializers.GlorotNormal()(l.weights[0].shape)            
            b = tf.zeros_like(l.weights[1])            
            l.set_weights([w, b])


    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    # train
    history = qmodel2.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, validation_data=(x_test, y_test), validation_freq=1)                
    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.01), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    # train
    history = qmodel2.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=10, validation_data=(x_test, y_test), validation_freq=1)                
    # compile 
    qmodel2.compile(optimizer=keras.optimizers.SGD(0.001), loss=keras.losses.CategoricalCrossentropy(from_logits=True), metrics=["accuracy"])
    # train
    history = qmodel2.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, validation_data=(x_test, y_test), validation_freq=1)                


    # evaluate
    loss, acc = qmodel2.evaluate(x_test, y_test)
    
    mean_acc += acc


mean_acc /= 10
print(f"Accuracy: {mean_acc * 100}%")