# Experimento CNN FP32

In [None]:
import numpy as np
import tensorflow as tf
from dataset import load_mnist
import NeuralNetwork
lenet = NeuralNetwork.LeNet

x_train, y_train, x_test, y_test = load_mnist()

In [None]:

# Define neural network output
output_size = y_train.shape[-1]

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

batch_size = 256
num_classes = output_size

# Create and train the neural network
neural_network = lenet([batch_size] + x_train.shape[1:], num_classes)


neural_network.train(x_train, y_train, learning_rate=0.01/batch_size, num_epochs=10, x_val=x_test, y_val=y_test)
# neural_network.train(x_train, y_train, learning_rate=0.005/batch_size, num_epochs=5, x_val=x_test, y_val=y_test)


In [None]:


# Exemplo de uso da camada personalizada
model = tf.keras.Sequential([
    tf.keras.layers.Conv2D(filters=16, kernel_size=(3, 3), strides=(1, 1), padding='SAME', kernel_initializer="he_uniform"),
    tf.keras.layers.ReLU(),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Conv2D(filters=32, kernel_size=(3, 3), strides=(1, 1), padding='SAME', kernel_initializer="he_uniform"),
    tf.keras.layers.ReLU(),
    tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(256, kernel_initializer="he_uniform"),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Dense(256, kernel_initializer="he_uniform"),
    tf.keras.layers.ReLU(),
    tf.keras.layers.Dense(10, kernel_initializer="he_uniform"),
])


batch_size = 256
# Compilação e treinamento do modelo
optimizer = tf.keras.optimizers.SGD(0.01)
model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True), optimizer=optimizer, metrics=['accuracy'])
model.build([batch_size] + x_train.shape[1:])


model.fit(x_train, y_train, epochs=10, batch_size=batch_size, validation_data=(x_test, y_test))

# Experimento Quantized MLP

In [None]:
import numpy as np
import cupy as cp
import tensorflow as tf
from dataset import load_mnist
from NeuralNetwork import QNeuralNetworkWithScale
import Activations
import FullyConnectedLayer


# load dataset
x_train, x_test, y_train, y_test = load_mnist()

# Define neural network inputnoutput
input_size = x_train.shape[1]
output_size = y_train.shape[1]

print(x_train.shape,
x_test.shape,
y_train.shape,
y_test.shape)

# Create and train the neural network
neural_network_with_scale = QNeuralNetworkWithScale(input_size, output_size)

# train the nn
neural_network_with_scale.train(x_train, y_train, learning_rate=0.000010, num_epochs=1, batch_size=256, x_val=x_test, y_val=y_test)
neural_network_with_scale.train(x_train, y_train, learning_rate=0.000100, num_epochs=20, batch_size=256, x_val=x_test, y_val=y_test)
neural_network_with_scale.train(x_train, y_train, learning_rate=0.000010, num_epochs=5, batch_size=256, x_val=x_test, y_val=y_test)



# Experimento em Quantized CNN

In [1]:
import numpy as np
import cupy as cp
import tensorflow as tf
from dataset import load_mnist
from NeuralNetwork import QLeNet


# load dataset
x_train, y_train, x_test, y_test = load_mnist()
print(y_train.shape)

# Define neural network output
output_size = y_train.shape[-1]

print(x_train.shape, x_test.shape, y_train.shape, y_test.shape)

batch_size = 256
num_classes = output_size

# Create and train the neural network
neural_network = QLeNet([batch_size] + x_train.shape[1:], num_classes)


# train the nn
neural_network.train(x_train, y_train, learning_rate=0.0010/batch_size, num_epochs=1, x_val=x_test, y_val=y_test)
neural_network.train(x_train, y_train, learning_rate=0.0100/batch_size, num_epochs=20, x_val=x_test, y_val=y_test)
neural_network.train(x_train, y_train, learning_rate=0.0010/batch_size, num_epochs=5, x_val=x_test, y_val=y_test)



(60000, 10)
(60000, 28, 28, 1) (10000, 28, 28, 1) (60000, 10) (10000, 10)
Epoch 1/1, Loss: 0.009044239297509193 Accuracy: 10.109999775886536%
Epoch 1/20, Loss: 0.009006360545754433 Accuracy: 9.830000251531601%


KeyboardInterrupt: 

# plot scales history

In [None]:
import matplotlib.pyplot as plt



for i, layer in enumerate(neural_network_with_scale.layers):
    if isinstance(layer, FullyConnectedLayer.QFullyConnectedLayerWithScale):
        plt.figure(dpi=300)
        plt.plot(np.clip(cp.asnumpy(cp.array(layer.ws_hist)), 0, 2000000))
        plt.plot(np.clip(cp.asnumpy(cp.array(layer.bs_hist)), 0, 2000000))
        plt.plot(np.clip(cp.asnumpy(cp.array(layer.os_hist)), 0, 2000000))
        plt.plot(np.clip(cp.asnumpy(cp.array(layer.gws_hist)), 0, 2000000))
        plt.plot(np.clip(cp.asnumpy(cp.array(layer.gbs_hist)), 0, 2000000))
        plt.plot(np.clip(cp.asnumpy(cp.array(layer.gos_hist)), 0, 2000000))
        
        leg = [ 
                "ws_hist",
                "bs_hist",
                "os_hist",
                "gws_hist",
                "gbs_hist",
                "gos_hist"
         ]
             
             
        plt.legend(leg)
        plt.show()
        
        

# plot grad histogram history

In [None]:
for l, layer in enumerate(neural_network_with_scale.layers):
    if isinstance(layer, FullyConnectedLayer.QFullyConnectedLayerWithScale):
        idx = []
        for i, g in enumerate(layer.grad_output_hist):
            if g.shape[0] != 256:
                idx.append(i)
        
        for i, id in enumerate(idx):
            layer.grad_output_hist.pop(id-i)
            
        np.save(f"layer{l}_grad_hist", np.array(layer.grad_output_hist, np.float32))

In [None]:
import matplotlib.pyplot as plt

# plot grad history
for l, layer in enumerate(neural_network_with_scale.layers):
    if isinstance(layer, FullyConnectedLayer.QFullyConnectedLayerWithScale):
        g = layer.grad_output_hist
        shape = g[0].shape

        # Criação dos dados
        z = np.array(g, np.float32)
        
        # Criação da figura e do eixo 3D
        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')

        # Coordenadas dos pontos no espaço 3D
        x, y, z = np.meshgrid(np.arange(z.shape[0]), np.arange(z.shape[1]), np.arange(z.shape[2]))

        # Plotagem do array 3D
        ax.scatter(x.ravel(), y.ravel(), z.ravel(), c=z.ravel())

        # Configuração dos rótulos dos eixos
        ax.set_xlabel('Eixo X')
        ax.set_ylabel('Eixo Y')
        ax.set_zlabel('Eixo Z')

        # Exibição do gráfico
        plt.show()      




# plot wandb distributions

In [None]:
import matplotlib.pyplot as plt



for i, layer in enumerate(neural_network_with_scale.layers):
    if isinstance(layer, FullyConnectedLayer.QFullyConnectedLayerWithScale):

        plt.hist(np.ravel(cp.asnumpy(layer.qw)), bins=16)
        plt.hist(np.ravel(cp.asnumpy(layer.qb)), bins=16)
        
        plt.show()
        
        

# plot output dist

In [None]:
import matplotlib.pyplot as plt



for i, layer in enumerate(neural_network_with_scale.layers):
    if isinstance(layer, FullyConnectedLayer.QFullyConnectedLayerWithScale):

        plt.hist(np.ravel(cp.asnumpy(layer.output)), bins=16)
                
        plt.show()
        

# calculate accuracy

In [None]:
y_pred = neural_network_with_scale.predict(x_train, 256)

# Calculate accuracy
accuracy = cp.mean(y_pred == cp.argmax(y_train, axis=1))
print(f"Accuracy: {accuracy * 100}%")

In [None]:
# validation
z = neural_network_with_scale.forward(x_test)
y_pred = cp.argmax(z, axis=-1)

# Calculate accuracy
accuracy = cp.mean(y_pred == cp.argmax(y_test, axis=1))
print(f"Accuracy: {accuracy * 100}%")

In [None]:
import matplotlib.pyplot as plt
from quantizer import quantize
import copy 
## printing weight distribution


for layer in neural_network_with_scale.layers:
    if isinstance(layer, FullyConnectedLayer.QFullyConnectedLayerWithScale):
        w = copy.deepcopy(layer.weights)
        b = copy.deepcopy(layer.biases)
        o = copy.deepcopy(layer.output)
        
        
        # Initialise the subplot function using number of rows and columns
        figure, axis = plt.subplots(1, 3, figsize=(20,7))
        
        # For Sine Function
        axis[0].hist(cp.ravel(w), bins=160)
        axis[0].set_title("Float Weights")
        
        scale = cp.max(cp.abs(w))
        print(scale)

        w /= scale
        w = quantize(w, True)

        axis[1].hist(cp.ravel(w), bins=16)
        axis[1].set_title("Quantized Weights")
        
        axis[2].hist(cp.ravel(o), bins=16)
        axis[2].set_title("quantized outputs")

        plt.show()