In [None]:
import os
import sys
from Quantmodel.Quant import FullyConnectedLayer, ReLULayer, SoftmaxLossLayer
from Quantmodel.mnist_mlp_cpu import MNIST_MLP, build_mnist_mlp
import numpy as np
import struct
import time


In [None]:
def evaluate(mlp):
    pred_results = np.zeros([mlp.test_data.shape[0]])
    for idx in range(int(np.ceil(mlp.test_data.shape[0]/mlp.batch_size))):
        batch_images = mlp.test_data[idx*mlp.batch_size:(idx+1)*mlp.batch_size, :-1]
        prob = mlp.forward(batch_images)
        pred_labels = np.argmax(prob, axis=1)
        pred_results[idx*mlp.batch_size:(idx+1)*mlp.batch_size] = pred_labels
    if mlp.test_data.shape[0] % mlp.batch_size >0: 
        last_batch = mlp.test_data.shape[0]/mlp.batch_size*mlp.batch_size
        batch_images = mlp.test_data[-last_batch:, :-1]
        prob = mlp.forward(batch_images)
        pred_labels = np.argmax(prob, axis=1)
        pred_results[-last_batch:] = pred_labels
    accuracy = np.mean(pred_results == mlp.test_data[:,-1])*100
    print('Accuracy in test set: %f %%' % accuracy)

Run the "evaluate()" to get accuracy

In [None]:
mlp = build_mnist_mlp()
evaluate(mlp)

In [None]:
MNIST_DIR = "./mnist_data"
TRAIN_DATA = "train-images-idx3-ubyte"
TRAIN_LABEL = "train-labels-idx1-ubyte"
TEST_DATA = "t10k-images-idx3-ubyte"
TEST_LABEL = "t10k-labels-idx1-ubyte"

In FullConnectedLayer, calcualte the scale of input and quantize it into 8-bit

In [None]:
def quantize_data(param_dir):
    print('loading data from ' + param_dir)
    data = np.load(param_dir, allow_pickle=True).item()
    quant_num_bits = 8
    quantization_params = {
        'w1': ('sw1', -2**(quant_num_bits-1), 2**(quant_num_bits-1)-1),
        'b1': ('sb1', -2**(quant_num_bits-1), 2**(quant_num_bits-1)-1),
        'w2': ('sw2', -2**(quant_num_bits-1), 2**(quant_num_bits-1)-1),
        'b2': ('sb2', -2**(quant_num_bits-1), 2**(quant_num_bits-1)-1),
        'w3': ('sw3', -2**(quant_num_bits-1), 2**(quant_num_bits-1)-1),
        'b3': ('sb3', -2**(quant_num_bits-1), 2**(quant_num_bits-1)-1)
    }

    quantized_data = {}
    for param, (scale_key, min_val, max_val) in quantization_params.items():
        raw_data = data[param]
        scale = data[scale_key]
        #TODO：quantize the weight and bias stored in .npy file
        quantized_param = _________________________
        quantized_data[param] = quantized_param
        quantized_data[scale_key] = data[scale_key]

    np.save('.\Data\8bitdata.npy', quantized_data)
    print('Saving parameters to file :  .\Data\8bitdata.npy' )

In [None]:
class FullyConnectedLayer(object):
    def __init__(self, num_input, num_output):  
        self.num_input = num_input
        self.num_output = num_output
        print('\tFully connected layer with input %d, output %d.' % (self.num_input, self.num_output))
    def forward(self, input ):  
        #TODO: quantize the input into 8-bit, obtain the scale factor
        self.scale = _________________________
        self.input = _________________________
        self.output = np.dot(self.input, self.weight) + self.bias 
        return self.output 
    def load_param(self, weight, bias, w_scale, b_scale):  
        # Load the saved weight, bias, and their corresponding scales from file "mlp-128-128-5-epoch.npy "
        self.weight = weight
        self.bias = bias
        self.w_scale = w_scale
        self.b_scale = b_scale
    def save_param(self):
        #Save the required data
        return self.weight, self.bias, self.w_scale, self.b_scale, self.scale

class ReLULayer(object):
    def __init__(self):
        print('\tReLU layer.')
    def forward(self, input):  
        self.input = input
        #ReLU layer's forward propagation, compute the output result
        output = np.maximum(0, self.input)
        return output
class SoftmaxLossLayer(object):
    def __init__(self):
        print('\tSoftmax loss layer.')
    def forward(self, input):  
        # softmax 's forward propagation, compute the output result
        input_max = np.max(input, axis=1, keepdims=True)
        input_exp = np.exp(input - input_max)
        self.prob = input_exp/(np.sum(input_exp, axis = 1, keepdims = True) )
        return self.prob

In [None]:
class Net(object):  #New Net without FakeQuant layers
    def __init__(self, batch_size=100, input_size=784, hidden1=90, hidden2=90, out_classes=10, lr=0.01, max_epoch=10, print_iter=100):
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden1 = hidden1
        self.hidden2 = hidden2
        self.out_classes = out_classes
        self.lr = lr
        self.max_epoch = max_epoch
        self.print_iter = print_iter

    def load_mnist(self, file_dir, is_images = 'True'):
        # Read binary data
        bin_file = open(file_dir, 'rb')
        bin_data = bin_file.read()
        bin_file.close()
        # Analysis file header
        if is_images:
            # Read images
            fmt_header = '>iiii'
            magic, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, 0)
        else:
            # Read labels
            fmt_header = '>ii'
            magic, num_images = struct.unpack_from(fmt_header, bin_data, 0)
            num_rows, num_cols = 1, 1
        data_size = num_images * num_rows * num_cols
        mat_data = struct.unpack_from('>' + str(data_size) + 'B', bin_data, struct.calcsize(fmt_header))
        mat_data = np.reshape(mat_data, [num_images, num_rows * num_cols])
        print('Load images from %s, number: %d, data shape: %s' % (file_dir, num_images, str(mat_data.shape)))
        return mat_data
    
    def load_data(self):
        # preprocess the images and labels of the training and testing data in the MNIST dataset.
        print('Loading MNIST data from files...')
        train_images = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_DATA), True)
        train_labels = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_LABEL), False)
        test_images = self.load_mnist(os.path.join(MNIST_DIR, TEST_DATA), True)
        test_labels = self.load_mnist(os.path.join(MNIST_DIR, TEST_LABEL), False)
        self.train_data = np.append(train_images, train_labels, axis=1)
        self.test_data = np.append(test_images, test_labels, axis=1)

    def build_model(self): 
        #build net model
        # without FakeQuant layer
        print('Building multi-layer perception model...')
        self.fc1 = FullyConnectedLayer(self.input_size, self.hidden1)
        self.relu1 = ReLULayer()
        self.fc2 = FullyConnectedLayer(self.hidden1, self.hidden2)
        self.relu2 = ReLULayer()
        self.fc3 = FullyConnectedLayer(self.hidden2, self.out_classes)
        self.softmax = SoftmaxLossLayer()
        self.update_layer_list = [self.fc1, self.fc2, self.fc3]

    def load_model(self, param_dir):
        #load data for each layer
        print('Loading parameters from file ' + param_dir)
        params = np.load(param_dir, allow_pickle=True).item()
        self.fc1.load_param(params['w1'], params['b1'], params['sw1'], params['sb1'])     
        self.fc2.load_param(params['w2'], params['b2'], params['sw2'], params['sb2'])
        self.fc3.load_param(params['w3'], params['b3'], params['sw3'], params['sb3'])

    def save_model(self, param_dir):
        #save layer's data
        #TODO: need save 5 parammeters
        print('Saving parameters to file ' + param_dir)
        params = {}
        _________________________= self.fc1.save_param()
        _________________________ = self.fc2.save_param()
        _________________________ = self.fc3.save_param()
        np.save(param_dir, params)
    def forward(self, input):  
        h1 = self.fc1.forward(input)
        h1 = self.relu1.forward(h1)
        h2 = self.fc2.forward(h1)
        h2 = self.relu2.forward(h2)
        h3 = self.fc3.forward(h2)
        prob = self.softmax.forward(h3)
        return prob


In this step, we use quantized weight and bias to get scale of activation

In [None]:
def build_Net():  
    h1, h2, e =128, 128, 5#10
    mlp = Net(hidden1=h1, hidden2=h2, max_epoch=e) 
    mlp.load_data()
    mlp.build_model()
    mlp.load_model('.\Data\8bitData.npy')     #load the quantized data into model
    return mlp

In [None]:
quantize_data('.\Data\mlp-128-128-5-epoch.npy')
mlp = build_Net()
evaluate(mlp)
mlp.save_model('.\Data\\answer.npy')

Now that we have obtained quantized weights, quantized biases, and the scaling factor for activations, we can load this data into the MLP network for inference.

In [None]:
class FullyConnectedLayer2(object):
    def __init__(self, num_input, num_output): 
        self.num_input = num_input
        self.num_output = num_output
        print('\tFully connected layer with input %d, output %d.' % (self.num_input, self.num_output))
    def forward(self, input ):  
        #TODO: compared with FullyConnectedLayer, this layer need to use saved sacle to quantize input
        self.input = _________________________
        self.output = np.dot(self.input, self.weight) + self.bias 
        return self.output
    def load_param(self, weight, bias, w_scale, b_scale, sa): 
        self.weight = weight
        self.bias = bias
        self.w_scale = w_scale
        self.b_scale = b_scale
        self.sa = sa


In [None]:
class Net2(object):
    def __init__(self, batch_size=100, input_size=784, hidden1=90, hidden2=90, out_classes=10, lr=0.01, max_epoch=10, print_iter=100):
        self.batch_size = batch_size
        self.input_size = input_size
        self.hidden1 = hidden1
        self.hidden2 = hidden2
        self.out_classes = out_classes
        self.lr = lr
        self.max_epoch = max_epoch
        self.print_iter = print_iter

    def load_mnist(self, file_dir, is_images = 'True'):
        # Read binary data
        bin_file = open(file_dir, 'rb')
        bin_data = bin_file.read()
        bin_file.close()
        # Analysis file header
        if is_images:
            # Read images
            fmt_header = '>iiii'
            magic, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, 0)
        else:
            # Read labels
            fmt_header = '>ii'
            magic, num_images = struct.unpack_from(fmt_header, bin_data, 0)
            num_rows, num_cols = 1, 1
        data_size = num_images * num_rows * num_cols
        mat_data = struct.unpack_from('>' + str(data_size) + 'B', bin_data, struct.calcsize(fmt_header))
        mat_data = np.reshape(mat_data, [num_images, num_rows * num_cols])
        print('Load images from %s, number: %d, data shape: %s' % (file_dir, num_images, str(mat_data.shape)))
        return mat_data
    
    
    def load_data(self):

        print('Loading MNIST data from files...')
        train_images = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_DATA), True)
        train_labels = self.load_mnist(os.path.join(MNIST_DIR, TRAIN_LABEL), False)
        test_images = self.load_mnist(os.path.join(MNIST_DIR, TEST_DATA), True)
        test_labels = self.load_mnist(os.path.join(MNIST_DIR, TEST_LABEL), False)
        self.train_data = np.append(train_images, train_labels, axis=1)
        self.test_data = np.append(test_images, test_labels, axis=1)

    def build_model(self):  
        print('Building multi-layer perception model...')
        self.fc1 = FullyConnectedLayer2(self.input_size, self.hidden1)
        self.relu1 = ReLULayer()
        self.fc2 = FullyConnectedLayer2(self.hidden1, self.hidden2)
        self.relu2 = ReLULayer()
        self.fc3 = FullyConnectedLayer2(self.hidden2, self.out_classes)
        self.softmax = SoftmaxLossLayer()
        self.update_layer_list = [self.fc1, self.fc2, self.fc3]

    def load_model(self, param_dir):
        #compared the previous load_model, we load a new parameter sa
        print('Loading parameters from file ' + param_dir)
        params = np.load(param_dir, allow_pickle=True).item()
        self.fc1.load_param(params['w1'], params['b1'], params['sw1'], params['sb1'], params['sa1'])     
        self.fc2.load_param(params['w2'], params['b2'], params['sw2'], params['sb2'], params['sa2'])
        self.fc3.load_param(params['w3'], params['b3'], params['sw3'], params['sb3'], params['sa3'])
    def forward(self, input):  
        h1 = self.fc1.forward(input)
        h1 = self.relu1.forward(h1)
        h2 = self.fc2.forward(h1)
        h2 = self.relu2.forward(h2)
        h3 = self.fc3.forward(h2)
        prob = self.softmax.forward(h3)
        return prob

In [None]:

def build_Net2():  
    h1, h2, e =128, 128, 5#10
    mlp = Net2(hidden1=h1, hidden2=h2, max_epoch=e) 
    mlp.load_data()
    mlp.build_model()
    mlp.load_model('.\Data\\answer.npy')     #load the quantized data into model
    return mlp


In [None]:
mlp = build_Net2()
evaluate(mlp)


# Visualize Weights and Bias
Plot histograms of the weights and bias stored in .npy file. Record any observations you make about the distribution of the values and report.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

ADD YOUR CODE HERE to plot distributions of weights of the original NN model. Add them to the report

# Bonus : Lower precision
Try to quantize into lower bits. Finsh bonus in a new file