In [1]:
import numpy as np

In [2]:
def generate_train_header(x, y, name):
    '''
    this method generate the
    :param x:  input x data size
    :param y:  input label (one hot label)
    :return:
    '''
    # quantize input x
    min_value = np.min(x)
    max_value = np.max(x)

    int_bits = int(np.ceil(np.log2(max(abs(min_value), abs(max_value)))))
    dec_bits = 7 - int_bits
    x = np.round(x*2**dec_bits).astype(np.int8)
    
#     print(x[0][0])
    data = x.astype(dtype="byte")
    label = y
    node = 0
    file=name
    with open(file, 'w') as f:
        num_of_image = x.shape[0]
        for i in range(num_of_image):
            f.write('#define NODE_%d_TRAIN_IMG_%d {'%(node,i) )
#             print(data[i])
            (data[i]).flatten().tofile(f, sep=", ") # convert 0~1 to 0~127
            f.write('} \n')
            f.write('#define NODE_%d_TRAIN_IMG%d_LABEL'% (node, i))
            f.write(' %d \n \n' % label[i])
        f.write('#define NODE_%d_TOTAL_TRAIN_IMAGES %d \n \n'%(node,num_of_image))

        f.write('static q7_t NODE_%d_TRAIN_IMAGES[%d][%d] = {' % (node,num_of_image, data[0].flatten().shape[0]))
        f.write('NODE_%d_TRAIN_IMG_0'%(node))
        for i in range(num_of_image -1):
            f.write(',NODE_%d_TRAIN_IMG_%d'%(node, i+1))
        f.write('};\n\n')

        f.write('static q7_t NODE_%d_TRAIN_LABELS[%d] = {' % (node,num_of_image))
        f.write('NODE_%d_TRAIN_IMG0_LABEL'%(node))
        for i in range(num_of_image -1):
            f.write(',NODE_%d_TRAIN_IMG%d_LABEL'%(node, i+1))
        f.write('};\n\n')

#     return 000


In [3]:
def generate_test_bin(x, y, file='test_data.h'):
    '''
    method to generate the test data as a header file
    :param x:  input x data size
    :param y:  input label 
    :write to file 
    :return: nothing
    '''
    # quantize input x
    min_value = np.min(x)
    max_value = np.max(x)

    int_bits = int(np.ceil(np.log2(max(abs(min_value), abs(max_value)))))
    dec_bits = 7 - int_bits
    x = np.round(x*2**dec_bits).astype(np.int8)
    
    data = x
    label = y

    with open(file, 'w') as f:
        num_of_image = x.shape[0]
        print("Number of test images:",num_of_image)
        for i in range(num_of_image):
            f.write('#define TEST_IMG%d {'% (i))
            data[i].flatten().tofile(f, sep=", ")
            f.write('} \n')
            f.write('#define TEST_IMG%d_LABEL'% (i))
            f.write(' %d \n \n' % label[i])
        f.write('#define TOTAL_TEST_IMAGES %d \n \n'%(num_of_image))

        f.write('static q7_t TEST_IMAGES[%d][%d] = {' % (num_of_image, data[0].flatten().shape[0]))
        f.write('TEST_IMG0')
        for i in range(num_of_image -1):
            f.write(',TEST_IMG%d'%(i+1))
        f.write('};\n\n')

        f.write('static q7_t TEST_LABELS[%d] = {' % (num_of_image))
        f.write('TEST_IMG0_LABEL')
        for i in range(num_of_image -1):
            f.write(',TEST_IMG%d_LABEL'%(i+1))
        f.write('};\n\n')


In [4]:
def federated_train_images(node, data, label, size, file='renameTHIS.h'):
    # test
    with open(file, 'w') as f:
        num_of_image = size
        for i in range(num_of_image):
            f.write('#define NODE_%d_TRAIN_IMG_%d {'%(node,i) )
            np.round(data[i]).flatten().tofile(f, sep=", ", format="%d") # convert 0~1 to 0~127
            f.write('} \n')
            f.write('#define NODE_%d_TRAIN_IMG%d_LABEL'% (node, i))
            f.write(' %d \n \n' % label[i])
        f.write('#define NODE_%d_TOTAL_TRAIN_IMAGES %d \n \n'%(node,num_of_image))

        f.write('static q7_t NODE_%d_TRAIN_IMAGES[%d][%d] = {' % (node,num_of_image, data[0].flatten().shape[0]))
        f.write('NODE_%d_TRAIN_IMG_0'%(node))
        for i in range(num_of_image -1):
            f.write(',NODE_%d_TRAIN_IMG_%d'%(node, i+1))
        f.write('};\n\n')

        f.write('static q7_t NODE_%d_TRAIN_LABELS[%d] = {' % (node,num_of_image))
        f.write('NODE_%d_TRAIN_IMG0_LABEL'%(node))
        for i in range(num_of_image -1):
            f.write(',NODE_%d_TRAIN_IMG%d_LABEL'%(node, i+1))
        f.write('};\n\n')

In [5]:
import os
nnscript = os.path.abspath('../../scripts')
os.sys.path.append(nnscript)

In [6]:
import pandas as pd

In [7]:
# load data
try:
    x_train = np.load('train_data.npy')
    y_train = np.load('train_label.npy')
    x_test = np.load('test_data.npy')
    y_test = np.load('test_label.npy')
    x_val = np.load('val_data.npy')
    y_val = np.load('val_label.npy')
except:
    (x_train, y_train), (x_test, y_test), (x_val, y_val) = merge_mfcc_file()
    np.save('train_data.npy', x_train)
    np.save('train_label.npy', y_train)
    np.save('test_data.npy', x_test)
    np.save('test_label.npy', y_test)
    np.save('val_data.npy', x_val)
    np.save('val_label.npy', y_val)

In [8]:
selected_lable = ['backward', 'bed', 'bird', 'cat', 'dog', 'down', 'eight','five', 'follow', 'forward',
                      'four','go','happy','house','learn','left','marvin','nine','no','off','on','one','right',
                      'seven','sheila','six','stop','three','tree','two','up','visual','yes','zero']

In [9]:
x_train.shape

(85245, 62, 13)

In [10]:
x_test.shape

(11005, 62, 13)

In [52]:
import sys

In [53]:
sys.getsizeof(x_train)

507378384

In [11]:
# only take 2~13 coefficient. 1 is destructive.
x_train = x_train[:, :, 1:]
x_test = x_test[:, :, 1:]
x_val = x_val[:, :, 1:]

# expand on channel axis because we only have one channel
x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], x_train.shape[2], 1))
x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], x_test.shape[2], 1))
x_val = x_val.reshape((x_val.shape[0], x_val.shape[1], x_val.shape[2], 1))
print('x_train shape:', x_train.shape, 'max', x_train.max(), 'min', x_train.min())

# training data enforcement
x_train = np.vstack((x_train, x_train*0.8))
y_train = np.hstack((y_train, y_train))
print(y_train.shape)

def normalize(data, n, quantize=True):
    limit = pow(2, n)
    data = np.clip(data, -limit, limit) / limit
    if quantize:
        data = np.round(data * 128) / 128.0
    return data

# instead of using maximum value for quantised, we allows some saturation to save more details in small values.
x_train = normalize(x_train, 3)
x_test = normalize(x_test, 3)
x_val = normalize(x_val, 3)


# shuffle test data
permutation = np.random.permutation(x_test.shape[0])
x_test = x_test[permutation, :]
y_test = y_test[permutation]

permutation = np.random.permutation(x_train.shape[0])
x_train = x_train[permutation, :]
y_train = y_train[permutation]

permutation = np.random.permutation(x_val.shape[0])
x_val = x_val[permutation, :]
y_val = y_val[permutation]




x_train shape: (85245, 62, 12, 1) max 23.778687 min -37.92257
(170490,)


In [50]:
labelindices = np.random.choice(34, 5, replace=False)
print(labelindices)


sub_x_train = x_train[np.isin(y_train.flatten(), labelindices).flatten()]

[10 23 15 11 26]


In [51]:
sub_x_train.shape


(24086, 62, 12, 1)

In [14]:
generate_test_bin(sub_x_test[10:15], sub_y_test[10:15], file='fed_prunningNNOM-keyspot/src/keyspot_test.h')

NameError: name 'sub_x_test' is not defined

In [None]:
!open .



In [15]:
y_train, _ = pd.factorize( y_train) 
y_test, _ = pd.factorize( y_test) 
y_val, _ = pd.factorize( y_val)

In [None]:

sumples = 10


for i in range (1):
    labelindices = np.random.choice(34, 5, replace=False)
    print(labelindices)
    sub_x_test = x_test[np.isin(y_test.flatten(), labelindices).flatten()]
    sub_y_test = y_test[np.isin(y_test.flatten(),labelindices).flatten()] 

    sub_y_test = np.unique(sub_y_test, return_inverse=True)[1] 
    # print(sub_y_test)
    unique, counts = np.unique(sub_y_test[sumples:], return_counts=True)
    print(dict(zip(unique, counts)) )

    unique, counts = np.unique(sub_y_test[:sumples], return_counts=True)
    print(dict(zip(unique, counts)) )




    generate_train_header(sub_x_test[:sumples], sub_y_test[:sumples], name='fed_prunningNNOM-keyspot/src/keyspot_train_data.h')
    generate_test_bin(sub_x_test[sumples:], sub_y_test[sumples:], file='fed_prunningNNOM-keyspot/src/keyspot_test.h')
    !make -C ./fed_prunningNNOM-keyspot/ 
    !./fed_prunningNNOM-keyspot/uwnet

    

In [None]:
# sumples = 100

# for i in range (30):
#     labelindices = np.random.choice(35, 3, replace=False)
#     print(labelindices)


#     sub_x_test = x_test[np.isin(y_test.flatten(), labelindices).flatten()]
#     sub_y_test = y_test[np.isin(y_test.flatten(), labelindices).flatten()] 
#     unique, counts = np.unique(sub_y_test[sumples], return_counts=True)
#     print(dict(zip(unique, counts)) )

#     generate_train_header(sub_x_test[:sumples], sub_y_test[:sumples], name='fed_prunningNNOM-keyspot/src/keyspot_train_data.h')
#     generate_test_bin(sub_x_test[sumples:], sub_y_test[sumples:], file='fed_prunningNNOM-keyspot/src/keyspot_test.h')
#     !make -C ./fed_prunningNNOM-keyspot/ 
#     !./fed_prunningNNOM-keyspot/uwnet



In [None]:
# !make -C ./fed_prunningNNOM-keyspot/

In [None]:
# generate_train_header(x_test[:sumples], y_test[:sumples], name='fed_prunningNNOM-keyspot/src/keyspot_train_data.h')
# generate_test_bin(x_test[sumples:], y_test[sumples:], file='fed_prunningNNOM-keyspot/src/keyspot_test.h')



In [None]:


# x_train = x_train[np.isin(y_train.flatten(), [0,1,2]).flatten()]
# y_train = y_train[np.isin(y_train.flatten(), [0,1,2]).flatten()]

# x_val = x_val[np.isin(y_val.flatten(), [0,1,2]).flatten()]
# y_val = y_val[np.isin(y_val.flatten(), [0,1,2]).flatten()]



# x_test = x_test[np.isin(y_test.flatten(), [0,1,2]).flatten()]
# y_test = y_test[np.isin(y_test.flatten(), [0,1,2]).flatten()] 

# y_test_original = y_test  # save for CI test


# y_train = one_hot(y_train, 3)
# y_test = one_hot(y_test,3)


# print("norm")
# print(x_train.shape)
# print(x_test.shape)
# # normolized each sensor, to range -1~1
# x_train = normalize(x_train)
# x_test  = normalize(x_test)

# generate binary test data, convert range to [-128 127] for mcu
# x_test_bin = np.clip(x_test *128, -128, 127)
# x_train_bin = np.clip(x_train*128, -128, 127)




In [None]:
len(x_val)

In [None]:
len(x_test)

In [None]:
# indices = np.random.choice(x_test.shape[0], 400, replace=False)
#     print(subset_x_train.shape[0])
# subset_x_train = x_train[indices]
# subset_y_train= y_train[indices]

# subset_x_test = x_test[indices]
# subset_y_test= y_train[indices]


# generate_train_header(x_test[:400], y_test[:400], name='har_train_data.h')

# generate_test_bin(x_test[400:], y_test[400:], file='har_test.h')




# generate_train_header(x_train_bin[:400], y_train[:400], name='00.bin')
# # generate_test_bin(x_train_bin[1750:1900], y_train[1750:1900], file='har_test.h')
# generate_test_bin(x_test_bin, y_test, file='har_test.h')

In [None]:
!open .

In [None]:
import numpy as np

f = open("test_data.bin", "r")
a = np.fromfile(f, dtype=np.int8)


In [None]:
print (a[128:200])

In [None]:
# def generate_test_bin(x, y, name='test_data_with_label.bin'):
#     '''
#     this method generate the
#     :param x:  input x data size
#     :param y:  input label (one hot label)
#     :return:
#     '''
#     # quantize input x
#     min_value = np.min(x)
#     max_value = np.max(x)

#     int_bits = int(np.ceil(np.log2(max(abs(min_value), abs(max_value)))))
#     dec_bits = 7 - int_bits
#     x = np.round(x*2**dec_bits).astype(np.int8)
    
#     print(x[0][0])
#     data = x.astype(dtype="byte")
#     label = y
#     node = 0
#     file='har_test.h'
#     with open(file, 'w') as f:
#         num_of_image = x.shape[0]
#         for i in range(num_of_image):
#             f.write('#define NODE_%d_TRAIN_IMG_%d {'%(node,i) )
# #             print(data[i])
#             (data[i]).flatten().tofile(f, sep=", ") # convert 0~1 to 0~127
#             f.write('} \n')
#             f.write('#define NODE_%d_TRAIN_IMG%d_LABEL'% (node, i))
#             f.write(' %d \n \n' % label[i])
#         f.write('#define NODE_%d_TOTAL_TRAIN_IMAGES %d \n \n'%(node,num_of_image))

#         f.write('static q7_t NODE_%d_TRAIN_IMAGES[%d][%d] = {' % (node,num_of_image, data[0].flatten().shape[0]))
#         f.write('NODE_%d_TRAIN_IMG_0'%(node))
#         for i in range(num_of_image -1):
#             f.write(',NODE_%d_TRAIN_IMG_%d'%(node, i+1))
#         f.write('};\n\n')

#         f.write('static q7_t NODE_%d_TRAIN_LABELS[%d] = {' % (node,num_of_image))
#         f.write('NODE_%d_TRAIN_IMG0_LABEL'%(node))
#         for i in range(num_of_image -1):
#             f.write(',NODE_%d_TRAIN_IMG%d_LABEL'%(node, i+1))
#         f.write('};\n\n')

# #     return 000
