fastmachinelearning · nhanvtran · Jul 10, 2018 · May 25, 2018 · May 31, 2018 · Jun 11, 2018
diff --git a/example-prjs/sublayer/build_prj.tcl b/example-prjs/sublayer/build_prj.tcl
@@ -0,0 +1,17 @@
+#################
+#    HLS4ML
+#################
+open_project -reset myproject_prj
+set_top myproject
+add_files firmware/myproject.cpp -cflags "-I[file normalize ../../nnet_utils]"
+add_files -tb myproject_test.cpp -cflags "-I[file normalize ../../nnet_utils]"
+add_files -tb firmware/weights
+#add_files -tb tb_data
+open_solution -reset "solution1"
+set_part {xcku115-flvf1924-2-i}
+create_clock -period 5 -name default
+csim_design
+csynth_design
+cosim_design -trace_level all
+export_design -format ip_catalog
+exit
diff --git a/example-prjs/sublayer/firmware/myproject.cpp b/example-prjs/sublayer/firmware/myproject.cpp
@@ -0,0 +1,97 @@
+//
+//    rfnoc-hls-neuralnet: Vivado HLS code for neural-net building blocks
+//
+//    Copyright (C) 2017 EJ Kreinar
+//
+//    This program is free software: you can redistribute it and/or modify
+//    it under the terms of the GNU General Public License as published by
+//    the Free Software Foundation, either version 3 of the License, or
+//    (at your option) any later version.
+//
+//    This program is distributed in the hope that it will be useful,
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//    GNU General Public License for more details.
+//
+//    You should have received a copy of the GNU General Public License
+//    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+#include <iostream>
+
+#include "parameters.h"
+#include "myproject.h"
+
+#include "nnet_layer.h"
+#include "nnet_sublayer.h"
+#include "nnet_conv.h"
+#include "nnet_activation.h"
+
+//hls-fpga-machine-learning insert weights
+#include "weights/w1.h"
+#include "weights/b1.h"
+#include "weights/w2.h"
+#include "weights/b2.h"
+#include "weights/w3.h"
+#include "weights/b3.h"
+#include "weights/w4.h"
+#include "weights/b4.h"
+
+void myproject(
+		  input_t data[N_INPUTS],
+		  result_t res[N_OUTPUTS],
+		  unsigned short &const_size_in,
+		  unsigned short &const_size_out)
+{
+
+    //hls-fpga-machine-learning insert IO
+    #pragma HLS ARRAY_RESHAPE variable=data complete dim=0 
+    #pragma HLS ARRAY_RESHAPE variable=res complete dim=0 
+    #pragma HLS INTERFACE ap_vld port=data,res 
+    #pragma HLS PIPELINE 
+
+
+    const_size_in   = N_INPUTS;
+    const_size_out  = N_OUTPUTS;
+
+    // ****************************************
+    // NETWORK INSTANTIATION
+    // ****************************************
+
+    //hls-fpga-machine-learning insert layers
+
+    layer1_t layer1_out[N_LAYER_1];
+    #pragma HLS ARRAY_PARTITION variable=layer1_out complete dim=0
+    layer1_t logits1[N_LAYER_1];
+    #pragma HLS ARRAY_PARTITION variable=logits1 complete dim=0
+    layer1_t logits1_0[N_LAYER_1/2];
+    #pragma HLS ARRAY_PARTITION variable=logits1_0 complete dim=0
+    layer1_t logits1_1[N_LAYER_1/2];
+    #pragma HLS ARRAY_PARTITION variable=logits1_1 complete dim=0
+    nnet::compute_sublayer<input_t, layer1_t, config1_0>(data, logits1_0, w1, b1);
+    nnet::compute_sublayer<input_t, layer1_t, config1_1>(data, logits1_1, w1, b1);
+    nnet::merge<layer1_t, N_LAYER_1/2, N_LAYER_1/2>(logits1_0, logits1_1, logits1);
+    //nnet::compute_layer<input_t, layer1_t, config1>(data, logits1, w1, b1);
+    nnet::relu<layer1_t, layer1_t, relu_config1>(logits1, layer1_out);
+
+
+    layer2_t layer2_out[N_LAYER_2];
+    #pragma HLS ARRAY_PARTITION variable=layer2_out complete dim=0
+    layer2_t logits2[N_LAYER_2];
+    #pragma HLS ARRAY_PARTITION variable=logits2 complete dim=0
+    nnet::compute_layer<layer1_t, layer2_t, config2>(layer1_out, logits2, w2, b2);
+    nnet::relu<layer2_t, layer2_t, relu_config2>(logits2, layer2_out);
+
+    layer3_t layer3_out[N_LAYER_3];
+    #pragma HLS ARRAY_PARTITION variable=layer3_out complete dim=0
+    layer3_t logits3[N_LAYER_3];
+    #pragma HLS ARRAY_PARTITION variable=logits3 complete dim=0
+    nnet::compute_layer<layer2_t, layer3_t, config3>(layer2_out, logits3, w3, b3);
+    nnet::relu<layer3_t, layer3_t, relu_config3>(logits3, layer3_out);
+
+    result_t logits4[N_OUTPUTS];
+    #pragma HLS ARRAY_PARTITION variable=logits4 complete dim=0
+    nnet::compute_layer<layer3_t, result_t, config4>(layer3_out, logits4, w4, b4);
+    nnet::softmax<result_t, result_t, softmax_config4>(logits4, res);
+
+
+}
diff --git a/example-prjs/sublayer/firmware/myproject.h b/example-prjs/sublayer/firmware/myproject.h
@@ -0,0 +1,37 @@
+//
+//    rfnoc-hls-neuralnet: Vivado HLS code for neural-net building blocks
+//
+//    Copyright (C) 2017 EJ Kreinar
+//
+//    This program is free software: you can redistribute it and/or modify
+//    it under the terms of the GNU General Public License as published by
+//    the Free Software Foundation, either version 3 of the License, or
+//    (at your option) any later version.
+//
+//    This program is distributed in the hope that it will be useful,
+//    but WITHOUT ANY WARRANTY; without even the implied warranty of
+//    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//    GNU General Public License for more details.
+//
+//    You should have received a copy of the GNU General Public License
+//    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+
+#ifndef MYPROJECT_H_
+#define MYPROJECT_H_
+
+#include <complex>
+#include "ap_int.h"
+#include "ap_fixed.h"
+
+#include "parameters.h"
+
+
+// Prototype of top level function for C-synthesis
+void myproject(
+      input_t data[N_INPUTS],
+      result_t res[N_OUTPUTS],
+      unsigned short &const_size_in,
+      unsigned short &const_size_out);
+
+#endif
diff --git a/example-prjs/sublayer/firmware/parameters.h b/example-prjs/sublayer/firmware/parameters.h
@@ -0,0 +1,129 @@
+#ifndef PARAMETERS_H_
+#define PARAMETERS_H_
+
+#include <complex>
+#include "ap_int.h"
+#include "ap_fixed.h"
+#include "nnet_layer.h"
+#include "nnet_sublayer.h"
+#include "nnet_conv.h"
+#include "nnet_activation.h"
+#include "nnet_common.h"
+
+//hls-fpga-machine-learning insert numbers
+typedef ap_fixed<16,6> accum_default_t;
+typedef ap_fixed<16,6> weight_default_t;
+typedef ap_fixed<16,6> bias_default_t;
+typedef ap_fixed<16,6> input_t;
+typedef ap_fixed<16,6> result_t;
+#define N_INPUTS 16
+#define N_LAYER_1 64
+#define N_LAYER_2 32
+#define N_LAYER_3 32
+#define N_OUTPUTS 5
+
+//hls-fpga-machine-learning insert layer-precision
+typedef ap_fixed<16,6> layer1_t;
+typedef ap_fixed<16,6> layer2_t;
+typedef ap_fixed<16,6> layer3_t;
+
+//hls-fpga-machine-learning insert layer-config
+struct config1 : nnet::layer_config {
+        static const unsigned n_in = N_INPUTS;
+        static const unsigned n_out = N_LAYER_1;
+
+        static const unsigned io_type = nnet::io_parallel;
+        static const unsigned reuse_factor = 1;
+        static const unsigned n_zeros = 0;
+        static const bool store_weights_in_bram = false;
+        typedef accum_default_t accum_t;
+        typedef bias_default_t bias_t;
+        typedef weight_default_t weight_t;
+        };
+struct config1_0 : nnet::sublayer_config {
+        static const unsigned n_in = N_INPUTS;
+        static const unsigned n_out = N_LAYER_1;
+	static const unsigned n_part = 1;
+	static const unsigned i_part = 0;
+	static const unsigned n_sub_out = N_LAYER_1/2;
+	static const unsigned i_sub_out = 0;
+
+        static const unsigned io_type = nnet::io_parallel;
+        static const unsigned reuse_factor = 1;
+        static const unsigned n_zeros = 0;
+        static const bool store_weights_in_bram = false;
+        typedef accum_default_t accum_t;
+        typedef bias_default_t bias_t;
+        typedef weight_default_t weight_t;
+        };
+struct config1_1 : nnet::sublayer_config {
+        static const unsigned n_in = N_INPUTS;
+        static const unsigned n_out = N_LAYER_1;
+	static const unsigned n_part = 1;
+	static const unsigned i_part = 0;
+	static const unsigned n_sub_out = N_LAYER_1/2;
+	static const unsigned i_sub_out = N_LAYER_1/2;
+
+        static const unsigned io_type = nnet::io_parallel;
+        static const unsigned reuse_factor = 1;
+        static const unsigned n_zeros = 0;
+        static const bool store_weights_in_bram = false;
+        typedef accum_default_t accum_t;
+        typedef bias_default_t bias_t;
+        typedef weight_default_t weight_t;
+        };
+struct relu_config1 : nnet::activ_config {
+        static const unsigned n_in = N_LAYER_1;
+        static const unsigned table_size = 1024;
+        static const unsigned io_type = nnet::io_parallel;
+        };
+struct config2 : nnet::layer_config {
+        static const unsigned n_in = N_LAYER_1;
+        static const unsigned n_out = N_LAYER_2;
+        static const unsigned io_type = nnet::io_parallel;
+        static const unsigned reuse_factor = 1;
+        static const unsigned n_zeros = 1579;
+        static const bool store_weights_in_bram = false;
+        typedef accum_default_t accum_t;
+        typedef bias_default_t bias_t;
+        typedef weight_default_t weight_t;
+        };
+struct relu_config2 : nnet::activ_config {
+        static const unsigned n_in = N_LAYER_2;
+        static const unsigned table_size = 1024;
+        static const unsigned io_type = nnet::io_parallel;
+        };
+struct config3 : nnet::layer_config {
+        static const unsigned n_in = N_LAYER_2;
+        static const unsigned n_out = N_LAYER_3;
+        static const unsigned io_type = nnet::io_parallel;
+        static const unsigned reuse_factor = 1;
+        static const unsigned n_zeros = 797;
+        static const bool store_weights_in_bram = false;
+        typedef accum_default_t accum_t;
+        typedef bias_default_t bias_t;
+        typedef weight_default_t weight_t;
+        };
+struct relu_config3 : nnet::activ_config {
+        static const unsigned n_in = N_LAYER_3;
+        static const unsigned table_size = 1024;
+        static const unsigned io_type = nnet::io_parallel;
+        };
+struct config4 : nnet::layer_config {
+        static const unsigned n_in = N_LAYER_3;
+        static const unsigned n_out = N_OUTPUTS;
+        static const unsigned io_type = nnet::io_parallel;
+        static const unsigned reuse_factor = 1;
+        static const unsigned n_zeros = 82;
+        static const bool store_weights_in_bram = false;
+        typedef accum_default_t accum_t;
+        typedef bias_default_t bias_t;
+        typedef weight_default_t weight_t;
+        };
+struct softmax_config4 : nnet::activ_config {
+        static const unsigned n_in = N_OUTPUTS;
+        static const unsigned table_size = 1024;
+        static const unsigned io_type = nnet::io_parallel;
+        };
+
+#endif 
diff --git a/example-prjs/sublayer/firmware/weights/b1.h b/example-prjs/sublayer/firmware/weights/b1.h
@@ -0,0 +1,6 @@
+//Numpy array shape (64,)
+//Min -0.449101030827
+//Max 0.666767776012
+//Number of zeros 0
+
+bias_default_t b1[64] = {0.40715688467, -0.152693673968, 0.0247167721391, 0.00855304021388, 0.0373381376266, 0.050812792033, 0.405522465706, 0.287921875715, -0.449101030827, 0.0863167047501, 0.0802906751633, 0.0533042959869, 0.0377268269658, -0.104714356363, 0.161454766989, 0.0043580099009, 0.543596565723, 0.0671059116721, 0.0300234947354, -0.0791025981307, 0.0687709525228, 0.246556162834, 0.307227045298, 0.015126154758, 0.269042164087, 0.159081891179, 0.359984785318, 0.0347696691751, -0.11019410938, 0.290954172611, -0.383461236954, 0.212079584599, 0.267755180597, 0.323163509369, 0.238599523902, 0.666767776012, -0.210283786058, 0.467060565948, 0.0360072515905, 0.447051584721, 0.543918013573, 0.0631941556931, 0.219371959567, 0.343893527985, 0.0888087227941, 0.3759329319, 0.328853011131, 0.0872760862112, 0.0198150593787, 0.100737020373, -0.114139311016, -0.0335930995643, 0.498003512621, 0.0944355949759, 0.33665907383, 0.277953624725, 0.322312951088, 0.328217118979, -0.182733371854, 0.355605363846, 0.242487534881, 0.336844176054, 0.497421890497, 0.153069153428};
diff --git a/example-prjs/sublayer/firmware/weights/b2.h b/example-prjs/sublayer/firmware/weights/b2.h
@@ -0,0 +1,6 @@
+//Numpy array shape (32,)
+//Min -0.233829811215
+//Max 0.638602077961
+//Number of zeros 0
+
+bias_default_t b2[32] = {0.385666370392, 0.238704577088, 0.309233009815, 0.239236935973, -0.100519768894, 0.390451163054, 0.0123554319143, 0.00878310669214, -0.226692482829, 0.551124632359, 0.387682288885, -0.233829811215, 0.638602077961, -0.0973265096545, 0.0515359714627, 0.0117165837437, 0.291351050138, 0.0050294934772, 0.42868027091, 0.179656729102, 0.377021223307, 0.0864382982254, 0.0744026079774, 0.192403048277, 0.374691933393, -0.0706490576267, 0.00352985248901, -0.00252335402183, -0.000259724474745, -0.0192233566195, -0.000169069055119, 0.195322647691};
diff --git a/example-prjs/sublayer/firmware/weights/b3.h b/example-prjs/sublayer/firmware/weights/b3.h
@@ -0,0 +1,6 @@
+//Numpy array shape (32,)
+//Min -0.192495539784
+//Max 0.727849543095
+//Number of zeros 0
+
+bias_default_t b3[32] = {0.130974292755, -0.177440658212, -0.0381854325533, 0.0132606215775, 0.278921306133, 0.0119517724961, 0.206687957048, -0.00634318916127, 0.323955714703, 0.24621617794, 0.0515957064927, 0.121288783848, -0.108800083399, 0.272505134344, 0.0230016112328, 0.179796844721, 0.30160176754, 0.017023300752, -0.0593090243638, -0.0195552054793, 0.096253298223, -0.0279509462416, 0.208594635129, -0.192495539784, 0.2576790452, 0.346903651953, -0.0277135614306, 0.727849543095, 0.509101092815, 0.116215974092, 0.0136004369706, 0.256516158581};
diff --git a/example-prjs/sublayer/firmware/weights/b4.h b/example-prjs/sublayer/firmware/weights/b4.h
@@ -0,0 +1,6 @@
+//Numpy array shape (5,)
+//Min -0.260938197374
+//Max 0.304645419121
+//Number of zeros 0
+
+bias_default_t b4[5] = {-0.0388891100883, -0.0420672409236, -0.260938197374, 0.0625408738852, 0.304645419121};