Skip to content

Commit

Permalink
latency pooling overhaul
Browse files Browse the repository at this point in the history
vivado latency pooling overhaul

vitis latency pooling overhaul, fix comment

fix boundry cond

fix syn issues

latency pooling overhaul
  • Loading branch information
calad0i committed Mar 23, 2024
1 parent bed3b60 commit f3d7fb5
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 153 deletions.
111 changes: 38 additions & 73 deletions hls4ml/templates/vitis/nnet_utils/nnet_pooling.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,60 +8,40 @@
namespace nnet {

// Return the maximum value from an array
template <typename T, int N> T max(T x[N]) {
template <typename T, int N, typename accum_t> accum_t max(T x[N]) {
T y = x[0];
for (int i = 1; i < N; i++) {
y = x[i] > y ? x[i] : y;
}
return y;
}

template <int W, int N> ap_int<W> avg(ap_int<W> (&x)[N]) {
// Use a wider accumulator than the input to avoid overflow
ap_int<W + ceillog2(N)> tmp = 0;
for (int i = 0; i < N; i++) {
tmp += x[i];
}
tmp /= N;
// Now cast back to original type
ap_int<W> y = tmp;
return tmp;
}

template <int W, int I, int N> ap_fixed<W, I> avg(ap_fixed<W, I> (&x)[N]) {
// Use a wider accumulator than the input to avoid overflow
ap_fixed<W + ceillog2(N), I + ceillog2(N)> tmp = 0;
for (int i = 0; i < N; i++) {
tmp += x[i];
}
tmp /= N;
// Now cast back to original type
ap_fixed<W, I> y = tmp;
return y;
}

// Return the mean value of an array
template <typename T, int N> T avg(T (&x)[N]) {
T y = 0;
template <typename T, int N, typename accum_t> accum_t avg(T (&x)[N], unsigned length) {
accum_t y = 0;
for (int i = 0; i < N; i++) {
y += x[i];
}
y /= N;
y /= length;
return y;
}

// Enumeration for pooling operation (max, avg, l2norm pooling)
enum Pool_Op { Max, Average }; // L2Norm };
template <typename T, int N, Pool_Op op> T pool_op(T (&x)[N]) {
template <typename T, int N, Pool_Op op, typename accum_t> accum_t pool_op(T (&x)[N], unsigned length) {
switch (op) {
case Max:
return max<T, N>(x);
return max<T, N, accum_t>(x);
case Average:
return avg(x);
return avg<T, N, accum_t>(x, length);
// case L2Norm: return l2norm<T, N>(x);
}
}

template <typename T, int N, Pool_Op op, typename accum_t> accum_t pool_op(T (&x)[N]) {
return pool_op<T, N, op, accum_t>(x, N);
}

template <typename T, Pool_Op op> T pad_val() {
/*---
*- In Tensorflow, pooling ignores the value in the padded cells
Expand Down Expand Up @@ -106,6 +86,7 @@ void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONF
const int limit = pool_op_limit_1d<CONFIG_T>();
#pragma HLS ALLOCATION function instances=pool_op<data_T, CONFIG_T::pool_width, CONFIG_T::pool_op> limit=limit
// Add any necessary padding

unsigned padded_width = CONFIG_T::n_in + CONFIG_T::pad_left + CONFIG_T::pad_right;
if (CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
padded_width -= padded_width - (padded_width / CONFIG_T::stride_width * CONFIG_T::stride_width);
Expand All @@ -114,29 +95,19 @@ void pooling1d_cl(data_T data[CONFIG_T::n_in * CONFIG_T::n_filt], res_T res[CONF
for (int ff = 0; ff < CONFIG_T::n_filt; ff++) {
// Loop over input image x in steps of stride
for (int ii = 0; ii < padded_width; ii += CONFIG_T::stride_width) {
unsigned overlap_pixel = 0;
data_T pool[CONFIG_T::pool_width];
// Keep track of number of pixels in image vs padding region
unsigned img_overlap = 0;
// Loop over pool window x
for (int jj = 0; jj < CONFIG_T::stride_width; jj++) {
if (ii + jj < CONFIG_T::pad_left || ii + jj >= (padded_width - CONFIG_T::pad_right)) {
// Add padding
pool[jj] = pad_val<data_T, CONFIG_T::pool_op>();
} else {
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0

for (int jj = 0; jj < CONFIG_T::pool_width; jj++) {
if (ii + jj >= CONFIG_T::pad_left && ii + jj < CONFIG_T::n_in + CONFIG_T::pad_left) {
pool[jj] = data[(ii + jj - CONFIG_T::pad_left) * CONFIG_T::n_filt + ff];
img_overlap++;
}
overlap_pixel++;
} else
pool[jj] = pad_val<data_T, CONFIG_T::pool_op>();
}
// do the pooling
// TODO in the case of average pooling, need to reduce width to area of pool window
// not overlapping padding region
res[(ii / CONFIG_T::stride_width) * CONFIG_T::n_filt + ff] =
pool_op<data_T, CONFIG_T::pool_width, CONFIG_T::pool_op>(pool);
// If the pool op is Average, the zero-padding needs to be removed from the results
if (CONFIG_T::pool_op == Average) {
data_T rescale = static_cast<data_T>(CONFIG_T::pool_width) / img_overlap;
res[(ii / CONFIG_T::stride_width) * CONFIG_T::n_filt + ff] *= rescale;
}
pool_op<data_T, CONFIG_T::pool_width, CONFIG_T::pool_op, typename CONFIG_T::accum_t>(pool, overlap_pixel);
}
}
}
Expand Down Expand Up @@ -200,7 +171,6 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
const int limit = pool_op_limit<CONFIG_T>();
#pragma HLS ALLOCATION function instances=pool_op<data_T, CONFIG_T::pool_height*CONFIG_T::pool_width, \
CONFIG_T::pool_op> limit=limit
// Add any necessary padding
unsigned padded_height = CONFIG_T::in_height + CONFIG_T::pad_top + CONFIG_T::pad_bottom;
unsigned padded_width = CONFIG_T::in_width + CONFIG_T::pad_left + CONFIG_T::pad_right;
if (CONFIG_T::pad_top == 0 && CONFIG_T::pad_bottom == 0 && CONFIG_T::pad_left == 0 && CONFIG_T::pad_right == 0) {
Expand All @@ -214,37 +184,32 @@ void pooling2d_cl(data_T data[CONFIG_T::in_height * CONFIG_T::in_width * CONFIG_
// Loop over input image x in steps of stride
for (int jj = 0; jj < padded_width; jj += CONFIG_T::stride_width) {
data_T pool[CONFIG_T::pool_height * CONFIG_T::pool_width];
// Keep track of number of pixels in image vs padding region
unsigned img_overlap = 0;
#pragma HLS ARRAY_PARTITION variable=pool complete dim=0

unsigned overlap_pixel = 0;

// Loop over pool window y
for (int kk = 0; kk < CONFIG_T::stride_height; kk++) {
// Loop over pool window x
for (int ll = 0; ll < CONFIG_T::stride_width; ll++) {
if (ii + kk < CONFIG_T::pad_top || ii + kk >= (padded_height - CONFIG_T::pad_bottom) ||
jj + ll < CONFIG_T::pad_left || jj + ll >= (padded_width - CONFIG_T::pad_right)) {
// Add padding
bool cond1 = ii + kk >= CONFIG_T::pad_top && ii + kk < CONFIG_T::in_height + CONFIG_T::pad_top;
bool cond2 = jj + ll >= CONFIG_T::pad_left && jj + ll < CONFIG_T::in_width + CONFIG_T::pad_left;
if (cond1 && cond2) {
unsigned data_idx =
((ii + kk - CONFIG_T::pad_top) * CONFIG_T::in_width + (jj + ll - CONFIG_T::pad_left)) *
CONFIG_T::n_filt +
ff;
pool[kk * CONFIG_T::stride_width + ll] = data[data_idx];
overlap_pixel++;
} else
pool[kk * CONFIG_T::stride_width + ll] = pad_val<data_T, CONFIG_T::pool_op>();
} else {
pool[kk * CONFIG_T::stride_width + ll] =
data[(ii + kk - CONFIG_T::pad_top) * CONFIG_T::in_width * CONFIG_T::n_filt +
(jj + ll - CONFIG_T::pad_left) * CONFIG_T::n_filt + ff];
img_overlap++;
}
}
}
// do the pooling
// TODO in the case of average pooling, need to reduce height * width to area of pool window
// not overlapping padding region

res[(ii / CONFIG_T::stride_height) * CONFIG_T::out_width * CONFIG_T::n_filt +
(jj / CONFIG_T::stride_width) * CONFIG_T::n_filt + ff] =
pool_op<data_T, CONFIG_T::pool_height * CONFIG_T::pool_width, CONFIG_T::pool_op>(pool);
// If the pool op is Average, the zero-padding needs to be removed from the results
if (CONFIG_T::pool_op == Average) {
data_T rescale =
static_cast<data_T>(CONFIG_T::pool_height) * static_cast<data_T>(CONFIG_T::pool_width) / img_overlap;
res[(ii / CONFIG_T::stride_height) * CONFIG_T::out_width * CONFIG_T::n_filt +
(jj / CONFIG_T::stride_width) * CONFIG_T::n_filt + ff] *= rescale;
}
pool_op<data_T, CONFIG_T::pool_height * CONFIG_T::pool_width, CONFIG_T::pool_op,
typename CONFIG_T::accum_t>(pool, overlap_pixel);
}
}
}
Expand Down
Loading

0 comments on commit f3d7fb5

Please sign in to comment.