Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
max2ma committed Jan 23, 2020
2 parents 8ae70db + 30f0468 commit 3a74e53
Show file tree
Hide file tree
Showing 65 changed files with 2,366 additions and 1,138 deletions.
2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@Library('pipeline-library')_

VitisLibPipeline (branch: 'master', libname: 'xf_blas',
email: 'amr@xilinx.com', devtest: 'RunBLAS.sh', TOOLVERSION: '2019.1_release')
email: 'amr@xilinx.com', devtest: 'RunBLAS.sh', TOOLVERSION: '2019.2_released')
108 changes: 108 additions & 0 deletions L1/include/hw/xf_blas/gemm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
/*
* Copyright 2019 Xilinx, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef XF_BLAS_GEMM_HPP
#define XF_BLAS_GEMM_HPP

#ifndef __cplusplus
#error "BLAS Library only works with C++."
#endif

#include "ap_int.h"
#include "hls_stream.h"
#include "xf_blas/helpers.hpp"
#include "scal.hpp"
#include "axpy.hpp"

namespace xf {

namespace blas {

template <typename t_DataType, unsigned int t_M, unsigned int t_N = t_M, typename t_MacDataType = t_DataType>
class SystolicArray {
public:
static void process_dsp(unsigned int p_k,
hls::stream<WideType<t_DataType, t_M> >& p_As,
hls::stream<WideType<t_DataType, t_N> >& p_Bs,
hls::stream<WideType<t_MacDataType, t_N> >& p_sum,
unsigned int p_multi = 1) {
#ifndef __SYNTHESIS__
assert(p_k >= t_M + t_N);
#endif

WideType<t_DataType, t_M + t_N> l_winA[t_M];
#pragma HLS ARRAY_PARTITION variable = l_winA dim = 0 complete
WideType<t_DataType, t_M + t_N> l_winB[t_N];
#pragma HLS ARRAY_PARTITION variable = l_winB dim = 0 complete

WideType<t_MacDataType, t_N> l_C[t_M];
#pragma HLS ARRAY_PARTITION variable = l_C dim = 0 complete
WideType<t_MacDataType, t_N> l_Co[t_M];
#pragma HLS ARRAY_PARTITION variable = l_Co dim = 0 complete

for (int k = 0, l = 0; l < p_multi * p_k + t_M + t_N; l++, k++) {
#pragma HLS PIPELINE
if (k == p_k) {
k = 0;
}

if (l > p_k && k >= t_N && k < t_M + t_N) {
p_sum.write(l_Co[k - t_N]);
}

WideType<t_DataType, t_M> l_A = 0;
WideType<t_DataType, t_N> l_B = 0;

if (l < p_multi * p_k) {
l_A = p_As.read();
l_B = p_Bs.read();
}

for (int j = 0; j < t_M; j++) l_winA[j].shift(l_A[j]);
for (int j = 0; j < t_N; j++) l_winB[j].shift(l_B[j]);
for (int m = 0; m < t_M; m++) {
for (int n = 0; n < t_N; n++) {
int l_id = m + n;
if (l_id == k) {
l_Co[m][n] = l_C[m][n];
l_C[m][n] = 0;
}
l_C[m][n] += l_winA[m][l_id] * l_winB[n][l_id];
}
}
}
}
};

template <typename t_DataType,
unsigned int t_M,
unsigned int t_N = t_M,
typename t_IndexType = unsigned int,
typename t_MacDataType = t_DataType>
void gemm(const unsigned int p_k,
hls::stream<WideType<t_DataType, t_M> >& p_A,
hls::stream<WideType<t_DataType, t_N> >& p_B,
hls::stream<WideType<t_MacDataType, t_N> >& p_C,
const unsigned int p_r = 1) {
#pragma HLS DATAFLOW
SystolicArray<t_DataType, t_M, t_N, t_MacDataType>::process_dsp(p_k, p_A, p_B, p_C, p_r);
}

} // end namespace blas

} // end namespace xf

#endif
56 changes: 40 additions & 16 deletions L1/include/hw/xf_blas/helpers/dataMover/transpMatB2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,30 +92,54 @@ template <typename t_DataType, unsigned int t_ParEntries>
void transpMatBlocks(unsigned int p_blocks,
hls::stream<WideType<t_DataType, t_ParEntries> >& p_in,
hls::stream<WideType<t_DataType, t_ParEntries> >& p_out) {
t_DataType l_buf[t_ParEntries][t_ParEntries];
t_DataType l_buf[2][t_ParEntries][t_ParEntries];
#pragma HLS ARRAY_PARTITION variable = l_buf complete dim = 0
for (unsigned int l_block = 0; l_block < p_blocks; ++l_block) {
// shuffle and store
for (unsigned int i = 0; i < t_ParEntries; ++i) {

for (int i = 0; i < t_ParEntries; ++i) {
#pragma HLS PIPELINE
WideType<t_DataType, t_ParEntries> l_val;
WideType<t_DataType, t_ParEntries> l_val;
#pragma HLS ARRAY_PARTITION variable = l_val complete
l_val = p_in.read();
for (unsigned int j = 0; j < t_ParEntries; ++j) {
l_buf[i][j] = l_val[j];
}
l_val = p_in.read();
for (int j = 0; j < t_ParEntries; ++j) {
l_buf[0][i][j] = l_val[j];
}
}

for (unsigned int i = 0; i < t_ParEntries; ++i) {
for (unsigned int l_block = 1; l_block < p_blocks; ++l_block) {
int jIn = 0, jOut = 0;
do {
#pragma HLS PIPELINE
WideType<t_DataType, t_ParEntries> l_val;
#pragma HLS ARRAY_PARTITION variable = l_val complete
for (unsigned int j = 0; j < t_ParEntries; ++j) {
l_val[j] = l_buf[j][i];
WideType<t_DataType, t_ParEntries> l_valIn;
#pragma HLS ARRAY_PARTITION variable = l_valIn complete
WideType<t_DataType, t_ParEntries> l_valOut;
#pragma HLS ARRAY_PARTITION variable = l_valOut complete
if (p_in.read_nb(l_valIn)) {
for (int k = 0; k < t_ParEntries; ++k) {
l_buf[l_block % 2][jIn][k] = l_valIn[k];
}
jIn++;
}
p_out.write(l_val);
}
for (int k = 0; k < t_ParEntries; ++k) {
l_valOut[k] = l_buf[(l_block - 1) % 2][k][jOut];
}
if (jOut < t_ParEntries) {
p_out.write(l_valOut);
jOut++;
}
} while ((jIn < t_ParEntries) || (jOut < t_ParEntries));
}

int i = 0;
do {
#pragma HLS PIPELINE
WideType<t_DataType, t_ParEntries> l_valOut;
#pragma HLS ARRAY_PARTITION variable = l_valOut complete
for (int j = 0; j < t_ParEntries; ++j) {
l_valOut[j] = l_buf[(p_blocks - 1) % 2][j][i];
}
p_out.write(l_valOut);
i++;
} while (i < t_ParEntries);
}

template <typename t_DataType, unsigned int t_ParEntries>
Expand Down
1 change: 1 addition & 0 deletions L1/include/hw/xf_blas/helpers/dataMover/vecMoverB1.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ void readVec2Stream(t_DataType* p_in, unsigned int p_n, hls::stream<WideType<t_D
#pragma HLS PIPELINE
BitConv<t_DataType> l_bitConv;
WideType<t_DataType, t_ParEntries> l_val;
#pragma HLS ARRAY_PARTITION variable = l_val complete
for (unsigned int j = 0; j < t_ParEntries; ++j) {
l_val[j] = p_in[i * t_ParEntries + j];
}
Expand Down
Loading

0 comments on commit 3a74e53

Please sign in to comment.