/*
* Copyright (c) 2014-2020 Embedded Systems and Applications, TU Darmstadt.
*
* This file is part of TaPaSCo
* (see https://github.com/esa-tu-darmstadt/tapasco).
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see .
*/
#include
#include
#include
#include
#include
#include
#include
#define RUNS 10
#define PAGE_SIZE 4096
typedef int32_t element_type;
typedef int64_t numblock_type;
// this function is used to init the array
static void init_array(element_type *arr, const numblock_type num_blocks) {
for (numblock_type i = 0; i < num_blocks; ++i){
arr[i] = (element_type)i;
}
}
// this function check whether the input and result match with each other
static uint64_t check_interface(element_type *input, element_type *result, const numblock_type num_blocks)
{
unsigned int errs = 0;
for (numblock_type i = 0; i < num_blocks; ++i) {
if (input[i] != result[i] ) {
std::cerr << "ERROR: Value at " << i << " is " << input[i] <<" vs "<< result[i] << std::endl;
++errs;
}
}
return errs;
}
int run_p2p(tapasco::Tapasco *dev0, tapasco::Tapasco *dev1, tapasco::PEId interface_test_id, numblock_type num_blocks)
{
uint64_t errs = 0;
std::cout<<"testing on demand P2P"<(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
auto *output0 = static_cast(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
// init the array
init_array(input0,num_blocks);
// allocate array for second job
auto *output1 = static_cast(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
// allocate array for third job
auto *output2 = static_cast(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
// insert the timer
auto start = std::chrono::high_resolution_clock::now();
// the first job is copying the value from input0 into output0
// it use on-demand to migrate data
auto job = dev0->launch(interface_test_id, tapasco::makeVirtualAddress(input0),
tapasco::makeVirtualAddress(output0), num_blocks);
job();
// end the timer
auto stop = std::chrono::high_resolution_clock::now();
// record the latency is micro second
auto duration = std::chrono::duration_cast(stop - start);
// start the timer of the second job, this involve the copying of output 0
// from one FPGA into another FPGA and it should use P2P.
// the output1 is coming from host
auto start2 = std::chrono::high_resolution_clock::now();
// it also use on-demand
auto job2 = dev1->launch(interface_test_id, tapasco::makeVirtualAddress(
output0),tapasco::makeVirtualAddress(output1), num_blocks);
job2();
// time the code
auto stop2 = std::chrono::high_resolution_clock::now();
// record the latency
auto duration2 = std::chrono::duration_cast(stop2 - start2);
// now the third job start
auto start3 = std::chrono::high_resolution_clock::now();
// the third job is going to copy the output 1 from the second FPGA
// back to the first FPGA. It should use P2P.
// output2 is from host
// it also use on demand
auto job3 = dev0->launch(interface_test_id, tapasco::makeVirtualAddress(
output1),tapasco::makeVirtualAddress(output2), num_blocks);
job3();
// stop the timer
auto stop3 = std::chrono::high_resolution_clock::now();
// record the latency
auto duration3 = std::chrono::duration_cast(stop3 - start3);
// std::cerr<<(num_blocks * sizeof(element_type))/PAGE_SIZE<<","<(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
auto *output0 = static_cast(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
// init the array
init_array(input0,num_blocks);
// allocate array for second job
auto *output1 = static_cast(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
// allocate array for third job
auto *output2 = static_cast(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
// insert the timer
auto start = std::chrono::high_resolution_clock::now();
// the first job is copying the value from input0 into output0
// Here is the launch for user-managed
auto job = dev0->launch(interface_test_id, tapasco::makeWrappedPointer(
output0, num_blocks* sizeof(element_type)),tapasco::makeWrappedPointer(
output1, num_blocks* sizeof(element_type)), num_blocks);
job();
// end the timer
auto stop = std::chrono::high_resolution_clock::now();
// record the latency is micro second
auto duration = std::chrono::duration_cast(stop - start);
// start the timer of the second job, this involve the copying of output 0
// from one FPGA into another FPGA and it should use P2P.
// the output1 is coming from host
auto start2 = std::chrono::high_resolution_clock::now();
// user-managed
auto job2 = dev1->launch(interface_test_id, tapasco::makeWrappedPointer(
output0, num_blocks* sizeof(element_type)),tapasco::makeWrappedPointer(
output1, num_blocks* sizeof(element_type)), num_blocks);
job2();
// time the code
auto stop2 = std::chrono::high_resolution_clock::now();
// record the latency
auto duration2 = std::chrono::duration_cast(stop2 - start2);
// now the third job start
auto start3 = std::chrono::high_resolution_clock::now();
// the third job is going to copy the output 1 from the second FPGA
// back to the first FPGA. It should use P2P.
// output2 is from host
// it use user managed
auto job3 = dev0->launch(interface_test_id, tapasco::makeWrappedPointer(
output1, num_blocks* sizeof(element_type)),tapasco::makeWrappedPointer(
output2, num_blocks* sizeof(element_type)), num_blocks);
job3();
// stop the timer
auto stop3 = std::chrono::high_resolution_clock::now();
// record the latency
auto duration3 = std::chrono::duration_cast(stop3 - start3);
// std::cerr<<(num_blocks * sizeof(element_type))/PAGE_SIZE<<","<(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
auto *output0 = static_cast(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
init_array(input0,num_blocks);
// time the code
auto start = std::chrono::high_resolution_clock::now();
// use on demand to copy array
auto job = tapasco->launch(interface_test_id,tapasco::makeVirtualAddress(input0),
tapasco::makeVirtualAddress(output0),num_blocks);
job();
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast(stop - start);
// std::cerr<<(num_blocks * sizeof(element_type))/PAGE_SIZE<<","<(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
element_type *output0 = static_cast(aligned_alloc(PAGE_SIZE, num_blocks * sizeof(element_type)));
init_array(input0,num_blocks);
// time the code
auto start = std::chrono::high_resolution_clock::now();
// use user managed
auto job = tapasco->launch(interface_test_id, tapasco::makeWrappedPointer(
input0, num_blocks* sizeof(element_type)),tapasco::makeWrappedPointer(
output0, num_blocks* sizeof(element_type)), num_blocks);
job();
auto stop = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast(stop - start);
// std::cerr<<(num_blocks * sizeof(element_type))/PAGE_SIZE<<","< v) {
return std::all_of(v.begin(), v.end(), [](int i) {return i == 0;});
}
int main(int argc, char **argv)
{
// Use C API to get number of TaPaSCo devices
if(argc != 2) {
std::cout << "Usage: ./prog num_block (i.e. number of element)" << std::endl;
return 1;
}
tapasco::TLKM *tlkm = tapasco::tapasco_tlkm_new();
int num_devices = tapasco::tapasco_tlkm_device_len(tlkm);
std::cout << "Found " << num_devices << " TaPaSCo devices." << std::endl;
const numblock_type num_blocks = std::atoll(argv[1]);
std::cout << "The num_blocks is: " << num_blocks << std::endl;
// initialize TaPaSCo devices
std::vector devices;
for (int i = 0; i < num_devices; ++i) {
// pass access types and device ID to constructor (not required if only one device is used)
devices.push_back(new tapasco::Tapasco(tapasco::tlkm_access::TlkmAccessExclusive, i));
}
tapasco::PEId interface_test_id;
std::vector interface_test_pe_count;
// num_devices=1;
for (auto d : devices) {
try {
auto id = d->get_pe_id("esa.cs.tu-darmstadt.de:hls:interface_test_2:1.0");
interface_test_pe_count.push_back(d->kernel_pe_count(id));
interface_test_id = id;
} catch (...) {
interface_test_pe_count.push_back(0);
}
}
// No PE found to run any tests
if (vector_all_zero(interface_test_pe_count)) {
std::cout << "ERROR: Need at least one interface_test instance to run." << std::endl;
exit(1);
}
// Run tests on all devices if PEs are available
std::cout << "Run single-FPGA host example..." << std::endl;
// std::cerr << "# of pages, number of run, latency" << std::endl;
for (int i = 0; i < num_devices; ++i) {
if (interface_test_pe_count[i]) {
if (run_interface_test(devices[i], interface_test_id, num_blocks)) {
std::cout << "An error occurred while running the interface_test example, exiting..."
<< std::endl;
exit(1);
} else {
std::cout << "Completed interface_test example successfully!" << std::endl;
}
}
}
std::cout << "Run p2p example..." << std::endl;
// std::cerr << "# of pages, number of run, dev0 latency, dev1 latency, dev 0 run latency" << std::endl;
if (run_p2p(devices[0], devices[1], interface_test_id,num_blocks)) {
std::cout << "An error occurred while running the pipeline example, exiting..." << std::endl;
exit(1);
} else {
std::cout << "Completed the p2p example successfully!" << std::endl;
}
return 0;
}