Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move hwctx initializtion of module to xrt::run #7647

Merged
merged 1 commit into from
Jul 29, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 47 additions & 39 deletions src/runtime_src/core/common/api/xrt_kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1479,23 +1479,6 @@ class kernel_impl
return data; // no skipping
}

// For DPU kernels, initialize the instruction buffer and count in
// the command packet, both of which are before regular kernel
// arguments. The function returns the data payload after the
// preceeding instruction buffer and count; this data payload is
// where regular kernel arguments are placed.
uint32_t*
initialize_dpu(uint32_t* payload)
{
auto dpu = reinterpret_cast<ert_dpu_data*>(payload);
auto bo = xrt_core::module_int::get_instruction_buffer(m_module, name);
dpu->instruction_buffer = bo.address();
dpu->instruction_buffer_size = bo.size();

// Return payload past the ert_dpu_data structure
return payload + sizeof(ert_dpu_data) / sizeof(uint32_t); // skip past ert_dpu_data
}

static uint32_t
create_uid()
{
Expand All @@ -1512,17 +1495,6 @@ class kernel_impl
throw xrt_core::error("No such kernel '" + nm + "'");
}

// This function copies the module into a hw_context. The module
// will be associated with hwctx specific memory.
static xrt::module
copy_module(const xrt::module& module, const xrt::hw_context& hwctx)
{
if (!module)
return {};

return {module, hwctx};
}

public:
// kernel_type - constructor
//
Expand All @@ -1539,7 +1511,7 @@ class kernel_impl
, ctxmgr(xrt_core::context_mgr::create(device->core_device.get())) // owership tied to kernel_impl
, hwctx(std::move(ctx)) // hw context
, hwqueue(hwctx) // hw queue
, m_module{copy_module(mod, hwctx)} // module if any copy to hwctx specific module
, m_module{std::move(mod)} // module if any
, xclbin(hwctx.get_xclbin()) // xclbin with kernel
, xkernel(get_kernel_or_error(xclbin, name)) // kernel meta data managed by xclbin
, properties(xrt_core::xclbin_int::get_properties(xkernel))// cache kernel properties
Expand Down Expand Up @@ -1629,16 +1601,8 @@ class kernel_impl
cmd->encode_compute_units(cumask, num_cumasks);
auto data = kcmd->data + kcmd->extra_cu_masks;

switch (kcmd->opcode) {
case ERT_START_FA:
if (kcmd->opcode == ERT_START_FA)
data = initialize_fadesc(data);
break;
case ERT_START_DPU:
data = initialize_dpu(data);
break;
default:
break;
}

return data;
}
Expand Down Expand Up @@ -1943,6 +1907,17 @@ class run_impl
return count++;
}

// This function copies the module into a hw_context. The module
// will be associated with hwctx specific memory.
static xrt::module
copy_module(const xrt::module& module, const xrt::hw_context& hwctx)
{
if (!module)
return {};

return {module, hwctx};
}

virtual std::unique_ptr<arg_setter>
make_arg_setter()
{
Expand Down Expand Up @@ -2032,8 +2007,39 @@ class run_impl
return pkt->data + (rhs->data - rhs_pkt->data);
}

// For DPU kernels, initialize the instruction buffer and count in
// the command packet, both of which are before regular kernel
// arguments. The function returns the data payload after the
// preceeding instruction buffer and count; this data payload is
// where regular kernel arguments are placed.
uint32_t*
initialize_dpu(uint32_t* payload)
{
auto dpu = reinterpret_cast<ert_dpu_data*>(payload);
auto bo = xrt_core::module_int::get_instruction_buffer(m_module, kernel->get_name());
dpu->instruction_buffer = bo.address();
dpu->instruction_buffer_size = bo.size();

// Return payload past the ert_dpu_data structure
return payload + sizeof(ert_dpu_data) / sizeof(uint32_t); // skip past ert_dpu_data
}

// Initialize the command packet with special case for DPU kernels
uint32_t*
initialize_command(kernel_command* pkt)
{
auto kcmd = pkt->get_ert_cmd<ert_start_kernel_cmd*>();
auto payload = kernel->initialize_command(pkt);

if (kcmd->opcode == ERT_START_DPU)
payload = initialize_dpu(payload);

return payload;
}

using callback_function_type = std::function<void(ert_cmd_state)>;
std::shared_ptr<kernel_impl> kernel; // shared ownership
xrt::module m_module; // instruction module (optional)
xrt_core::hw_queue m_hwqueue; // hw queue for command submission
std::vector<ipctx> ips; // ips controlled by this run object
std::bitset<max_cus> cumask; // cumask for command execution
Expand Down Expand Up @@ -2079,12 +2085,13 @@ class run_impl
explicit
run_impl(std::shared_ptr<kernel_impl> k)
: kernel(std::move(k))
, m_module{copy_module(kernel->get_module(), kernel->get_hw_context())}
, m_hwqueue(kernel->get_hw_queue())
, ips(kernel->get_ips())
, cumask(kernel->get_cumask())
, core_device(kernel->get_core_device())
, cmd(std::make_shared<kernel_command>(kernel->get_device(), m_hwqueue, kernel->get_hw_context()))
, data(kernel->initialize_command(cmd.get()))
, data(initialize_command(cmd.get()))
, m_header(cmd->get_ert_packet()->header)
, uid(create_uid())
{
Expand All @@ -2096,6 +2103,7 @@ class run_impl
explicit
run_impl(const run_impl* rhs)
: kernel(rhs->kernel)
, m_module{rhs->m_module}
, m_hwqueue(rhs->m_hwqueue)
, ips(rhs->ips)
, cumask(rhs->cumask)
Expand Down