/
CPUDevice.cpp
239 lines (185 loc) · 6.22 KB
/
CPUDevice.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
// Copyright 2020-2022 Intel Corporation
// SPDX-License-Identifier: BSD-3-Clause
#include "CPUDevice.h"
#if defined(_WIN32) || defined(_WIN64)
#include "windows.h"
#else
#include <dlfcn.h>
#endif
// std
#include <cassert>
#include <chrono>
#include <cstring>
#include <exception>
#include <string>
namespace ispcrt {
namespace cpu {
struct Future : public ispcrt::base::Future {
Future() = default;
virtual ~Future() = default;
bool valid() override { return m_valid; }
uint64_t time() override { return m_time; }
friend struct TaskQueue;
private:
uint64_t m_time{0};
bool m_valid{false};
};
using CPUKernelEntryPoint = void (*)(void *, size_t, size_t, size_t);
struct MemoryView : public ispcrt::base::MemoryView {
MemoryView(void *appMem, size_t numBytes, bool shared) : m_hostPtr(appMem), m_devicePtr(appMem), m_size(numBytes), m_shared(shared) {}
~MemoryView() {
if (!m_external_alloc && m_devicePtr)
free(m_devicePtr);
}
bool isShared() { return m_shared; }
void *hostPtr() {
if (m_shared) {
return devicePtr();
}
else {
if (!m_hostPtr)
throw std::logic_error("pointer to the host memory is NULL");
return m_hostPtr;
}
};
void *devicePtr() {
if (!m_devicePtr)
allocate();
return m_devicePtr;
};
size_t numBytes() { return m_size; };
private:
void allocate() {
m_devicePtr = malloc(m_size);
if (!m_devicePtr)
throw std::bad_alloc();
m_external_alloc = false;
}
bool m_external_alloc{true};
bool m_shared{false};
void *m_hostPtr{nullptr};
void *m_devicePtr{nullptr};
size_t m_size{0};
};
struct Module : public ispcrt::base::Module {
Module(const char *moduleFile) : m_file(moduleFile) {
if (!m_file.empty()) {
#if defined(__MACOSX__) || defined(__APPLE__)
std::string ext = ".dylib";
#elif defined(_WIN32) || defined(_WIN64)
std::string ext = ".dll";
#else
std::string ext = ".so";
#endif
#if defined _WIN32
m_lib = LoadLibrary((m_file + ext).c_str());
#else
m_lib = dlopen(("lib" + m_file + ext).c_str(), RTLD_LAZY | RTLD_LOCAL);
#endif
if (!m_lib)
throw std::logic_error("could not open CPU shared module file");
}
}
~Module() {
if (m_lib)
#if defined(_WIN32) || defined(_WIN64)
FreeLibrary((HMODULE)m_lib);
#else
dlclose(m_lib);
#endif
}
void *lib() const { return m_lib; }
private:
std::string m_file;
void *m_lib{nullptr};
};
struct Kernel : public ispcrt::base::Kernel {
Kernel(const ispcrt::base::Module &_module, const char *_name) : m_fcnName(_name), m_module(&_module) {
const cpu::Module &module = (const cpu::Module &)_module;
auto name = std::string(_name) + "_cpu_entry_point";
#if defined(_WIN32) || defined(_WIN64)
void *fcn = GetProcAddress((HMODULE)module.lib(), name.c_str());
#else
void *fcn = dlsym(module.lib() ? module.lib() : RTLD_DEFAULT, name.c_str());
#endif
if (!fcn)
throw std::logic_error("could not find CPU kernel function");
m_fcn = (CPUKernelEntryPoint)fcn;
m_module->refInc();
}
~Kernel() {
if (m_module)
m_module->refDec();
}
CPUKernelEntryPoint entryPoint() const { return m_fcn; }
private:
std::string m_fcnName;
CPUKernelEntryPoint m_fcn{nullptr};
const ispcrt::base::Module *m_module{nullptr};
};
struct TaskQueue : public ispcrt::base::TaskQueue {
TaskQueue() {
// no-op
}
void barrier() override {
// no-op
}
void copyToHost(ispcrt::base::MemoryView &) override {
// no-op
}
void copyToDevice(ispcrt::base::MemoryView &) override {
// no-op
}
void copyMemoryView(base::MemoryView &mv_dst, base::MemoryView &mv_src, const size_t size) override {
auto &view_dst = (cpu::MemoryView &)mv_dst;
auto &view_src = (cpu::MemoryView &)mv_src;
memcpy(view_dst.devicePtr(), view_src.devicePtr(), size);
}
ispcrt::base::Future *launch(ispcrt::base::Kernel &k, ispcrt::base::MemoryView *params, size_t dim0, size_t dim1,
size_t dim2) override {
auto &kernel = (cpu::Kernel &)k;
auto *parameters = (cpu::MemoryView *)params;
auto *fcn = kernel.entryPoint();
auto *future = new cpu::Future;
assert(future);
auto start = std::chrono::high_resolution_clock::now();
fcn(parameters ? parameters->devicePtr() : nullptr, dim0, dim1, dim2);
auto end = std::chrono::high_resolution_clock::now();
future->m_time = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count();
future->m_valid = true;
return future;
}
void submit() override {
// no-op
}
void sync() override {
// no-op
}
void *taskQueueNativeHandle() const override { return nullptr; }
};
uint32_t deviceCount() { return 1; }
ISPCRTDeviceInfo deviceInfo(uint32_t deviceIdx) {
ISPCRTDeviceInfo info;
info.deviceId = 0; // for CPU we don't support it yet
info.vendorId = 0;
return info;
}
} // namespace cpu
ispcrt::base::MemoryView *CPUDevice::newMemoryView(void *appMem, size_t numBytes, bool shared) const {
return new cpu::MemoryView(appMem, numBytes, shared);
}
ispcrt::base::TaskQueue *CPUDevice::newTaskQueue() const { return new cpu::TaskQueue(); }
ispcrt::base::Module *CPUDevice::newModule(const char *moduleFile, const ISPCRTModuleOptions &moduleOpts) const {
return new cpu::Module(moduleFile);
}
void CPUDevice::linkModules(base::Module **modules, const uint32_t numModules) const {}
ispcrt::base::Kernel *CPUDevice::newKernel(const ispcrt::base::Module &module, const char *name) const {
return new cpu::Kernel(module, name);
}
void *CPUDevice::platformNativeHandle() const { return nullptr; }
void *CPUDevice::deviceNativeHandle() const { return nullptr; }
void *CPUDevice::contextNativeHandle() const { return nullptr; }
ISPCRTAllocationType CPUDevice::getMemAllocType(void* appMemory) const {
return ISPCRT_ALLOC_TYPE_UNKNOWN;
}
} // namespace ispcrt