Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 4 additions & 54 deletions sycl/source/detail/device_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,61 +309,11 @@ ur_native_handle_t device_impl::getNative() const {
return Handle;
}

// On the first call this function queries for device timestamp
// along with host synchronized timestamp and stores it in member variable
// MDeviceHostBaseTime. Subsequent calls to this function would just retrieve
// the host timestamp, compute difference against the host timestamp in
// MDeviceHostBaseTime and calculate the device timestamp based on the
// difference.
//
// The MDeviceHostBaseTime is refreshed with new device and host timestamp
// after a certain interval (determined by TimeTillRefresh) to account for
// clock drift between host and device.
//
uint64_t device_impl::getCurrentDeviceTime() {
auto GetGlobalTimestamps = [this](ur_device_handle_t Device,
uint64_t *DeviceTime, uint64_t *HostTime) {
auto Result =
getAdapter().call_nocheck<UrApiKind::urDeviceGetGlobalTimestamps>(
Device, DeviceTime, HostTime);
if (Result == UR_RESULT_ERROR_INVALID_OPERATION) {
// NOTE(UR port): Removed the call to GetLastError because we shouldn't
// be calling it after ERROR_INVALID_OPERATION: there is no
// adapter-specific error.
throw detail::set_ur_error(
sycl::exception(
make_error_code(errc::feature_not_supported),
"Device and/or backend does not support querying timestamp."),
UR_RESULT_ERROR_INVALID_OPERATION);
} else {
getAdapter().checkUrResult<errc::feature_not_supported>(Result);
}
};

uint64_t HostTime = 0;
uint64_t Diff = 0;
// To account for potential clock drift between host clock and device clock.
// The value set is arbitrary: 200 seconds
constexpr uint64_t TimeTillRefresh = 200e9;
// If getCurrentDeviceTime is called for the first time or we have to refresh.
std::shared_lock<std::shared_mutex> ReadLock(MDeviceHostBaseTimeMutex);
if (!MDeviceHostBaseTime.second || Diff > TimeTillRefresh) {
ReadLock.unlock();
std::unique_lock<std::shared_mutex> WriteLock(MDeviceHostBaseTimeMutex);
// Recheck the condition after acquiring the write lock.
if (MDeviceHostBaseTime.second && Diff <= TimeTillRefresh) {
// If we are here, it means that another thread has already updated
// MDeviceHostBaseTime, so we can just return the current device time.
return MDeviceHostBaseTime.first + Diff;
}
GetGlobalTimestamps(MDevice, &MDeviceHostBaseTime.first,
&MDeviceHostBaseTime.second);
} else {
GetGlobalTimestamps(MDevice, nullptr, &HostTime);
assert(HostTime >= MDeviceHostBaseTime.second);
Diff = HostTime - MDeviceHostBaseTime.second;
}
return MDeviceHostBaseTime.first + Diff;
uint64_t DeviceTime = 0;
getAdapter().call<UrApiKind::urDeviceGetGlobalTimestamps>(
MDevice, &DeviceTime, nullptr);
return DeviceTime;
}

bool device_impl::extOneapiCanBuild(
Expand Down
3 changes: 0 additions & 3 deletions sycl/source/detail/device_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2249,9 +2249,6 @@ class device_impl {
// This is used for getAdapter so should be above other properties.
platform_impl &MPlatform;

std::shared_mutex MDeviceHostBaseTimeMutex;
std::pair<uint64_t, uint64_t> MDeviceHostBaseTime{0, 0};

const ur_device_handle_t MRootDevice;

// Devices track a list of active queues on it, to allow for synchronization
Expand Down
11 changes: 5 additions & 6 deletions sycl/test-e2e/Basic/submit_time.cpp
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
// RUN: %{build} -o %t.out
// There is an issue with reported device time for the L0 backend, works only on
// pvc for now. No such problems for other backends.
// RUN: %if (!level_zero || arch-intel_gpu_pvc) %{ %{run} %t.out %}
// RUN: %{run} %t.out

// Check that submission time is calculated properly.
// Check that submission time is valid.

// Test fails on hip flakily, disable temprorarily.
// UNSUPPORTED: hip
Expand All @@ -19,11 +17,12 @@

int main(void) {
constexpr size_t n = 16;
constexpr size_t iter_count = 100;
sycl::queue q({sycl::property::queue::enable_profiling{}});
int *data = sycl::malloc_host<int>(n, q);
int *dest = sycl::malloc_host<int>(n, q);

for (int i = 0; i < 5; i++) {
for (int i = 0; i < iter_count; i++) {
auto event = q.submit([&](sycl::handler &cgh) {
cgh.parallel_for<class KernelTime>(
sycl::range<1>(n), [=](sycl::id<1> idx) { data[idx] = idx; });
Expand Down Expand Up @@ -52,7 +51,7 @@ int main(void) {
uint64_t memcpy_submit_time = 0;
uint64_t memcpy_start_time = 0;
uint64_t memcpy_end_time = 0;
for (int i = 0; i < 5; i++) {
for (int i = 0; i < iter_count; i++) {
auto memcpy_event = q.memcpy(dest, data, sizeof(int) * n);
memcpy_event.wait();

Expand Down
Loading