Skip to content

Commit 62faecf

Browse files
Optimize virtual calls #2.
Optimize frequently used virtual cost. Compiler cannot inline those which causes overhead. Signed-off-by: Michal Mrozek <michal.mrozek@intel.com>
1 parent c30f65f commit 62faecf

File tree

13 files changed

+35
-42
lines changed

13 files changed

+35
-42
lines changed

level_zero/core/source/device/device.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,13 @@ struct DebugSession;
4343
enum class ModuleType;
4444

4545
struct Device : _ze_device_handle_t {
46-
virtual uint32_t getRootDeviceIndex() = 0;
46+
uint32_t getRootDeviceIndex() const {
47+
return neoDevice->getRootDeviceIndex();
48+
}
49+
NEO::Device *getNEODevice() const {
50+
return this->neoDevice;
51+
}
52+
4753
virtual ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) = 0;
4854
virtual ze_result_t createCommandList(const ze_command_list_desc_t *desc,
4955
ze_command_list_handle_t *commandList) = 0;
@@ -118,7 +124,6 @@ struct Device : _ze_device_handle_t {
118124

119125
virtual NEO::PreemptionMode getDevicePreemptionMode() const = 0;
120126
virtual const NEO::DeviceInfo &getDeviceInfo() const = 0;
121-
virtual NEO::Device *getNEODevice() = 0;
122127
NEO::SourceLevelDebugger *getSourceLevelDebugger() { return getNEODevice()->getSourceLevelDebugger(); }
123128
DebuggerL0 *getL0Debugger() {
124129
auto debugger = getNEODevice()->getDebugger();
@@ -142,6 +147,7 @@ struct Device : _ze_device_handle_t {
142147
virtual void storeReusableAllocation(NEO::GraphicsAllocation &alloc) = 0;
143148

144149
protected:
150+
NEO::Device *neoDevice = nullptr;
145151
bool implicitScalingCapable = false;
146152
};
147153

level_zero/core/source/device/device_imp.cpp

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,6 @@ bool releaseFP64Override();
5252

5353
namespace L0 {
5454

55-
uint32_t DeviceImp::getRootDeviceIndex() {
56-
return neoDevice->getRootDeviceIndex();
57-
}
58-
5955
DriverHandle *DeviceImp::getDriverHandle() {
6056
return this->driverHandle;
6157
}
@@ -942,10 +938,6 @@ const NEO::DeviceInfo &DeviceImp::getDeviceInfo() const {
942938
return neoDevice->getDeviceInfo();
943939
}
944940

945-
NEO::Device *DeviceImp::getNEODevice() {
946-
return neoDevice;
947-
}
948-
949941
NEO::GraphicsAllocation *DeviceImp::allocateManagedMemoryFromHostPtr(void *buffer, size_t size, struct CommandList *commandList) {
950942
char *baseAddress = reinterpret_cast<char *>(buffer);
951943
NEO::GraphicsAllocation *allocation = nullptr;

level_zero/core/source/device/device_imp.h

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ namespace L0 {
2929
struct SysmanDevice;
3030

3131
struct DeviceImp : public Device {
32-
uint32_t getRootDeviceIndex() override;
3332
ze_result_t canAccessPeer(ze_device_handle_t hPeerDevice, ze_bool_t *value) override;
3433
ze_result_t createCommandList(const ze_command_list_desc_t *desc,
3534
ze_command_list_handle_t *commandList) override;
@@ -83,7 +82,6 @@ struct DeviceImp : public Device {
8382
NEO::PreemptionMode getDevicePreemptionMode() const override;
8483
const NEO::DeviceInfo &getDeviceInfo() const override;
8584

86-
NEO::Device *getNEODevice() override;
8785
void activateMetricGroups() override;
8886
void processAdditionalKernelProperties(NEO::HwHelper &hwHelper, ze_device_module_properties_t *pKernelProperties);
8987
NEO::GraphicsAllocation *getDebugSurface() const override { return debugSurface; }
@@ -102,7 +100,6 @@ struct DeviceImp : public Device {
102100
bool toPhysicalSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t &deviceIndex);
103101
bool toApiSliceId(const NEO::TopologyMap &topologyMap, uint32_t &slice, uint32_t deviceIndex);
104102

105-
NEO::Device *neoDevice = nullptr;
106103
bool isSubdevice = false;
107104
void *execEnvironment = nullptr;
108105
std::unique_ptr<BuiltinFunctionsLib> builtins = nullptr;

level_zero/core/source/event/event_impl.inl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ Event *Event::create(EventPool *eventPool, const ze_event_desc_t *desc, Device *
2626
event->hostAddress = reinterpret_cast<void *>(baseHostAddr + (desc->index * eventPool->getEventSize()));
2727
event->signalScope = desc->signal;
2828
event->waitScope = desc->wait;
29-
event->csr = static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver;
29+
event->csr = device->getNEODevice()->getDefaultEngine().commandStreamReceiver;
3030

3131
EventPoolImp *EventPoolImp = static_cast<struct EventPoolImp *>(eventPool);
3232
// do not reset even if it has been imported, since event pool
@@ -309,7 +309,7 @@ ze_result_t EventImp<TagSizeT>::queryTimestampsExp(Device *device, uint32_t *pCo
309309

310310
auto packetId = i;
311311
if (deviceImp->isSubdevice) {
312-
packetId = static_cast<NEO::SubDevice *>(deviceImp->neoDevice)->getSubDeviceIndex();
312+
packetId = static_cast<NEO::SubDevice *>(deviceImp->getNEODevice())->getSubDeviceIndex();
313313
}
314314

315315
globalStartTs = kernelEventCompletionData[timestampPacket].getGlobalStartValue(packetId);

level_zero/core/test/unit_tests/gen9/test_cmdqueue_gen9.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,7 @@ HWTEST2_F(CommandQueueGroupMultiDevice,
210210
returnValue));
211211

212212
L0::CommandQueueImp *cmdQueue = reinterpret_cast<CommandQueueImp *>(commandList0->cmdQImmediate);
213-
L0::DeviceImp *deviceImp = reinterpret_cast<L0::DeviceImp *>(device);
214-
auto &nearestSubDevice = *deviceImp->neoDevice->getNearestGenericSubDevice(0);
213+
auto &nearestSubDevice = *device->getNEODevice()->getNearestGenericSubDevice(0);
215214
const auto rcsIndex = nearestSubDevice.getEngineGroupIndexFromEngineGroupType(NEO::EngineGroupType::RenderCompute);
216215
auto expectedCSR = nearestSubDevice.getRegularEngineGroups()[rcsIndex].engines[queueGroupIndex].commandStreamReceiver;
217216
EXPECT_EQ(cmdQueue->getCsr(), expectedCSR);

level_zero/core/test/unit_tests/mocks/mock_device.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ template <>
2020
struct WhiteBox<::L0::Device> : public ::L0::Device {
2121
using Base = L0::Device;
2222
using Base::implicitScalingCapable;
23+
using L0::Device::getNEODevice;
24+
using L0::Device::neoDevice;
2325
};
2426

2527
using Device = WhiteBox<::L0::Device>;
@@ -28,7 +30,6 @@ template <>
2830
struct Mock<Device> : public Device {
2931
Mock() = default;
3032

31-
ADDMETHOD_NOBASE(getRootDeviceIndex, uint32_t, 0u, ());
3233
ADDMETHOD_NOBASE(canAccessPeer, ze_result_t, ZE_RESULT_SUCCESS, (ze_device_handle_t hPeerDevice, ze_bool_t *value));
3334
ADDMETHOD_NOBASE(createCommandList, ze_result_t, ZE_RESULT_SUCCESS, (const ze_command_list_desc_t *desc, ze_command_list_handle_t *commandList));
3435
ADDMETHOD_NOBASE(createCommandListImmediate, ze_result_t, ZE_RESULT_SUCCESS, (const ze_command_queue_desc_t *desc, ze_command_list_handle_t *commandList));
@@ -66,7 +67,6 @@ struct Mock<Device> : public Device {
6667
ADDMETHOD_NOBASE_VOIDRETURN(setDriverHandle, (L0::DriverHandle *));
6768
ADDMETHOD_CONST_NOBASE(getDevicePreemptionMode, NEO::PreemptionMode, NEO::PreemptionMode::Initial, ());
6869
ADDMETHOD_CONST_NOBASE_REFRETURN(getDeviceInfo, const NEO::DeviceInfo &, ());
69-
ADDMETHOD_NOBASE(getNEODevice, NEO::Device *, nullptr, ());
7070
ADDMETHOD_NOBASE_VOIDRETURN(activateMetricGroups, ());
7171
ADDMETHOD_CONST_NOBASE(getDebugSurface, NEO::GraphicsAllocation *, nullptr, ());
7272
ADDMETHOD_NOBASE(allocateManagedMemoryFromHostPtr, NEO::GraphicsAllocation *, nullptr, (void *buffer, size_t size, struct L0::CommandList *commandList));

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_1.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -873,7 +873,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendSignalEvent
873873

874874
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
875875
ASSERT_NE(nullptr, event_object->csr);
876-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
876+
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
877877

878878
commandList->appendSignalEvent(event);
879879

@@ -913,7 +913,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendBarrierThen
913913

914914
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
915915
ASSERT_NE(nullptr, event_object->csr);
916-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
916+
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
917917

918918
commandList->appendBarrier(nullptr, 1, &event);
919919

@@ -955,7 +955,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithSyncModeAndAppendResetEventT
955955

956956
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
957957
ASSERT_NE(nullptr, event_object->csr);
958-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
958+
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
959959

960960
commandList->appendEventReset(event);
961961

@@ -995,7 +995,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEven
995995

996996
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
997997
ASSERT_NE(nullptr, event_object->csr);
998-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
998+
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
999999

10001000
commandList->appendSignalEvent(event);
10011001

@@ -1035,7 +1035,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendBarrierThe
10351035

10361036
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
10371037
ASSERT_NE(nullptr, event_object->csr);
1038-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
1038+
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
10391039

10401040
commandList->appendBarrier(event, 0, nullptr);
10411041

@@ -1078,7 +1078,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndCopyEngineAndApp
10781078

10791079
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
10801080
ASSERT_NE(nullptr, event_object->csr);
1081-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
1081+
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
10821082

10831083
commandList->appendBarrier(event, 0, nullptr);
10841084

@@ -1120,7 +1120,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventReset
11201120

11211121
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
11221122
ASSERT_NE(nullptr, event_object->csr);
1123-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
1123+
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
11241124

11251125
commandList->appendEventReset(event);
11261126

level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_3.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1119,7 +1119,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendSignalEven
11191119

11201120
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
11211121
ASSERT_NE(nullptr, event_object->csr);
1122-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
1122+
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
11231123

11241124
commandList->appendSignalEvent(event);
11251125

@@ -1159,7 +1159,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendBarrierThe
11591159

11601160
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
11611161
ASSERT_NE(nullptr, event_object->csr);
1162-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
1162+
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
11631163

11641164
commandList->appendBarrier(event, 0, nullptr);
11651165

@@ -1201,7 +1201,7 @@ TEST_F(CommandListCreate, whenCreatingImmCmdListWithASyncModeAndAppendEventReset
12011201

12021202
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
12031203
ASSERT_NE(nullptr, event_object->csr);
1204-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
1204+
ASSERT_EQ(device->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
12051205

12061206
commandList->appendEventReset(event);
12071207

level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -546,7 +546,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediat
546546

547547
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
548548
ASSERT_NE(nullptr, event_object->csr);
549-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
549+
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
550550

551551
returnValue = commandList->appendWaitOnEvents(1, &event);
552552
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
@@ -609,7 +609,7 @@ HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledWithImmedia
609609

610610
std::unique_ptr<L0::Event> event_object(L0::Event::fromHandle(event));
611611
ASSERT_NE(nullptr, event_object->csr);
612-
ASSERT_EQ(static_cast<DeviceImp *>(device)->neoDevice->getDefaultEngine().commandStreamReceiver, event_object->csr);
612+
ASSERT_EQ(static_cast<DeviceImp *>(device)->getNEODevice()->getDefaultEngine().commandStreamReceiver, event_object->csr);
613613

614614
returnValue = commandList->appendWaitOnEvents(1, &event);
615615
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);

level_zero/core/test/unit_tests/sources/device/test_device.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2181,10 +2181,10 @@ TEST_F(DeviceTest, givenNoL0DebuggerWhenGettingL0DebuggerThenNullptrReturned) {
21812181
TEST_F(DeviceTest, givenValidDeviceWhenCallingReleaseResourcesThenResourcesReleased) {
21822182
auto deviceImp = static_cast<DeviceImp *>(device);
21832183
EXPECT_FALSE(deviceImp->resourcesReleased);
2184-
EXPECT_FALSE(nullptr == deviceImp->neoDevice);
2184+
EXPECT_FALSE(nullptr == deviceImp->getNEODevice());
21852185
deviceImp->releaseResources();
21862186
EXPECT_TRUE(deviceImp->resourcesReleased);
2187-
EXPECT_TRUE(nullptr == deviceImp->neoDevice);
2187+
EXPECT_TRUE(nullptr == deviceImp->getNEODevice());
21882188
EXPECT_TRUE(nullptr == deviceImp->pageFaultCommandList);
21892189
EXPECT_TRUE(nullptr == deviceImp->getDebugSurface());
21902190
deviceImp->releaseResources();

0 commit comments

Comments
 (0)