Skip to content

Commit

Permalink
Allow Device creating multiple CSRs [2/n]
Browse files Browse the repository at this point in the history
Create new OsContext per CSR

Change-Id: I8dad7fc1ab450e560f78eba3152b5913791e59a3
Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
  • Loading branch information
BartoszDunajski authored and Compute-Runtime-Automation committed Nov 22, 2018
1 parent 3cb8683 commit 706d9f8
Show file tree
Hide file tree
Showing 16 changed files with 104 additions and 80 deletions.
33 changes: 16 additions & 17 deletions runtime/device/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ Device::~Device() {
performanceCounters->shutdown();
}

for (auto &csr : commandStreamReceiver) {
csr->flushBatchedSubmissions();
for (auto &engine : engines) {
engine.commandStreamReceiver->flushBatchedSubmissions();
}

if (deviceInfo.sourceLevelDebuggerActive && executionEnvironment->sourceLevelDebugger) {
Expand Down Expand Up @@ -116,21 +116,20 @@ bool Device::createDeviceImpl(const HardwareInfo *pHwInfo, Device &outDevice) {
executionEnvironment->initializeMemoryManager(outDevice.getEnabled64kbPages(), outDevice.getEnableLocalMemory(),
outDevice.getDeviceIndex(), deviceCsrIndex);

outDevice.osContext = new OsContext(executionEnvironment->osInterface.get(), outDevice.getDeviceIndex());
executionEnvironment->memoryManager->registerOsContext(outDevice.osContext);

outDevice.commandStreamReceiver.resize(1);
outDevice.commandStreamReceiver[deviceCsrIndex] = executionEnvironment->commandStreamReceivers[outDevice.getDeviceIndex()][deviceCsrIndex].get();
if (!outDevice.commandStreamReceiver[deviceCsrIndex]->initializeTagAllocation()) {
auto osContext = executionEnvironment->memoryManager->createAndRegisterOsContext();
auto commandStreamReceiver = executionEnvironment->commandStreamReceivers[outDevice.getDeviceIndex()][deviceCsrIndex].get();
if (!commandStreamReceiver->initializeTagAllocation()) {
return false;
}

outDevice.engines.emplace_back(commandStreamReceiver, osContext);

auto pDevice = &outDevice;
if (!pDevice->osTime) {
pDevice->osTime = OSTime::create(outDevice.commandStreamReceiver[deviceCsrIndex]->getOSInterface());
pDevice->osTime = OSTime::create(commandStreamReceiver->getOSInterface());
}
pDevice->driverInfo.reset(DriverInfo::create(outDevice.commandStreamReceiver[deviceCsrIndex]->getOSInterface()));
pDevice->tagAddress = reinterpret_cast<uint32_t *>(outDevice.commandStreamReceiver[deviceCsrIndex]->getTagAllocation()->getUnderlyingBuffer());
pDevice->driverInfo.reset(DriverInfo::create(commandStreamReceiver->getOSInterface()));
pDevice->tagAddress = reinterpret_cast<uint32_t *>(commandStreamReceiver->getTagAllocation()->getUnderlyingBuffer());

pDevice->initializeCaps();

Expand All @@ -142,8 +141,8 @@ bool Device::createDeviceImpl(const HardwareInfo *pHwInfo, Device &outDevice) {
}

uint32_t deviceHandle = 0;
if (outDevice.commandStreamReceiver[deviceCsrIndex]->getOSInterface()) {
deviceHandle = outDevice.commandStreamReceiver[deviceCsrIndex]->getOSInterface()->getDeviceHandle();
if (commandStreamReceiver->getOSInterface()) {
deviceHandle = commandStreamReceiver->getOSInterface()->getDeviceHandle();
}

if (pDevice->deviceInfo.sourceLevelDebuggerActive) {
Expand All @@ -160,14 +159,14 @@ bool Device::createDeviceImpl(const HardwareInfo *pHwInfo, Device &outDevice) {
if (!pDevice->preemptionAllocation) {
return false;
}
outDevice.commandStreamReceiver[deviceCsrIndex]->setPreemptionCsrAllocation(pDevice->preemptionAllocation);
commandStreamReceiver->setPreemptionCsrAllocation(pDevice->preemptionAllocation);
auto sipType = SipKernel::getSipKernelType(pHwInfo->pPlatform->eRenderCoreFamily, pDevice->isSourceLevelDebuggerActive());
initSipKernel(sipType, *pDevice);
}

if (DebugManager.flags.EnableExperimentalCommandBuffer.get() > 0) {
outDevice.commandStreamReceiver[deviceCsrIndex]->setExperimentalCmdBuffer(
std::unique_ptr<ExperimentalCommandBuffer>(new ExperimentalCommandBuffer(outDevice.commandStreamReceiver[deviceCsrIndex], pDevice->getDeviceInfo().profilingTimerResolution)));
commandStreamReceiver->setExperimentalCmdBuffer(std::unique_ptr<ExperimentalCommandBuffer>(
new ExperimentalCommandBuffer(commandStreamReceiver, pDevice->getDeviceInfo().profilingTimerResolution)));
}

return true;
Expand Down Expand Up @@ -230,7 +229,7 @@ unique_ptr_if_unused<Device> Device::release() {

bool Device::isSimulation() const {
bool simulation = hwInfo.capabilityTable.isSimulation(hwInfo.pPlatform->usDeviceID);
if (commandStreamReceiver[0]->getType() != CommandStreamReceiverType::CSR_HW) {
if (engines[0].commandStreamReceiver->getType() != CommandStreamReceiverType::CSR_HW) {
simulation = true;
}
if (hwInfo.pSkuTable->ftrSimulationMode) {
Expand Down
8 changes: 4 additions & 4 deletions runtime/device/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "runtime/device/device_info_map.h"
#include "runtime/execution_environment/execution_environment.h"
#include "runtime/helpers/base_object.h"
#include "runtime/helpers/engine_control.h"
#include "runtime/helpers/hw_info.h"
#include "runtime/memory_manager/memory_constants.h"
#include "runtime/os_interface/performance_counters.h"
Expand Down Expand Up @@ -118,7 +119,7 @@ class Device : public BaseObject<_cl_device_id> {
SourceLevelDebugger *getSourceLevelDebugger() { return executionEnvironment->sourceLevelDebugger.get(); }
ExecutionEnvironment *getExecutionEnvironment() const { return executionEnvironment; }
const HardwareCapabilities &getHardwareCapabilities() const { return hardwareCapabilities; }
OsContext *getOsContext() const { return osContext; }
OsContext *getOsContext() const { return engines[0].osContext; }
uint32_t getDeviceIndex() { return deviceIndex; }
bool isFullRangeSvm() {
return getHardwareInfo().capabilityTable.gpuAddressSpace == MemoryConstants::max48BitAddress;
Expand Down Expand Up @@ -155,7 +156,7 @@ class Device : public BaseObject<_cl_device_id> {
std::unique_ptr<DriverInfo> driverInfo;
std::unique_ptr<PerformanceCounters> performanceCounters;

OsContext *osContext = nullptr;
std::vector<EngineControl> engines;

void *slmWindowStartAddress = nullptr;

Expand All @@ -165,7 +166,6 @@ class Device : public BaseObject<_cl_device_id> {
EngineType engineType;
ExecutionEnvironment *executionEnvironment = nullptr;
uint32_t deviceIndex = 0u;
std::vector<CommandStreamReceiver *> commandStreamReceiver;
};

template <cl_device_info Param>
Expand All @@ -177,7 +177,7 @@ inline void Device::getCap(const void *&src,
}

inline CommandStreamReceiver &Device::getCommandStreamReceiver() {
return *this->commandStreamReceiver[0];
return *engines[0].commandStreamReceiver;
}

inline volatile uint32_t *Device::getTagAddress() const {
Expand Down
1 change: 1 addition & 0 deletions runtime/helpers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ set(RUNTIME_SRCS_HELPERS_BASE
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info.h
${CMAKE_CURRENT_SOURCE_DIR}/dispatch_info_builder.h
${CMAKE_CURRENT_SOURCE_DIR}/enable_product.inl
${CMAKE_CURRENT_SOURCE_DIR}/engine_control.h
${CMAKE_CURRENT_SOURCE_DIR}/error_mappers.h
${CMAKE_CURRENT_SOURCE_DIR}/extendable_enum.h
${CMAKE_CURRENT_SOURCE_DIR}/file_io.cpp
Expand Down
22 changes: 22 additions & 0 deletions runtime/helpers/engine_control.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
* Copyright (C) 2018 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/

#pragma once

namespace OCLRT {
class CommandStreamReceiver;
class OsContext;

struct EngineControl {
EngineControl() = default;
EngineControl(CommandStreamReceiver *commandStreamReceiver, OsContext *osContext)
: commandStreamReceiver(commandStreamReceiver), osContext(osContext){};

CommandStreamReceiver *commandStreamReceiver = nullptr;
OsContext *osContext = nullptr;
};
} // namespace OCLRT
4 changes: 2 additions & 2 deletions runtime/memory_manager/graphics_allocation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@ GraphicsAllocation::GraphicsAllocation(void *cpuPtrIn, uint64_t gpuAddress, uint
size(sizeIn),
cpuPtr(cpuPtrIn),
gpuAddress(gpuAddress),
usageInfos(osContextCount),
isShareable(isShareable) {
usageInfos.resize(maxOsContextCount);
}

GraphicsAllocation::GraphicsAllocation(void *cpuPtrIn, size_t sizeIn, osHandle sharedHandleIn, uint32_t osContextCount, bool isShareable) : size(sizeIn),
cpuPtr(cpuPtrIn),
gpuAddress(castToUint64(cpuPtrIn)),
sharedHandle(sharedHandleIn),
usageInfos(osContextCount),
isShareable(isShareable) {
usageInfos.resize(maxOsContextCount);
}
GraphicsAllocation::~GraphicsAllocation() = default;

Expand Down
2 changes: 1 addition & 1 deletion runtime/memory_manager/graphics_allocation.h
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ class GraphicsAllocation : public IDNode<GraphicsAllocation> {
bool aubWritable = true;
bool allocDumpable = false;
bool memObjectsAllocationWithWritableFlags = false;
StackVec<UsageInfo, maxOsContextCount> usageInfos;
std::vector<UsageInfo> usageInfos;
std::atomic<uint32_t> registeredContextsNum{0};
bool isShareable = false;
};
Expand Down
12 changes: 8 additions & 4 deletions runtime/memory_manager/memory_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "runtime/memory_manager/host_ptr_manager.h"
#include "runtime/memory_manager/internal_allocation_storage.h"
#include "runtime/os_interface/os_context.h"
#include "runtime/os_interface/os_interface.h"
#include "runtime/utilities/stackvec.h"
#include "runtime/utilities/tag_allocator.h"

Expand Down Expand Up @@ -200,13 +201,16 @@ bool MemoryManager::isMemoryBudgetExhausted() const {
return false;
}

void MemoryManager::registerOsContext(OsContext *contextToRegister) {
auto contextId = contextToRegister->getContextId();
OsContext *MemoryManager::createAndRegisterOsContext() {
auto contextId = ++latestContextId;
if (contextId + 1 > registeredOsContexts.size()) {
registeredOsContexts.resize(contextId + 1);
}
contextToRegister->incRefInternal();
registeredOsContexts[contextToRegister->getContextId()] = contextToRegister;
auto osContext = new OsContext(executionEnvironment.osInterface.get(), contextId);
osContext->incRefInternal();
registeredOsContexts[contextId] = osContext;

return osContext;
}

bool MemoryManager::getAllocationData(AllocationData &allocationData, const AllocationFlags &flags, const DevicesBitfield devicesBitfield,
Expand Down
4 changes: 3 additions & 1 deletion runtime/memory_manager/memory_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class GraphicsAllocation;
class HostPtrManager;
class CommandStreamReceiver;
class OsContext;
class OSInterface;
class TimestampPacket;

struct HwPerfCounter;
Expand Down Expand Up @@ -246,7 +247,7 @@ class MemoryManager {
::alignedFree(ptr);
}

void registerOsContext(OsContext *contextToRegister);
OsContext *createAndRegisterOsContext();
uint32_t getOsContextCount() { return static_cast<uint32_t>(registeredOsContexts.size()); }
CommandStreamReceiver *getCommandStreamReceiver(uint32_t contextId);
HostPtrManager *getHostPtrManager() const { return hostPtrManager.get(); }
Expand All @@ -270,6 +271,7 @@ class MemoryManager {
ExecutionEnvironment &executionEnvironment;
std::vector<OsContext *> registeredOsContexts;
std::unique_ptr<HostPtrManager> hostPtrManager;
uint32_t latestContextId = std::numeric_limits<uint32_t>::max();
};

std::unique_ptr<DeferredDeleter> createDeferredDeleter();
Expand Down
22 changes: 11 additions & 11 deletions unit_tests/memory_manager/memory_manager_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,9 @@ TEST_F(MemoryAllocatorTest, allocateSystemAligned) {
TEST_F(MemoryAllocatorTest, allocateGraphics) {
unsigned int alignment = 4096;

memoryManager->createAndRegisterOsContext();
auto allocation = memoryManager->allocateGraphicsMemory(sizeof(char));

ASSERT_NE(nullptr, allocation);
// initial taskCount must be -1. if not, we may kill allocation before it will be used
EXPECT_EQ((uint32_t)-1, allocation->getTaskCount(0));
Expand Down Expand Up @@ -1224,6 +1226,7 @@ TEST_F(MemoryManagerWithCsrTest, givenAllocationThatWasUsedAndIsCompletedWhenche
}

TEST_F(MemoryManagerWithCsrTest, givenAllocationThatWasUsedAndIsNotCompletedWhencheckGpuUsageAndDestroyGraphicsAllocationsIsCalledThenItIsAddedToTemporaryAllocationList) {
memoryManager->createAndRegisterOsContext();
auto usedAllocationAndNotGpuCompleted = memoryManager->allocateGraphicsMemory(4096);

auto tagAddress = csr->getTagAddress();
Expand Down Expand Up @@ -1393,24 +1396,21 @@ TEST(GraphicsAllocation, givenSharedHandleBasedConstructorWhenGraphicsAllocation
}

TEST(ResidencyDataTest, givenOsContextWhenItIsRegisteredToMemoryManagerThenRefCountIncreases) {
auto osContext = new OsContext(nullptr, 0u);
ExecutionEnvironment executionEnvironment;
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
memoryManager.registerOsContext(osContext);
MockMemoryManager memoryManager(false, false, executionEnvironment);
memoryManager.createAndRegisterOsContext();
EXPECT_EQ(1u, memoryManager.getOsContextCount());
EXPECT_EQ(1, osContext->getRefInternalCount());
EXPECT_EQ(1, memoryManager.registeredOsContexts[0]->getRefInternalCount());
}

TEST(ResidencyDataTest, givenTwoOsContextsWhenTheyAreRegistredFromHigherToLowerThenProperSizeIsReturned) {
auto osContext2 = new OsContext(nullptr, 1u);
auto osContext = new OsContext(nullptr, 0u);
ExecutionEnvironment executionEnvironment;
OsAgnosticMemoryManager memoryManager(false, false, executionEnvironment);
memoryManager.registerOsContext(osContext2);
memoryManager.registerOsContext(osContext);
MockMemoryManager memoryManager(false, false, executionEnvironment);
memoryManager.createAndRegisterOsContext();
memoryManager.createAndRegisterOsContext();
EXPECT_EQ(2u, memoryManager.getOsContextCount());
EXPECT_EQ(1, osContext->getRefInternalCount());
EXPECT_EQ(1, osContext2->getRefInternalCount());
EXPECT_EQ(1, memoryManager.registeredOsContexts[0]->getRefInternalCount());
EXPECT_EQ(1, memoryManager.registeredOsContexts[1]->getRefInternalCount());
}

TEST(ResidencyDataTest, givenResidencyDataWhenUpdateCompletionDataIsCalledThenItIsProperlyUpdated) {
Expand Down
4 changes: 2 additions & 2 deletions unit_tests/mocks/mock_device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ MockDevice::MockDevice(const HardwareInfo &hwInfo)
executionEnvironment->commandStreamReceivers.resize(getDeviceIndex() + 1);
executionEnvironment->commandStreamReceivers[getDeviceIndex()].push_back(std::unique_ptr<CommandStreamReceiver>(commandStreamReceiver));
this->executionEnvironment->memoryManager = std::move(this->mockMemoryManager);
this->commandStreamReceiver.push_back(commandStreamReceiver);
this->engines.emplace_back(commandStreamReceiver, nullptr);
}
MockDevice::MockDevice(const HardwareInfo &hwInfo, ExecutionEnvironment *executionEnvironment, uint32_t deviceIndex)
: Device(hwInfo, executionEnvironment, deviceIndex) {
Expand Down Expand Up @@ -49,7 +49,7 @@ void MockDevice::resetCommandStreamReceiver(CommandStreamReceiver *newCsr) {
executionEnvironment->commandStreamReceivers[getDeviceIndex()][0].reset(newCsr);
executionEnvironment->commandStreamReceivers[getDeviceIndex()][0]->initializeTagAllocation();
executionEnvironment->commandStreamReceivers[getDeviceIndex()][0]->setPreemptionCsrAllocation(preemptionAllocation);
this->commandStreamReceiver[0] = newCsr;
this->engines[0].commandStreamReceiver = newCsr;
UNRECOVERABLE_IF(getDeviceIndex() != 0u);
this->tagAddress = executionEnvironment->commandStreamReceivers[getDeviceIndex()][0]->getTagAddress();
}
Expand Down
2 changes: 1 addition & 1 deletion unit_tests/mocks/mock_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ class MockDevice : public Device {
size_t alignment = 256 * MemoryConstants::kiloByte;
bool uncacheable = getWaTable()->waCSRUncachable;
this->preemptionAllocation = executionEnvironment->memoryManager->allocateGraphicsMemory(requiredSize, alignment, false, uncacheable);
this->commandStreamReceiver[0]->setPreemptionCsrAllocation(preemptionAllocation);
this->engines[0].commandStreamReceiver->setPreemptionCsrAllocation(preemptionAllocation);
}
}
}
Expand Down
1 change: 1 addition & 0 deletions unit_tests/mocks/mock_memory_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ class MockMemoryManager : public OsAgnosticMemoryManager {
using MemoryManager::allocateGraphicsMemory;
using MemoryManager::allocateGraphicsMemoryInPreferredPool;
using MemoryManager::getAllocationData;
using MemoryManager::registeredOsContexts;
using MemoryManager::timestampPacketAllocator;
using OsAgnosticMemoryManager::OsAgnosticMemoryManager;
MockMemoryManager(ExecutionEnvironment &executionEnvironment) : OsAgnosticMemoryManager(false, false, executionEnvironment) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,9 @@ TEST(WddmPreemptionHeaderTests, givenWddmCommandStreamReceiverWhenPreemptionIsOf

auto commandBuffer = executionEnvironment->memoryManager->allocateGraphicsMemory(4096);
LinearStream cs(commandBuffer);

BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
OsContext *osContext = new OsContext(executionEnvironment->osInterface.get(), 0u);
osContext->incRefInternal();
executionEnvironment->memoryManager->registerOsContext(osContext);
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
executionEnvironment->commandStreamReceivers[0][0]->flush(batchBuffer, EngineType::ENGINE_RCS,
executionEnvironment->commandStreamReceivers[0][0]->getResidencyAllocations(), *osContext);
auto commandHeader = wddm->submitResult.commandHeaderSubmitted;
Expand Down Expand Up @@ -288,7 +286,6 @@ TEST(WddmPreemptionHeaderTests, givenWddmCommandStreamReceiverWhenPreemptionIsOn
BatchBuffer batchBuffer{cs.getGraphicsAllocation(), 0, 0, nullptr, false, false, QueueThrottle::MEDIUM, cs.getUsed(), &cs};
OsContext *osContext = new OsContext(executionEnvironment->osInterface.get(), 0u);
osContext->incRefInternal();
executionEnvironment->memoryManager->registerOsContext(osContext);
executionEnvironment->commandStreamReceivers[0][0]->flush(batchBuffer, EngineType::ENGINE_RCS, executionEnvironment->commandStreamReceivers[0][0]->getResidencyAllocations(), *osContext);
auto commandHeader = wddm->submitResult.commandHeaderSubmitted;
COMMAND_BUFFER_HEADER *pHeader = reinterpret_cast<COMMAND_BUFFER_HEADER *>(commandHeader);
Expand Down

0 comments on commit 706d9f8

Please sign in to comment.