diff --git a/src/esp/gfx/BackgroundRenderer.cpp b/src/esp/gfx/BackgroundRenderer.cpp index 04e45f4c5a..69c3faa3ed 100644 --- a/src/esp/gfx/BackgroundRenderer.cpp +++ b/src/esp/gfx/BackgroundRenderer.cpp @@ -171,7 +171,7 @@ int BackgroundRenderer::threadRender() { if (sensorType == sensor::SensorType::Depth) sensor.renderTarget().readFrameDepth(view); - if (sensorType == sensor::SensorType::Semantic) + if (sensorType == sensor::SensorType::Semantic || sensorType == sensor::SensorType::Instance) sensor.renderTarget().readFrameObjectId(view); } diff --git a/src/esp/gfx/PbrDrawable.cpp b/src/esp/gfx/PbrDrawable.cpp index a6bb93e352..6271b6602b 100644 --- a/src/esp/gfx/PbrDrawable.cpp +++ b/src/esp/gfx/PbrDrawable.cpp @@ -195,6 +195,15 @@ void PbrDrawable::draw(const Mn::Matrix4& transformationMatrix, Mn::GL::Renderer::setFrontFace(Mn::GL::Renderer::FrontFace::ClockWise); } + // Pick the semantic id or the instance id based on the camera type + int specificID = 0; + if(static_cast(camera).getIsInstanceId()) { + specificID = node_.getInstanceId(); + } + else { + specificID = node_.getSemanticId(); + } + (*shader_) // e.g., semantic mesh has its own per vertex annotation, which has // been uploaded to GPU so simply pass 0 to the uniform "objectId" in @@ -204,7 +213,7 @@ void PbrDrawable::draw(const Mn::Matrix4& transformationMatrix, : ((flags_ & PbrShader::Flag::InstancedObjectId) == PbrShader::Flag::InstancedObjectId ? 0 - : node_.getSemanticId())) + : specificID)) .setProjectionMatrix(camera.projectionMatrix()) .setViewMatrix(camera.cameraMatrix()) .setModelMatrix(modelMatrix) // NOT modelview matrix! diff --git a/src/esp/gfx/RenderCamera.h b/src/esp/gfx/RenderCamera.h index 70195558cc..18cba91732 100644 --- a/src/esp/gfx/RenderCamera.h +++ b/src/esp/gfx/RenderCamera.h @@ -215,6 +215,20 @@ class RenderCamera : public MagnumCamera { size_t filterTransforms(DrawableTransforms& drawableTransforms, Flags flags = {}); + /** + * @brief Set the flag to infer if the camera should pick semantic id or instance id for the object. + * + * @param isInstanceId true if picking instance id, false if picking semantic id. + */ + void setIsInstanceId(bool isInstanceId) { isInstanceId_ = isInstanceId; } + + /** + * @brief Get the flag of whether the camera should pick semantic id or instance id for the object. + * + * @return true if picking instance id, false if picking semantic id. + */ + bool getIsInstanceId() const { return isInstanceId_; } + /** * @brief if the "immediate" following rendering pass is to use drawable ids * as the object ids. @@ -254,6 +268,7 @@ class RenderCamera : public MagnumCamera { Mn::Matrix4 invertedProjectionMatrix; size_t previousNumVisibleDrawables_ = 0; bool useDrawableIds_ = false; + bool isInstanceId_ = false; ESP_SMART_POINTERS(RenderCamera) }; diff --git a/src/esp/gfx/Renderer.cpp b/src/esp/gfx/Renderer.cpp index 7fbda18e0f..c94e074c8a 100644 --- a/src/esp/gfx/Renderer.cpp +++ b/src/esp/gfx/Renderer.cpp @@ -87,7 +87,8 @@ struct Renderer::Impl { void draw(sensor::VisualSensor& visualSensor, sim::Simulator& sim) { acquireGlContext(); if (visualSensor.specification()->sensorType == - sensor::SensorType::Semantic) { + sensor::SensorType::Semantic || visualSensor.specification()->sensorType == + sensor::SensorType::Instance) { ESP_CHECK(sim.semanticSceneGraphExists(), "Renderer::Impl::draw(): SemanticSensor observation requested " "but no SemanticSceneGraph is loaded"); @@ -101,7 +102,7 @@ struct Renderer::Impl { acquireGlContext(); sensor::SensorType& type = visualSensor.specification()->sensorType; if (type == sensor::SensorType::Depth || - type == sensor::SensorType::Semantic) { + type == sensor::SensorType::Semantic || type == sensor::SensorType::Instance) { Mn::GL::Renderer::disable(Mn::GL::Renderer::Feature::DepthTest); gfx::RenderTarget& tgt = visualSensor.renderTarget(); if (!mesh_) { @@ -112,7 +113,7 @@ struct Renderer::Impl { esp::gfx::Renderer::Impl::RendererShaderType rendererShaderType = esp::gfx::Renderer::Impl::RendererShaderType::DepthTextureVisualizer; - if (type == sensor::SensorType::Semantic) { + if (type == sensor::SensorType::Semantic || type == sensor::SensorType::Instance) { rendererShaderType = gfx::Renderer::Impl::RendererShaderType::ObjectIdTextureVisualizer; } @@ -160,7 +161,7 @@ struct Renderer::Impl { shader->bindDepthTexture(tgt.getDepthTexture()); #endif shader->setDepthUnprojection(*visualSensor.depthUnprojection()); - } else if (type == sensor::SensorType::Semantic) { + } else if (type == sensor::SensorType::Semantic || type == sensor::SensorType::Instance) { shader->bindObjectIdTexture(tgt.getObjectIdTexture()); } if ((colorMapOffset >= 0) && (colorMapScale >= 0)) { @@ -365,6 +366,13 @@ struct Renderer::Impl { renderTargetFlags |= RenderTarget::Flag::ObjectIdAttachment; break; + case sensor::SensorType::Instance: + if (bindingFlags & Flag::VisualizeTexture) { + renderTargetFlags |= RenderTarget::Flag::RgbaAttachment; + } + renderTargetFlags |= RenderTarget::Flag::ObjectIdAttachment; + break; + default: // I need this default, since sensor type list is long, and without // default clang-tidy will complain diff --git a/src/esp/metadata/attributes/ObjectAttributes.h b/src/esp/metadata/attributes/ObjectAttributes.h index 458509596b..da38d46400 100644 --- a/src/esp/metadata/attributes/ObjectAttributes.h +++ b/src/esp/metadata/attributes/ObjectAttributes.h @@ -367,6 +367,20 @@ class ObjectAttributes : public AbstractObjectAttributes { uint32_t getSemanticId() const { return get("semantic_id"); } + /** + * @brief Set the instance id for the object. The instance id should be unique in each scene for the object. + * + * @param instance_id the id for the object in the scene (can be the implicit index of the object in the scene config) + */ + void setInstanceId(int instance_id) { set("instance_id", instance_id); } + + /** + * @brief Get the instance id for the object. + * + * @return instance_id the id for the object in the scene + */ + uint32_t getInstanceId() const { return get("instance_id"); } + protected: /** * @brief Write object-specific values to json object diff --git a/src/esp/physics/PhysicsManager.cpp b/src/esp/physics/PhysicsManager.cpp index 33ab8a619e..b76beb2b7e 100644 --- a/src/esp/physics/PhysicsManager.cpp +++ b/src/esp/physics/PhysicsManager.cpp @@ -129,6 +129,10 @@ int PhysicsManager::addObjectInstance( << "as specified in object instance attributes."; return 0; } + + // Set the instance id of the object + objAttributes->setInstanceId(objInstAttributes->getID()); + // check if an object is being set to be not visible for a particular // instance. int visSet = objInstAttributes->getIsInstanceVisible(); diff --git a/src/esp/physics/RigidBase.h b/src/esp/physics/RigidBase.h index 3fe13a9d14..7ccf6f9de4 100644 --- a/src/esp/physics/RigidBase.h +++ b/src/esp/physics/RigidBase.h @@ -386,6 +386,22 @@ class RigidBase : public esp::physics::PhysicsObjectBase { } } + /** + * @brief Get the Instance ID for this object. + */ + int getInstanceId() const { return visualNode_->getInstanceId(); } + + /** + * @brief Set the @ref esp::scene::SceneNode::InstanceId_ for all visual nodes + * belonging to the object. + * @param InstanceId The desired Instance id for the object. + */ + void setInstanceId(uint32_t instanceId) { + for (auto* node : visualNodes_) { + node->setInstanceId(instanceId); + } + } + /** * @brief Get pointers to this rigid's visual SceneNodes. * @return vector of pointers to the rigid's visual scene nodes. diff --git a/src/esp/physics/RigidObject.cpp b/src/esp/physics/RigidObject.cpp index f9e55093c3..c0e38a9a2b 100644 --- a/src/esp/physics/RigidObject.cpp +++ b/src/esp/physics/RigidObject.cpp @@ -49,6 +49,8 @@ bool RigidObject::finalizeObject() { // set the visualization semantic id setSemanticId(ObjectAttributes->getSemanticId()); + // Set the visualization instance id + setInstanceId(ObjectAttributes->getInstanceId()); // finish object by instancing any dynamics library-specific code required return finalizeObject_LibSpecific(); diff --git a/src/esp/scene/SceneNode.h b/src/esp/scene/SceneNode.h index 480ba5ed26..c3068dccb9 100644 --- a/src/esp/scene/SceneNode.h +++ b/src/esp/scene/SceneNode.h @@ -127,6 +127,12 @@ class SceneNode : public MagnumObject, //! Sets node semanticId virtual void setSemanticId(int semanticId) { semanticId_ = semanticId; } + //! Returns node instanceId + virtual int getInstanceId() const { return instanceId_; } + + //! Sets node instanceId + virtual void setInstanceId(int instanceId) { instanceId_ = instanceId; } + Magnum::Vector3 absoluteTranslation() const; Magnum::Vector3 absoluteTranslation(); @@ -234,6 +240,7 @@ class SceneNode : public MagnumObject, //! The semantic category of this node. Used to render attached Drawables with //! Semantic sensor when no perVertexObjectIds are present. uint32_t semanticId_ = 0; + uint32_t instanceId_ = 0; //! the local bounding box for meshes stored at this node Magnum::Range3D meshBB_; diff --git a/src/esp/sensor/CameraSensor.cpp b/src/esp/sensor/CameraSensor.cpp index ceb073ea2b..e98be311e2 100644 --- a/src/esp/sensor/CameraSensor.cpp +++ b/src/esp/sensor/CameraSensor.cpp @@ -134,7 +134,13 @@ bool CameraSensor::drawObservation(sim::Simulator& sim) { flags |= gfx::RenderCamera::Flag::FrustumCulling; } - if (cameraSensorSpec_->sensorType == SensorType::Semantic) { + if (cameraSensorSpec_->sensorType == SensorType::Semantic || cameraSensorSpec_->sensorType == SensorType::Instance) { + // Set the camera instance Flag to infer if the camera should pick semantic id or instance id based on the camera type + if (cameraSensorSpec_->sensorType == SensorType::Semantic) { + renderCamera_->setIsInstanceId(false); + } else { + renderCamera_->setIsInstanceId(true); + } // TODO: check sim has semantic scene graph bool twoSceneGraphs = (&sim.getActiveSemanticSceneGraph() != &sim.getActiveSceneGraph()); diff --git a/src/esp/sensor/CubeMapSensorBase.cpp b/src/esp/sensor/CubeMapSensorBase.cpp index 794d88aeef..813312fb20 100644 --- a/src/esp/sensor/CubeMapSensorBase.cpp +++ b/src/esp/sensor/CubeMapSensorBase.cpp @@ -57,6 +57,9 @@ CubeMapSensorBase::CubeMapSensorBase(scene::SceneNode& cameraNode, case SensorType::Semantic: cubeMapFlags |= gfx::CubeMap::Flag::ObjectIdTexture; break; + case SensorType::Instance: + cubeMapFlags |= gfx::CubeMap::Flag::ObjectIdTexture; + break; default: CORRADE_INTERNAL_ASSERT_UNREACHABLE(); break; @@ -80,6 +83,9 @@ CubeMapSensorBase::CubeMapSensorBase(scene::SceneNode& cameraNode, case SensorType::Semantic: cubeMapShaderBaseFlags_ |= gfx::CubeMapShaderBase::Flag::ObjectIdTexture; break; + case SensorType::Instance: + cubeMapShaderBaseFlags_ |= gfx::CubeMapShaderBase::Flag::ObjectIdTexture; + break; // sensor type list is too long, have to use default default: CORRADE_INTERNAL_ASSERT_UNREACHABLE(); @@ -121,7 +127,7 @@ bool CubeMapSensorBase::renderToCubemapTexture(sim::Simulator& sim) { // generate the cubemap texture const char* defaultDrawableGroupName = ""; - if (cubeMapSensorBaseSpec_->sensorType == SensorType::Semantic) { + if (cubeMapSensorBaseSpec_->sensorType == SensorType::Semantic || cubeMapSensorBaseSpec_->sensorType == SensorType::Instance) { bool twoSceneGraphs = (&sim.getActiveSemanticSceneGraph() != &sim.getActiveSceneGraph()); @@ -164,7 +170,8 @@ void CubeMapSensorBase::drawWith(gfx::CubeMapShaderBase& shader) { shader.bindDepthTexture( cubeMap_->getTexture(gfx::CubeMap::TextureType::Depth)); } - if (cubeMapSensorBaseSpec_->sensorType == SensorType::Semantic) { + if (cubeMapSensorBaseSpec_->sensorType == SensorType::Semantic || + cubeMapSensorBaseSpec_->sensorType == SensorType::Instance) { shader.bindObjectIdTexture( cubeMap_->getTexture(gfx::CubeMap::TextureType::ObjectId)); } diff --git a/src/esp/sensor/Sensor.h b/src/esp/sensor/Sensor.h index b64d53add5..b6653aaedf 100644 --- a/src/esp/sensor/Sensor.h +++ b/src/esp/sensor/Sensor.h @@ -33,6 +33,7 @@ enum class SensorType : int32_t { Tensor, Text, Audio, + Instance, SensorTypeCount, // add new type above this term!! }; diff --git a/src/esp/sensor/VisualSensor.cpp b/src/esp/sensor/VisualSensor.cpp index a53e317b1f..12fb1aee14 100644 --- a/src/esp/sensor/VisualSensor.cpp +++ b/src/esp/sensor/VisualSensor.cpp @@ -24,7 +24,7 @@ void VisualSensorSpec::sanityCheck() const { SensorSpec::sanityCheck(); bool isVisualSensor = (sensorType == SensorType::Color || sensorType == SensorType::Depth || - sensorType == SensorType::Normal || sensorType == SensorType::Semantic); + sensorType == SensorType::Normal || sensorType == SensorType::Semantic || sensorType == SensorType::Instance); CORRADE_ASSERT( isVisualSensor, "VisualSensorSpec::sanityCheck(): sensorType must be Color, Depth, " @@ -90,7 +90,7 @@ bool VisualSensor::getObservationSpace(ObservationSpace& space) { static_cast(visualSensorSpec_->resolution[1]), static_cast(visualSensorSpec_->channels)}; space.dataType = core::DataType::DT_UINT8; - if (visualSensorSpec_->sensorType == SensorType::Semantic) { + if (visualSensorSpec_->sensorType == SensorType::Semantic || visualSensorSpec_->sensorType == SensorType::Instance) { space.dataType = core::DataType::DT_UINT32; } else if (visualSensorSpec_->sensorType == SensorType::Depth) { space.dataType = core::DataType::DT_FLOAT; @@ -110,7 +110,7 @@ void VisualSensor::readObservation(Observation& obs) { // TODO: have different classes for the different types of sensors // TODO: do we need to flip axis? - if (visualSensorSpec_->sensorType == SensorType::Semantic) { + if (visualSensorSpec_->sensorType == SensorType::Semantic || visualSensorSpec_->sensorType == SensorType::Instance) { renderTarget().readFrameObjectId(Magnum::MutableImageView2D{ Magnum::PixelFormat::R32UI, renderTarget().framebufferSize(), obs.buffer->data}); @@ -153,7 +153,7 @@ VisualSensor::MoveSemanticSensorNodeHelper::MoveSemanticSensorNodeHelper( sim::Simulator& sim) : visualSensor_(visualSensor), sim_(sim) { CORRADE_INTERNAL_ASSERT(visualSensor_.specification()->sensorType == - SensorType::Semantic); + SensorType::Semantic || visualSensor_.specification()->sensorType == SensorType::Instance); scene::SceneNode& node = visualSensor_.node(); CORRADE_ASSERT( !scene::SceneGraph::isRootNode(node), @@ -189,7 +189,7 @@ VisualSensor::MoveSemanticSensorNodeHelper::MoveSemanticSensorNodeHelper( VisualSensor::MoveSemanticSensorNodeHelper::~MoveSemanticSensorNodeHelper() { CORRADE_INTERNAL_ASSERT(visualSensor_.specification()->sensorType == - SensorType::Semantic); + SensorType::Semantic || visualSensor_.specification()->sensorType == SensorType::Instance); scene::SceneNode& node = visualSensor_.node(); CORRADE_ASSERT( diff --git a/src/utils/viewer/viewer.cpp b/src/utils/viewer/viewer.cpp index e9fca198d3..701941c41f 100644 --- a/src/utils/viewer/viewer.cpp +++ b/src/utils/viewer/viewer.cpp @@ -645,6 +645,7 @@ Key Commands: RGBA = 0, Depth, Semantic, + Instance, VisualizeModeCount, }; VisualizeMode visualizeMode_ = VisualizeMode::RGBA; @@ -701,7 +702,8 @@ void addSensors(esp::agent::AgentConfiguration& agentConfig, bool isOrtho) { : esp::sensor::SensorSubType::Pinhole; spec->sensorType = sensorType; if (sensorType == esp::sensor::SensorType::Depth || - sensorType == esp::sensor::SensorType::Semantic) { + sensorType == esp::sensor::SensorType::Semantic || + sensorType == esp::sensor::SensorType::Instance) { spec->channels = 1; } spec->position = {0.0f, rgbSensorHeight, 0.0f}; @@ -715,6 +717,8 @@ void addSensors(esp::agent::AgentConfiguration& agentConfig, bool isOrtho) { addCameraSensor("depth_camera", esp::sensor::SensorType::Depth); // add the camera semantic sensor addCameraSensor("semantic_camera", esp::sensor::SensorType::Semantic); + // add the camera instance sensor + addCameraSensor("instance_camera", esp::sensor::SensorType::Instance); auto addFisheyeSensor = [&](const std::string& uuid, esp::sensor::SensorType sensorType, @@ -730,7 +734,8 @@ void addSensors(esp::agent::AgentConfiguration& agentConfig, bool isOrtho) { spec->uuid = uuid; spec->sensorType = sensorType; if (sensorType == esp::sensor::SensorType::Depth || - sensorType == esp::sensor::SensorType::Semantic) { + sensorType == esp::sensor::SensorType::Semantic || + sensorType == esp::sensor::SensorType::Instance) { spec->channels = 1; } spec->sensorSubType = esp::sensor::SensorSubType::Fisheye; @@ -766,6 +771,9 @@ void addSensors(esp::agent::AgentConfiguration& agentConfig, bool isOrtho) { // add the fisheye semantic sensor addFisheyeSensor("semantic_fisheye", esp::sensor::SensorType::Semantic, esp::sensor::FisheyeSensorModelType::DoubleSphere); + // add the fisheye instance sensor + addFisheyeSensor("instance_fisheye", esp::sensor::SensorType::Instance, + esp::sensor::FisheyeSensorModelType::DoubleSphere); auto addEquirectangularSensor = [&](const std::string& uuid, esp::sensor::SensorType sensorType) { @@ -776,7 +784,8 @@ void addSensors(esp::agent::AgentConfiguration& agentConfig, bool isOrtho) { spec->uuid = uuid; spec->sensorType = sensorType; if (sensorType == esp::sensor::SensorType::Depth || - sensorType == esp::sensor::SensorType::Semantic) { + sensorType == esp::sensor::SensorType::Semantic || + sensorType == esp::sensor::SensorType::Instance) { spec->channels = 1; } spec->sensorSubType = esp::sensor::SensorSubType::Equirectangular; @@ -791,6 +800,9 @@ void addSensors(esp::agent::AgentConfiguration& agentConfig, bool isOrtho) { // add the equirectangular semantic sensor addEquirectangularSensor("semantic_equirectangular", esp::sensor::SensorType::Semantic); + // add the equirectangular instance sensor + addEquirectangularSensor("instance_equirectangular", + esp::sensor::SensorType::Instance); // add audio sensor #ifdef ESP_BUILD_WITH_AUDIO @@ -1636,6 +1648,10 @@ void Viewer::setSensorVisID() { prefix = "semantic"; break; } + case VisualizeMode::Instance: { + prefix = "instance"; + break; + } default: CORRADE_INTERNAL_ASSERT_UNREACHABLE(); } // switch on visualize mode @@ -1750,7 +1766,7 @@ void Viewer::drawEvent() { } sensorRenderTarget->blitRgbaToDefault(); } else { - // Depth Or Semantic, or Non-pinhole RGBA + // Depth Or Semantic Or Instance, or Non-pinhole RGBA simulator_->drawObservation(defaultAgentId_, sensorVisID_); esp::gfx::RenderTarget* sensorRenderTarget = @@ -1763,7 +1779,8 @@ void Viewer::drawEvent() { simulator_->visualizeObservation(defaultAgentId_, sensorVisID_, 1.0f / 512.0f, // colorMapOffset 1.0f / 12.0f); // colorMapScale - } else if (visualizeMode_ == VisualizeMode::Semantic) { + } else if (visualizeMode_ == VisualizeMode::Semantic || + visualizeMode_ == VisualizeMode::Instance) { Mn::GL::defaultFramebuffer.clear(Mn::GL::FramebufferClear::Color | Mn::GL::FramebufferClear::Depth); simulator_->visualizeObservation(defaultAgentId_, sensorVisID_); @@ -1925,7 +1942,8 @@ void Viewer::bindRenderTarget() { esp::sensor::VisualSensor& visualSensor = static_cast(it.second.get()); if (visualizeMode_ == VisualizeMode::Depth || - visualizeMode_ == VisualizeMode::Semantic) { + visualizeMode_ == VisualizeMode::Semantic || + visualizeMode_ == VisualizeMode::Instance) { simulator_->getRenderer()->bindRenderTarget( visualSensor, {esp::gfx::Renderer::Flag::VisualizeTexture}); } else {