Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support resnet 10 blob to roi converter #402

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions samples/gstreamer/model_proc/public/car_color_proc_file.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
{
"json_schema_version": "2.2.0",
"input_preproc": [
{
"precision": "FP32",
"params": {
"color_space": "BGR",
"resize": "aspect-ratio",
"mean": [
103.939,
116.779,
123.68
]
}
}
],
"output_postproc": [
{
"converter": "label",
"method": "max",
"labels": [
"black",
"blue",
"brown",
"gold",
"green",
"grey",
"maroon",
"orange",
"red",
"silver",
"white",
"yellow"
]
}
]
}
44 changes: 44 additions & 0 deletions samples/gstreamer/model_proc/public/car_make_proc_file.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
{
"json_schema_version": "2.2.0",
"input_preproc": [
{
"precision": "FP32",
"params": {
"color_space": "BGR",
"resize": "aspect-ratio",
"mean": [
103.939,
116.779,
123.68
]
}
}
],
"output_postproc": [
{
"converter": "label",
"method": "max",
"labels": [
"acura",
"audi",
"bmw",
"chevrolet",
"chrysler",
"dodge",
"ford",
"gmc",
"honda",
"hyundai",
"infiniti",
"jeep",
"kia",
"lexus",
"mercedes",
"nissan",
"subaru",
"toyota",
"volkswagen"
]
}
]
}
31 changes: 31 additions & 0 deletions samples/gstreamer/model_proc/public/car_type_proc_file.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"json_schema_version": "2.2.0",
"input_preproc": [
{
"precision": "FP32",
"params": {
"color_space": "BGR",
"resize": "aspect-ratio",
"mean": [
103.939,
116.779,
123.68
]
}
}
],
"output_postproc": [
{
"converter": "label",
"method": "max",
"labels": [
"coupe",
"largevehicle",
"sedan",
"suv",
"truck",
"van"
]
}
]
}
21 changes: 21 additions & 0 deletions samples/gstreamer/model_proc/public/resnet10.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"json_schema_version": "2.2.0",
"input_preproc": [
{
"params": {
"resize": "aspect-ratio"
}
}
],
"output_postproc": [
{
"converter": "resnet_10",
"labels": [
"Car",
"Bicycle",
"Person",
"Roadsign"
]
}
]
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "yolo_v2.h"
#include "yolo_v3.h"
#include "yolo_v5.h"
#include "resnet_10.h"

#include "inference_backend/logger.h"

Expand All @@ -40,6 +41,8 @@ BlobToMetaConverter::Ptr BlobToROIConverter::create(BlobToMetaConverter::Initial

if (converter_name == DetectionOutputConverter::getName())
return BlobToMetaConverter::Ptr(new DetectionOutputConverter(std::move(initializer), confidence_threshold));
else if (converter_name == Resnet10Converter::getName())
return BlobToMetaConverter::Ptr(new Resnet10Converter(std::move(initializer), confidence_threshold));
else if (converter_name == BoxesLabelsConverter::getName())
return BlobToMetaConverter::Ptr(new BoxesLabelsConverter(std::move(initializer), confidence_threshold));
else if (converter_name == BoxesScoresConverter::getName())
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/*******************************************************************************
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
******************************************************************************/

#include "resnet_10.h"

#include "inference_backend/image_inference.h"
#include "inference_backend/logger.h"
#include "safe_arithmetic.hpp"

#include <gst/gst.h>

#include <map>
#include <memory>
#include <string>
#include <vector>
#include <iostream>


#define CLIP(a,min,max) (MAX(MIN(a, max), min))
#define DIVIDE_AND_ROUND_UP(a, b) ((a + b - 1) / b)

constexpr int kNUM_CONFIGURED_CLASSES = 4;


using namespace post_processing;

void Resnet10Converter::parseOutputBlob(const InferDimsCHW& covLayerDims, const InferDimsCHW& bboxLayerDims, const float *outputCovBuf, const float *outputBboxBuf,
int numClassesToParse, std::vector<DetectedObject> &objects) const {

int gridW = covLayerDims.w;
int gridH = covLayerDims.h;
int gridSize = gridW * gridH;
float gcCentersX[gridW];
float gcCentersY[gridH];
float bboxNormX = 35.0;
float bboxNormY = 35.0;
size_t input_width = getModelInputImageInfo().width;
size_t input_height = getModelInputImageInfo().height;
int strideX = DIVIDE_AND_ROUND_UP(input_width, bboxLayerDims.w);
int strideY = DIVIDE_AND_ROUND_UP(input_height, bboxLayerDims.h);

for (int i = 0; i < gridW; i++)
{
gcCentersX[i] = (float)(i * strideX + 0.5);
gcCentersX[i] /= (float)bboxNormX;

}
for (int i = 0; i < gridH; i++)
{
gcCentersY[i] = (float)(i * strideY + 0.5);
gcCentersY[i] /= (float)bboxNormY;

}

for (int c = 0; c < numClassesToParse; c++)
{
const float *outputX1 = outputBboxBuf + (c * 4 * bboxLayerDims.h * bboxLayerDims.w);

const float *outputY1 = outputX1 + gridSize;
const float *outputX2 = outputY1 + gridSize;
const float *outputY2 = outputX2 + gridSize;

if(c >= kNUM_CONFIGURED_CLASSES) throw std::runtime_error("class id " + std::to_string(c) + " is out of bound");

for (int h = 0; h < gridH; h++)
{
for (int w = 0; w < gridW; w++)
{
int i = w + h * gridW;
if (outputCovBuf[c * gridSize + i] >= confidence_threshold)
{
float rectX1f, rectY1f, rectX2f, rectY2f;

rectX1f = (outputX1[w + h * gridW] - gcCentersX[w]) * -bboxNormX;
rectY1f = (outputY1[w + h * gridW] - gcCentersY[h]) * -bboxNormY;
rectX2f = (outputX2[w + h * gridW] + gcCentersX[w]) * bboxNormX;
rectY2f = (outputY2[w + h * gridW] + gcCentersY[h]) * bboxNormY;

float x = CLIP(rectX1f, 0, input_width - 1);
float y = CLIP(rectY1f, 0, input_height - 1);
float w = CLIP(rectX2f, 0, input_width - 1) - x + 1;
float h = CLIP(rectY2f, 0, input_height - 1) - y + 1;

objects.push_back(DetectedObject(x, y, w, h, outputCovBuf[c * gridSize + i], c,
BlobToMetaConverter::getLabelByLabelId(c), 1.0f / input_width,
1.0f / input_height, false));
}
}
}
}

}

TensorsTable Resnet10Converter::convert(const OutputBlobs &output_blobs) const {
ITT_TASK(__FUNCTION__);
try {

static InferDimsCHW covLayerDims = {0, 0, 0};
static InferDimsCHW bboxLayerDims = {0, 0, 0};
int numClassesToParse;

const auto &model_input_image_info = getModelInputImageInfo();
size_t batch_size = model_input_image_info.batch_size;

DetectedObjectsTable objects_table(batch_size);

for (size_t batch_number = 0; batch_number < batch_size; ++batch_number) {
auto &objects = objects_table[batch_number];
const float *outputCovBuf = nullptr;
const float *outputBboxBuf = nullptr;
for (const auto &blob_iter : output_blobs) {
const InferenceBackend::OutputBlob::Ptr &blob = blob_iter.second;
if (not blob)
throw std::invalid_argument("Output blob is nullptr.");

size_t unbatched_size = blob->GetSize() / batch_size;
if(blob_iter.first == "conv2d_bbox")
{
if(!bboxLayerDims.c)
{
const auto& dims = blob->GetDims(); // NCHW
bboxLayerDims.set(dims[1], dims[2], dims[3]);
}
outputBboxBuf = reinterpret_cast<const float *>(blob->GetData()) + unbatched_size * batch_number;
}
if(blob_iter.first == "conv2d_cov/Sigmoid")
{
if(!covLayerDims.c)
{
const auto& dims = blob->GetDims();
covLayerDims.set(dims[1], dims[2], dims[3]);
}
outputCovBuf = reinterpret_cast<const float *>(blob->GetData()) + unbatched_size * batch_number;
}
}

if(!outputCovBuf || !outputCovBuf) throw std::runtime_error("Failed to do Resnet10 post-processing.");

numClassesToParse = MIN(covLayerDims.c, kNUM_CONFIGURED_CLASSES);
parseOutputBlob(covLayerDims, bboxLayerDims, outputCovBuf, outputBboxBuf, numClassesToParse, objects);
}

return storeObjects(objects_table);
} catch (const std::exception &e) {
std::throw_with_nested(std::runtime_error("Failed to do Resnet10 post-processing."));
}
return TensorsTable{};
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*******************************************************************************
* Copyright (C) 2021 Intel Corporation
*
* SPDX-License-Identifier: MIT
******************************************************************************/

#pragma once

#include "blob_to_roi_converter.h"
#include <opencv2/opencv.hpp>
#include "inference_backend/image_inference.h"

#include <gst/gst.h>

#include <map>
#include <memory>
#include <string>
#include <vector>

namespace post_processing {

struct InferDimsCHW
{
unsigned int c, h, w;
void set(unsigned int c, unsigned int h, unsigned w)
{
this->c = c;
this->h = h;
this->w = w;
}
};

class Resnet10Converter : public BlobToROIConverter {

protected:

// FIXME: move roi_scale to coordinates restorer or attacher
void parseOutputBlob(const InferDimsCHW& covLayerDims, const InferDimsCHW& bboxLayerDims, const float *outputCovBuf, const float *outputBboxBuf,
int numClassesToParse, std::vector<DetectedObject> &objects) const;

public:
Resnet10Converter(BlobToMetaConverter::Initializer initializer, double confidence_threshold)
: BlobToROIConverter(std::move(initializer), confidence_threshold, true, 0.4) {
}

TensorsTable convert(const OutputBlobs &output_blobs) const override;

static std::string getName() {
return "resnet_10";
}

static std::string getDepricatedName() {
return "tensor_to_bbox_resnet_10";
}
};
} // namespace post_processing