-
Notifications
You must be signed in to change notification settings - Fork 313
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
有关yolov4-tiny chunk部分的代码 #51
Comments
@beizhengren 应该会有问题,你改过重新build下就好 |
@enazoe 好嘞, 我先试试 |
@enazoe 搞定了,谢谢! |
@enazoe yolo-tensorrt/modules/chunk.cu Line 68 in cc405b0
最大的改动就是把 void Chunk::configurePlugin 中的代码移到了 configureWithFormat中. 完整的chunk_V2.cpp 如下:
Click to expand#include <cmath>
#include <stdio.h>
#include <cassert>
#include <iostream>
#include "chunk_V2.h"
#include <cuda_runtime.h>
#define ASSERT(assertion) \
{ \
if (!(assertion)) \
{ \
std::cout<<"ASSERTION FAILED in " \
<<__FILE__<<":"<<__LINE__ \
<<std::endl; \
abort(); \
} \
}
#define NV_CUDA_CHECK(status) \
{ \
if (status != 0) \
{ \
std::cout << "Cuda failure: " << cudaGetErrorString(status) << " in file " << __FILE__ \
<< " at line " << __LINE__ << std::endl; \
abort(); \
} \
}
namespace nvinfer1
{
ChunkV2::ChunkV2()
{
}
ChunkV2::ChunkV2(const void* buffer, size_t size)
{
assert(size == sizeof(_n_size_split));
_n_size_split = *reinterpret_cast<const int*>(buffer);
}
ChunkV2::~ChunkV2()
{
}
int ChunkV2::getNbOutputs() const
{
return 2;
}
Dims ChunkV2::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)
{
assert(nbInputDims == 1);
assert(index == 0 || index == 1);
return Dims3(inputs[0].d[0] / 2, inputs[0].d[1], inputs[0].d[2]);
}
int ChunkV2::initialize()
{
return 0;
}
void ChunkV2::terminate()
{
}
size_t ChunkV2::getWorkspaceSize(int maxBatchSize) const
{
return 0;
}
size_t ChunkV2::getSerializationSize() const
{
return sizeof(_n_size_split);
}
void ChunkV2::serialize(void *buffer)const
{
*reinterpret_cast<int*>(buffer) = _n_size_split;
}
bool ChunkV2::supportsFormat(DataType type, PluginFormat format) const
{
return ((type == DataType::kFLOAT || type == DataType::kHALF || type == DataType::kINT8) &&
(format == PluginFormat::kNCHW));
}
// Set plugin namespace
void ChunkV2::setPluginNamespace(const char* pluginNamespace)
{
_s_plugin_namespace = pluginNamespace;
}
const char* ChunkV2::getPluginNamespace() const
{
return _s_plugin_namespace.c_str();
}
// Configure the layer with input and output data types.
void ChunkV2::configureWithFormat(
const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs,
DataType type, PluginFormat format, int maxBatchSize){
_n_size_split = inputDims->d[0] / 2 * inputDims->d[1] * inputDims->d[2] *sizeof(float);
std::cerr << _n_size_split << std::endl;
ASSERT(format == PluginFormat::kNCHW);
ASSERT(type == DataType::kFLOAT || type == DataType::kHALF);
//mDataType = type;
ASSERT(inputDims[0].nbDims >= 1); // number of dimensions of the input tensor must be >=1
}
const char* ChunkV2::getPluginType()const
{
return "CHUNK_TRT";
}
const char* ChunkV2::getPluginVersion() const
{
return "1.0";
}
void ChunkV2::destroy()
{
delete this;
}
// Clone the plugin
IPluginV2* ChunkV2::clone() const
{
ChunkV2 *p = new ChunkV2();
p->_n_size_split = _n_size_split;
p->setPluginNamespace(_s_plugin_namespace.c_str());
return p;
}
int ChunkV2::enqueue(int batchSize,
const void* const* inputs,
void** outputs,
void* workspace,
cudaStream_t stream)
{
for (int b = 0; b < batchSize; ++b)
{
NV_CUDA_CHECK(cudaMemcpy((char*)outputs[0] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split, _n_size_split, cudaMemcpyDeviceToDevice));
NV_CUDA_CHECK(cudaMemcpy((char*)outputs[1] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split, _n_size_split, cudaMemcpyDeviceToDevice));
}
return 0;
}
PluginFieldCollection ChunkV2PluginCreator::_fc{};
std::vector<PluginField> ChunkV2PluginCreator::_vec_plugin_attributes;
ChunkV2PluginCreator::ChunkV2PluginCreator()
{
_vec_plugin_attributes.clear();
_fc.nbFields = _vec_plugin_attributes.size();
_fc.fields = _vec_plugin_attributes.data();
}
const char* ChunkV2PluginCreator::getPluginName() const
{
return "CHUNK_TRT";
}
const char* ChunkV2PluginCreator::getPluginVersion() const
{
return "1.0";
}
const PluginFieldCollection* ChunkV2PluginCreator::getFieldNames()
{
return &_fc;
}
IPluginV2* ChunkV2PluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)
{
ChunkV2* obj = new ChunkV2();
obj->setPluginNamespace(_s_name_space.c_str());
return obj;
}
IPluginV2* ChunkV2PluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
{
ChunkV2* obj = new ChunkV2(serialData,serialLength);
obj->setPluginNamespace(_s_name_space.c_str());
return obj;
}
void ChunkV2PluginCreator::setPluginNamespace(const char* libNamespace)
{
_s_name_space = libNamespace;
}
const char* ChunkV2PluginCreator::getPluginNamespace() const
{
return _s_name_space.c_str();
}
REGISTER_TENSORRT_PLUGIN(ChunkV2PluginCreator);
}//namespace nvinfer1 |
@beizhengren 额,这个我不太清楚,fp16精度不用特意实现吧,fp32实现,序列化的时候会自动设成fp16 |
@enazoe 哦哦哦, 这样啊. 那int8应该和fp16的实现过程差不多吧? 只是多了一步setInt8Calibrator |
@beizhengren 是的,目前来看是这样的 |
@enazoe |
@enazoe yolo-tensorrt/modules/yolo.cpp Line 462 in 1110190
创建 half 的engine的时候 会执行下面的语句报错 yolo-tensorrt/modules/chunk.cu Line 68 in cc405b0
您有空能试一下吗?我尝试了很多办法,没有搞定. 非常感谢! |
Hi. Anyone can explain this issue as English ? Actually, I have same problem in below line :( yolo-tensorrt/modules/chunk.cu Line 68 in cc405b0
My env;
|
@enazoe 作者您好,
关于yolov4-tiny部分, 我想把IPluginV2IOExt 换成IPluginV2, 然后支持tensorrt5. 如下:
trt7:
trt5:
请问这样换完之后(版本暂且成为trt5), 是不是需要用trt5生成新的engine之后, 才能做推断.
trt5直接加载 原始的trt7的已经转好的模型 推断会不会有问题呢?
谢谢!
The text was updated successfully, but these errors were encountered: