Skip to content

Commit

Permalink
-
Browse files Browse the repository at this point in the history
  • Loading branch information
Bruno Alves authored and bfonta committed Sep 15, 2020
1 parent 98df8e3 commit 38d08f8
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 270 deletions.
6 changes: 6 additions & 0 deletions CUDADataFormats/HGCal/BuildFile.xml
@@ -1,2 +1,8 @@
<use name="FWCore/Utilities"/>
<use name="Geometry/HGCalGeometry"/>
<use name="HeterogeneousCore/CUDACore"/>
<use name="cuda"/>

<export>
<lib name="1"/>
</export>
@@ -1,7 +1,6 @@
#include "RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalConditions_TEST.h"
#include "RecoLocalCalo/HGCalRecProducers/plugins/HeterogeneousHGCalHEFConditions.h"

HeterogeneousHGCalHEFConditionsWrapper::HeterogeneousHGCalHEFConditionsWrapper(const HGCalParameters* cpuHGCalParameters,
cpos::HGCalPositionsMapping* cpuPos)
HeterogeneousHGCalHEFConditionsWrapper::HeterogeneousHGCalHEFConditionsWrapper(const HGCalParameters* cpuHGCalParameters)
{
//HGCalParameters as defined in CMSSW
this->sizes_params_ = calculate_memory_bytes_params_(cpuHGCalParameters);
Expand All @@ -22,13 +21,6 @@ size_t HeterogeneousHGCalHEFConditionsWrapper::allocate_memory_params_(const std
return chunk_;
}

size_t HeterogeneousHGCalHEFConditionsWrapper::allocate_memory_pos_(const std::vector<size_t>& sz)
{
size_t chunk_ = std::accumulate(sz.begin(), sz.end(), 0); //total memory required in bytes
gpuErrchk(cudaMallocHost(&this->posmap_.x, chunk_));
return chunk_;
}

void HeterogeneousHGCalHEFConditionsWrapper::transfer_data_to_heterogeneous_pointers_params_(const std::vector<size_t>& sz, const HGCalParameters* cpuParams)
{
//store cumulative sum in bytes and convert it to sizes in units of C++ typesHEF, i.e., number if items to be transferred to GPU
Expand Down Expand Up @@ -80,7 +72,7 @@ void HeterogeneousHGCalHEFConditionsWrapper::transfer_data_to_heterogeneous_poin

void HeterogeneousHGCalHEFConditionsWrapper::transfer_data_to_heterogeneous_pointers_pos_(const std::vector<size_t>& sz, cpos::HGCalPositionsMapping* cpuPos)
{
//store cumulative sum in bytes and convert it to sizes in units of C++ cpos::types, i.e., number if items to be transferred to GPU
//store cumulative sum in bytes and convert it to sizes in units of C++ typesHEF, i.e., number if items to be transferred to GPU
std::vector<size_t> cumsum_sizes( sz.size()+1, 0 ); //starting with zero
std::partial_sum(sz.begin(), sz.end(), cumsum_sizes.begin()+1);
for(unsigned int i=1; i<cumsum_sizes.size(); ++i) //start at second element (the first is zero)
Expand Down Expand Up @@ -181,14 +173,12 @@ std::vector<size_t> HeterogeneousHGCalHEFConditionsWrapper::calculate_memory_byt
std::vector<size_t> sizes(npointers);
for(unsigned int i=0; i<npointers; ++i)
{
if(cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Float and i<2)
sizes[i] = select_pointer_u_(cpuPos, 2).size(); //each position array (x, y) will have the same size as the detid array
else if(cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Float and i==2)
sizes[i] = 44; //the z position array will only include one value per layer (each HEF endcap has 22 layers)
if(cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Float)
sizes[i] = select_pointer_u_(cpuPos, 1).size(); //each position array (x, y and z) will have the same size as the detid array
else if(cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Int32_t)
sizes[i] = select_pointer_i_(cpuPos, 1).size();
sizes[i] = select_pointer_i_(cpuPos, 0).size();
else if(cpos::types[i] == cpos::HeterogeneousHGCalPositionsType::Uint32_t)
sizes[i] = select_pointer_u_(cpuPos, 2).size();
sizes[i] = select_pointer_u_(cpuPos, 1).size();
}

std::vector<size_t> sizes_units(npointers);
Expand All @@ -210,6 +200,7 @@ std::vector<size_t> HeterogeneousHGCalHEFConditionsWrapper::calculate_memory_byt

HeterogeneousHGCalHEFConditionsWrapper::~HeterogeneousHGCalHEFConditionsWrapper() {
gpuErrchk(cudaFreeHost(this->params_.cellFineX_));
gpuErrchk(cudaFreeHost(this->posmap_.x));
}

//I could use template specializations
Expand Down Expand Up @@ -261,7 +252,7 @@ float*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_f_(cpos::Heteroge
case 2:
return cpuObject->z_per_layer;
default:
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_f(heterogeneous): no item.";
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_i(heterogeneous): no item.";
return cpuObject->x;
}
}
Expand All @@ -273,7 +264,7 @@ std::vector<float>& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_f_(cp
case 0:
return cpuObject->z_per_layer;
default:
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_f(non-heterogeneous): no item.";
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_i(non-heterogeneous): no item.";
return cpuObject->z_per_layer;
}
}
Expand All @@ -285,7 +276,7 @@ int32_t*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_i_(cpar::Hetero
case 4:
return cpuObject->waferTypeL_;
default:
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_i(heterogeneous, parameters): no item.";
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_i(heterogeneous): no item.";
return cpuObject->waferTypeL_;
}
}
Expand All @@ -297,7 +288,7 @@ std::vector<int32_t> HeterogeneousHGCalHEFConditionsWrapper::select_pointer_i_(c
case 4:
return cpuObject->waferTypeL_;
default:
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_i(non-heterogeneous, parameters): no item.";
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_i(non-heterogeneous): no item.";
return cpuObject->waferTypeL_;
}
}
Expand All @@ -309,7 +300,7 @@ int32_t*& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_i_(cpos::Hetero
case 3:
return cpuObject->numberCellsHexagon;
default:
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_i(heterogeneous, positions mapping): no item.";
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_i(heterogeneous): no item.";
return cpuObject->numberCellsHexagon;
}
}
Expand All @@ -321,8 +312,7 @@ std::vector<int32_t>& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_i_(
case 1:
return cpuObject->numberCellsHexagon;
default:
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_i(non-heterogeneous, positions mapping): no item "
<< item;
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_i(non-heterogeneous): no item.";
return cpuObject->numberCellsHexagon;
}
}
Expand All @@ -346,8 +336,7 @@ std::vector<uint32_t>& HeterogeneousHGCalHEFConditionsWrapper::select_pointer_u_
case 2:
return cpuObject->detid;
default:
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_u(non-heterogeneous): no item "
<< item;
edm::LogError("HeterogeneousHGCalHEFConditionsWrapper") << "select_pointer_u(non-heterogeneous): no item.";
return cpuObject->detid;
}
}
Expand Down Expand Up @@ -413,7 +402,7 @@ hgcal_conditions::HeterogeneousHEFConditionsESProduct const *HeterogeneousHGCalH

//Important: The transfer does *not* start at posmap.x because the positions are not known in the CPU side!
size_t position_memory_size_to_transfer = chunk_pos_ - this->number_position_arrays*this->nelems_posmap_*sfloat; //size in bytes occupied by the non-position information
//std::cout << position_memory_size_to_transfer << ", " << chunk_pos_ << ", " << this->number_position_arrays*this->nelems_posmap_*sfloat << ", " << this->number_position_arrays*this->nelems_posmap_ << ", " << this->nelems_posmap_ << std::endl;
std::cout << position_memory_size_to_transfer << ", " << chunk_pos_ << ", " << this->number_position_arrays*this->nelems_posmap_*sfloat << ", " << this->number_position_arrays*this->nelems_posmap_ << ", " << this->nelems_posmap_ << std::endl;
gpuErrchk(cudaMemcpyAsync(data.host->posmap.z_per_layer, this->posmap_.z_per_layer, position_memory_size_to_transfer, cudaMemcpyHostToDevice, stream));

// ... and then the payload object
Expand Down
Expand Up @@ -200,7 +200,7 @@ void fill_positions_from_detids(const hgcal_conditions::HeterogeneousHEFCellPosi
}

__global__
void print_positions_from_detids(const hgcal_conditions::HeterogeneousHEFConditionsESProduct* conds)
void print_positions_from_detids(const hgcal_conditions::HeterogeneousHEFCellPositionsConditionsESProduct* conds)
{
unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;

Expand Down

0 comments on commit 38d08f8

Please sign in to comment.