Skip to content

Commit

Permalink
Rework the GPU pixel track clusterizer and vertex finder (#338)
Browse files Browse the repository at this point in the history
Add two alternative (faster) track clusterizers: one based on DBSCAN,
and one "by density"; use the latter by default.

Allow all pixel triplets, but protect the vertex from triplets.

Use a larger-then-needed nearest neightbours array to allow for possible
duplicate pixels, as a pixel can appear more than once in the same event.

Use a separate workspace for temporary data.
  • Loading branch information
VinInn authored and fwyzard committed Jan 15, 2021
1 parent 2e22e5d commit 93ebb23
Show file tree
Hide file tree
Showing 4 changed files with 386 additions and 294 deletions.
25 changes: 24 additions & 1 deletion RecoPixelVertexing/PixelVertexFinding/test/BuildFile.xml
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,32 @@
<use name="SimDataFormats/Track"/>
<use name="TrackingTools/TransientTrack"/>

<bin file="gpuVertexFinder_t.cu" name="gpuVertexFinder_t">
<bin file="gpuVertexFinder_t.cu" name="gpuVertexFinderByDensity_t">
<use name="cuda"/>
<use name="cuda-api-wrappers"/>
<flags CUDA_FLAGS="-g -DGPU_DEBUG"/>
<flags CXXFLAGS="-g"/>
</bin>

<bin file="cpuVertexFinder_t.cpp" name="cpuVertexFinderByDensity_t">
<flags CXXFLAGS="-g -DGPU_DEBUG"/>
</bin>

<bin file="gpuVertexFinder_t.cu" name="gpuVertexFinderDBSCAN_t">
<use name="cuda"/>
<use name="cuda-api-wrappers"/>
<flags CUDA_FLAGS="-g -DGPU_DEBUG -DUSE_DBSCAN"/>
<flags CXXFLAGS="-g"/>
</bin>

<bin file="gpuVertexFinder_t.cu" name="gpuVertexFinderIterative_t">
<use name="cuda"/>
<use name="cuda-api-wrappers"/>
<flags CUDA_FLAGS="-g -DGPU_DEBUG -DUSE_ITERATIVE"/>
<flags CXXFLAGS="-g"/>
</bin>

<bin file="cpuVertexFinder_t.cpp" name="cpuVertexFinderIterative_t">
<flags CXXFLAGS="-g -DGPU_DEBUG -DUSE_ITERATIVE"/>
</bin>

360 changes: 360 additions & 0 deletions RecoPixelVertexing/PixelVertexFinding/test/VertexFinder_t.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,360 @@
#include <cmath>
#include <cstdint>
#include <iostream>
#include <random>
#include <vector>

#include <cuda/api_wrappers.h>

#include "HeterogeneousCore/CUDAUtilities/interface/exitSansCUDADevices.h"
#ifdef USE_DBSCAN
#include "RecoPixelVertexing/PixelVertexFinding/src/gpuClusterTracksDBSCAN.h"
#define CLUSTERIZE clusterTracksDBSCAN
#elif USE_ITERATIVE
#include "RecoPixelVertexing/PixelVertexFinding/src/gpuClusterTracksIterative.h"
#define CLUSTERIZE clusterTracksIterative
#else
#include "RecoPixelVertexing/PixelVertexFinding/src/gpuClusterTracksByDensity.h"
#define CLUSTERIZE clusterTracksByDensity
#endif

#include "RecoPixelVertexing/PixelVertexFinding/src/gpuFitVertices.h"
#include "RecoPixelVertexing/PixelVertexFinding/src/gpuSortByPt2.h"
#include "RecoPixelVertexing/PixelVertexFinding/src/gpuSplitVertices.h"

using namespace gpuVertexFinder;

struct Event {
std::vector<float> zvert;
std::vector<uint16_t> itrack;
std::vector<float> ztrack;
std::vector<float> eztrack;
std::vector<float> pttrack;
std::vector<uint16_t> ivert;
};

struct ClusterGenerator {

explicit ClusterGenerator(float nvert, float ntrack) :
rgen(-13.,13), errgen(0.005,0.025), clusGen(nvert), trackGen(ntrack), gauss(0.,1.), ptGen(1.)
{}

void operator()(Event & ev) {

int nclus = clusGen(reng);
ev.zvert.resize(nclus);
ev.itrack.resize(nclus);
for (auto & z : ev.zvert) {
z = 3.5f*gauss(reng);
}

ev.ztrack.clear();
ev.eztrack.clear();
ev.ivert.clear();
for (int iv=0; iv<nclus; ++iv) {
auto nt = trackGen(reng);
ev.itrack[nclus] = nt;
for (int it=0; it<nt; ++it) {
auto err = errgen(reng); // reality is not flat....
ev.ztrack.push_back(ev.zvert[iv]+err*gauss(reng));
ev.eztrack.push_back(err*err);
ev.ivert.push_back(iv);
ev.pttrack.push_back( (iv==5? 1.f:0.5f) + ptGen(reng) );
ev.pttrack.back()*=ev.pttrack.back();
}
}
// add noise
auto nt = 2*trackGen(reng);
for (int it=0; it<nt; ++it) {
auto err = 0.03f;
ev.ztrack.push_back(rgen(reng));
ev.eztrack.push_back(err*err);
ev.ivert.push_back(9999);
ev.pttrack.push_back( 0.5f + ptGen(reng) );
ev.pttrack.back()*=ev.pttrack.back();
}

}

std::mt19937 reng;
std::uniform_real_distribution<float> rgen;
std::uniform_real_distribution<float> errgen;
std::poisson_distribution<int> clusGen;
std::poisson_distribution<int> trackGen;
std::normal_distribution<float> gauss;
std::exponential_distribution<float> ptGen;

};

// a macro SORRY
#define LOC_ONGPU(M) ((char*)(onGPU_d.get()) +offsetof(ZVertices,M))
#define LOC_WS(M) ((char*)(ws_d.get()) +offsetof(WorkSpace,M))

__global__
void print(ZVertices const * pdata, WorkSpace const * pws) {
auto const & __restrict__ data = *pdata;
auto const & __restrict__ ws = *pws;
printf("nt,nv %d %d,%d\n",ws.ntrks,data.nvFinal,ws.nvIntermediate);

}

int main() {

#ifdef __CUDACC__
exitSansCUDADevices();

auto current_device = cuda::device::current::get();

auto onGPU_d = cuda::memory::device::make_unique<ZVertices[]>(current_device, 1);
auto ws_d = cuda::memory::device::make_unique<WorkSpace[]>(current_device, 1);
#else
auto onGPU_d = std::make_unique<ZVertices>();
auto ws_d = std::make_unique<WorkSpace>();
#endif

Event ev;

float eps = 0.1f;
std::array<float,3> par{{eps, 0.01f,9.0f}};
for (int nav=30;nav<80;nav+=20){

ClusterGenerator gen(nav,10);

for (int i=8; i<20; ++i) {

auto kk=i/4; // M param

gen(ev);

#ifdef __CUDACC__
init<<<1,1,0,0>>>(onGPU_d.get(),ws_d.get());
#else
onGPU_d->init();ws_d->init();
#endif

std::cout << "v,t size " << ev.zvert.size() << ' ' << ev.ztrack.size() << std::endl;
auto nt = ev.ztrack.size();
#ifdef __CUDACC__
cuda::memory::copy(LOC_WS(ntrks),&nt,sizeof(uint32_t));
cuda::memory::copy(LOC_WS(zt),ev.ztrack.data(),sizeof(float)*ev.ztrack.size());
cuda::memory::copy(LOC_WS(ezt2),ev.eztrack.data(),sizeof(float)*ev.eztrack.size());
cuda::memory::copy(LOC_WS(ptt2),ev.pttrack.data(),sizeof(float)*ev.eztrack.size());
#else
::memcpy(LOC_WS(ntrks),&nt,sizeof(uint32_t));
::memcpy(LOC_WS(zt),ev.ztrack.data(),sizeof(float)*ev.ztrack.size());
::memcpy(LOC_WS(ezt2),ev.eztrack.data(),sizeof(float)*ev.eztrack.size());
::memcpy(LOC_WS(ptt2),ev.pttrack.data(),sizeof(float)*ev.eztrack.size());
#endif

std::cout << "M eps, pset " << kk << ' ' << eps << ' ' << (i%4) << std::endl;

if ( (i%4) == 0 ) par = {{eps, 0.02f,12.0f}};
if ( (i%4) == 1 ) par = {{eps, 0.02f,9.0f}};
if ( (i%4) == 2 ) par = {{eps, 0.01f,9.0f}};
if ( (i%4) == 3 ) par = {{0.7f*eps, 0.01f,9.0f}};

uint32_t nv=0;
#ifdef __CUDACC__
print<<<1,1,0,0>>>(onGPU_d.get(),ws_d.get());
cudaCheck(cudaGetLastError());
cudaDeviceSynchronize();

cuda::launch(CLUSTERIZE,
{ 1, 512+256 },
onGPU_d.get(),ws_d.get(),kk,par[0],
par[1],par[2]
);
print<<<1,1,0,0>>>(onGPU_d.get(),ws_d.get());

cudaCheck(cudaGetLastError());
cudaDeviceSynchronize();


cuda::launch(fitVertices,
{ 1,1024-256 },
onGPU_d.get(),ws_d.get(),50.f
);
cudaCheck(cudaGetLastError());
cuda::memory::copy(&nv, LOC_ONGPU(nvFinal), sizeof(uint32_t));

#else
print(onGPU_d.get(),ws_d.get());
CLUSTERIZE(
onGPU_d.get(),ws_d.get(),kk,par[0],
par[1],par[2]
);
print(onGPU_d.get(),ws_d.get());
fitVertices(
onGPU_d.get(),ws_d.get(),50.f
);
nv = onGPU_d->nvFinal;
#endif

if (nv==0) {
std::cout << "NO VERTICES???" << std::endl;
continue;
}

float * zv = nullptr;
float * wv = nullptr;
float * ptv2 = nullptr;
int32_t * nn = nullptr;
uint16_t * ind = nullptr;

// keep chi2 separated...
float chi2[2*nv]; // make space for splitting...

#ifdef __CUDACC__
float hzv[2*nv];
float hwv[2*nv];
float hptv2[2*nv];
int32_t hnn[2*nv];
uint16_t hind[2*nv];

zv = hzv;
wv = hwv;
ptv2 = hptv2;
nn = hnn;
ind = hind;
#else
zv = onGPU_d->zv;
wv = onGPU_d->wv;
ptv2 = onGPU_d->ptv2;
nn = onGPU_d->ndof;
ind = onGPU_d->sortInd;
#endif

#ifdef __CUDACC__
cuda::memory::copy(nn, LOC_ONGPU(ndof), nv*sizeof(int32_t));
cuda::memory::copy(chi2, LOC_ONGPU(chi2), nv*sizeof(float));
#else
memcpy(chi2, LOC_ONGPU(chi2), nv*sizeof(float));
#endif

for (auto j=0U; j<nv; ++j) if (nn[j]>0) chi2[j]/=float(nn[j]);
{
auto mx = std::minmax_element(chi2,chi2+nv);
std::cout << "after fit nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
}

#ifdef __CUDACC__
cuda::launch(fitVertices,
{ 1,1024-256 },
onGPU_d.get(),ws_d.get(), 50.f
);
cuda::memory::copy(&nv, LOC_ONGPU(nvFinal), sizeof(uint32_t));
cuda::memory::copy(nn, LOC_ONGPU(ndof), nv*sizeof(int32_t));
cuda::memory::copy(chi2, LOC_ONGPU(chi2), nv*sizeof(float));
#else
fitVertices(
onGPU_d.get(),ws_d.get(),50.f
);
nv = onGPU_d->nvFinal;
memcpy(chi2, LOC_ONGPU(chi2), nv*sizeof(float));
#endif

for (auto j=0U; j<nv; ++j) if (nn[j]>0) chi2[j]/=float(nn[j]);
{
auto mx = std::minmax_element(chi2,chi2+nv);
std::cout << "before splitting nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
}

#ifdef __CUDACC__
// one vertex per block!!!
cuda::launch(splitVertices,
{ 1024, 64 },
onGPU_d.get(),ws_d.get(),
9.f
);
cuda::memory::copy(&nv, LOC_WS(nvIntermediate), sizeof(uint32_t));
#else
gridDim.x = 1024; // nv ????
assert(blockIdx.x==0);
for(;blockIdx.x<gridDim.x; ++blockIdx.x)
splitVertices(
onGPU_d.get(),ws_d.get(),
9.f
);
resetGrid();
nv = ws_d->nvIntermediate;
#endif
std::cout << "after split " << nv << std::endl;


#ifdef __CUDACC__
cuda::launch(fitVertices,
{ 1,1024-256 },
onGPU_d.get(),ws_d.get(),5000.f
);
cudaCheck(cudaGetLastError());

cuda::launch(sortByPt2,
{ 1, 256 },
onGPU_d.get(),ws_d.get()
);
cudaCheck(cudaGetLastError());
cuda::memory::copy(&nv, LOC_ONGPU(nvFinal), sizeof(uint32_t));
#else
fitVertices(onGPU_d.get(),ws_d.get(),5000.f);
sortByPt2(onGPU_d.get(),ws_d.get());
nv = onGPU_d->nvFinal;
memcpy(chi2, LOC_ONGPU(chi2), nv*sizeof(float));
#endif


if (nv==0) {
std::cout << "NO VERTICES???" << std::endl;
continue;
}

#ifdef __CUDACC__
cuda::memory::copy(zv, LOC_ONGPU(zv), nv*sizeof(float));
cuda::memory::copy(wv, LOC_ONGPU(wv), nv*sizeof(float));
cuda::memory::copy(chi2, LOC_ONGPU(chi2), nv*sizeof(float));
cuda::memory::copy(ptv2, LOC_ONGPU(ptv2), nv*sizeof(float));
cuda::memory::copy(nn, LOC_ONGPU(ndof), nv*sizeof(int32_t));
cuda::memory::copy(ind, LOC_ONGPU(sortInd), nv*sizeof(uint16_t));
#endif
for (auto j=0U; j<nv; ++j) if (nn[j]>0) chi2[j]/=float(nn[j]);
{
auto mx = std::minmax_element(chi2,chi2+nv);
std::cout << "nv, min max chi2 " << nv << " " << *mx.first << ' ' << *mx.second << std::endl;
}

{
auto mx = std::minmax_element(wv,wv+nv);
std::cout << "min max error " << 1./std::sqrt(*mx.first) << ' ' << 1./std::sqrt(*mx.second) << std::endl;
}

{
auto mx = std::minmax_element(ptv2,ptv2+nv);
std::cout << "min max ptv2 " << *mx.first << ' ' << *mx.second << std::endl;
std::cout << "min max ptv2 " << ptv2[ind[0]] << ' ' << ptv2[ind[nv-1]] << " at " << ind[0] << ' ' << ind[nv-1] << std::endl;

}

float dd[nv];
for (auto kv=0U; kv<nv; ++kv) {
auto zr = zv[kv];
auto md=500.0f;
for (auto zs : ev.ztrack) {
auto d = std::abs(zr-zs);
md = std::min(d,md);
}
dd[kv] = md;
}
if (i==6) {
for (auto d:dd) std::cout << d << ' ';
std::cout << std::endl;
}
auto mx = std::minmax_element(dd,dd+nv);
float rms=0;
for (auto d:dd) rms+=d*d;
rms = std::sqrt(rms)/(nv-1);
std::cout << "min max rms " << *mx.first << ' ' << *mx.second << ' ' << rms << std::endl;

} // loop on events
} // lopp on ave vert

return 0;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
#include "VertexFinder_t.h"
Loading

0 comments on commit 93ebb23

Please sign in to comment.