Port the Riemann fit to CUDA (#60)

- the CPU Riemann fit works using all combinations between the 2 booleans: `useErrors` and `useMultipleScattering`; - the standalone version of the GPU Riemann fit has been updated in order to explore all possibilities among the 2 booleans above: all of them work and produce identical results up to 1e-5 precision (the default one, 1e-6 fails when enabling multiScattering, most likely due to matrix inversions); - the GPU version of the Riemann fit within CMSSW works, with 1 fit assigned to each thread, with 32 threads/warps, all dynamically computed. Things that needs a "hack": - limit the "dynamic" size of Eigen matrices to at most, 4x4, which is just fine for quadruplets. Using anything wider will cause errors which I *believe* is related to the stack size of threads on the GPU; - cast matrices to be inverted to 4x4 (was done before the previous point: will revert it back and see if that's still needed or not, but I believe it is); this was done in order to "specialize" the `invert()` call to something that is "natively" supported by Eigen on GPU (that brought in also few `__host__` `__device__` here and there in Eigen); - fix the alignment of the `struct` holding the results of the fit, since its size was different on GPU and CPU, causing an annoying off-by-one effect.
cms-patatrack · Nov 27, 2020 · 601337b · 601337b
1 parent 4deb721
commit 601337b
Show file tree

Hide file tree

Showing 3 changed files with 21 additions and 2 deletions.
diff --git a/Configuration/PyReleaseValidation/python/relval_2017.py b/Configuration/PyReleaseValidation/python/relval_2017.py
@@ -27,7 +27,7 @@
            10024.1,10024.2,10024.3,10024.4,10024.5,
            10801.0,10802.0,10803.0,10804.0,10805.0,10806.0,10807.0,10808.0,10809.0,10859.0,10871.0,
            10842.0,10824.0,10825.0,10826.0,10823.0,11024.0,11025.0,11224.0,
-           10824.1,10824.5,10824.7,10824.8,
+           10824.1,10824.5,10824.7,10824.8,10824.9,
            10824.6,11024.6,11224.6,
            11642.0,11624.0,11625.0,11626.0,11623.0,11824.0,11825.0,12024.0]
 for numWF in numWFIB:

diff --git a/Configuration/PyReleaseValidation/python/relval_steps.py b/Configuration/PyReleaseValidation/python/relval_steps.py
@@ -1678,6 +1678,9 @@ def gen2018HiMix(fragment,howMuch):
 step3_riemannFit = {
     '--procModifiers': 'riemannFit',
 }
+step3_riemannFitGPU = {
+    '--procModifiers': 'riemannFitGPU',
+}
 step3_gpu = {
     '--procModifiers': 'gpu',
 }
@@ -2663,6 +2666,11 @@ def gen2018HiMix(fragment,howMuch):
         if 'Reco' in step: upgradeStepDict[stepName][k] = merge([step3_riemannFit, step3_pixelTrackingOnly, upgradeStepDict[step][k]])
         elif 'HARVEST' in step: upgradeStepDict[stepName][k] = merge([{'-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM'}, upgradeStepDict[step][k]])
 
+    for step in upgradeSteps['pixelTrackingOnlyRiemannFitGPU']['steps']:
+        stepName = step + upgradeSteps['pixelTrackingOnlyRiemannFitGPU']['suffix']
+        if 'Reco' in step: upgradeStepDict[stepName][k] = merge([step3_riemannFitGPU, step3_pixelTrackingOnly, upgradeStepDict[step][k]])
+        elif 'HARVEST' in step: upgradeStepDict[stepName][k] = merge([{'-s': 'HARVESTING:@trackingOnlyValidation+@pixelTrackingOnlyDQM'}, upgradeStepDict[step][k]])
+
     for step in upgradeSteps['pixelTrackingOnlyGPU']['steps']:
         stepName = step + upgradeSteps['pixelTrackingOnlyGPU']['suffix']
         if 'Reco' in step: upgradeStepDict[stepName][k] = merge([step3_gpu, step3_pixelTrackingOnly, upgradeStepDict[step][k]])

diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py
@@ -162,6 +162,17 @@
     'suffix' : '_pixelTrackingOnlyGPU',
     'offset' : 0.8,
 }
+upgradeSteps['pixelTrackingOnlyRiemannFitGPU'] = {
+    'steps' : [
+        'RecoFull',
+        'HARVESTFull',
+        'RecoFullGlobal',
+        'HARVESTFullGlobal',
+    ],
+    'PU' : [],
+    'suffix' : '_pixelTrackingOnlyRiemannFitGPU',
+    'offset' : 0.9,
+}
 upgradeSteps['Timing'] = {
     'steps' : upgradeSteps['baseline']['steps'],
     'PU' : upgradeSteps['baseline']['PU'],
@@ -387,7 +398,7 @@
                   'DoubleMuPt1000Extended_pythia8_cfi',
                   'TenMuE_0_200_pythia8_cfi',
                   'SinglePiE50HCAL_pythia8_cfi',
-                  'MinBias_13TeV_pythia8_TuneCUETP8M1_cfi', 
+                  'MinBias_13TeV_pythia8_TuneCUETP8M1_cfi',
                   'TTbar_13TeV_TuneCUETP8M1_cfi',
                   'ZEE_13TeV_TuneCUETP8M1_cfi',
                   'QCD_Pt_600_800_13TeV_TuneCUETP8M1_cfi',