alpaka-group · psychocoderHPC · May 18, 2020 · May 11, 2020 · May 14, 2020 · May 14, 2020
diff --git a/.clang-format b/.clang-format
@@ -0,0 +1,77 @@
+---
+AccessModifierOffset: -4
+AlignAfterOpenBracket: AlwaysBreak
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: DontAlign
+AlignOperands: false
+AlignTrailingComments: false
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Empty
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: true
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: Yes
+BinPackArguments: false
+BinPackParameters: false
+BreakBeforeBraces: Custom
+BraceWrapping:
+  AfterClass: true
+  AfterControlStatement: true
+  AfterEnum: true
+  AfterFunction: true
+  AfterNamespace: true
+  AfterStruct: true
+  AfterUnion: true
+  AfterExternBlock: true
+  BeforeCatch: true
+  BeforeElse: true
+  IndentBraces: false
+  SplitEmptyFunction: false
+  SplitEmptyRecord: false
+  SplitEmptyNamespace: false
+BreakBeforeBinaryOperators: All
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializers: AfterColon
+BreakInheritanceList: AfterColon
+BreakStringLiterals: true
+ColumnLimit: 80
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 8
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+FixNamespaceComments: false
+IncludeBlocks: Regroup
+IndentCaseLabels: false
+IndentPPDirectives: None
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+Language: Cpp
+NamespaceIndentation: All
+PointerAlignment: Middle
+ReflowComments: true
+SortIncludes: true
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeCpp11BracedList: false
+SpaceBeforeCtorInitializerColon: true
+SpaceBeforeInheritanceColon: true
+SpaceBeforeParens: Never
+SpaceBeforeRangeBasedForLoopColon: true
+SpaceInEmptyParentheses: false
+SpacesInAngles: false
+SpacesInCStyleCastParentheses: false
+SpacesInContainerLiterals: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Cpp11
+UseTab: Never
+...
diff --git a/.clang-tidy b/.clang-tidy
@@ -1,3 +1,3 @@
 ---
-Checks:            '*,-llvm-header-guard,-fuchsia-default-arguments-declarations,-cppcoreguidelines-no-malloc,-cppcoreguidelines-owning-memory,-modernize-use-trailing-return-type,-misc-non-private-member-variables-in-classes'
+Checks:            '*,-llvm-header-guard,-fuchsia-default-arguments-declarations,-cppcoreguidelines-no-malloc,-cppcoreguidelines-owning-memory,-misc-non-private-member-variables-in-classes'
 HeaderFilterRegex: '.*'
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,15 @@
+# Contributing
+
+## Formatting
+
+Please format your code before before opening pull requests using clang-format and the .clang-format file placed in the repository root.
+
+### Visual Studio and CLion
+Suport for clang-format is built-in since Visual Studio 2017 15.7 and CLion 2019.1.
+The .clang-format file in the repository will be automatically detected and formatting is done as you type, or triggered when pressing the format hotkey.
+
+### Bash
+First install clang-format. Instructions therefore can be found on the web. To format you can run this command in bash:
+```
+find -iname *.cu -o -iname *.hpp | xargs clang-format-10 -i
+```
diff --git a/README.md b/README.md
@@ -22,6 +22,11 @@ mallocMC is header-only, but requires a few other C++ libraries to be
 available. Our installation notes can be found in [INSTALL.md](INSTALL.md).
 
 
+Contributing
+------------
+
+Rules for contributions are found in [CONTRIBUTING.md](CONTRIBUTING.md).
+
 On the ScatterAlloc Algorithm
 -----------------------------
 

diff --git a/examples/mallocMC_example01.cu b/examples/mallocMC_example01.cu
@@ -26,133 +26,140 @@
   THE SOFTWARE.
 */
 
-#include <iostream>
-#include <cassert>
-#include <vector>
-#include <numeric>
+#include "mallocMC_example01_config.hpp"
 
+#include <cassert>
 #include <cuda.h>
-#include "mallocMC_example01_config.hpp"
+#include <iostream>
+#include <numeric>
+#include <vector>
 
 void run();
 
-int main()
+auto main() -> int
 {
-  int computeCapabilityMajor = 0;
-  cudaDeviceGetAttribute(&computeCapabilityMajor, cudaDevAttrComputeCapabilityMajor, 0);
-  int computeCapabilityMinor = 0;
-  cudaDeviceGetAttribute(&computeCapabilityMinor, cudaDevAttrComputeCapabilityMinor, 0);
-
-  if( computeCapabilityMajor < 2 ) {
-    std::cerr << "Error: Compute Capability >= 2.0 required. (is ";
-    std::cerr << computeCapabilityMajor << "."<< computeCapabilityMinor << ")" << std::endl;
-    return 1;
-  }
-
-  cudaSetDevice(0);
-  run();
-  cudaDeviceReset();
-
-  return 0;
+    int computeCapabilityMajor = 0;
+    cudaDeviceGetAttribute(
+        &computeCapabilityMajor, cudaDevAttrComputeCapabilityMajor, 0);
+    int computeCapabilityMinor = 0;
+    cudaDeviceGetAttribute(
+        &computeCapabilityMinor, cudaDevAttrComputeCapabilityMinor, 0);
+
+    if(computeCapabilityMajor < 2)
+    {
+        std::cerr << "Error: Compute Capability >= 2.0 required. (is ";
+        std::cerr << computeCapabilityMajor << "." << computeCapabilityMinor
+                  << ")" << std::endl;
+        return 1;
+    }
+
+    cudaSetDevice(0);
+    run();
+    cudaDeviceReset();
+
+    return 0;
 }
 
+__device__ int ** arA;
+__device__ int ** arB;
+__device__ int ** arC;
 
-__device__ int** arA;
-__device__ int** arB;
-__device__ int** arC;
-
-
-__global__ void createArrayPointers(int x, int y, ScatterAllocator::AllocatorHandle mMC){
-  arA = (int**) mMC.malloc(sizeof(int*) * x*y);
-  arB = (int**) mMC.malloc(sizeof(int*) * x*y);
-  arC = (int**) mMC.malloc(sizeof(int*) * x*y);
+__global__ void
+createArrayPointers(int x, int y, ScatterAllocator::AllocatorHandle mMC)
+{
+    arA = (int **)mMC.malloc(sizeof(int *) * x * y);
+    arB = (int **)mMC.malloc(sizeof(int *) * x * y);
+    arC = (int **)mMC.malloc(sizeof(int *) * x * y);
 }
 
+__global__ void
+fillArrays(int length, int * d, ScatterAllocator::AllocatorHandle mMC)
+{
+    int id = threadIdx.x + blockIdx.x * blockDim.x;
 
-__global__ void fillArrays(int length, int* d, ScatterAllocator::AllocatorHandle mMC){
-  int id = threadIdx.x + blockIdx.x*blockDim.x;
-
-  arA[id] = (int*) mMC.malloc(length*sizeof(int));
-  arB[id] = (int*) mMC.malloc(length*sizeof(int));
-  arC[id] = (int*) mMC.malloc(sizeof(int)*length);
+    arA[id] = (int *)mMC.malloc(length * sizeof(int));
+    arB[id] = (int *)mMC.malloc(length * sizeof(int));
+    arC[id] = (int *)mMC.malloc(sizeof(int) * length);
 
-  for(int i=0 ; i<length; ++i){
-    arA[id][i] = id*length+i;
-    arB[id][i] = id*length+i;
-  }
+    for(int i = 0; i < length; ++i)
+    {
+        arA[id][i] = id * length + i;
+        arB[id][i] = id * length + i;
+    }
 }
 
-
-__global__ void addArrays(int length, int* d){
-  int id = threadIdx.x + blockIdx.x*blockDim.x;
-
-  d[id] = 0;
-  for(int i=0 ; i<length; ++i){
-    arC[id][i] = arA[id][i] + arB[id][i];
-    d[id] += arC[id][i];
-  }
+__global__ void addArrays(int length, int * d)
+{
+    int id = threadIdx.x + blockIdx.x * blockDim.x;
+
+    d[id] = 0;
+    for(int i = 0; i < length; ++i)
+    {
+        arC[id][i] = arA[id][i] + arB[id][i];
+        d[id] += arC[id][i];
+    }
 }
 
-
-__global__ void freeArrays(ScatterAllocator::AllocatorHandle mMC){
-  int id = threadIdx.x + blockIdx.x*blockDim.x;
-  mMC.free(arA[id]);
-  mMC.free(arB[id]);
-  mMC.free(arC[id]);
+__global__ void freeArrays(ScatterAllocator::AllocatorHandle mMC)
+{
+    int id = threadIdx.x + blockIdx.x * blockDim.x;
+    mMC.free(arA[id]);
+    mMC.free(arB[id]);
+    mMC.free(arC[id]);
 }
 
-
-__global__ void freeArrayPointers(ScatterAllocator::AllocatorHandle mMC){
-  mMC.free(arA);
-  mMC.free(arB);
-  mMC.free(arC);
+__global__ void freeArrayPointers(ScatterAllocator::AllocatorHandle mMC)
+{
+    mMC.free(arA);
+    mMC.free(arB);
+    mMC.free(arC);
 }
 
-
 void run()
 {
-  size_t block = 32;
-  size_t grid = 32;
-  int length = 100;
-  assert((unsigned)length<= block*grid); //necessary for used algorithm
-
-  //init the heap
-  std::cerr << "initHeap...";
-  ScatterAllocator mMC(1U*1024U*1024U*1024U); //1GB for device-side malloc
-  std::cerr << "done" << std::endl;
+    size_t block = 32;
+    size_t grid = 32;
+    int length = 100;
+    assert((unsigned)length <= block * grid); // necessary for used algorithm
 
-  std::cout << ScatterAllocator::info("\n") << std::endl;
+    // init the heap
+    std::cerr << "initHeap...";
+    ScatterAllocator mMC(
+        1U * 1024U * 1024U * 1024U); // 1GB for device-side malloc
+    std::cerr << "done" << std::endl;
 
-  // device-side pointers
-  int*  d;
-  cudaMalloc((void**) &d, sizeof(int)*block*grid);
+    std::cout << ScatterAllocator::info("\n") << std::endl;
 
-  // host-side pointers
-  std::vector<int> array_sums(block*grid,0);
+    // device-side pointers
+    int * d;
+    cudaMalloc((void **)&d, sizeof(int) * block * grid);
 
-  // create arrays of arrays on the device
-  createArrayPointers<<<1,1>>>(grid,block, mMC );
+    // host-side pointers
+    std::vector<int> array_sums(block * grid, 0);
 
-  // fill 2 of them all with ascending values
-  fillArrays<<<grid,block>>>(length, d, mMC );
+    // create arrays of arrays on the device
+    createArrayPointers<<<1, 1>>>(grid, block, mMC);
 
-  // add the 2 arrays (vector addition within each thread)
-  // and do a thread-wise reduce to d
-  addArrays<<<grid,block>>>(length, d);
+    // fill 2 of them all with ascending values
+    fillArrays<<<grid, block>>>(length, d, mMC);
 
-  cudaMemcpy(&array_sums[0],d,sizeof(int)*block*grid,cudaMemcpyDeviceToHost);
+    // add the 2 arrays (vector addition within each thread)
+    // and do a thread-wise reduce to d
+    addArrays<<<grid, block>>>(length, d);
 
-  mMC.getAvailableSlots(1024U*1024U); //get available megabyte-sized slots
+    cudaMemcpy(
+        &array_sums[0], d, sizeof(int) * block * grid, cudaMemcpyDeviceToHost);
 
-  int sum = std::accumulate(array_sums.begin(),array_sums.end(),0);
-  std::cout << "The sum of the arrays on GPU is " << sum << std::endl;
+    mMC.getAvailableSlots(1024U * 1024U); // get available megabyte-sized slots
 
-  int n = block*grid*length;
-  int gaussian = n*(n-1);
-  std::cout << "The gaussian sum as comparison: " << gaussian << std::endl;
+    int sum = std::accumulate(array_sums.begin(), array_sums.end(), 0);
+    std::cout << "The sum of the arrays on GPU is " << sum << std::endl;
 
-  freeArrays<<<grid,block>>>( mMC );
-  freeArrayPointers<<<1,1>>>( mMC );
-  cudaFree(d);
+    int n = block * grid * length;
+    int gaussian = n * (n - 1);
+    std::cout << "The gaussian sum as comparison: " << gaussian << std::endl;
 
+    freeArrays<<<grid, block>>>(mMC);
+    freeArrayPointers<<<1, 1>>>(mMC);
+    cudaFree(d);
 }