Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clang-format #172

Merged
merged 6 commits into from
May 18, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions .clang-format
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
---
AccessModifierOffset: -4
AlignAfterOpenBracket: AlwaysBreak
AlignConsecutiveAssignments: false
AlignConsecutiveDeclarations: false
AlignEscapedNewlines: DontAlign
AlignOperands: false
AlignTrailingComments: false
AllowAllParametersOfDeclarationOnNextLine: false
AllowShortBlocksOnASingleLine: false
AllowShortCaseLabelsOnASingleLine: false
AllowShortFunctionsOnASingleLine: Empty
AllowShortIfStatementsOnASingleLine: false
AllowShortLoopsOnASingleLine: true
AlwaysBreakAfterReturnType: None
AlwaysBreakBeforeMultilineStrings: true
AlwaysBreakTemplateDeclarations: Yes
BinPackArguments: false
BinPackParameters: false
BreakBeforeBraces: Custom
BraceWrapping:
AfterClass: true
AfterControlStatement: true
AfterEnum: true
AfterFunction: true
AfterNamespace: true
AfterStruct: true
AfterUnion: true
AfterExternBlock: true
BeforeCatch: true
BeforeElse: true
IndentBraces: false
SplitEmptyFunction: false
SplitEmptyRecord: false
SplitEmptyNamespace: false
BreakBeforeBinaryOperators: All
BreakBeforeTernaryOperators: true
BreakConstructorInitializers: AfterColon
BreakInheritanceList: AfterColon
BreakStringLiterals: true
ColumnLimit: 80
CompactNamespaces: false
ConstructorInitializerAllOnOneLineOrOnePerLine: true
ConstructorInitializerIndentWidth: 8
ContinuationIndentWidth: 4
Cpp11BracedListStyle: true
DerivePointerAlignment: false
FixNamespaceComments: false
IncludeBlocks: Regroup
IndentCaseLabels: false
IndentPPDirectives: None
IndentWidth: 4
IndentWrappedFunctionNames: false
KeepEmptyLinesAtTheStartOfBlocks: false
Language: Cpp
NamespaceIndentation: All
PointerAlignment: Middle
ReflowComments: true
SortIncludes: true
SortUsingDeclarations: true
SpaceAfterCStyleCast: false
SpaceAfterTemplateKeyword: false
SpaceBeforeAssignmentOperators: true
SpaceBeforeCpp11BracedList: false
SpaceBeforeCtorInitializerColon: true
SpaceBeforeInheritanceColon: true
SpaceBeforeParens: Never
SpaceBeforeRangeBasedForLoopColon: true
SpaceInEmptyParentheses: false
SpacesInAngles: false
SpacesInCStyleCastParentheses: false
SpacesInContainerLiterals: false
SpacesInParentheses: false
SpacesInSquareBrackets: false
Standard: Cpp11
UseTab: Never
...
2 changes: 1 addition & 1 deletion .clang-tidy
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
---
Checks: '*,-llvm-header-guard,-fuchsia-default-arguments-declarations,-cppcoreguidelines-no-malloc,-cppcoreguidelines-owning-memory,-modernize-use-trailing-return-type,-misc-non-private-member-variables-in-classes'
Checks: '*,-llvm-header-guard,-fuchsia-default-arguments-declarations,-cppcoreguidelines-no-malloc,-cppcoreguidelines-owning-memory,-misc-non-private-member-variables-in-classes'
HeaderFilterRegex: '.*'
15 changes: 15 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Contributing

## Formatting

Please format your code before before opening pull requests using clang-format and the .clang-format file placed in the repository root.

### Visual Studio and CLion
Suport for clang-format is built-in since Visual Studio 2017 15.7 and CLion 2019.1.
The .clang-format file in the repository will be automatically detected and formatting is done as you type, or triggered when pressing the format hotkey.

### Bash
First install clang-format. Instructions therefore can be found on the web. To format you can run this command in bash:
```
find -iname *.cu -o -iname *.hpp | xargs clang-format-10 -i
```
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,11 @@ mallocMC is header-only, but requires a few other C++ libraries to be
available. Our installation notes can be found in [INSTALL.md](INSTALL.md).


Contributing
------------

Rules for contributions are found in [CONTRIBUTING.md](CONTRIBUTING.md).

On the ScatterAlloc Algorithm
-----------------------------

Expand Down
195 changes: 101 additions & 94 deletions examples/mallocMC_example01.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,133 +26,140 @@
THE SOFTWARE.
*/

#include <iostream>
#include <cassert>
#include <vector>
#include <numeric>
#include "mallocMC_example01_config.hpp"

#include <cassert>
#include <cuda.h>
#include "mallocMC_example01_config.hpp"
#include <iostream>
#include <numeric>
#include <vector>

void run();

int main()
auto main() -> int
{
int computeCapabilityMajor = 0;
cudaDeviceGetAttribute(&computeCapabilityMajor, cudaDevAttrComputeCapabilityMajor, 0);
int computeCapabilityMinor = 0;
cudaDeviceGetAttribute(&computeCapabilityMinor, cudaDevAttrComputeCapabilityMinor, 0);

if( computeCapabilityMajor < 2 ) {
std::cerr << "Error: Compute Capability >= 2.0 required. (is ";
std::cerr << computeCapabilityMajor << "."<< computeCapabilityMinor << ")" << std::endl;
return 1;
}

cudaSetDevice(0);
run();
cudaDeviceReset();

return 0;
int computeCapabilityMajor = 0;
cudaDeviceGetAttribute(
&computeCapabilityMajor, cudaDevAttrComputeCapabilityMajor, 0);
int computeCapabilityMinor = 0;
cudaDeviceGetAttribute(
&computeCapabilityMinor, cudaDevAttrComputeCapabilityMinor, 0);

if(computeCapabilityMajor < 2)
{
std::cerr << "Error: Compute Capability >= 2.0 required. (is ";
std::cerr << computeCapabilityMajor << "." << computeCapabilityMinor
<< ")" << std::endl;
return 1;
}

cudaSetDevice(0);
run();
cudaDeviceReset();

return 0;
}

__device__ int ** arA;
__device__ int ** arB;
__device__ int ** arC;

__device__ int** arA;
__device__ int** arB;
__device__ int** arC;


__global__ void createArrayPointers(int x, int y, ScatterAllocator::AllocatorHandle mMC){
arA = (int**) mMC.malloc(sizeof(int*) * x*y);
arB = (int**) mMC.malloc(sizeof(int*) * x*y);
arC = (int**) mMC.malloc(sizeof(int*) * x*y);
__global__ void
createArrayPointers(int x, int y, ScatterAllocator::AllocatorHandle mMC)
{
arA = (int **)mMC.malloc(sizeof(int *) * x * y);
arB = (int **)mMC.malloc(sizeof(int *) * x * y);
arC = (int **)mMC.malloc(sizeof(int *) * x * y);
}

__global__ void
fillArrays(int length, int * d, ScatterAllocator::AllocatorHandle mMC)
{
int id = threadIdx.x + blockIdx.x * blockDim.x;

__global__ void fillArrays(int length, int* d, ScatterAllocator::AllocatorHandle mMC){
int id = threadIdx.x + blockIdx.x*blockDim.x;

arA[id] = (int*) mMC.malloc(length*sizeof(int));
arB[id] = (int*) mMC.malloc(length*sizeof(int));
arC[id] = (int*) mMC.malloc(sizeof(int)*length);
arA[id] = (int *)mMC.malloc(length * sizeof(int));
arB[id] = (int *)mMC.malloc(length * sizeof(int));
arC[id] = (int *)mMC.malloc(sizeof(int) * length);

for(int i=0 ; i<length; ++i){
arA[id][i] = id*length+i;
arB[id][i] = id*length+i;
}
for(int i = 0; i < length; ++i)
{
arA[id][i] = id * length + i;
arB[id][i] = id * length + i;
}
}


__global__ void addArrays(int length, int* d){
int id = threadIdx.x + blockIdx.x*blockDim.x;

d[id] = 0;
for(int i=0 ; i<length; ++i){
arC[id][i] = arA[id][i] + arB[id][i];
d[id] += arC[id][i];
}
__global__ void addArrays(int length, int * d)
{
int id = threadIdx.x + blockIdx.x * blockDim.x;

d[id] = 0;
for(int i = 0; i < length; ++i)
{
arC[id][i] = arA[id][i] + arB[id][i];
d[id] += arC[id][i];
}
}


__global__ void freeArrays(ScatterAllocator::AllocatorHandle mMC){
int id = threadIdx.x + blockIdx.x*blockDim.x;
mMC.free(arA[id]);
mMC.free(arB[id]);
mMC.free(arC[id]);
__global__ void freeArrays(ScatterAllocator::AllocatorHandle mMC)
{
int id = threadIdx.x + blockIdx.x * blockDim.x;
mMC.free(arA[id]);
mMC.free(arB[id]);
mMC.free(arC[id]);
}


__global__ void freeArrayPointers(ScatterAllocator::AllocatorHandle mMC){
mMC.free(arA);
mMC.free(arB);
mMC.free(arC);
__global__ void freeArrayPointers(ScatterAllocator::AllocatorHandle mMC)
{
mMC.free(arA);
mMC.free(arB);
mMC.free(arC);
}


void run()
{
size_t block = 32;
size_t grid = 32;
int length = 100;
assert((unsigned)length<= block*grid); //necessary for used algorithm

//init the heap
std::cerr << "initHeap...";
ScatterAllocator mMC(1U*1024U*1024U*1024U); //1GB for device-side malloc
std::cerr << "done" << std::endl;
size_t block = 32;
size_t grid = 32;
int length = 100;
assert((unsigned)length <= block * grid); // necessary for used algorithm

std::cout << ScatterAllocator::info("\n") << std::endl;
// init the heap
std::cerr << "initHeap...";
ScatterAllocator mMC(
1U * 1024U * 1024U * 1024U); // 1GB for device-side malloc
std::cerr << "done" << std::endl;

// device-side pointers
int* d;
cudaMalloc((void**) &d, sizeof(int)*block*grid);
std::cout << ScatterAllocator::info("\n") << std::endl;

// host-side pointers
std::vector<int> array_sums(block*grid,0);
// device-side pointers
int * d;
cudaMalloc((void **)&d, sizeof(int) * block * grid);

// create arrays of arrays on the device
createArrayPointers<<<1,1>>>(grid,block, mMC );
// host-side pointers
std::vector<int> array_sums(block * grid, 0);

// fill 2 of them all with ascending values
fillArrays<<<grid,block>>>(length, d, mMC );
// create arrays of arrays on the device
createArrayPointers<<<1, 1>>>(grid, block, mMC);

// add the 2 arrays (vector addition within each thread)
// and do a thread-wise reduce to d
addArrays<<<grid,block>>>(length, d);
// fill 2 of them all with ascending values
fillArrays<<<grid, block>>>(length, d, mMC);

cudaMemcpy(&array_sums[0],d,sizeof(int)*block*grid,cudaMemcpyDeviceToHost);
// add the 2 arrays (vector addition within each thread)
// and do a thread-wise reduce to d
addArrays<<<grid, block>>>(length, d);

mMC.getAvailableSlots(1024U*1024U); //get available megabyte-sized slots
cudaMemcpy(
&array_sums[0], d, sizeof(int) * block * grid, cudaMemcpyDeviceToHost);

int sum = std::accumulate(array_sums.begin(),array_sums.end(),0);
std::cout << "The sum of the arrays on GPU is " << sum << std::endl;
mMC.getAvailableSlots(1024U * 1024U); // get available megabyte-sized slots

int n = block*grid*length;
int gaussian = n*(n-1);
std::cout << "The gaussian sum as comparison: " << gaussian << std::endl;
int sum = std::accumulate(array_sums.begin(), array_sums.end(), 0);
std::cout << "The sum of the arrays on GPU is " << sum << std::endl;

freeArrays<<<grid,block>>>( mMC );
freeArrayPointers<<<1,1>>>( mMC );
cudaFree(d);
int n = block * grid * length;
int gaussian = n * (n - 1);
std::cout << "The gaussian sum as comparison: " << gaussian << std::endl;

freeArrays<<<grid, block>>>(mMC);
freeArrayPointers<<<1, 1>>>(mMC);
cudaFree(d);
}
Loading