Skip to content

Commit

Permalink
Merge pull request #76 from ComputationalRadiationPhysics/dev
Browse files Browse the repository at this point in the history
Merge dev into master for release 2.0.1crp
  • Loading branch information
ax3l committed Jan 15, 2015
2 parents ddeae86 + 15730e4 commit 1314bf2
Show file tree
Hide file tree
Showing 10 changed files with 130 additions and 40 deletions.
23 changes: 23 additions & 0 deletions CHANGELOG.md
@@ -1,6 +1,29 @@
Change Log / Release Log for mallocMC
================================================================

2.0.1crp
-------------
**Date:** 2015-01-13

This release fixes several bugs that occured after the release of 2.0.0crp.
We closed all issues documented in
[Milestone *Bugfixes*](https://github.com/ComputationalRadiationPhysics/mallocMC/issues?milestone=4&state=closed)

### Changes to mallocMC 2.0.0crp

**Bug fixes**
- page table metadata was not correctly initialized with 0 #70
- freeing pages would not work under certain circumstances #66
- the bitmask in a page table entry could be wrong due to a racecondition #62
- not all regions were initialized correctly #60
- getAvailableSlots could sometimes miss blocks #59
- the counter for elements in a page could get too high due to a racecondition #61
- Out of Memory (OOM) Policy sometimes did not recognize allocation failures correctly #67

**Misc:**
- See the full changes at https://github.com/ComputationalRadiationPhysics/mallocMC/compare/2.0.0crp...2.0.1crp


2.0.0crp
-------------
**Date:** 2014-06-02
Expand Down
24 changes: 24 additions & 0 deletions CMakeLists.txt
Expand Up @@ -39,6 +39,30 @@ if(Boost_VERSION EQUAL 105500)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} \"-DBOOST_NOINLINE=__attribute__((noinline))\" ")
endif(Boost_VERSION EQUAL 105500)


################################################################################
# Warnings
################################################################################
# GNU
if(CMAKE_COMPILER_IS_GNUCXX)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unknown-pragmas")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wextra")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
# new warning in gcc 4.8 (flag ignored in previous version)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs")
# ICC
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Intel")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_NO_VARIADIC_TEMPLATES")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_NO_CXX11_VARIADIC_TEMPLATES")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBOOST_NO_FENV_H")
# PGI
elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "PGI")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Minform=inform")
endif()


###############################################################################
# Installation
###############################################################################
Expand Down
44 changes: 44 additions & 0 deletions INSTALL.md
Expand Up @@ -45,3 +45,47 @@ This is an example how to compile `mallocMC` and test the example code snippets
- `./mallocMC_Example02`
- `./VerifyHeap`
- additional options: see `./VerifyHeap --help`


Linking to your Project
-----------------------

To use mallocMC in your project, you must include the header `mallocMC/mallocMC.hpp` and
add the correct include path.

Because we are linking to Boost and CUDA, the following **external dependencies** must be linked:
- `-lboost`, `-lcudart`

If you are using CMake you can download our `FindmallocMC.cmake` module with
```bash
wget https://raw.githubusercontent.com/ComputationalRadiationPhysics/picongpu/dev/src/cmake/FindmallocMC.cmake
# read the documentation
cmake -DCMAKE_MODULE_PATH=. --help-module FindmallocMC | less
```

and use the following lines in your `CMakeLists.txt`:
```cmake
# this example will require at least CMake 2.8.5
cmake_minimum_required(VERSION 2.8.5)
# add path to FindmallocMC.cmake, e.g. in the directory in cmake/
set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake/)
# find the packages that are required by mallocMC. This has to be done BEFORE
# loading mallocMC
find_package(Boost REQUIRED)
set(LIBS ${LIBS} ${Boost_LIBRARIES})
find_package(CUDA REQUIRED)
cuda_include_directories(${CUDA_INCLUDE_DIRS})
# find mallocMC installation
find_package(mallocMC 2.0.1 REQUIRED)
# where to find headers (-I includes for compiler)
include_directories(SYSTEM ${mallocMC_INCLUDE_DIRS} ${CUDA_INCLUDE_DIRS} ${Boost_INCLUDE_DIRS})
add_executable(yourBinary ${SOURCES})
target_link_libraries(yourBinary ${LIBS})
```
2 changes: 1 addition & 1 deletion src/include/mallocMC/alignmentPolicies/Shrink_impl.hpp
Expand Up @@ -74,7 +74,7 @@ namespace Shrink2NS{
#endif
static const uint32 dataAlignment = MALLOCMC_AP_SHRINK_DATAALIGNMENT;

BOOST_STATIC_ASSERT(dataAlignment > 0);
BOOST_STATIC_ASSERT(static_cast<uint32>(dataAlignment) > 0);
//dataAlignment must also be a power of 2!
BOOST_STATIC_ASSERT(dataAlignment && !(dataAlignment & (dataAlignment-1)) );

Expand Down
4 changes: 2 additions & 2 deletions src/include/mallocMC/creationPolicies/OldMalloc_impl.hpp
Expand Up @@ -52,8 +52,8 @@ namespace CreationPolicies{
free(mem);
}

__device__ bool isOOM(void* p){
return 32 == __popc(__ballot(p == NULL));
__device__ bool isOOM(void* p, size_t s){
return s && (p == NULL);
}

template < typename T>
Expand Down
41 changes: 21 additions & 20 deletions src/include/mallocMC/creationPolicies/Scatter_impl.hpp
Expand Up @@ -187,15 +187,11 @@ namespace ScatterKernelDetail{
* bit fields when the page is used for a small chunk size
* @param previous_chunksize the chunksize which was uses for the page before
*/
__device__ void init(uint32 previous_chunksize = 0)
__device__ void init()
{
//TODO: we can speed this up for pages being freed, because we know the
//chunksize used before (these bits must be zero again)

//init the entire data which can hold bitfields
uint32 max_bits = min(32*32,pagesize/minChunkSize1);
uint32 max_entries = divup<uint32>(max_bits/8,sizeof(uint32))*sizeof(uint32);
uint32* write = (uint32*)(data+(pagesize-max_entries));
//clear the entire data which can hold bitfields
uint32 first_possible_metadata = 32*HierarchyThreshold;
uint32* write = (uint32*)(data+(pagesize-first_possible_metadata));
while(write < (uint32*)(data + pagesize))
*write++ = 0;
}
Expand Down Expand Up @@ -319,6 +315,9 @@ namespace ScatterKernelDetail{
*/
__device__ inline void* tryUsePage(uint32 page, uint32 chunksize)
{

void* chunk_ptr = NULL;

//increse the fill level
uint32 filllevel = atomicAdd((uint32*)&(_ptes[page].count), 1);
//recheck chunck size (it could be that the page got freed in the meanwhile...)
Expand All @@ -333,19 +332,21 @@ namespace ScatterKernelDetail{
fullsegments = pagesize / segmentsize;
additional_chunks = max(0,(int)pagesize - (int)fullsegments*segmentsize - (int)sizeof(uint32))/chunksize;
if(filllevel < fullsegments * 32 + additional_chunks)
return addChunkHierarchy(chunksize, fullsegments, additional_chunks, page);
chunk_ptr = addChunkHierarchy(chunksize, fullsegments, additional_chunks, page);
}
else
{
uint32 chunksinpage = min(pagesize / chunksize, 32);
if(filllevel < chunksinpage)
return addChunkNoHierarchy(chunksize, page, chunksinpage);
chunk_ptr = addChunkNoHierarchy(chunksize, page, chunksinpage);
}
}

//this one is full/not useable
atomicSub((uint32*)&(_ptes[page].count), 1);
return 0;
if(chunk_ptr == NULL)
atomicSub((uint32*)&(_ptes[page].count), 1);

return chunk_ptr;
}


Expand Down Expand Up @@ -444,9 +445,8 @@ namespace ScatterKernelDetail{
uint32* onpagemasks = (uint32*)(_page[page].data + chunksize*(fullsegments*32 + additional_chunks));
uint32 old = atomicAnd(onpagemasks + segment, ~(1 << withinsegment));

uint32 elementsinsegment = segment < fullsegments ? 32 : additional_chunks;
if(__popc(old) == elementsinsegment)
atomicAnd((uint32*)&_ptes[page].bitmask, ~(1 << segment));
// always do this, since it might fail due to a race-condition with addChunkHierarchy
atomicAnd((uint32*)&_ptes[page].bitmask, ~(1 << segment));
}
else
{
Expand Down Expand Up @@ -718,7 +718,7 @@ namespace ScatterKernelDetail{
ptes[i].init();
page[i].init();
}
for(uint32 i = linid; i < numregions; i+= numregions)
for(uint32 i = linid; i < numregions; i+= threads)
regions[i] = 0;

if(linid == 0)
Expand Down Expand Up @@ -777,9 +777,9 @@ namespace ScatterKernelDetail{
}
}

__device__ bool isOOM(void* p){
// all threads in a warp return get NULL
return 32 == __popc(__ballot(p == NULL));
__device__ bool isOOM(void* p, size_t s){
// one thread that requested memory returned null
return s && (p == NULL);
}


Expand Down Expand Up @@ -869,7 +869,8 @@ namespace ScatterKernelDetail{
if(gid > 0) return 0; //do this serially
uint32 pagestoalloc = divup((uint32)slotSize, pagesize);
uint32 freecount = 0;
for(uint32 currentpage = _numpages; currentpage > 0; --currentpage){ //this already includes all superblocks
for(uint32 currentpage = _numpages; currentpage > 0;){ //this already includes all superblocks
--currentpage;
if(_ptes[currentpage].chunksize == 0){
if(++freecount == pagestoalloc){
freecount = 0;
Expand Down
Expand Up @@ -77,7 +77,7 @@ namespace DistributionPolicies{

//all the properties must be unsigned integers > 0
BOOST_STATIC_ASSERT(!std::numeric_limits<typename Properties::pagesize::type>::is_signed);
BOOST_STATIC_ASSERT(pagesize > 0);
BOOST_STATIC_ASSERT(static_cast<uint32>(pagesize) > 0);

public:
static const uint32 _pagesize = pagesize;
Expand All @@ -97,7 +97,7 @@ namespace DistributionPolicies{
//second half: make sure that all coalesced allocations can fit within one page
//necessary for offset calculation
bool coalescible = bytes > 0 && bytes < (pagesize / 32);
uint32 threadcount = __popc(__ballot(coalescible));
threadcount = __popc(__ballot(coalescible));

if (coalescible && threadcount > 1)
{
Expand Down
17 changes: 9 additions & 8 deletions src/include/mallocMC/mallocMC_constraints.hpp
Expand Up @@ -37,19 +37,19 @@ namespace mallocMC{
/** The default PolicyCheckers (do always succeed)
*/
template<typename Policy1>
struct PolicyCheck1{};
class PolicyCheck1{};

template<typename Policy1, typename Policy2>
struct PolicyCheck2{};
class PolicyCheck2{};

template<typename Policy1, typename Policy2, typename Policy3>
struct PolicyCheck3{};
class PolicyCheck3{};

template<typename Policy1, typename Policy2, typename Policy3, typename Policy4>
struct PolicyCheck4{};
class PolicyCheck4{};

template<typename Policy1, typename Policy2, typename Policy3, typename Policy4, typename Policy5>
struct PolicyCheck5{};
class PolicyCheck5{};


/** Enforces constraints on policies or combinations of polices
Expand All @@ -63,8 +63,9 @@ namespace mallocMC{
typename T_GetHeapPolicy,
typename T_AlignmentPolicy
>
class PolicyConstraints{
PolicyCheck2<T_CreationPolicy, T_DistributionPolicy> c;

class PolicyConstraints:PolicyCheck2<T_CreationPolicy, T_DistributionPolicy>{

};


Expand All @@ -75,7 +76,7 @@ namespace mallocMC{
* the same value for their "pagesize"-parameter.
*/
template<typename x, typename y, typename z >
struct PolicyCheck2<
class PolicyCheck2<
typename CreationPolicies::Scatter<x,y>,
typename DistributionPolicies::XMallocSIMD<z>
>{
Expand Down
9 changes: 3 additions & 6 deletions src/include/mallocMC/mallocMC_hostclass.hpp
Expand Up @@ -83,7 +83,8 @@ namespace mallocMC{
public T_CreationPolicy,
public T_OOMPolicy,
public T_ReservePoolPolicy,
public T_AlignmentPolicy
public T_AlignmentPolicy,
public PolicyConstraints<T_CreationPolicy,T_DistributionPolicy,T_OOMPolicy,T_ReservePoolPolicy,T_AlignmentPolicy>
{
public:
typedef T_CreationPolicy CreationPolicy;
Expand All @@ -96,10 +97,6 @@ namespace mallocMC{
typedef boost::uint32_t uint32;
void* pool;

//Instantiating the constraints checker will execute the check
PolicyConstraints<CreationPolicy,DistributionPolicy,
OOMPolicy,ReservePoolPolicy,AlignmentPolicy> c;

public:

typedef Allocator<CreationPolicy,DistributionPolicy,
Expand All @@ -112,7 +109,7 @@ namespace mallocMC{
bytes = AlignmentPolicy::applyPadding(bytes);
uint32 req_size = distributionPolicy.collect(bytes);
void* memBlock = CreationPolicy::create(req_size);
const bool oom = CreationPolicy::isOOM(memBlock);
const bool oom = CreationPolicy::isOOM(memBlock, req_size);
if(oom) memBlock = OOMPolicy::handleOOM(memBlock);
void* myPart = distributionPolicy.distribute(memBlock);

Expand Down
2 changes: 1 addition & 1 deletion src/include/mallocMC/version.hpp
Expand Up @@ -39,7 +39,7 @@
/** the mallocMC version: major API changes should be reflected here */
#define MALLOCMC_VERSION_MAJOR 2
#define MALLOCMC_VERSION_MINOR 0
#define MALLOCMC_VERSION_PATCH 0
#define MALLOCMC_VERSION_PATCH 1

/** the mallocMC flavor is used to differenciate the releases of the
* Computational Radiation Physics group (crp) from other releases
Expand Down

0 comments on commit 1314bf2

Please sign in to comment.