Permalink
Browse files

Fixes another issue with the caching algorithm.

It ran into trouble when running a big job
on relatively few GPUs
  • Loading branch information...
1 parent 228bf7d commit 5d44b46feebadd676c4751070c610ea0d3e5bcfa @fwinter committed Jul 10, 2012
Showing with 53 additions and 169 deletions.
  1. +48 −144 include/qdp_cache.h
  2. +0 −9 include/qdp_outer.h
  3. +5 −16 include/qdp_parscalar_specific.h
View
@@ -26,20 +26,11 @@ namespace QDP
int flags;
void* hstPtr; // NULL if not allocated
void* devPtr; // NULL if not allocated
- bool modHst; // host and device memory are in the same state, like both contain the same data
- bool modDev; // device memory is the one that has the (modified) copy of the data
int lockCount;
list<int>::iterator iterTrack;
};
public:
- // typedef QDPCacheBase::Key Key;
- // typedef int Value;
- // typedef list<Key> ListKey;
- // typedef list<Value> ListValue;
- // typedef pair<Value,ListKey::iterator> PairValueIter;
- // typedef map<Key,PairValueIter> MapKeyValue;
-
static QDPCache& Instance()
{
@@ -146,9 +137,6 @@ namespace QDP
// since it might already be destructed, and its host memory
if ( e.flags != 2 ) {
- if (e.modHst && !e.modDev)
- QDP_error_exit("cache backup: A backup would overwrite valueable data");
-
if(!e.hstPtr)
allocateHostMemory(e);
@@ -157,9 +145,6 @@ namespace QDP
QDP_debug_deep("backup: D2H");
#endif
CudaMemcpy( e.hstPtr , e.devPtr , e.size );
-
- e.modHst = false;
- e.modDev = false;
}
}
}
@@ -195,9 +180,7 @@ namespace QDP
QDP_debug_deep("restore: H2D");
#endif
CudaMemcpy( e.devPtr , e.hstPtr , e.size );
-
- e.modHst = false;
- e.modDev = false;
+ freeHostMemory(e);
}
}
}
@@ -209,61 +192,6 @@ namespace QDP
public:
- void setModDev( int id ) {
-#ifdef GPU_DEBUG_DEEP
- QDP_debug_deep("cache set modDev id=%lu ",(long)id );
-#endif
-
-#ifdef SANITY_CHECKS_CACHE
- // SANITY
- if (id >= vecEntry.size())
- QDP_error_exit("cache set modDev: out of range");
-#endif
-
- Entry& e = vecEntry[id];
-
-#ifdef SANITY_CHECKS_CACHE
- // SANITY
- if ( !e.devPtr )
- QDP_error_exit("cache setModified: but no device memory");
- if ( e.modHst )
- QDP_error_exit("cache setModified: both modified flags set!");
-#endif
-
- e.modDev = true;
-
- lstTracker.splice( lstTracker.end(), lstTracker , e.iterTrack );
-
-#ifdef GPU_DEBUG_DEEP
- printTracker();
-#endif
- }
-
-
- void setModHost( int id ) {
-#ifdef GPU_DEBUG_DEEP
- QDP_debug_deep("cache set modHost: id=%lu ",(long)id );
-#endif
-
-#ifdef SANITY_CHECKS_CACHE
- // SANITY
- if (id >= vecEntry.size())
- QDP_error_exit("cache set modHost: out of range");
-#endif
-
- Entry& e = vecEntry[id];
-
-#ifdef SANITY_CHECKS_CACHE
- // SANITY
- if ( !e.hstPtr )
- QDP_error_exit("cache set modHost: but no host memory");
- if ( e.modDev )
- QDP_error_exit("cache set modHost: both modified flags set!");
-#endif
-
- e.modHst = true;
- }
-
// onDevice()
// Device memory allocated && !modHst
inline bool onDevice(int id) const {
@@ -280,7 +208,7 @@ namespace QDP
QDP_debug_deep("cache onDevice id=%u devPtr=%p modHst=%d size=%u",(unsigned)id,e.devPtr, e.modHst,(unsigned)e.size);
#endif
- return ((e.devPtr != NULL) && (!e.modHst));
+ return (e.devPtr != NULL);
}
@@ -319,8 +247,6 @@ namespace QDP
e.flags = flags;
e.hstPtr = NULL;
e.devPtr = NULL;
- e.modHst = false;
- e.modDev = false;
e.lockCount = 0;
e.iterTrack = lstTracker.insert( lstTracker.end() , Id );
@@ -359,8 +285,6 @@ namespace QDP
e.flags = 2;
e.hstPtr = ptr;
e.devPtr = NULL;
- e.modHst = true;
- e.modDev = false;
e.lockCount = 0;
e.iterTrack = lstTracker.insert( lstTracker.end() , Id );
@@ -465,6 +389,35 @@ namespace QDP
template<class T> friend class QDP::OLattice;
friend class QDP::QDPJitArgs;
+ void freeHostMemory(Entry& e) {
+ switch(e.flags) {
+ case 0:
+#ifdef GPU_DEBUG_DEEP
+ QDP_debug_deep("cache delete obj host memory flag=0");
+#endif
+ CUDAHostPoolAllocator::Instance().free( e.hstPtr );
+ e.hstPtr=NULL;
+ break;
+ case 1:
+#ifdef GPU_DEBUG_DEEP
+ QDP_debug_deep("cache delete obj host memory flag=1");
+#endif
+ QDP::Allocator::theQDPAllocator::Instance().free( e.hstPtr );
+ e.hstPtr=NULL;
+ break;
+ case 2:
+ // Do nothing, this object deallocates its own host memory
+#ifdef GPU_DEBUG_DEEP
+ QDP_debug_deep("cache delete obj, no need to free host mem");
+#endif
+ break;
+ default:
+ QDP_error_exit("cache delete objects: unkown host memory allocator");
+ break;
+ }
+
+ }
+
void allocateHostMemory(Entry& e) {
#ifdef GPU_DEBUG_DEEP
QDP_debug_deep("cache: alloc host size=%lu",(unsigned long)e.size);
@@ -508,7 +461,6 @@ namespace QDP
QDP_error_exit("cache allocateHostMemory: not allocated, but should");
#endif
- e.modHst=false;
}
@@ -520,30 +472,20 @@ namespace QDP
QDP_error_exit("cache assureDevice: can't spill LRU object");
}
}
- e.modDev=false;
+ if (e.hstPtr) {
+ CudaMemcpyAsync( e.devPtr , e.hstPtr , e.size );
+ CudaSyncTransferStream();
+ if (e.flags != 2)
+ freeHostMemory(e);
+ }
}
- if (e.modHst) {
- // For now do a sync transfer
-#ifdef GPU_DEBUG_DEEP
- QDP_debug_deep("cache assureDevice: H2D");
-#endif
#ifdef SANITY_CHECKS_CACHE
- // SANITY
- if(e.modDev)
- QDP_error_exit("cache assureDevice: mod on both!");
+ if (e.hstPtr) QDP_error_exit("assureDevice: We still have a host pointer");
#endif
- //CudaMemcpy( e.devPtr , e.hstPtr , e.size );
- CudaMemcpyAsync( e.devPtr , e.hstPtr , e.size );
- e.modDev=false;
- }
-
vecLockSet[currLS].push_back(e.Id);
e.lockCount++;
-
- e.modHst=false;
-
}
@@ -557,7 +499,6 @@ namespace QDP
QDP_error_exit("cache assureHost: flags == 2");
#endif
-
if (e.lockCount > 0) {
#ifdef GPU_DEBUG_DEEP
QDP_debug_deep("cache assure on host. obj in current calculation. will sync with kernel stream");
@@ -571,31 +512,17 @@ namespace QDP
#endif
}
- if(!e.hstPtr) {
+ if (!e.hstPtr) {
allocateHostMemory(e);
- e.modHst=false;
+ if (e.devPtr) {
+ CudaMemcpyAsync( e.hstPtr , e.devPtr , e.size );
+ CudaSyncTransferStream();
+ CUDADevicePoolAllocator::Instance().free( e.devPtr );
+ e.devPtr = NULL;
+ }
}
bool in_flight=false;
-
- if (e.modDev) {
- // For now do a sync transfer
-#ifdef GPU_DEBUG_DEEP
- QDP_debug_deep("cache assureHost: D2H");
-#endif
- //CudaMemcpy( e.hstPtr , e.devPtr , e.size );
- CudaMemcpyAsync( e.hstPtr , e.devPtr , e.size );
- CudaSyncTransferStream();
- e.modHst=true;
- in_flight=false;
- }
-
- if (e.devPtr) {
- CUDADevicePoolAllocator::Instance().free( e.devPtr );
- e.devPtr = NULL;
- }
-
- e.modDev = false;
return in_flight;
}
@@ -690,31 +617,9 @@ namespace QDP
if (e.devPtr)
CUDADevicePoolAllocator::Instance().free( e.devPtr );
- if (e.hstPtr) {
- switch(e.flags) {
- case 0:
-#ifdef GPU_DEBUG_DEEP
- QDP_debug_deep("cache delete obj host memory flag=0");
-#endif
- CUDAHostPoolAllocator::Instance().free( e.hstPtr );
- break;
- case 1:
-#ifdef GPU_DEBUG_DEEP
- QDP_debug_deep("cache delete obj host memory flag=1");
-#endif
- QDP::Allocator::theQDPAllocator::Instance().free( e.hstPtr );
- break;
- case 2:
- // Do nothing, this object deallocates its own host memory
-#ifdef GPU_DEBUG_DEEP
- QDP_debug_deep("cache delete obj, no need to free host mem");
-#endif
- break;
- default:
- QDP_error_exit("cache delete objects: unkown host memory allocator");
- break;
- }
- }
+ if (e.hstPtr)
+ freeHostMemory(e);
+
stackFree.push( *i );
lstDel.erase( i++ );
@@ -743,7 +648,6 @@ namespace QDP
- //MapKeyValue mapReg;
list<void *> lstStatic;
View
@@ -127,9 +127,6 @@ namespace QDP {
public:
- inline void setModified() const {
- QDPCache::Instance().setModDev( myId );
- }
inline bool onDevice() const {
return QDPCache::Instance().onDevice( myId );
}
@@ -163,7 +160,6 @@ namespace QDP {
#endif
CudaSyncTransferStream();
}
- QDPCache::Instance().setModHost( myId );
}
private:
@@ -394,9 +390,6 @@ void evaluate(OScalar<T>& dest, const Op& op, const QDPExpr<RHS,OScalar<T1> >& r
public:
- inline void setModified() const {
- QDPCache::Instance().setModDev( myId );
- }
inline bool onDevice() const {
return QDPCache::Instance().onDevice( myId );
}
@@ -428,8 +421,6 @@ void evaluate(OScalar<T>& dest, const Op& op, const QDPExpr<RHS,OScalar<T1> >& r
#endif
CudaSyncTransferStream();
}
-
- QDPCache::Instance().setModHost( myId );
}
private:
Oops, something went wrong.

0 comments on commit 5d44b46

Please sign in to comment.