Skip to content

Commit

Permalink
Merge pull request #300 from brian-team/sort-multiple-run-bugs
Browse files Browse the repository at this point in the history
Sort multiple run bugs
  • Loading branch information
denisalevi committed Jun 23, 2022
2 parents 22a935b + f9d66d3 commit 40ee5fa
Show file tree
Hide file tree
Showing 9 changed files with 598 additions and 242 deletions.
115 changes: 89 additions & 26 deletions brian2cuda/brianlib/spikequeue.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ using namespace std;
// variables (delays, dt) are assumed to use the same data type
typedef int32_t DTYPE_int;

typedef cudaVector<DTYPE_int> cuda_vector;

class CudaSpikeQueue
{
private:
Expand All @@ -31,7 +33,7 @@ class CudaSpikeQueue

public:
//these vectors should ALWAYS be the same size, since each index refers to a triple of (pre_id, syn_id, post_id)
cudaVector<DTYPE_int>** synapses_queue;
cuda_vector** synapses_queue;

//our connectivity matrix with dimensions (num_blocks) * neuron_N
//each element
Expand All @@ -47,22 +49,26 @@ class CudaSpikeQueue
int* unique_delay_start_idcs;
int current_offset; // offset in circular queue structure
int num_queues;
int num_delays;
//int max_num_delays_per_block;
int num_blocks;
int neuron_N; // number of neurons in source of SynapticPathway
int syn_N;

// When we have 0 synapses, prepare() is not called in synapses_initialise_queue.cu
// and for destroy() to still work, synapses_queue needs to be a null pointer
__device__ CudaSpikeQueue(): synapses_queue(0) {};
// When we have 0 synapses, prepare() is not called in
// before_run_synapses_push_spikes and for destroy() to still work,
// synapses_queue needs to be a null pointer
__device__ CudaSpikeQueue(): synapses_queue(0), semaphore(0), num_queues(0) {};

//Since we can't have a destructor, we need to call this function manually
__device__ void destroy()
{
if(synapses_queue)
{
delete [] synapses_queue;
delete [] semaphore;
synapses_queue = 0;
semaphore = 0;
}
}

Expand All @@ -76,7 +82,7 @@ class CudaSpikeQueue
double _dt,
int _neuron_N,
int _syn_N,
int _num_queues,
int _num_delays,
int* _num_synapses_by_pre,
int* _num_synapses_by_bundle,
int* _num_unique_delays_by_pre,
Expand All @@ -89,18 +95,59 @@ class CudaSpikeQueue
int* _unique_delay_start_idcs
)
{
if(tid == 0)
// read queue information from a previous run
// (these are all null at the first run)
int old_num_queues = num_queues;
int required_num_queues = _num_delays + 1;
cuda_vector** old_synapses_queue = synapses_queue;
bool require_new_queues = (required_num_queues > old_num_queues);
int old_current_offset = current_offset;
bool initialize_semaphores = (!semaphore);

if (tid == 0)
{
// TODO add comments
// allocate semaphore memory only at first prepare() call
if (initialize_semaphores)
{
semaphore = new int[_num_blocks];
}

// only allocate queue pointer memory if the number of queues increased
if (require_new_queues)
{
synapses_queue = new cuda_vector*[required_num_queues];
if (!synapses_queue)
{
printf("ERROR while allocating memory with size %ld in"
" spikequeue.h/prepare()\n",
sizeof(cuda_vector*) * required_num_queues);
}
// only reset queue offset if we require new queues, in which
// case we copy the old queues such that the offset is reset
// (if there are no new queues, the queues remain as they are)
current_offset = 0;
}

semaphore = new int[_num_blocks];
current_offset = 0;
// set class attributes
assert(num_threads <= required_num_queues); // else parallel loop fails below
if (!initialize_semaphores)
{
assert(_num_blocks == num_blocks); // can't change between runs
}
num_blocks = _num_blocks;
neuron_N = _neuron_N;
syn_N = _syn_N;
num_queues = _num_queues;
num_delays = _num_delays;
// we only add queues, but never remove queues (because we could
// loose spikes in the queues)
if (require_new_queues)
{
num_queues = required_num_queues;
}

// TODO: do we need num_synapses_by_pre? is num_synapses_by_pre[pre_post_block_id] faster then synapses_by_pre[pre_post_block_id].size()?
// TODO: do we need num_synapses_by_pre? is
// num_synapses_by_pre[pre_post_block_id] faster then
// synapses_by_pre[pre_post_block_id].size()?
// if so, add unique_num_synapses_by_pre as well!
num_synapses_by_pre = _num_synapses_by_pre;
num_synapses_by_bundle = _num_synapses_by_bundle;
Expand All @@ -113,27 +160,45 @@ class CudaSpikeQueue
unique_delays_offset_by_pre = _unique_delays_offset_by_pre;
unique_delay_start_idcs = _unique_delay_start_idcs;

synapses_queue = new cudaVector<DTYPE_int>*[num_queues];
if(!synapses_queue)
{
printf("ERROR while allocating memory with size %ld in spikequeue.h/prepare()\n", sizeof(cudaVector<DTYPE_int>*)*num_queues);
}
}
__syncthreads();

for (int i = tid; i < _num_blocks; i+=num_threads)
// initialize semaphores only if they were not initalized before
if (initialize_semaphores)
{
semaphore[i] = 0;
for (int i = tid; i < _num_blocks; i+=num_threads)
{
semaphore[i] = 0;
}
}

for(int i = tid; i < num_queues; i+=num_threads)
// setup the new queues
if (require_new_queues)
{
synapses_queue[i] = new cudaVector<DTYPE_int>[num_blocks];
if(!synapses_queue[i])
// copy old queues over to new queue array
for (int i = tid; i < required_num_queues; i += num_threads)
{
printf("ERROR while allocating memory with size %ld in spikequeue.h/prepare()\n", sizeof(cudaVector<DTYPE_int>)*num_blocks);
if (i < old_num_queues)
{
// copy the old queues to the new array, such that the
// offset is reset back to the start (current_offset is set
// to zero above)
int old_i = (i + old_current_offset) % old_num_queues;
synapses_queue[i] = old_synapses_queue[old_i];
} else
{
// allocate new memory for cudaVectors of new queues
synapses_queue[i] = new cuda_vector[num_blocks];
if (!synapses_queue[i])
{
printf("ERROR while allocating memory with size %ld in"
" spikequeue.h/prepare()\n",
sizeof(cuda_vector)*num_blocks);
}
}
}
}

};

__device__ void push_synapses(
Expand Down Expand Up @@ -436,8 +501,7 @@ class CudaSpikeQueue

} // end push_bundles()

__device__ void advance(
int tid)
__device__ void advance(int tid)
{
assert(tid < num_blocks && current_offset < num_queues);
synapses_queue[current_offset][tid].reset();
Expand All @@ -446,8 +510,7 @@ class CudaSpikeQueue
current_offset = (current_offset + 1)%num_queues;
}

__device__ void peek(
cudaVector<DTYPE_int>** _synapses_queue)
__device__ void peek(cuda_vector** _synapses_queue)
{
*(_synapses_queue) = &(synapses_queue[current_offset][0]);
}
Expand Down

0 comments on commit 40ee5fa

Please sign in to comment.