Permalink
Browse files

added support for read-only usage of arrays on device, uncovered some

synchronizatino bugs along the way
  • Loading branch information...
1 parent 4323355 commit a73309ce720bae67057f1ca7d1a778939e59c073 @canonizer committed Apr 20, 2012
Showing with 70 additions and 27 deletions.
  1. +3 −2 makefile
  2. +9 −0 samples/add-arrays-ngpu/src/main.c
  3. +1 −1 samples/add-arrays/src/main.c
  4. +14 −0 src/devapi.c
  5. +7 −3 src/opencl-api.c
  6. +14 −2 src/os-linux.c
  7. +2 −2 src/subreg.c
  8. +5 −5 src/sync.c
  9. +7 −6 src/tsem.c
  10. +8 −6 src/wthreads.c
View
@@ -32,7 +32,7 @@ TMP=$(TGT) *~ src/*~ $(TGT) bin/$(TGT_WITH_MAJOR_VERSION) bin/*.$(DL_SUFFIX) \
# compilation settings, also handle configuration and OS-dependent settings
INCLUDE_DIRS=
CC=gcc
-CFLAGS=-O2
+CFLAGS=-O2 -pthread
DEFS=
DL_FLAGS=-fPIC -fvisibility=hidden -shared -Wl,-soname,$(TGT_DL).$(MAJOR_VERSION)
LIBS=-lpthread
@@ -55,10 +55,11 @@ ifeq ($(ENABLE_CUDA), y)
LIB_DIRS+= -L$(CUDA_INSTALL_PATH)/$(LIBDIR_STD)
endif
ifeq ($(OSNAME), Darwin)
- LIBS+= -lrt
INCLUDE_DIRS+= -I/system/library/frameworks/opencl.framework/headers
DL_FLAGS=-fvisibility=hidden -dynamiclib
CFLAGS+= -arch i386 -arch x86_64
+else
+ LIBS+= -lrt
endif
build : $(TGT)
@@ -9,6 +9,8 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
+#include <sys/time.h>
+#include <sys/resource.h>
#include "../../../src/gpuvm.h"
@@ -135,6 +137,13 @@ void *thread_fun(void *ptr) {
int main(int argc, char** argv) {
+ // check limits
+ //struct rlimit siglim;
+ //getrlimit(RLIMIT_SIGPENDING, &siglim);
+ //printf("pending realtime signals limits: soft %d, hard %d\n",
+ // siglim.rlim_cur, siglim.rlim_max);
+
+
// allocate host data
ha = (int*)malloc(SZ);
hb = (int*)malloc(SZ);
@@ -39,7 +39,7 @@
#define N (1024 * 13 + 64)
#define SZ (N * sizeof(int))
-#define NRUNS 10
+#define NRUNS 1
cl_command_queue queue;
cl_kernel add_arrays_kernel;
View
@@ -2,6 +2,7 @@
OpenCL
*/
+#include <signal.h>
#include <stdio.h>
#include "cuda-api.h"
@@ -13,7 +14,16 @@
devapi_t *devapi_g;
+/** a helper signal mask to (un)block during writer lock */
+sigset_t devapi_block_sig_g;
+
int devapi_init(int flags) {
+ sigemptyset(&devapi_block_sig_g);
+ sigaddset(&devapi_block_sig_g, SIG_MONOGC_SUSPEND);
+#ifndef __APPLE__
+ sigaddset(&devapi_block_sig_g, SIG_SUSP);
+#endif
+
flags &= GPUVM_API;
if(flags != GPUVM_CUDA && flags != GPUVM_OPENCL) {
fprintf(stderr, "devapi_init: invalid flags\n");
@@ -42,6 +52,7 @@ int devapi_init(int flags) {
int memcpy_h2d
(devapi_t *devapi, unsigned idev, void *tgt, void *src, size_t nbytes,
size_t devoff) {
+ //sigprocmask(SIG_BLOCK, &devapi_block_sig_g, 0);
// time API call
rtime_t start_time, end_time;
if(stat_enabled())
@@ -53,12 +64,14 @@ int memcpy_h2d
end_time = rtime_get();
stat_acc_double(GPUVM_STAT_HOST_COPY_TIME, rtime_diff(&start_time, &end_time));
}
+ //sigprocmask(SIG_UNBLOCK, &devapi_block_sig_g, 0);
return err;
} // memcpy_h2d
int memcpy_d2h
(devapi_t *devapi, unsigned idev, void *tgt, void *src, size_t nbytes,
size_t devoff) {
+ //sigprocmask(SIG_BLOCK, &devapi_block_sig_g, 0);
// time API call
rtime_t start_time, end_time;
if(stat_enabled())
@@ -70,5 +83,6 @@ int memcpy_d2h
end_time = rtime_get();
stat_acc_double(GPUVM_STAT_HOST_COPY_TIME, rtime_diff(&start_time, &end_time));
}
+ //sigprocmask(SIG_UNBLOCK, &devapi_block_sig_g, 0);
return err;
} // memcpy_d2h
View
@@ -22,6 +22,9 @@
#define MAX_DEVICE_NAME_LENGTH 256
+/** OpenCL devapi structure */
+devapi_t ocl_devapi_g;
+
/** does an AMD initialization hack on AMD GPUs and AMD platforms, and nothing
on devices from other manufacturers
@returns 0 if successful and a negative error code if not */
@@ -51,9 +54,10 @@ static int ocl_memcpy_h2d
int ocl_devapi_init(void) {
// fill in devapi_g structure
- devapi_g = (devapi_t*)smalloc(sizeof(devapi_t));
- if(!devapi_g)
- return GPUVM_ESALLOC;
+ //devapi_g = (devapi_t*)smalloc(sizeof(devapi_t));
+ //if(!devapi_g)
+ // return GPUVM_ESALLOC;
+ devapi_g = &ocl_devapi_g;
devapi_g->memcpy_d2h = ocl_memcpy_d2h;
devapi_g->memcpy_h2d = ocl_memcpy_h2d;
View
@@ -125,6 +125,11 @@ static int tgkill(int tgid, int tid, int sig) {
return syscall(SYS_tgkill, tgid, tid, sig);
}
+/** wrapper for tkill syscall */
+static int tkill(int tid, int sig) {
+ return syscall(SYS_tkill, tid, sig);
+}
+
thread_t self_thread() {
return gettid();
}
@@ -191,9 +196,15 @@ static tsem_t *thread_must_be_stopped(thread_t tid) {
static int stop_thread(tsem_t* tsem) {
thread_t tid = tsem->tid;
+ //fprintf(stderr, "stopping thread %d\n", tid);
tsem_pre_stop(tsem);
tgkill(my_pid_g, (pid_t)tid, SIG_SUSP);
+ //tkill(tid, SIG_SUSP);
+ //union sigval sv;
+ //sv.sival_int = 0;
+ //sigqueue(tid, SIG_SUSP, sv);
tsem_mark_blocked(tsem);
+ //fprintf(stderr, "stopped thread %d\n", tid);
return 0;
}
@@ -212,8 +223,9 @@ void stop_other_threads(void) {
if(task_mtim_g.tv_sec == stat_buf.st_mtim.tv_sec &&
task_mtim_g.tv_nsec == stat_buf.st_mtim.tv_nsec) {
// fast-track thread-stopping
+ //fprintf(stderr, "fast-track stopping\n");
tsem_traverse_all(stop_thread);
- return;
+ return;
} else {
// update /proc/self/task mtime
task_mtim_g = stat_buf.st_mtim;
@@ -227,7 +239,7 @@ void stop_other_threads(void) {
// indicates first iteration of "stopping threads"
int stop_every_thread = 1;
int running_thread_found = 1;
- tsem_lock_reader();
+ tsem_lock_writer();
while(running_thread_found) {
running_thread_found = 0;
int task_dir_fd = my_opendir(task_dir_path);
View
@@ -153,7 +153,7 @@ int subreg_sync_to_device(subreg_t *subreg, unsigned idev, int flags) {
// need to copy from host to this device
link_t *link = host_array->links[idev];
- fprintf(stderr, "host -> device, subreg = %p, link = %p\n", subreg, link);
+ //fprintf(stderr, "host -> device, subreg = %p, link = %p\n", subreg, link);
if(err = subreg_link_sync_to_device(subreg, link)) {
return err;
}
@@ -176,7 +176,7 @@ int subreg_sync_to_host(subreg_t *subreg) {
// do actualy copying
link_t *link = host_array->links[idev];
- fprintf(stderr, "device -> host, subreg = %p, link = %p\n", subreg, link);
+ //fprintf(stderr, "device -> host, subreg = %p, link = %p\n", subreg, link);
if(err = subreg_link_sync_to_host(subreg, link)) {
return err;
}
View
@@ -21,7 +21,7 @@ int sync_init(void) {
fprintf(stderr, "sync_init: can\'t init rwlock\n");
return GPUVM_ERROR;
}
- sigemptyset(&writer_block_sig_g);
+ sigfillset(&writer_block_sig_g);
sigaddset(&writer_block_sig_g, SIG_MONOGC_SUSPEND);
#ifndef __APPLE__
sigaddset(&writer_block_sig_g, SIG_SUSP);
@@ -30,7 +30,7 @@ int sync_init(void) {
}
int lock_reader(void) {
- fprintf(stderr, "locking reader\n");
+ //fprintf(stderr, "locking reader\n");
if(pthread_rwlock_rdlock(&mutex_g)) {
fprintf(stderr, "lock_reader: reader can\'t lock\n");
return GPUVM_ERROR;
@@ -39,7 +39,7 @@ int lock_reader(void) {
}
int lock_writer(void) {
- fprintf(stderr, "locking writer\n");
+ //fprintf(stderr, "locking writer\n");
if(stat_writer_sig_block())
sigprocmask(SIG_BLOCK, &writer_block_sig_g, 0);
if(pthread_rwlock_wrlock(&mutex_g)) {
@@ -50,7 +50,7 @@ int lock_writer(void) {
}
int unlock_reader(void) {
- fprintf(stderr, "unlocking reader\n");
+ //fprintf(stderr, "unlocking reader\n");
if(pthread_rwlock_unlock(&mutex_g)) {
fprintf(stderr, "unlock_reader: reader unlock\n");
return GPUVM_ERROR;
@@ -59,7 +59,7 @@ int unlock_reader(void) {
}
int unlock_writer(void) {
- fprintf(stderr, "unlocking writer\n");
+ //fprintf(stderr, "unlocking writer\n");
if(pthread_rwlock_unlock(&mutex_g)) {
fprintf(stderr, "unlock_writer: reader unlock\n");
return GPUVM_ERROR;
View
@@ -59,7 +59,7 @@ tsem_t *tsem_get(thread_t tid) {
if(pthread_mutex_init(&node->mut, 0)) {
fprintf(stderr, "tsem_find: can\'t init semaphore for thread blocking\n");
#else
- if(sem_init(&node->sem, 0, 0)) {
+ if(semaph_init(&node->sem, 0)) {
#endif
sfree(node);
return 0;
@@ -93,6 +93,7 @@ int tsem_pre_stop(tsem_t *tsem) {
}
static int tsem_traverse_subtree(tsem_t *tsem, int (*f)(tsem_t*)) {
+ //fprintf(stderr, "tsem=%p\n", tsem);
if(!tsem)
return 0;
int err;
@@ -112,14 +113,14 @@ int tsem_traverse_all(int (*f)(tsem_t*)) {
static int tsem_post(tsem_t *tsem) {
if(tsem_is_blocked(tsem)) {
tsem->blocked = 0;
+ }
#ifdef GPUVM_TSEM_MUTEX
- if(pthread_mutex_unlock(&tsem->mut)) {
- fprintf(stderr, "tsem_post: can\'t unlock thread-blocking mutexx\n");
+ if(pthread_mutex_unlock(&tsem->mut)) {
+ fprintf(stderr, "tsem_post: can\'t unlock thread-blocking mutex\n");
#else
- if(semaph_post(&tsem->sem)) {
+ if(semaph_post(&tsem->sem)) {
#endif
- return -1;
- }
+ return -1;
}
return 0;
}
View
@@ -126,7 +126,7 @@ static void *unprot_thread(void *dummy_param) {
return 0;
case REGION_OP_UNPROTECT:
- fprintf(stderr, "unprotect request received\n");
+ //fprintf(stderr, "unprotect request received\n");
stat_inc(GPUVM_STAT_PAGEFAULTS);
if(region->prot_status == PROT_NONE) {
// fully unprotect region
@@ -136,10 +136,11 @@ static void *unprot_thread(void *dummy_param) {
start_time = rtime_get();
//fprintf(stderr, "stopping other threads\n");
stop_other_threads();
+ //fprintf(stderr, "stopped other threads\n");
}
region_unprotect(region);
+ //fprintf(stderr, "unprotect request satisfied - BLOCK\n");
region_post_unprotect(region);
- fprintf(stderr, "unprotect request satisfied - BLOCK\n");
pending_regions++;
elem.op = REGION_OP_SYNC_TO_HOST;
@@ -150,13 +151,14 @@ static void *unprot_thread(void *dummy_param) {
region_unprotect(region);
for(list = region->subreg_list; list; list = list->next)
subreg_sync_to_host(list->subreg);
+ //fprintf(stderr, "unprotect request satisfied - RO\n");
region_post_unprotect(region);
- fprintf(stderr, "unprotect request satisfied - RO\n");
} else {
// do nothing
+ //fprintf(stderr, "unprotect request satisfied - NONE\n");
region_post_unprotect(region);
- fprintf(stderr, "unprotect request satisfied - NONE\n");
}
+ //fprintf(stderr, "unprotect message posted\n");
break;
case REGION_OP_SYNCED_TO_HOST:
@@ -200,14 +202,14 @@ static void *sync_thread(void *dummy_param) {
return 0;
case REGION_OP_SYNC_TO_HOST:
- fprintf(stderr, "syncing region to host\n");
+ //fprintf(stderr, "syncing region to host\n");
// sync region to host
for(list = region->subreg_list; list; list = list->next)
subreg_sync_to_host(list->subreg);
elem.op = REGION_OP_SYNCED_TO_HOST;
rqueue_put(&unprot_queue_g, &elem);
- fprintf(stderr, "synced region to host\n");
+ //fprintf(stderr, "synced region to host\n");
break;
default:

0 comments on commit a73309c

Please sign in to comment.