From b7f4842257cd9df2e201e240e11255b877d377a3 Mon Sep 17 00:00:00 2001 From: adityapb Date: Sun, 1 May 2016 00:33:40 +0530 Subject: [PATCH 1/3] Fix brute_force_gpu_update --- src/kernels.cu | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/kernels.cu b/src/kernels.cu index b01bd38..df2cc6f 100644 --- a/src/kernels.cu +++ b/src/kernels.cu @@ -66,11 +66,19 @@ void brute_force_kernel(double* x, double* y, double* z, double a_y_i = a_y[id]; double a_z_i = a_z[id]; + double temp_a_x = 0; + double temp_a_y = 0; + double temp_a_z = 0; + calculate_force_device(x_old, y_old, z_old, m, x_old[id], y_old[id], z_old[id], - a_x[id], a_y[id], a_z[id], + temp_a_x, temp_a_y, temp_a_z, num_planets, eps2, G); + a_x[id] = temp_a_x; + a_y[id] = temp_a_y; + a_z[id] = temp_a_z; + x[id] += v_x[id]*dt + a_x_i*0.5*dt*dt; y[id] += v_y[id]*dt + a_y_i*0.5*dt*dt; z[id] += v_z[id]*dt + a_z_i*0.5*dt*dt; From a1783588e5e48bef9eeb675ed237c1c1d80c9040 Mon Sep 17 00:00:00 2001 From: adityapb Date: Sun, 1 May 2016 10:01:07 +0530 Subject: [PATCH 2/3] Remove useless data copies --- src/kernels.cu | 21 +++------------------ 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/src/kernels.cu b/src/kernels.cu index df2cc6f..2b11c9d 100644 --- a/src/kernels.cu +++ b/src/kernels.cu @@ -95,18 +95,6 @@ void brute_force_gpu_update(double* x, double* y, double* z, double* a_x, double* a_y, double* a_z, double* m, double G, double dt, int num_planets, double eps) { - double* x_old = (double*) malloc(num_planets*sizeof(double)); - double* y_old = (double*) malloc(num_planets*sizeof(double)); - double* z_old = (double*) malloc(num_planets*sizeof(double)); - - int i; - for(i=0; i Date: Sun, 1 May 2016 11:37:21 +0530 Subject: [PATCH 3/3] Try reducing number of threads per block --- src/kernels.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/kernels.cu b/src/kernels.cu index 2b11c9d..2f3516a 100644 --- a/src/kernels.cu +++ b/src/kernels.cu @@ -135,7 +135,7 @@ void brute_force_gpu_update(double* x, double* y, double* z, exit(0); } - brute_force_kernel<<>>(dev_x, dev_y, dev_z, + brute_force_kernel<<>>(dev_x, dev_y, dev_z, dev_x_old, dev_y_old, dev_z_old, dev_v_x, dev_v_y, dev_v_z, dev_a_x, dev_a_y, dev_a_z,