-
Notifications
You must be signed in to change notification settings - Fork 2
/
random_gen.cu
125 lines (100 loc) · 3.36 KB
/
random_gen.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "curand_kernel.h"
#include <stdio.h>
#include <time.h>
__global__ void kernel_set_random(curandState *curand_states,int width,int height,long clock_for_rand)
{
int x = threadIdx.x + blockIdx.x*blockDim.x;
if(x<0 || x>width)
{
return;
}
curand_init(clock_for_rand,x,0,&curand_states[x]);
}
__global__ void kernel_random(float *dev_random_array,int width,int height,curandState *curand_states)
{
int x = threadIdx.x + blockIdx.x*blockDim.x;
if(x<0 || x>width)
{
return;
}
for(int y=0;y<height;y++)
{
int pos = y*width + x;
dev_random_array[pos] = abs(curand_uniform(curand_states+x));
}
}
int main()
{
const int array_size_width = 10;
const int array_size_height = 10;
float random_array[array_size_width*array_size_height];
for(int i=0;i<array_size_width*array_size_height;i++)
{
random_array[i] = 0;
}
//error status
cudaError_t cuda_status;
//only chose one GPU
cuda_status = cudaSetDevice(0);
if(cuda_status != cudaSuccess)
{
fprintf(stderr,"cudaSetDevice failed! Do you have a CUDA-Capable GPU installed?");
return 0;
}
float *dev_random_array;
curandState *dev_states;
//allocate memory on the GPU
cuda_status = cudaMalloc((void**)&dev_random_array,sizeof(float)*array_size_width*array_size_height);
if(cuda_status != cudaSuccess)
{
fprintf(stderr,"dev_random_array cudaMalloc Failed");
exit( EXIT_FAILURE );
}
cuda_status = cudaMalloc((void **)&dev_states,sizeof(curandState)*array_size_width*array_size_height);
if(cuda_status != cudaSuccess)
{
fprintf(stderr,"dev_states cudaMalloc Failed");
exit( EXIT_FAILURE );
}
long clock_for_rand = clock();
dim3 threads(16,1);
dim3 grid((array_size_width+threads.x-1)/threads.x,1);
kernel_set_random<<<grid,threads>>>(dev_states,array_size_width,array_size_height,clock_for_rand);
printf("The first time \n");
{
kernel_random<<<grid,threads>>>(dev_random_array,array_size_width,array_size_height,dev_states);
//copy out the result
cuda_status = cudaMemcpy(random_array,dev_random_array,sizeof(float)*array_size_width*array_size_height,cudaMemcpyDeviceToHost);//dev_depthMap
if(cuda_status != cudaSuccess)
{
fprintf(stderr,"cudaMemcpy Failed");
exit( EXIT_FAILURE );
}
for(int i=0;i<array_size_width*array_size_height;i++)
{
printf("%f\n",random_array[i]);
}
}
printf("------------------------------------------------------- \n");
printf("The second time \n");
{
kernel_random<<<grid,threads>>>(dev_random_array,array_size_width,array_size_height,dev_states);
//copy out the result
cuda_status = cudaMemcpy(random_array,dev_random_array,sizeof(float)*array_size_width*array_size_height,cudaMemcpyDeviceToHost);//dev_depthMap
if(cuda_status != cudaSuccess)
{
fprintf(stderr,"cudaMemcpy Failed");
exit( EXIT_FAILURE );
}
for(int i=0;i<array_size_width*array_size_height;i++)
{
printf("%f\n",random_array[i]);
}
}
//free
cudaFree(dev_random_array);
cudaFree(dev_states);
return 0;
}