@@ -20,51 +20,51 @@
#define checkCUDAError (msg ) checkCUDAErrorFn(msg, FILENAME, __LINE__)
void checkCUDAErrorFn (const char *msg, const char *file, int line) {
#if ERRORCHECK
cudaDeviceSynchronize ();
cudaError_t err = cudaGetLastError ();
if (cudaSuccess == err) {
return ;
}
fprintf (stderr, " CUDA error" );
if (file) {
fprintf (stderr, " (%s:%d)" , file, line);
}
fprintf (stderr, " : %s: %s\n " , msg, cudaGetErrorString (err));
cudaDeviceSynchronize ();
cudaError_t err = cudaGetLastError ();
if (cudaSuccess == err) {
return ;
}
fprintf (stderr, " CUDA error" );
if (file) {
fprintf (stderr, " (%s:%d)" , file, line);
}
fprintf (stderr, " : %s: %s\n " , msg, cudaGetErrorString (err));
# ifdef _WIN32
getchar ();
getchar ();
# endif
exit (EXIT_FAILURE);
exit (EXIT_FAILURE);
#endif
}
__host__ __device__
thrust::default_random_engine makeSeededRandomEngine (int iter, int index, int depth) {
int h = utilhash ((1 << 31 ) | (depth << 22 ) | iter) ^ utilhash (index );
return thrust::default_random_engine (h);
int h = utilhash ((1 << 31 ) | (depth << 22 ) | iter) ^ utilhash (index );
return thrust::default_random_engine (h);
}
// Kernel that writes the image to the OpenGL PBO directly.
__global__ void sendImageToPBO (uchar4 * pbo, glm::ivec2 resolution,
int iter, glm::vec3* image) {
int x = (blockIdx .x * blockDim .x ) + threadIdx .x ;
int y = (blockIdx .y * blockDim .y ) + threadIdx .y ;
if (x < resolution.x && y < resolution.y ) {
int index = x + (y * resolution.x );
glm::vec3 pix = image[index ];
glm::ivec3 color;
color.x = glm::clamp ((int ) (pix.x / iter * 255.0 ), 0 , 255 );
color.y = glm::clamp ((int ) (pix.y / iter * 255.0 ), 0 , 255 );
color.z = glm::clamp ((int ) (pix.z / iter * 255.0 ), 0 , 255 );
// Each thread writes one pixel location in the texture (textel)
pbo[index ].w = 0 ;
pbo[index ].x = color.x ;
pbo[index ].y = color.y ;
pbo[index ].z = color.z ;
}
int iter, glm::vec3* image) {
int x = (blockIdx .x * blockDim .x ) + threadIdx .x ;
int y = (blockIdx .y * blockDim .y ) + threadIdx .y ;
if (x < resolution.x && y < resolution.y ) {
int index = x + (y * resolution.x );
glm::vec3 pix = image[index ];
glm::ivec3 color;
color.x = glm::clamp ((int )(pix.x / iter * 255.0 ), 0 , 255 );
color.y = glm::clamp ((int )(pix.y / iter * 255.0 ), 0 , 255 );
color.z = glm::clamp ((int )(pix.z / iter * 255.0 ), 0 , 255 );
// Each thread writes one pixel location in the texture (textel)
pbo[index ].w = 0 ;
pbo[index ].x = color.x ;
pbo[index ].y = color.y ;
pbo[index ].z = color.z ;
}
}
static Scene * hst_scene = NULL ;
@@ -77,38 +77,38 @@ static ShadeableIntersection * dev_intersections = NULL;
// ...
void pathtraceInit (Scene *scene) {
hst_scene = scene;
const Camera &cam = hst_scene->state .camera ;
const int pixelcount = cam.resolution .x * cam.resolution .y ;
hst_scene = scene;
const Camera &cam = hst_scene->state .camera ;
const int pixelcount = cam.resolution .x * cam.resolution .y ;
cudaMalloc (&dev_image, pixelcount * sizeof (glm::vec3));
cudaMemset (dev_image, 0 , pixelcount * sizeof (glm::vec3));
cudaMalloc (&dev_image, pixelcount * sizeof (glm::vec3));
cudaMemset (dev_image, 0 , pixelcount * sizeof (glm::vec3));
cudaMalloc (&dev_paths, pixelcount * sizeof (PathSegment));
cudaMalloc (&dev_paths, pixelcount * sizeof (PathSegment));
cudaMalloc (&dev_geoms, scene->geoms .size () * sizeof (Geom));
cudaMemcpy (dev_geoms, scene->geoms .data (), scene->geoms .size () * sizeof (Geom), cudaMemcpyHostToDevice);
cudaMalloc (&dev_geoms, scene->geoms .size () * sizeof (Geom));
cudaMemcpy (dev_geoms, scene->geoms .data (), scene->geoms .size () * sizeof (Geom), cudaMemcpyHostToDevice);
cudaMalloc (&dev_materials, scene->materials .size () * sizeof (Material));
cudaMemcpy (dev_materials, scene->materials .data (), scene->materials .size () * sizeof (Material), cudaMemcpyHostToDevice);
cudaMalloc (&dev_materials, scene->materials .size () * sizeof (Material));
cudaMemcpy (dev_materials, scene->materials .data (), scene->materials .size () * sizeof (Material), cudaMemcpyHostToDevice);
cudaMalloc (&dev_intersections, pixelcount * sizeof (ShadeableIntersection));
cudaMemset (dev_intersections, 0 , pixelcount * sizeof (ShadeableIntersection));
cudaMalloc (&dev_intersections, pixelcount * sizeof (ShadeableIntersection));
cudaMemset (dev_intersections, 0 , pixelcount * sizeof (ShadeableIntersection));
// TODO: initialize any extra device memeory you need
// TODO: initialize any extra device memeory you need
checkCUDAError (" pathtraceInit" );
checkCUDAError (" pathtraceInit" );
}
void pathtraceFree () {
cudaFree (dev_image); // no-op if dev_image is null
cudaFree (dev_paths);
cudaFree (dev_geoms);
cudaFree (dev_materials);
cudaFree (dev_intersections);
// TODO: clean up any extra device memory you created
checkCUDAError (" pathtraceFree" );
cudaFree (dev_image); // no-op if dev_image is null
cudaFree (dev_paths);
cudaFree (dev_geoms);
cudaFree (dev_materials);
cudaFree (dev_intersections);
// TODO: clean up any extra device memory you created
checkCUDAError (" pathtraceFree" );
}
/* *
@@ -129,7 +129,7 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path
PathSegment & segment = pathSegments[index ];
segment.ray .origin = cam.position ;
segment.color = glm::vec3 (1 .0f , 1 .0f , 1 .0f );
segment.color = glm::vec3 (1 .0f , 1 .0f , 1 .0f );
// TODO: implement antialiasing by jittering the ray
segment.ray .direction = glm::normalize (cam.view
@@ -195,6 +195,7 @@ __global__ void computeIntersections(
hit_geom_index = i;
intersect_point = tmp_intersect;
normal = tmp_normal;
// when terminate, when use these values, TODO!
}
}
@@ -208,10 +209,52 @@ __global__ void computeIntersections(
intersections[path_index].t = t_min;
intersections[path_index].materialId = geoms[hit_geom_index].materialid ;
intersections[path_index].surfaceNormal = normal ;
intersections[path_index].pt3 = intersect_point; // leave it here for now
}
}
}
__global__ void shadeMaterial (int iter
, int num_paths
, ShadeableIntersection * shadeableIntersections
, PathSegment * pathSegments
, Material * materials){
int idx = blockIdx .x * blockDim .x + threadIdx .x ;
if (idx < num_paths)
{
ShadeableIntersection intersection = shadeableIntersections[idx];
if (intersection.t > 0 .0f ) { // if the intersection exists...
// Set up the RNG
// LOOK: this is how you use thrust's RNG! Please look at
// makeSeededRandomEngine as well.
thrust::default_random_engine rng = makeSeededRandomEngine (iter, idx, 0 );
thrust::uniform_real_distribution<float > u01 (0 , 1 );
Material material = materials[intersection.materialId ];
glm::vec3 materialColor = material.color ;
// If the material indicates that the object was a light, "light" the ray
if (material.emittance > 0 .0f ) {
pathSegments[idx].color *= (materialColor * material.emittance );
}
// Otherwise, do some pseudo-lighting computation. This is actually more
// like what you would expect from shading in a rasterizer like OpenGL.
// TODO: replace this! you should be able to start with basically a one-liner
else {
// float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f));
// pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f;
// pathSegments[idx].color *= u01(rng); // apply some noise because why not
scatterRay (pathSegments[idx], intersection.pt3 , intersection.surfaceNormal , material, rng);
}
// If there was no intersection, color the ray black.
// Lots of renderers use 4 channel color, RGBA, where A = alpha, often
// used for opacity, in which case they can indicate "no opacity".
// This can be useful for post-processing and image compositing.
}
else {
pathSegments[idx].color = glm::vec3 (0 .0f );
}
}
}
// LOOK: "fake" shader demonstrating what you might do with the info in
// a ShadeableIntersection, as well as how to use thrust's random number
// generator. Observe that since the thrust random number generator basically
@@ -221,48 +264,49 @@ __global__ void computeIntersections(
// Note that this shader does NOT do a BSDF evaluation!
// Your shaders should handle that - this can allow techniques such as
// bump mapping.
__global__ void shadeFakeMaterial (
int iter
, int num_paths
__global__ void shadeFakeMaterial (
int iter
, int num_paths
, ShadeableIntersection * shadeableIntersections
, PathSegment * pathSegments
, Material * materials
)
{
int idx = blockIdx .x * blockDim .x + threadIdx .x ;
if (idx < num_paths)
{
ShadeableIntersection intersection = shadeableIntersections[idx];
if (intersection.t > 0 .0f ) { // if the intersection exists...
// Set up the RNG
// LOOK: this is how you use thrust's RNG! Please look at
// makeSeededRandomEngine as well.
thrust::default_random_engine rng = makeSeededRandomEngine (iter, idx, 0 );
thrust::uniform_real_distribution<float > u01 (0 , 1 );
Material material = materials[intersection.materialId ];
glm::vec3 materialColor = material.color ;
// If the material indicates that the object was a light, "light" the ray
if (material.emittance > 0 .0f ) {
pathSegments[idx].color *= (materialColor * material.emittance );
}
// Otherwise, do some pseudo-lighting computation. This is actually more
// like what you would expect from shading in a rasterizer like OpenGL.
// TODO: replace this! you should be able to start with basically a one-liner
else {
float lightTerm = glm::dot (intersection.surfaceNormal , glm::vec3 (0 .0f , 1 .0f , 0 .0f ));
pathSegments[idx].color *= (materialColor * lightTerm) * 0 .3f + ((1 .0f - intersection.t * 0 .02f ) * materialColor) * 0 .7f ;
pathSegments[idx].color *= u01 (rng); // apply some noise because why not
}
// If there was no intersection, color the ray black.
// Lots of renderers use 4 channel color, RGBA, where A = alpha, often
// used for opacity, in which case they can indicate "no opacity".
// This can be useful for post-processing and image compositing.
} else {
pathSegments[idx].color = glm::vec3 (0 .0f );
}
}
int idx = blockIdx .x * blockDim .x + threadIdx .x ;
if (idx < num_paths)
{
ShadeableIntersection intersection = shadeableIntersections[idx];
if (intersection.t > 0 .0f ) { // if the intersection exists...
// Set up the RNG
// LOOK: this is how you use thrust's RNG! Please look at
// makeSeededRandomEngine as well.
thrust::default_random_engine rng = makeSeededRandomEngine (iter, idx, 0 );
thrust::uniform_real_distribution<float > u01 (0 , 1 );
Material material = materials[intersection.materialId ];
glm::vec3 materialColor = material.color ;
// If the material indicates that the object was a light, "light" the ray
if (material.emittance > 0 .0f ) {
pathSegments[idx].color *= (materialColor * material.emittance );
}
// Otherwise, do some pseudo-lighting computation. This is actually more
// like what you would expect from shading in a rasterizer like OpenGL.
// TODO: replace this! you should be able to start with basically a one-liner
else {
float lightTerm = glm::dot (intersection.surfaceNormal , glm::vec3 (0 .0f , 1 .0f , 0 .0f ));
pathSegments[idx].color *= (materialColor * lightTerm) * 0 .3f + ((1 .0f - intersection.t * 0 .02f ) * materialColor) * 0 .7f ;
pathSegments[idx].color *= u01 (rng); // apply some noise because why not
}
// If there was no intersection, color the ray black.
// Lots of renderers use 4 channel color, RGBA, where A = alpha, often
// used for opacity, in which case they can indicate "no opacity".
// This can be useful for post-processing and image compositing.
}
else {
pathSegments[idx].color = glm::vec3 (0 .0f );
}
}
}
// Add the current iteration's output to the overall image
@@ -282,52 +326,52 @@ __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterati
* of memory management
*/
void pathtrace (uchar4 *pbo, int frame, int iter) {
const int traceDepth = hst_scene->state .traceDepth ;
const Camera &cam = hst_scene->state .camera ;
const int pixelcount = cam.resolution .x * cam.resolution .y ;
const int traceDepth = hst_scene->state .traceDepth ;
const Camera &cam = hst_scene->state .camera ;
const int pixelcount = cam.resolution .x * cam.resolution .y ;
// 2D block for generating ray from camera
const dim3 blockSize2d (8 , 8 );
const dim3 blocksPerGrid2d (
(cam.resolution .x + blockSize2d.x - 1 ) / blockSize2d.x ,
(cam.resolution .y + blockSize2d.y - 1 ) / blockSize2d.y );
const dim3 blockSize2d (8 , 8 );
const dim3 blocksPerGrid2d (
(cam.resolution .x + blockSize2d.x - 1 ) / blockSize2d.x ,
(cam.resolution .y + blockSize2d.y - 1 ) / blockSize2d.y );
// 1D block for path tracing
const int blockSize1d = 128 ;
// /////////////////////////////////////////////////////////////////////////
// Recap:
// * Initialize array of path rays (using rays that come out of the camera)
// * You can pass the Camera object to that kernel.
// * Each path ray must carry at minimum a (ray, color) pair,
// * where color starts as the multiplicative identity, white = (1, 1, 1).
// * This has already been done for you.
// * For each depth:
// * Compute an intersection in the scene for each path ray.
// A very naive version of this has been implemented for you, but feel
// free to add more primitives and/or a better algorithm.
// Currently, intersection distance is recorded as a parametric distance,
// t, or a "distance along the ray." t = -1.0 indicates no intersection.
// * Color is attenuated (multiplied) by reflections off of any object
// * TODO: Stream compact away all of the terminated paths.
// You may use either your implementation or `thrust::remove_if` or its
// cousins.
// * Note that you can't really use a 2D kernel launch any more - switch
// to 1D.
// * TODO: Shade the rays that intersected something or didn't bottom out.
// That is, color the ray by performing a color computation according
// to the shader, then generate a new ray to continue the ray path.
// We recommend just updating the ray's PathSegment in place.
// Note that this step may come before or after stream compaction,
// since some shaders you write may also cause a path to terminate.
// * Finally, add this iteration's results to the image. This has been done
// for you.
// TODO: perform one iteration of path tracing
generateRayFromCamera <<<blocksPerGrid2d, blockSize2d >>> (cam, iter, traceDepth, dev_paths);
checkCUDAError (" generate camera ray" );
// /////////////////////////////////////////////////////////////////////////
// Recap:
// * Initialize array of path rays (using rays that come out of the camera)
// * You can pass the Camera object to that kernel.
// * Each path ray must carry at minimum a (ray, color) pair,
// * where color starts as the multiplicative identity, white = (1, 1, 1).
// * This has already been done for you.
// * For each depth:
// * Compute an intersection in the scene for each path ray.
// A very naive version of this has been implemented for you, but feel
// free to add more primitives and/or a better algorithm.
// Currently, intersection distance is recorded as a parametric distance,
// t, or a "distance along the ray." t = -1.0 indicates no intersection.
// * Color is attenuated (multiplied) by reflections off of any object
// * TODO: Stream compact away all of the terminated paths.
// You may use either your implementation or `thrust::remove_if` or its
// cousins.
// * Note that you can't really use a 2D kernel launch any more - switch
// to 1D.
// * TODO: Shade the rays that intersected something or didn't bottom out.
// That is, color the ray by performing a color computation according
// to the shader, then generate a new ray to continue the ray path.
// We recommend just updating the ray's PathSegment in place.
// Note that this step may come before or after stream compaction,
// since some shaders you write may also cause a path to terminate.
// * Finally, add this iteration's results to the image. This has been done
// for you.
// TODO: perform one iteration of path tracing
generateRayFromCamera << <blocksPerGrid2d, blockSize2d >> >(cam, iter, traceDepth, dev_paths);
checkCUDAError (" generate camera ray failed " );
int depth = 0 ;
PathSegment* dev_path_end = dev_paths + pixelcount;
@@ -336,58 +380,58 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
// --- PathSegment Tracing Stage ---
// Shoot ray into scene, bounce between objects, push shading chunks
bool iterationComplete = false ;
bool iterationComplete = false ;
while (!iterationComplete) {
// clean shading chunks
cudaMemset (dev_intersections, 0 , pixelcount * sizeof (ShadeableIntersection));
// tracing
dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1 ) / blockSize1d;
computeIntersections <<<numblocksPathSegmentTracing, blockSize1d>> > (
depth
, num_paths
, dev_paths
, dev_geoms
, hst_scene->geoms .size ()
, dev_intersections
);
checkCUDAError (" trace one bounce" );
cudaDeviceSynchronize ();
depth++;
// TODO:
// --- Shading Stage ---
// Shade path segments based on intersections and generate new rays by
// evaluating the BSDF.
// Start off with just a big kernel that handles all the different
// materials you have in the scenefile.
// TODO: compare between directly shading the path segments and shading
// path segments that have been reshuffled to be contiguous in memory.
shadeFakeMaterial << <numblocksPathSegmentTracing, blockSize1d>> > (
iter,
num_paths,
dev_intersections,
dev_paths,
dev_materials
);
iterationComplete = true ; // TODO: should be based off stream compaction results.
// clean shading chunks
cudaMemset (dev_intersections, 0 , pixelcount * sizeof (ShadeableIntersection));
// tracing
dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1 ) / blockSize1d;
computeIntersections << <numblocksPathSegmentTracing, blockSize1d >> > (
depth
, num_paths
, dev_paths
, dev_geoms
, hst_scene->geoms .size ()
, dev_intersections
);
checkCUDAError (" trace one bounce" );
cudaDeviceSynchronize ();
depth++;
// TODO:
// --- Shading Stage ---
// Shade path segments based on intersections and generate new rays by
// evaluating the BSDF.
// Start off with just a big kernel that handles all the different
// materials you have in the scenefile.
// TODO: compare between directly shading the path segments and shading
// path segments that have been reshuffled to be contiguous in memory.
shadeMaterial << <numblocksPathSegmentTracing, blockSize1d >> > (
iter,
num_paths,
dev_intersections,
dev_paths,
dev_materials
);
iterationComplete = true ; // TODO: should be based off stream compaction results.
}
// Assemble this iteration and apply it to the image
dim3 numBlocksPixels = (pixelcount + blockSize1d - 1 ) / blockSize1d;
finalGather<< <numBlocksPixels, blockSize1d>> > (num_paths, dev_image, dev_paths);
// Assemble this iteration and apply it to the image
dim3 numBlocksPixels = (pixelcount + blockSize1d - 1 ) / blockSize1d;
finalGather << <numBlocksPixels, blockSize1d >> >(num_paths, dev_image, dev_paths);
// /////////////////////////////////////////////////////////////////////////
// /////////////////////////////////////////////////////////////////////////
// Send results to OpenGL buffer for rendering
sendImageToPBO<< <blocksPerGrid2d, blockSize2d>> > (pbo, cam.resolution , iter, dev_image);
// Send results to OpenGL buffer for rendering
sendImageToPBO << <blocksPerGrid2d, blockSize2d >> >(pbo, cam.resolution , iter, dev_image);
// Retrieve image from GPU
cudaMemcpy (hst_scene->state .image .data (), dev_image,
pixelcount * sizeof (glm::vec3), cudaMemcpyDeviceToHost);
// Retrieve image from GPU
cudaMemcpy (hst_scene->state .image .data (), dev_image,
pixelcount * sizeof (glm::vec3), cudaMemcpyDeviceToHost);
checkCUDAError (" pathtrace" );
checkCUDAError (" pathtrace" );
}