@@ -20,51 +20,51 @@
#define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__)
void checkCUDAErrorFn(const char *msg, const char *file, int line) {
#if ERRORCHECK
cudaDeviceSynchronize();
cudaError_t err = cudaGetLastError();
if (cudaSuccess == err) {
return;
}

fprintf(stderr, "CUDA error");
if (file) {
fprintf(stderr, " (%s:%d)", file, line);
}
fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err));
cudaDeviceSynchronize();
cudaError_t err = cudaGetLastError();
if (cudaSuccess == err) {
return;
}

fprintf(stderr, "CUDA error");
if (file) {
fprintf(stderr, " (%s:%d)", file, line);
}
fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err));
# ifdef _WIN32
getchar();
getchar();
# endif
exit(EXIT_FAILURE);
exit(EXIT_FAILURE);
#endif
}

__host__ __device__
thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) {
int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index);
return thrust::default_random_engine(h);
int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index);
return thrust::default_random_engine(h);
}

//Kernel that writes the image to the OpenGL PBO directly.
__global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution,
int iter, glm::vec3* image) {
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;

if (x < resolution.x && y < resolution.y) {
int index = x + (y * resolution.x);
glm::vec3 pix = image[index];

glm::ivec3 color;
color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255);
color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255);
color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255);

// Each thread writes one pixel location in the texture (textel)
pbo[index].w = 0;
pbo[index].x = color.x;
pbo[index].y = color.y;
pbo[index].z = color.z;
}
int iter, glm::vec3* image) {
int x = (blockIdx.x * blockDim.x) + threadIdx.x;
int y = (blockIdx.y * blockDim.y) + threadIdx.y;

if (x < resolution.x && y < resolution.y) {
int index = x + (y * resolution.x);
glm::vec3 pix = image[index];

glm::ivec3 color;
color.x = glm::clamp((int)(pix.x / iter * 255.0), 0, 255);
color.y = glm::clamp((int)(pix.y / iter * 255.0), 0, 255);
color.z = glm::clamp((int)(pix.z / iter * 255.0), 0, 255);

// Each thread writes one pixel location in the texture (textel)
pbo[index].w = 0;
pbo[index].x = color.x;
pbo[index].y = color.y;
pbo[index].z = color.z;
}
}

static Scene * hst_scene = NULL;
@@ -77,38 +77,38 @@ static ShadeableIntersection * dev_intersections = NULL;
// ...

void pathtraceInit(Scene *scene) {
hst_scene = scene;
const Camera &cam = hst_scene->state.camera;
const int pixelcount = cam.resolution.x * cam.resolution.y;
hst_scene = scene;
const Camera &cam = hst_scene->state.camera;
const int pixelcount = cam.resolution.x * cam.resolution.y;

cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3));
cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3));
cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3));
cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3));

cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment));
cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment));

cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom));
cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice);
cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom));
cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice);

cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material));
cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice);
cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material));
cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice);

cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection));
cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));
cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection));
cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));

// TODO: initialize any extra device memeory you need
// TODO: initialize any extra device memeory you need

checkCUDAError("pathtraceInit");
checkCUDAError("pathtraceInit");
}

void pathtraceFree() {
cudaFree(dev_image); // no-op if dev_image is null
cudaFree(dev_paths);
cudaFree(dev_geoms);
cudaFree(dev_materials);
cudaFree(dev_intersections);
// TODO: clean up any extra device memory you created

checkCUDAError("pathtraceFree");
cudaFree(dev_image); // no-op if dev_image is null
cudaFree(dev_paths);
cudaFree(dev_geoms);
cudaFree(dev_materials);
cudaFree(dev_intersections);
// TODO: clean up any extra device memory you created

checkCUDAError("pathtraceFree");
}

/**
@@ -129,7 +129,7 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path
PathSegment & segment = pathSegments[index];

segment.ray.origin = cam.position;
segment.color = glm::vec3(1.0f, 1.0f, 1.0f);
segment.color = glm::vec3(1.0f, 1.0f, 1.0f);

// TODO: implement antialiasing by jittering the ray
segment.ray.direction = glm::normalize(cam.view
@@ -195,6 +195,7 @@ __global__ void computeIntersections(
hit_geom_index = i;
intersect_point = tmp_intersect;
normal = tmp_normal;
//when terminate, when use these values, TODO!
}
}

@@ -208,10 +209,52 @@ __global__ void computeIntersections(
intersections[path_index].t = t_min;
intersections[path_index].materialId = geoms[hit_geom_index].materialid;
intersections[path_index].surfaceNormal = normal;
intersections[path_index].pt3 = intersect_point; //leave it here for now
}
}
}
__global__ void shadeMaterial(int iter
, int num_paths
, ShadeableIntersection * shadeableIntersections
, PathSegment * pathSegments
, Material * materials){
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_paths)
{
ShadeableIntersection intersection = shadeableIntersections[idx];
if (intersection.t > 0.0f) { // if the intersection exists...
// Set up the RNG
// LOOK: this is how you use thrust's RNG! Please look at
// makeSeededRandomEngine as well.
thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0);
thrust::uniform_real_distribution<float> u01(0, 1);

Material material = materials[intersection.materialId];
glm::vec3 materialColor = material.color;

// If the material indicates that the object was a light, "light" the ray
if (material.emittance > 0.0f) {
pathSegments[idx].color *= (materialColor * material.emittance);
}
// Otherwise, do some pseudo-lighting computation. This is actually more
// like what you would expect from shading in a rasterizer like OpenGL.
// TODO: replace this! you should be able to start with basically a one-liner
else {
//float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f));
//pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f;
//pathSegments[idx].color *= u01(rng); // apply some noise because why not
scatterRay(pathSegments[idx], intersection.pt3, intersection.surfaceNormal, material, rng);
}
// If there was no intersection, color the ray black.
// Lots of renderers use 4 channel color, RGBA, where A = alpha, often
// used for opacity, in which case they can indicate "no opacity".
// This can be useful for post-processing and image compositing.
}
else {
pathSegments[idx].color = glm::vec3(0.0f);
}
}
}

// LOOK: "fake" shader demonstrating what you might do with the info in
// a ShadeableIntersection, as well as how to use thrust's random number
// generator. Observe that since the thrust random number generator basically
@@ -221,48 +264,49 @@ __global__ void computeIntersections(
// Note that this shader does NOT do a BSDF evaluation!
// Your shaders should handle that - this can allow techniques such as
// bump mapping.
__global__ void shadeFakeMaterial (
int iter
, int num_paths
__global__ void shadeFakeMaterial(
int iter
, int num_paths
, ShadeableIntersection * shadeableIntersections
, PathSegment * pathSegments
, Material * materials
)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_paths)
{
ShadeableIntersection intersection = shadeableIntersections[idx];
if (intersection.t > 0.0f) { // if the intersection exists...
// Set up the RNG
// LOOK: this is how you use thrust's RNG! Please look at
// makeSeededRandomEngine as well.
thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0);
thrust::uniform_real_distribution<float> u01(0, 1);

Material material = materials[intersection.materialId];
glm::vec3 materialColor = material.color;

// If the material indicates that the object was a light, "light" the ray
if (material.emittance > 0.0f) {
pathSegments[idx].color *= (materialColor * material.emittance);
}
// Otherwise, do some pseudo-lighting computation. This is actually more
// like what you would expect from shading in a rasterizer like OpenGL.
// TODO: replace this! you should be able to start with basically a one-liner
else {
float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f));
pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f;
pathSegments[idx].color *= u01(rng); // apply some noise because why not
}
// If there was no intersection, color the ray black.
// Lots of renderers use 4 channel color, RGBA, where A = alpha, often
// used for opacity, in which case they can indicate "no opacity".
// This can be useful for post-processing and image compositing.
} else {
pathSegments[idx].color = glm::vec3(0.0f);
}
}
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if (idx < num_paths)
{
ShadeableIntersection intersection = shadeableIntersections[idx];
if (intersection.t > 0.0f) { // if the intersection exists...
// Set up the RNG
// LOOK: this is how you use thrust's RNG! Please look at
// makeSeededRandomEngine as well.
thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0);
thrust::uniform_real_distribution<float> u01(0, 1);

Material material = materials[intersection.materialId];
glm::vec3 materialColor = material.color;

// If the material indicates that the object was a light, "light" the ray
if (material.emittance > 0.0f) {
pathSegments[idx].color *= (materialColor * material.emittance);
}
// Otherwise, do some pseudo-lighting computation. This is actually more
// like what you would expect from shading in a rasterizer like OpenGL.
// TODO: replace this! you should be able to start with basically a one-liner
else {
float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f));
pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f;
pathSegments[idx].color *= u01(rng); // apply some noise because why not
}
// If there was no intersection, color the ray black.
// Lots of renderers use 4 channel color, RGBA, where A = alpha, often
// used for opacity, in which case they can indicate "no opacity".
// This can be useful for post-processing and image compositing.
}
else {
pathSegments[idx].color = glm::vec3(0.0f);
}
}
}

// Add the current iteration's output to the overall image
@@ -282,52 +326,52 @@ __global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterati
* of memory management
*/
void pathtrace(uchar4 *pbo, int frame, int iter) {
const int traceDepth = hst_scene->state.traceDepth;
const Camera &cam = hst_scene->state.camera;
const int pixelcount = cam.resolution.x * cam.resolution.y;
const int traceDepth = hst_scene->state.traceDepth;
const Camera &cam = hst_scene->state.camera;
const int pixelcount = cam.resolution.x * cam.resolution.y;

// 2D block for generating ray from camera
const dim3 blockSize2d(8, 8);
const dim3 blocksPerGrid2d(
(cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
(cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);
const dim3 blockSize2d(8, 8);
const dim3 blocksPerGrid2d(
(cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x,
(cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y);

// 1D block for path tracing
const int blockSize1d = 128;

///////////////////////////////////////////////////////////////////////////

// Recap:
// * Initialize array of path rays (using rays that come out of the camera)
// * You can pass the Camera object to that kernel.
// * Each path ray must carry at minimum a (ray, color) pair,
// * where color starts as the multiplicative identity, white = (1, 1, 1).
// * This has already been done for you.
// * For each depth:
// * Compute an intersection in the scene for each path ray.
// A very naive version of this has been implemented for you, but feel
// free to add more primitives and/or a better algorithm.
// Currently, intersection distance is recorded as a parametric distance,
// t, or a "distance along the ray." t = -1.0 indicates no intersection.
// * Color is attenuated (multiplied) by reflections off of any object
// * TODO: Stream compact away all of the terminated paths.
// You may use either your implementation or `thrust::remove_if` or its
// cousins.
// * Note that you can't really use a 2D kernel launch any more - switch
// to 1D.
// * TODO: Shade the rays that intersected something or didn't bottom out.
// That is, color the ray by performing a color computation according
// to the shader, then generate a new ray to continue the ray path.
// We recommend just updating the ray's PathSegment in place.
// Note that this step may come before or after stream compaction,
// since some shaders you write may also cause a path to terminate.
// * Finally, add this iteration's results to the image. This has been done
// for you.

// TODO: perform one iteration of path tracing

generateRayFromCamera <<<blocksPerGrid2d, blockSize2d >>>(cam, iter, traceDepth, dev_paths);
checkCUDAError("generate camera ray");
///////////////////////////////////////////////////////////////////////////

// Recap:
// * Initialize array of path rays (using rays that come out of the camera)
// * You can pass the Camera object to that kernel.
// * Each path ray must carry at minimum a (ray, color) pair,
// * where color starts as the multiplicative identity, white = (1, 1, 1).
// * This has already been done for you.
// * For each depth:
// * Compute an intersection in the scene for each path ray.
// A very naive version of this has been implemented for you, but feel
// free to add more primitives and/or a better algorithm.
// Currently, intersection distance is recorded as a parametric distance,
// t, or a "distance along the ray." t = -1.0 indicates no intersection.
// * Color is attenuated (multiplied) by reflections off of any object
// * TODO: Stream compact away all of the terminated paths.
// You may use either your implementation or `thrust::remove_if` or its
// cousins.
// * Note that you can't really use a 2D kernel launch any more - switch
// to 1D.
// * TODO: Shade the rays that intersected something or didn't bottom out.
// That is, color the ray by performing a color computation according
// to the shader, then generate a new ray to continue the ray path.
// We recommend just updating the ray's PathSegment in place.
// Note that this step may come before or after stream compaction,
// since some shaders you write may also cause a path to terminate.
// * Finally, add this iteration's results to the image. This has been done
// for you.

// TODO: perform one iteration of path tracing

generateRayFromCamera << <blocksPerGrid2d, blockSize2d >> >(cam, iter, traceDepth, dev_paths);
checkCUDAError("generate camera ray failed");

int depth = 0;
PathSegment* dev_path_end = dev_paths + pixelcount;
@@ -336,58 +380,58 @@ void pathtrace(uchar4 *pbo, int frame, int iter) {
// --- PathSegment Tracing Stage ---
// Shoot ray into scene, bounce between objects, push shading chunks

bool iterationComplete = false;
bool iterationComplete = false;
while (!iterationComplete) {

// clean shading chunks
cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));

// tracing
dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
computeIntersections <<<numblocksPathSegmentTracing, blockSize1d>>> (
depth
, num_paths
, dev_paths
, dev_geoms
, hst_scene->geoms.size()
, dev_intersections
);
checkCUDAError("trace one bounce");
cudaDeviceSynchronize();
depth++;


// TODO:
// --- Shading Stage ---
// Shade path segments based on intersections and generate new rays by
// evaluating the BSDF.
// Start off with just a big kernel that handles all the different
// materials you have in the scenefile.
// TODO: compare between directly shading the path segments and shading
// path segments that have been reshuffled to be contiguous in memory.

shadeFakeMaterial<<<numblocksPathSegmentTracing, blockSize1d>>> (
iter,
num_paths,
dev_intersections,
dev_paths,
dev_materials
);
iterationComplete = true; // TODO: should be based off stream compaction results.
// clean shading chunks
cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection));

// tracing
dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d;
computeIntersections << <numblocksPathSegmentTracing, blockSize1d >> > (
depth
, num_paths
, dev_paths
, dev_geoms
, hst_scene->geoms.size()
, dev_intersections
);
checkCUDAError("trace one bounce");
cudaDeviceSynchronize();
depth++;


// TODO:
// --- Shading Stage ---
// Shade path segments based on intersections and generate new rays by
// evaluating the BSDF.
// Start off with just a big kernel that handles all the different
// materials you have in the scenefile.
// TODO: compare between directly shading the path segments and shading
// path segments that have been reshuffled to be contiguous in memory.

shadeMaterial << <numblocksPathSegmentTracing, blockSize1d >> > (
iter,
num_paths,
dev_intersections,
dev_paths,
dev_materials
);
iterationComplete = true; // TODO: should be based off stream compaction results.
}

// Assemble this iteration and apply it to the image
dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d;
finalGather<<<numBlocksPixels, blockSize1d>>>(num_paths, dev_image, dev_paths);
// Assemble this iteration and apply it to the image
dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d;
finalGather << <numBlocksPixels, blockSize1d >> >(num_paths, dev_image, dev_paths);

///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////

// Send results to OpenGL buffer for rendering
sendImageToPBO<<<blocksPerGrid2d, blockSize2d>>>(pbo, cam.resolution, iter, dev_image);
// Send results to OpenGL buffer for rendering
sendImageToPBO << <blocksPerGrid2d, blockSize2d >> >(pbo, cam.resolution, iter, dev_image);

// Retrieve image from GPU
cudaMemcpy(hst_scene->state.image.data(), dev_image,
pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost);
// Retrieve image from GPU
cudaMemcpy(hst_scene->state.image.data(), dev_image,
pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost);

checkCUDAError("pathtrace");
checkCUDAError("pathtrace");
}
@@ -73,4 +73,5 @@ struct ShadeableIntersection {
float t;
glm::vec3 surfaceNormal;
int materialId;
glm::vec3 pt3;
};