Skip to content

Commit

Permalink
Adaptive sampling patch https://developer.blender.org/D4686
Browse files Browse the repository at this point in the history
# Conflicts:
#	src/blender/addon/engine.py
#	src/blender/blender_sync.cpp
#	src/kernel/kernel_types.h
#	src/render/film.cpp
  • Loading branch information
boberfly committed Dec 16, 2019
1 parent 9fb3359 commit 8d378f9
Show file tree
Hide file tree
Showing 48 changed files with 1,415 additions and 47 deletions.
1 change: 1 addition & 0 deletions src/blender/addon/engine.py
Expand Up @@ -258,6 +258,7 @@ def list_render_passes(srl):
if crl.pass_debug_bvh_traversed_instances: yield ("Debug BVH Traversed Instances", "X", 'VALUE')
if crl.pass_debug_bvh_intersections: yield ("Debug BVH Intersections", "X", 'VALUE')
if crl.pass_debug_ray_bounces: yield ("Debug Ray Bounces", "X", 'VALUE')
if crl.pass_debug_sample_count: yield ("Debug Sample Count", "X", 'VALUE')
if crl.use_pass_volume_direct: yield ("VolumeDir", "RGB", 'COLOR')
if crl.use_pass_volume_indirect: yield ("VolumeInd", "RGB", 'COLOR')

Expand Down
27 changes: 27 additions & 0 deletions src/blender/addon/properties.py
Expand Up @@ -112,6 +112,7 @@
enum_sampling_pattern = (
('SOBOL', "Sobol", "Use Sobol random sampling pattern"),
('CORRELATED_MUTI_JITTER', "Correlated Multi-Jitter", "Use Correlated Multi-Jitter random sampling pattern"),
('PROGRESSIVE_MUTI_JITTER', "Progressive Multi-Jitter", "Use Progressive Multi-Jitter random sampling pattern"),
)

enum_integrator = (
Expand Down Expand Up @@ -336,6 +337,26 @@ class CyclesRenderSettings(bpy.types.PropertyGroup):
default=0.01,
)

adaptive_threshold: FloatProperty(
name="Adaptive Sampling Threshold",
description="Zero for automatic setting based on AA samples",
min=0.0, max=1.0,
default=0.0,
)

adaptive_min_samples: IntProperty(
name="Adaptive Min Samples",
description="Minimum AA samples for adaptive sampling. Zero for automatic setting based on AA samples",
min=0, max=4096,
default=0,
)

use_adaptive_sampling: BoolProperty(
name="Use adaptive sampling",
description="Automatically determine the number of samples per pixel based on a variance estimation",
default=False,
)

min_light_bounces: IntProperty(
name="Min Light Bounces",
description="Minimum number of light bounces. Setting this higher reduces noise in the first bounces, "
Expand Down Expand Up @@ -1279,6 +1300,12 @@ class CyclesRenderLayerSettings(bpy.types.PropertyGroup):
default=False,
update=update_render_passes,
)
pass_debug_sample_count: BoolProperty(
name="Debug Sample Count",
description="Number of samples/camera rays per pixel",
default=False,
update=update_render_passes,
)
use_pass_volume_direct: BoolProperty(
name="Volume Direct",
description="Deliver direct volumetric scattering pass",
Expand Down
12 changes: 11 additions & 1 deletion src/blender/addon/ui.py
Expand Up @@ -188,6 +188,8 @@ def draw(self, context):
col.prop(cscene, "aa_samples", text="Render")
col.prop(cscene, "preview_aa_samples", text="Viewport")

col.prop(cscene, "use_adaptive_sampling", text="Adaptive Sampling")


class CYCLES_RENDER_PT_sampling_sub_samples(CyclesButtonsPanel, Panel):
bl_label = "Sub Samples"
Expand Down Expand Up @@ -239,7 +241,13 @@ def draw(self, context):
row.prop(cscene, "seed")
row.prop(cscene, "use_animated_seed", text="", icon='TIME')

layout.prop(cscene, "sampling_pattern", text="Pattern")
col = layout.column(align=True)
col.active = not(cscene.use_adaptive_sampling)
col.prop(cscene, "sampling_pattern", text="Pattern")
col = layout.column(align=True)
col.active = cscene.use_adaptive_sampling
col.prop(cscene, "adaptive_min_samples", text="Adaptive Min Samples")
col.prop(cscene, "adaptive_threshold", text="Adaptive Threshold")

layout.prop(cscene, "use_square_samples")

Expand Down Expand Up @@ -803,6 +811,8 @@ def draw(self, context):
col.prop(cycles_view_layer, "denoising_store_passes", text="Denoising Data")
col = flow.column()
col.prop(cycles_view_layer, "pass_debug_render_time", text="Render Time")
col = flow.column()
col.prop(cycles_view_layer, "pass_debug_sample_count", text="Sample Count")

layout.separator()

Expand Down
3 changes: 2 additions & 1 deletion src/blender/blender_session.cpp
Expand Up @@ -474,7 +474,8 @@ void BlenderSession::render(BL::Depsgraph &b_depsgraph_)
b_rlay_name = b_view_layer.name();

/* add passes */
vector<Pass> passes = sync->sync_render_passes(b_rlay, b_view_layer);
vector<Pass> passes = sync->sync_render_passes(
b_rlay, b_view_layer, session_params.adaptive_sampling);
buffer_params.passes = passes;

PointerRNA crl = RNA_pointer_get(&b_view_layer.ptr, "cycles");
Expand Down
31 changes: 30 additions & 1 deletion src/blender/blender_sync.cpp
Expand Up @@ -291,6 +291,16 @@ void BlenderSync::sync_integrator()
integrator->sample_all_lights_indirect = get_boolean(cscene, "sample_all_lights_indirect");
integrator->light_sampling_threshold = get_float(cscene, "light_sampling_threshold");

if (RNA_boolean_get(&cscene, "use_adaptive_sampling")) {
integrator->sampling_pattern = SAMPLING_PATTERN_PMJ;
integrator->adaptive_min_samples = get_int(cscene, "adaptive_min_samples");
integrator->adaptive_threshold = get_float(cscene, "adaptive_threshold");
}
else {
integrator->adaptive_min_samples = INT_MAX;
integrator->adaptive_threshold = 0.0f;
}

int diffuse_samples = get_int(cscene, "diffuse_samples");
int glossy_samples = get_int(cscene, "glossy_samples");
int transmission_samples = get_int(cscene, "transmission_samples");
Expand All @@ -307,6 +317,8 @@ void BlenderSync::sync_integrator()
integrator->mesh_light_samples = mesh_light_samples * mesh_light_samples;
integrator->subsurface_samples = subsurface_samples * subsurface_samples;
integrator->volume_samples = volume_samples * volume_samples;
integrator->adaptive_min_samples = min(
integrator->adaptive_min_samples * integrator->adaptive_min_samples, INT_MAX);
}
else {
integrator->diffuse_samples = diffuse_samples;
Expand Down Expand Up @@ -482,6 +494,8 @@ PassType BlenderSync::get_pass_type(BL::RenderPass &b_pass)
MAP_PASS("Debug Ray Bounces", PASS_RAY_BOUNCES);
#endif
MAP_PASS("Debug Render Time", PASS_RENDER_TIME);
MAP_PASS("AdaptiveAuxBuffer", PASS_ADAPTIVE_AUX_BUFFER);
MAP_PASS("Debug Sample Count", PASS_SAMPLE_COUNT);
if (string_startswith(name, cryptomatte_prefix)) {
return PASS_CRYPTOMATTE;
}
Expand Down Expand Up @@ -517,7 +531,9 @@ int BlenderSync::get_denoising_pass(BL::RenderPass &b_pass)
return -1;
}

vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLayer &b_view_layer)
vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay,
BL::ViewLayer &b_view_layer,
bool adaptive_sampling)
{
vector<Pass> passes;

Expand Down Expand Up @@ -589,6 +605,10 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
b_engine.add_pass("Debug Render Time", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_RENDER_TIME, passes, "Debug Render Time");
}
if (get_boolean(crp, "pass_debug_sample_count")) {
b_engine.add_pass("Debug Sample Count", 1, "X", b_view_layer.name().c_str());
Pass::add(PASS_SAMPLE_COUNT, passes);
}
if (get_boolean(crp, "use_pass_volume_direct")) {
b_engine.add_pass("VolumeDir", 3, "RGB", b_view_layer.name().c_str());
Pass::add(PASS_VOLUME_DIRECT, passes, "VolumeDir");
Expand Down Expand Up @@ -650,6 +670,13 @@ vector<Pass> BlenderSync::sync_render_passes(BL::RenderLayer &b_rlay, BL::ViewLa
}
RNA_END;

if (adaptive_sampling) {
Pass::add(PASS_ADAPTIVE_AUX_BUFFER, passes);
if (!get_boolean(crp, "pass_debug_sample_count")) {
Pass::add(PASS_SAMPLE_COUNT, passes);
}
}

return passes;
}

Expand Down Expand Up @@ -883,6 +910,8 @@ SessionParams BlenderSync::get_session_params(BL::RenderEngine &b_engine,
params.use_profiling = params.device.has_profiling && !b_engine.is_preview() && background &&
BlenderSession::print_render_stats;

params.adaptive_sampling = RNA_boolean_get(&cscene, "use_adaptive_sampling");

return params;
}

Expand Down
4 changes: 3 additions & 1 deletion src/blender/blender_sync.h
Expand Up @@ -70,7 +70,9 @@ class BlenderSync {
int height,
void **python_thread_state);
void sync_view_layer(BL::SpaceView3D &b_v3d, BL::ViewLayer &b_view_layer);
vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer, BL::ViewLayer &b_view_layer);
vector<Pass> sync_render_passes(BL::RenderLayer &b_render_layer,
BL::ViewLayer &b_view_layer,
bool adaptive_sampling);
void sync_integrator();
void sync_camera(BL::RenderSettings &b_render,
BL::Object &b_override,
Expand Down
52 changes: 51 additions & 1 deletion src/device/device_cpu.cpp
Expand Up @@ -34,6 +34,7 @@
#include "kernel/kernel_types.h"
#include "kernel/split/kernel_split_data.h"
#include "kernel/kernel_globals.h"
#include "kernel/kernel_adaptive_sampling.h"

#include "kernel/filter/filter.h"

Expand Down Expand Up @@ -317,6 +318,10 @@ class CPUDevice : public Device {
REGISTER_SPLIT_KERNEL(next_iteration_setup);
REGISTER_SPLIT_KERNEL(indirect_subsurface);
REGISTER_SPLIT_KERNEL(buffer_update);
REGISTER_SPLIT_KERNEL(adaptive_stopping);
REGISTER_SPLIT_KERNEL(adaptive_filter_x);
REGISTER_SPLIT_KERNEL(adaptive_filter_y);
REGISTER_SPLIT_KERNEL(adaptive_adjust_samples);
#undef REGISTER_SPLIT_KERNEL
#undef KERNEL_FUNCTIONS
}
Expand Down Expand Up @@ -851,10 +856,33 @@ class CPUDevice : public Device {
path_trace_kernel()(kg, render_buffer, sample, x, y, tile.offset, tile.stride);
}
}

tile.sample = sample + 1;

task.update_progress(&tile, tile.w * tile.h);

if (kernel_data.film.pass_adaptive_aux_buffer && (sample & 0x3) == 3 &&
sample >= kernel_data.integrator.adaptive_min_samples - 1) {
WorkTile wtile;
wtile.x = tile.x;
wtile.y = tile.y;
wtile.w = tile.w;
wtile.h = tile.h;
wtile.offset = tile.offset;
wtile.stride = tile.stride;
wtile.buffer = (float *)tile.buffer;

bool any = false;
for (int y = tile.y; y < tile.y + tile.h; ++y) {
any |= kernel_do_adaptive_filter_x(kg, y, &wtile);
}
for (int x = tile.x; x < tile.x + tile.w; ++x) {
any |= kernel_do_adaptive_filter_y(kg, x, &wtile);
}
if (!any) {
tile.sample = end_sample;
break;
}
}
}
if (use_coverage) {
coverage.finalize();
Expand Down Expand Up @@ -931,6 +959,28 @@ class CPUDevice : public Device {
}
else {
path_trace(task, tile, kg);
if (task.integrator_adaptive && kernel_data.film.pass_adaptive_aux_buffer) {
float *render_buffer = (float *)tile.buffer;
for (int y = tile.y; y < tile.y + tile.h; y++) {
for (int x = tile.x; x < tile.x + tile.w; x++) {
int index = tile.offset + x + y * tile.stride;
ccl_global float *buffer = render_buffer + index * kernel_data.film.pass_stride;
if (buffer[kernel_data.film.pass_sample_count] < 0.0f) {
buffer[kernel_data.film.pass_sample_count] =
-buffer[kernel_data.film.pass_sample_count];
float sample_multiplier = tile.sample /
max((float)tile.start_sample + 1.0f,
buffer[kernel_data.film.pass_sample_count]);
if (sample_multiplier != 1.0f) {
kernel_adaptive_post_adjust(kg, buffer, sample_multiplier);
}
}
else {
kernel_adaptive_post_adjust(kg, buffer, tile.sample / (tile.sample - 1.0f));
}
}
}
}
}
}
else if (tile.task == RenderTile::DENOISE) {
Expand Down
58 changes: 58 additions & 0 deletions src/device/device_cuda.cpp
Expand Up @@ -1788,6 +1788,23 @@ class CUDADevice : public Device {

cuda_assert(cuFuncSetCacheConfig(cuPathTrace, CU_FUNC_CACHE_PREFER_L1));

/* Kernels for adaptive sampling. */
CUfunction cuAdaptiveStopping, cuAdaptiveFilterX, cuAdaptiveFilterY, cuAdaptiveScaleSamples;
if (task.integrator_adaptive) {
cuda_assert(
cuModuleGetFunction(&cuAdaptiveStopping, cuModule, "kernel_cuda_adaptive_stopping"));
cuda_assert(cuFuncSetCacheConfig(cuAdaptiveStopping, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(
cuModuleGetFunction(&cuAdaptiveFilterX, cuModule, "kernel_cuda_adaptive_filter_x"));
cuda_assert(cuFuncSetCacheConfig(cuAdaptiveFilterX, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(
cuModuleGetFunction(&cuAdaptiveFilterY, cuModule, "kernel_cuda_adaptive_filter_y"));
cuda_assert(cuFuncSetCacheConfig(cuAdaptiveFilterY, CU_FUNC_CACHE_PREFER_L1));
cuda_assert(cuModuleGetFunction(
&cuAdaptiveScaleSamples, cuModule, "kernel_cuda_adaptive_scale_samples"));
cuda_assert(cuFuncSetCacheConfig(cuAdaptiveScaleSamples, CU_FUNC_CACHE_PREFER_L1));
}

/* Allocate work tile. */
work_tiles.alloc(1);

Expand All @@ -1812,6 +1829,16 @@ class CUDADevice : public Device {

uint step_samples = divide_up(min_blocks * num_threads_per_block, wtile->w * wtile->h);

if (task.integrator_adaptive) {
/* Force to either 1, 2 or multiple of 4 samples per kernel invocation. */
if (step_samples == 3) {
step_samples = 2;
}
else if (step_samples > 4) {
step_samples &= 0xfffffffc;
}
}

/* Render all samples. */
int start_sample = rtile.start_sample;
int end_sample = rtile.start_sample + rtile.num_samples;
Expand All @@ -1832,6 +1859,26 @@ class CUDADevice : public Device {
cuda_assert(cuLaunchKernel(
cuPathTrace, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));

uint filter_sample = sample + wtile->num_samples - 1;
/* Run the adaptive sampling kernels when we're at a multiple of 4 samples.
* These are a series of tiny kernels because there is no grid synchronisation
* from within a kernel, so multiple kernel launches it is. */
if (task.integrator_adaptive && (filter_sample & 0x3) == 3) {
total_work_size = wtile->h * wtile->w;
void *args2[] = {&d_work_tiles, &filter_sample, &total_work_size};
num_blocks = divide_up(total_work_size, num_threads_per_block);
cuda_assert(cuLaunchKernel(
cuAdaptiveStopping, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0));
total_work_size = wtile->h;
num_blocks = divide_up(total_work_size, num_threads_per_block);
cuda_assert(cuLaunchKernel(
cuAdaptiveFilterX, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0));
total_work_size = wtile->w;
num_blocks = divide_up(total_work_size, num_threads_per_block);
cuda_assert(cuLaunchKernel(
cuAdaptiveFilterY, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args2, 0));
}

cuda_assert(cuCtxSynchronize());

/* Update progress. */
Expand All @@ -1843,6 +1890,17 @@ class CUDADevice : public Device {
break;
}
}

if (task.integrator_adaptive) {
CUdeviceptr d_work_tiles = cuda_device_ptr(work_tiles.device_pointer);
uint total_work_size = wtile->h * wtile->w;
void *args[] = {&d_work_tiles, &rtile.start_sample, &rtile.sample, &total_work_size};
uint num_blocks = divide_up(total_work_size, num_threads_per_block);
cuda_assert(cuLaunchKernel(
cuAdaptiveScaleSamples, num_blocks, 1, 1, num_threads_per_block, 1, 1, 0, 0, args, 0));
cuda_assert(cuCtxSynchronize());
task.update_progress(&rtile, rtile.w * rtile.h * wtile->num_samples);
}
}

void film_convert(DeviceTask &task,
Expand Down

0 comments on commit 8d378f9

Please sign in to comment.