Skip to content

Commit

Permalink
Megacommit with various improvements: Coarse-grained refactoring, mas…
Browse files Browse the repository at this point in the history
…sive simplification of foveated sample generation, 3D culling planes for samples, full 3D samples throughout the pipeline, improved clustering for foveated samples, fast bicubic interpolation.
  • Loading branch information
Michael Mara authored and Michael Mara committed Oct 8, 2018
1 parent 41d8d03 commit 3f5c7b2
Show file tree
Hide file tree
Showing 62 changed files with 1,789 additions and 1,821 deletions.
6 changes: 3 additions & 3 deletions libraries/hvvr/cuda.props
@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ImportGroup Label="PropertySheets" />
<PropertyGroup>
Expand All @@ -12,9 +12,9 @@
</ItemDefinitionGroup>
<ItemGroup />
<ImportGroup Label="ExtensionSettings">
<Import Project="$(VCTargetsPath14)\BuildCustomizations\CUDA 8.0.props" />
<Import Project="$(VCTargetsPath14)\BuildCustomizations\CUDA 9.1.props" />
</ImportGroup>
<ImportGroup Label="ExtensionTargets">
<Import Project="$(VCTargetsPath14)\BuildCustomizations\CUDA 8.0.targets" />
<Import Project="$(VCTargetsPath14)\BuildCustomizations\CUDA 9.1.targets" />
</ImportGroup>
</Project>
89 changes: 58 additions & 31 deletions libraries/hvvr/raycaster/camera.cpp
Expand Up @@ -22,17 +22,16 @@ namespace hvvr {
SampleData::SampleData(const Sample* rawSamples,
uint32_t rawSampleCount,
uint32_t splitColorSamples,
const matrix3x3& sampleToCamera,
ThinLens lens,
Sample2Dto3DMappingSettings settings2DTo3D,
uint32_t rtWidth,
uint32_t rtHeight)
: splitColorSamples(splitColorSamples), lens(lens) {
: splitColorSamples(splitColorSamples) {
DynamicArray<SortedSample> sortedSamples(rawSampleCount);
for (size_t n = 0; n < rawSampleCount; n++) {
sortedSamples[n] = SortedSample(rawSamples[n], n % splitColorSamples);
}
uint32_t blockCount = uint32_t((sortedSamples.size() + BLOCK_SIZE - 1) / BLOCK_SIZE);
// TODO(whunt): allow different clustering methods
// TODO: allow different clustering methods
naiveXYCluster(ArrayView<SortedSample>(sortedSamples), blockCount);

sampleBounds = {vector2(1.0f, 1.0f), vector2(0.0f, 0.0f)};
Expand All @@ -48,15 +47,13 @@ SampleData::SampleData(const Sample* rawSamples,
cullRect.upper.x = INFINITY;
cullRect.upper.y = INFINITY;
validSampleCount = uint32_t(rawSampleCount);
samples.blockFrusta2D = DynamicArray<RayPacketFrustum2D>(blockCount);
samples.tileFrusta2D = DynamicArray<RayPacketFrustum2D>(blockCount * TILES_PER_BLOCK);
samples.blockFrusta3D = DynamicArray<RayPacketFrustum3D>(blockCount);
samples.tileFrusta3D = DynamicArray<RayPacketFrustum3D>(blockCount * TILES_PER_BLOCK);
blockedSamplePositions = DynamicArray<float>(blockCount * BLOCK_SIZE * 2);
blockedSampleExtents = DynamicArray<Sample::Extents>(blockCount * BLOCK_SIZE);
samples.generate(sortedSamples, blockCount, validSampleCount, cullRect, blockedSamplePositions,
blockedSampleExtents, lens, sampleToCamera);
sampleCount = uint32_t(blockCount * BLOCK_SIZE);
samples.directionalSamples = DynamicArray<DirectionalBeam>(blockCount * BLOCK_SIZE);

samples2D = SampleHierarchy2D(sortedSamples, blockCount, validSampleCount, cullRect, settings2DTo3D.thinLens,
settings2DTo3D.sampleToCamera);
samples.generateFrom2D(samples2D, settings2DTo3D);

imageLocationToSampleIndex = DynamicArray<int32_t>(rtWidth * rtHeight * splitColorSamples);
memset(imageLocationToSampleIndex.data(), 0xff, sizeof(int32_t) * imageLocationToSampleIndex.size()); // clear to -1
Expand All @@ -67,6 +64,10 @@ SampleData::SampleData(const Sample* rawSamples,
}
}

void SampleData::generate3Dfrom2D(Sample2Dto3DMappingSettings settings) {
samples.generateFrom2D(samples2D, settings);
}

Camera::Camera(const FloatRect& viewport, float apertureRadius, GPUContext& gpuContext)
: _gpuCamera(nullptr), _lens({apertureRadius, 1.0f}), _eyeDir(0.0f, 0.0f, -1.0f) {
setViewport(viewport);
Expand Down Expand Up @@ -133,46 +134,43 @@ void Camera::setRenderTarget(const ImageResourceDescriptor& newRenderTarget) {
}

void Camera::setSamples(const Sample* rawSamples, uint32_t rawSampleCount, uint32_t splitColorSamples) {
setSampleData(SampleData(rawSamples, rawSampleCount, splitColorSamples, getSampleToCamera(), _lens,
setSampleData(SampleData(rawSamples, rawSampleCount, splitColorSamples, get2DSampleMappingSettings(),
_renderTarget.width, _renderTarget.height));
}

void Camera::setSampleData(const SampleData& sampleData) {
_sampleData = sampleData;

uint32_t blockCount = uint32_t(_sampleData.samples.blockFrusta3D.size());
uint32_t tileCount = uint32_t(_sampleData.samples.tileFrusta3D.size());
uint32_t blockCount = uint32_t(sampleData.samples.blockFrusta3D.size());
uint32_t tileCount = uint32_t(sampleData.samples.tileFrusta3D.size());

if (blockCount != _blockFrustaTransformed.size()) {
_blockFrustaTransformed = DynamicArray<RayPacketFrustum3D>(blockCount);
if (blockCount != _cpuHierarchy._blockFrusta.size()) {
_cpuHierarchy._blockFrusta = DynamicArray<RayPacketFrustum3D>(blockCount);
}
if (tileCount != _tileFrustaTransformed.size()) {
_tileFrustaTransformed = DynamicArray<RayPacketFrustum3D>(tileCount);
if (tileCount != _cpuHierarchy._tileFrusta.size()) {
_cpuHierarchy._tileFrusta = DynamicArray<RayPacketFrustum3D>(tileCount);
}

_gpuCamera->updateConfig(_outputMode, sampleData.imageLocationToSampleIndex.data(),
sampleData.blockedSamplePositions.data(), sampleData.blockedSampleExtents.data(), _lens,
sampleData.sampleCount, _renderTarget.width, _renderTarget.height,
const DynamicArray<DirectionalBeam>& samples = sampleData.samples.directionalSamples;
_gpuCamera->updateConfig(_outputFormat, sampleData.imageLocationToSampleIndex.data(), samples.data(), _lens,
uint32_t(samples.size()), _renderTarget.width, _renderTarget.height,
uint32_t(_renderTarget.stride), sampleData.splitColorSamples);
}

const SampleData& Camera::getSampleData() const {
return _sampleData;
}

const uint32_t Camera::getSampleCount() const {
return _gpuCamera != nullptr ? _gpuCamera->validSampleCount : 0;
}


matrix3x3 Camera::getSampleToCamera() const {
return matrix3x3(vector3(_viewport.upper.x - _viewport.lower.x, 0, 0),
vector3(0, _viewport.lower.y - _viewport.upper.y, 0),
vector3(_viewport.lower.x, _viewport.upper.y, -1));
}

matrix4x4 Camera::getSampleToWorld() const {
return matrix4x4(_cameraToWorld) * matrix4x4(getSampleToCamera());
}

matrix4x4 Camera::getWorldToSample() const {
return invert(getSampleToWorld());
}

void Camera::setCameraToWorld(const transform& cameraToWorld) {
_cameraToWorld = cameraToWorld;
Expand All @@ -186,8 +184,37 @@ const vector3& Camera::getTranslation() const {
return _cameraToWorld.translation;
}

vector3 Camera::getForward() const {
return vector3(-normalize(getCameraToWorld().m2));
void Camera::setupRenderTarget(GPUContext& context) {
if (!getEnabled())
return;
GPUCamera* gpuCamera = _gpuCamera;
if (_renderTarget.isHardwareRenderTarget() && _newHardwareTarget) {
gpuCamera->bindTexture(context, _renderTarget);
_newHardwareTarget = false;
}
}

void Camera::extractImage() {
GPUCamera* gpuCamera = _gpuCamera;
if (_renderTarget.isHardwareRenderTarget()) {
gpuCamera->copyImageToBoundTexture();
} else {
gpuCamera->copyImageToCPU((uint32_t*)_renderTarget.data, _renderTarget.width, _renderTarget.height,
uint32_t(_renderTarget.stride));
}
}

Sample2Dto3DMappingSettings Camera::get2DSampleMappingSettings() const {
if (_fovXDegrees > 0.0f) {
return Sample2Dto3DMappingSettings::sphericalSection(getSampleToCamera(), _lens, _fovXDegrees, _fovYDegrees);
} else {
return Sample2Dto3DMappingSettings(getSampleToCamera(), _lens);
}
}

void Camera::setSphericalWarpSettings(float fovXDegrees, float fovYDegrees) {
_fovXDegrees = fovXDegrees;
_fovYDegrees = fovYDegrees;
}

} // namespace hvvr
50 changes: 31 additions & 19 deletions libraries/hvvr/raycaster/camera.h
Expand Up @@ -10,8 +10,9 @@
*/

#include "dynamic_array.h"
#include "graphics_types.h"
#include "foveated.h"
#include "gpu_samples.h"
#include "graphics_types.h"
#include "sample_hierarchy.h"
#include "samples.h"

Expand All @@ -23,36 +24,36 @@ namespace hvvr {
class GPUCamera;
class GPUContext;


// preprocessed samples, ready for rendering
struct SampleData {
SampleHierarchy2D samples2D;
Sample2Dto3DMappingSettings settings2DTo3D;
SampleHierarchy samples;
uint32_t splitColorSamples = 1;
uint32_t sampleCount;

DynamicArray<int32_t> imageLocationToSampleIndex;
// Flat array of sample positions (in vector2 format) without fancy swizzling for CPU vectorization
DynamicArray<float> blockedSamplePositions;
DynamicArray<Sample::Extents> blockedSampleExtents;

FloatRect sampleBounds = {{0.0f, 0.0f}, {0.0f, 0.0f}};
uint32_t validSampleCount = 0;
ThinLens lens = {0.0f, 5.0f};

SampleData(){};
SampleData(const Sample* rawSamples,
uint32_t rawSampleCount,
uint32_t splitColorSamples,
const matrix3x3& sampleToCamera,
ThinLens lens,
Sample2Dto3DMappingSettings settings2DTo3D,
uint32_t rtWidth,
uint32_t rtHeight);
void generate3Dfrom2D(Sample2Dto3DMappingSettings settings);
};


// TODO(anankervis): merge with GPU version of this class
class Camera {
friend class Raycaster;
// TODO(anankervis): remove
friend void polarSpaceFoveatedSetup(Raycaster* raycaster);

public:
Camera(const FloatRect& viewport, float apertureRadius, GPUContext& gpuContext);
~Camera();
Expand Down Expand Up @@ -84,27 +85,35 @@ class Camera {
void setRenderTarget(const ImageResourceDescriptor& newRenderTarget);
void setSamples(const Sample* rawSamples, uint32_t rawSampleCount, uint32_t splitColorSamples);

// If called with nonzero values, this camera uses a spherical section for ray generation
// (instead of the standard perspective transform).
void setSphericalWarpSettings(float fovXDegrees, float fovYDegrees);

void setSampleData(const SampleData& sampleData);
const SampleData& getSampleData() const;
const uint32_t getSampleCount() const;

matrix3x3 getSampleToCamera() const;
// Beware - this isn't actually suitable for taking a 2D sample coordinate + Z and converting to world space.
// Samples can be in any arbitrary space, packing, or function we choose. What's important is that when we
// unpack them, they turn into camera-relative 3D rays (origin offset + direction). From there, we can convert
// into world space using cameraToWorld.
matrix4x4 getSampleToWorld() const;
matrix4x4 getWorldToSample() const;

void setCameraToWorld(const transform& cameraToWorld);
matrix4x4 getCameraToWorld() const;
const vector3& getTranslation() const;
vector3 getForward() const;

void setupRenderTarget(GPUContext& context);
void extractImage();

protected:
Sample2Dto3DMappingSettings get2DSampleMappingSettings() const;

float _fovXDegrees = 0.0f;
float _fovYDegrees = 0.0f;

// TODO(anankervis): clean up direct access of protected members by Raycaster

GPUCamera* _gpuCamera;

matrix4x4 _worldToEyePrevious = matrix4x4::identity();
// Initialize to an invalid transform since there is no previous frame on the initial frame
matrix4x4 _worldToEyePrevious = matrix4x4::zero();
matrix3x3 _eyePreviousToSamplePrevious = matrix3x3::identity();

// Incremeted on every render
Expand All @@ -116,13 +125,16 @@ class Camera {
ThinLens _lens = {0.0f, 1.0f};
bool _enabled = true;
ImageResourceDescriptor _renderTarget;
RaycasterOutputMode _outputMode = RaycasterOutputMode::COLOR_RGBA8;
RaycasterOutputFormat _outputFormat = RaycasterOutputFormat::COLOR_RGBA8;
FoveatedSampleData _foveatedSampleData;

// Only for polar foveated sampling
std::vector<vector2ui> _polarRemapToPixel;

DynamicArray<RayPacketFrustum3D> _blockFrustaTransformed;
DynamicArray<RayPacketFrustum3D> _tileFrustaTransformed;
struct CPUHierarchy {
DynamicArray<RayPacketFrustum3D> _blockFrusta;
DynamicArray<RayPacketFrustum3D> _tileFrusta;
} _cpuHierarchy;

transform _cameraToWorld = transform::identity();

Expand Down
78 changes: 75 additions & 3 deletions libraries/hvvr/raycaster/cuda_util.h
Expand Up @@ -19,8 +19,9 @@
#define cutilSafeCall(error) __cudaSafeCall(error, __FILE__, __LINE__)
inline void __cudaSafeCall(cudaError_t error, const char* file, const int line) {
if (error != cudaSuccess) {
fprintf(stderr, "error: CudaSafeCall() failed at %s:%d with %s\n", file, line, cudaGetErrorString(error));
#ifdef _WIN32
fprintf(stderr, "error %d: CudaSafeCall() failed at %s:%d with %s\n", error, file, line,
cudaGetErrorString(error));
#if defined(_WIN32)
__debugbreak();
#else
exit(error);
Expand Down Expand Up @@ -56,4 +57,75 @@ struct KernelDim {
}
};

#define CUDA_INF __int_as_float(0x7f800000)
#define CUDA_INF __int_as_float(0x7f800000)


// Based on https://stackoverflow.com/questions/52286202/dynamic-dispatch-to-template-function-c
// Use to generate all template function permutations and dispatch properly at runtime for a prefix of template booleans
// Makes calling cuda kernels with many permutations concise.
// Example:
// Change
// if (b0) {
// if (b1) {
// if (b2) {
// myFunc<true, true, true, otherArgs>(args);
// }
// else {
// myFunc<true, true, false, otherArgs>(args);
// }
// } else {
// if (b2) {
// myFunc<true, false, true, otherArgs>(args);
// }
// else {
// myFunc<true, false, false, otherArgs>(args);
// }
// }
// } else {
// if (b1) {
// if (b2) {
// myFunc<false, true, true, otherArgs>(args);
// }
// else {
// myFunc<false, true, false, otherArgs>(args);
// }
// } else {
// if (b2) {
// myFunc<false, false, true, otherArgs>(args);
// }
// else {
// myFunc<false, false, false, otherArgs>(args);
// }
// }
// }
// into:
// std::array<bool, 3> bargs = { { b0, b1, b2 } };
// dispatch_bools<3>{}(bargs, [&](auto...Bargs) {
// myFunc<decltype(Bargs)::value..., otherArgs>(args);
// });
//
// You may want to #pragma warning( disable : 4100) around the call, since there will be unrefenced Bargs in the call
// chain
template <bool b>
using kbool = std::integral_constant<bool, b>;

#pragma warning(push)
#pragma warning(disable : 4100)
template <std::size_t max>
struct dispatch_bools {
template <std::size_t N, class F, class... Bools>
void operator()(std::array<bool, N> const& input, F&& continuation, Bools...) {
if (input[max - 1])
dispatch_bools<max - 1>{}(input, continuation, kbool<true>{}, Bools{}...);
else
dispatch_bools<max - 1>{}(input, continuation, kbool<false>{}, Bools{}...);
}
};
template <>
struct dispatch_bools<0> {
template <std::size_t N, class F, class... Bools>
void operator()(std::array<bool, N> const& input, F&& continuation, Bools...) {
continuation(Bools{}...);
}
};
#pragma warning(pop)

0 comments on commit 3f5c7b2

Please sign in to comment.