Permalink
Browse files

Initial WebAssembly version via Emscripten (no SIMD, no threads).

640x360 resolution, built with emcc 1.38.19, with -O3:

- Intel Core i9 2.9GHz (2018 MBP), Mac Chrome 70: 5.3Mray/s
- AMD ThreadRipper 1950X 3.4GHz, Win Chrome 70: 4.6Mray/s
- iPhone XS/XR, Safari iOS12: 4.5Mray/s
- iPhone 8+, Safari iOS12: 4.0Mray/s
- iPhone SE, Safari iOS12: 2.4Mray/s
- Galaxy Note 9 Snapdragon 845, Chrome: 2.0Mray/s
- iPhone 6, Safari iOS12: 1.7Mray/s
  • Loading branch information...
aras-p committed Nov 15, 2018
1 parent cbf8b68 commit 344e9cefa70ace1f8f9d80a334996b6cb714bef9
Showing with 164 additions and 4 deletions.
  1. +2 −0 .gitignore
  2. +1 −0 Cpp/Emscripten/build.sh
  3. +67 −0 Cpp/Emscripten/main.cpp
  4. +68 −0 Cpp/Emscripten/toypathtracer.html
  5. +10 −2 Cpp/Source/Config.h
  6. +2 −2 Cpp/Source/MathSimd.h
  7. +14 −0 Cpp/Source/Test.cpp
@@ -20,3 +20,5 @@ Unity/Temp/
Unity/Builds
Unity/Assets/Plugins*
Cpp/Windows/Compiled*Shader.h
Cpp/Emscripten/toypathtracer.js
Cpp/Emscripten/toypathtracer.wasm
@@ -0,0 +1 @@
emcc -O3 -std=c++11 -s WASM=1 -s ALLOW_MEMORY_GROWTH=1 -s EXTRA_EXPORTED_RUNTIME_METHODS='["cwrap"]' -o toypathtracer.js main.cpp ../Source/Maths.cpp ../Source/Test.cpp
@@ -0,0 +1,67 @@
#include <emscripten.h>
#include <stdint.h>
#include <malloc.h>
#include <string.h>
#include <algorithm>
#include <math.h>
#include "../Source/Test.h"
EMSCRIPTEN_KEEPALIVE
extern "C" uint8_t* create_buffer(int width, int height)
{
return (uint8_t*)malloc(width * height * 4);
}
EMSCRIPTEN_KEEPALIVE
extern "C" void destroy_buffer(uint8_t* p)
{
free(p);
}
static float* backbuffer;
static int frameCount;
static int rayCount;
EMSCRIPTEN_KEEPALIVE
extern "C" int getRayCount()
{
return rayCount;
}
EMSCRIPTEN_KEEPALIVE
extern "C" void render(uint8_t* screen, int width, int height, double time)
{
if (!backbuffer)
{
backbuffer = new float[width * height * 4];
memset(backbuffer, 0, width*height*4*4);
}
float timeS = (float)(time / 1000);
// slow down animation time compared to C++/GPU versions, because single threaded
// on the web is much slower
timeS *= 0.2f;
unsigned flags = kFlagAnimate | kFlagProgressive;
UpdateTest(timeS, frameCount, width, height, flags);
DrawTest(timeS, frameCount, width, height, backbuffer, rayCount, flags);
++frameCount;
// We get a floating point, linear color space buffer result.
// Convert into 8bit/channel RGBA, and do a cheap sRGB approximation via sqrt.
// Note that C++ versions don't do this since they feed the linear FP buffer
// into the texture directly.
for (int y = 0; y < height; ++y)
{
const float* bb = backbuffer + (height-y-1) * width * 4;
for (int x = 0; x < width; ++x)
{
screen[0] = std::min(sqrtf(bb[0]) * 255, 255.0f);
screen[1] = std::min(sqrtf(bb[1]) * 255, 255.0f);
screen[2] = std::min(sqrtf(bb[2]) * 255, 255.0f);
screen[3] = 255;
screen += 4;
bb += 4;
}
}
}
@@ -0,0 +1,68 @@
<html>
<body>
<canvas id="screen" width="640px" height="360px" style="border: black 1px solid"></canvas>
<div id="stats">Stats</div>
<script src="toypathtracer.js"></script>
<script>
Module.onRuntimeInitialized = _ =>
{
const api =
{
create_buffer: Module.cwrap('create_buffer', 'number', ['number', 'number']),
destroy_buffer: Module.cwrap('destroy_buffer', '', ['number']),
render: Module.cwrap('render', '', ['number', 'number', 'number', 'number']),
get_ray_count: Module.cwrap('getRayCount', 'number', []),
};
var width = 640;
var height = 360;
var stats = document.getElementById('stats');
var canvas = document.getElementById('screen');
if (canvas.getContext)
{
var ctx = canvas.getContext('2d');
var pointer = api.create_buffer(width, height);
var usub = new Uint8ClampedArray(Module.HEAP8.buffer, pointer, width*height*4);
var img = new ImageData(usub, width, height);
var start = null;
function step(timestamp)
{
var progress;
if (start === null) start = timestamp;
progress = timestamp - start;
if (progress > 100)
{
var t0 = new Date();
api.render(pointer, width, height, timestamp);
var t1 = new Date();
var ms = t1-t0;
var fps = 1000.0 / (t1-t0);
var rayCount = api.get_ray_count();
var mraysS = rayCount / ((t1-t0)/1000.0) / 1000000.0;
var mraysFrame = rayCount / 1000000.0;
stats.innerHTML = `${width}x${height}: ${ms.toFixed(1)}ms (${fps.toFixed(2)}FPS) <b>${mraysS.toFixed(2)}Mray/s</b> ${mraysFrame.toFixed(2)}Mray/frame`;
start = timestamp
window.requestAnimationFrame(draw);
}
else
{
window.requestAnimationFrame(step);
}
}
function draw()
{
ctx.putImageData(img, 0, 0);
window.requestAnimationFrame(step);
}
window.requestAnimationFrame(step);
}
};
</script>
</body>
</html>
@@ -6,6 +6,14 @@
#define kBackbufferWidth 1280
#define kBackbufferHeight 720
#if defined(__EMSCRIPTEN__)
#define CPU_CAN_DO_SIMD 0
#define CPU_CAN_DO_THREADS 0
#else
#define CPU_CAN_DO_SIMD 1
#define CPU_CAN_DO_THREADS 1
#endif
#define DO_SAMPLES_PER_PIXEL 4
#define DO_ANIMATE_SMOOTHING 0.9f
@@ -19,7 +27,7 @@
#define kCSMaxObjects 64
// Should float3 struct use SSE/NEON?
#define DO_FLOAT3_WITH_SIMD (!(DO_COMPUTE_GPU) && 1)
#define DO_FLOAT3_WITH_SIMD (!(DO_COMPUTE_GPU) && CPU_CAN_DO_SIMD && 1)
// Should HitSpheres function use SSE/NEON?
#define DO_HIT_SPHERES_SIMD 1
#define DO_HIT_SPHERES_SIMD (CPU_CAN_DO_SIMD && 1)
@@ -8,7 +8,7 @@
#define kSimdWidth 4
#if !defined(__arm__) && !defined(__arm64__)
#if !defined(__arm__) && !defined(__arm64__) && !defined(__EMSCRIPTEN__)
// ---- SSE implementation
@@ -91,7 +91,7 @@ VM_INLINE __m128i select(__m128i a, __m128i b, bool4 cond)
VM_INLINE float4 sqrtf(float4 v) { return float4(_mm_sqrt_ps(v.m)); }
#else
#elif !defined(__EMSCRIPTEN__)
// ---- NEON implementation
@@ -2,7 +2,9 @@
#include "Test.h"
#include "Maths.h"
#include <algorithm>
#if CPU_CAN_DO_THREADS
#include "enkiTS/TaskScheduler_c.h"
#endif
#include <atomic>
// 46 spheres (2 emissive) when enabled; 9 spheres (1 emissive) when disabled
@@ -227,17 +229,23 @@ static float3 Trace(const Ray& r, int depth, int& inoutRayCount, uint32_t& state
}
}
#if CPU_CAN_DO_THREADS
static enkiTaskScheduler* g_TS;
#endif
void InitializeTest()
{
#if CPU_CAN_DO_THREADS
g_TS = enkiNewTaskScheduler();
enkiInitTaskScheduler(g_TS);
#endif
}
void ShutdownTest()
{
#if CPU_CAN_DO_THREADS
enkiDeleteTaskScheduler(g_TS);
#endif
}
struct JobData
@@ -340,11 +348,17 @@ void DrawTest(float time, int frameCount, int screenWidth, int screenHeight, flo
args.cam = &s_Cam;
args.testFlags = testFlags;
args.rayCount = 0;
#if CPU_CAN_DO_THREADS
enkiTaskSet* task = enkiCreateTaskSet(g_TS, TraceRowJob);
bool threaded = true;
enkiAddTaskSetToPipeMinRange(g_TS, task, &args, screenHeight, threaded ? 4 : screenHeight);
enkiWaitForTaskSet(g_TS, task);
enkiDeleteTaskSet(task);
#else
TraceRowJob(0, screenHeight, 0, &args);
#endif
outRayCount = args.rayCount;
}

0 comments on commit 344e9ce

Please sign in to comment.