New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GPU: naïve Metal implementation #5

Merged
merged 2 commits into from Apr 3, 2018
Jump to file or symbol
Failed to load files and symbols.
+539 −33
Diff settings

Always

Just for now

@@ -707,11 +707,11 @@
<objects>
<viewController id="XfG-lQ-9wD" customClass="GameViewController" sceneMemberID="viewController">
<view key="view" wantsLayer="YES" id="m2S-Jp-Qdl" customClass="MTKView">
<rect key="frame" x="0.0" y="0.0" width="800" height="600"/>
<rect key="frame" x="0.0" y="0.0" width="1280" height="720"/>
<autoresizingMask key="autoresizingMask"/>
<subviews>
<textField horizontalHuggingPriority="251" verticalHuggingPriority="750" fixedFrame="YES" translatesAutoresizingMaskIntoConstraints="NO" id="8Gq-yW-31S">
<rect key="frame" x="4" y="575" width="462" height="17"/>
<rect key="frame" x="4" y="695" width="462" height="17"/>
<autoresizingMask key="autoresizingMask" flexibleMaxX="YES" flexibleMinY="YES"/>
<textFieldCell key="cell" scrollable="YES" lineBreakMode="clipping" sendsActionOnEndEditing="YES" title="Perf" id="Lan-R5-5SX">
<font key="font" metaFont="system"/>
View
@@ -3,9 +3,33 @@
#import "Renderer.h"
#include "../Source/Config.h"
#include "../Source/Test.h"
#include "../Source/Maths.h"
#include "../Source/Test.h"
#define DO_COMPUTE 1
static const NSUInteger kMaxBuffersInFlight = 3;
#if DO_COMPUTE
// Metal on Mac needs buffer offsets to be 256-byte aligned
static int AlignedSize(int sz)
{
return (sz + 0xFF) & ~0xFF;
}
struct ComputeParams
{
Camera cam;
int sphereCount;
int screenWidth;
int screenHeight;
int frames;
float invWidth;
float invHeight;
float lerpFac;
};
#endif
@implementation Renderer
@@ -15,9 +39,22 @@ @implementation Renderer
id <MTLCommandQueue> _commandQueue;
id <MTLRenderPipelineState> _pipelineState;
id <MTLDepthStencilState> _depthState;
id <MTLDepthStencilState> _depthState;
#if DO_COMPUTE
id <MTLComputePipelineState> _computeState;
// all the data in separate buffers
id <MTLBuffer> _computeSpheres;
id <MTLBuffer> _computeMaterials;
id <MTLBuffer> _computeParams;
id <MTLBuffer> _computeCounter;
int _sphereCount;
int _objSize;
int _matSize;
int _uniformBufferIndex;
#endif
id <MTLTexture> _backbuffer;
id <MTLTexture> _backbuffer, _backbuffer2;
int _backbufferIndex;
float* _backbufferPixels;
mach_timebase_info_data_t _clock_timebase;
@@ -30,7 +67,8 @@ -(nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view withLabel:(n
if(self)
{
_label = label;
_device = view.device;
_device = view.device;
printf("GPU: %s\n", [[_device name] UTF8String]);
_inFlightSemaphore = dispatch_semaphore_create(kMaxBuffersInFlight);
mach_timebase_info(&_clock_timebase);
[self _loadMetalWithView:view];
@@ -41,14 +79,32 @@ -(nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view withLabel:(n
}
- (void)_loadMetalWithView:(nonnull MTKView *)view;
{
{
view.depthStencilPixelFormat = MTLPixelFormatDepth32Float_Stencil8;
view.colorPixelFormat = MTLPixelFormatBGRA8Unorm;
view.sampleCount = 1;
NSError *error = NULL;
id<MTLLibrary> defaultLibrary = [_device newDefaultLibrary];
id <MTLFunction> vertexFunction = [defaultLibrary newFunctionWithName:@"vertexShader"];
id <MTLFunction> fragmentFunction = [defaultLibrary newFunctionWithName:@"fragmentShader"];
id <MTLFunction> fragmentFunction = [defaultLibrary newFunctionWithName:@"fragmentShader"];
#if DO_COMPUTE
id <MTLFunction> computeFunction = [defaultLibrary newFunctionWithName:@"TraceGPU"];
_computeState = [_device newComputePipelineStateWithFunction:computeFunction error:&error];
if (!_computeState)
NSLog(@"Failed to created compute pipeline state, error %@", error);
int camSize;
GetObjectCount(_sphereCount, _objSize, _matSize, camSize);
assert(_objSize == 20);
assert(_matSize == 36);
assert(camSize == 88);
_computeSpheres = [_device newBufferWithLength:AlignedSize(_sphereCount*_objSize)*kMaxBuffersInFlight options:MTLResourceStorageModeManaged];
_computeMaterials = [_device newBufferWithLength:AlignedSize(_sphereCount*_matSize)*kMaxBuffersInFlight options:MTLResourceStorageModeManaged];
_computeParams = [_device newBufferWithLength:AlignedSize(sizeof(ComputeParams))*kMaxBuffersInFlight options:MTLResourceStorageModeManaged];
_computeCounter = [_device newBufferWithLength:AlignedSize(4)*kMaxBuffersInFlight options:MTLStorageModeShared];
_uniformBufferIndex = 0;
#endif
MTLRenderPipelineDescriptor *pipelineStateDescriptor = [[MTLRenderPipelineDescriptor alloc] init];
pipelineStateDescriptor.sampleCount = view.sampleCount;
@@ -58,7 +114,6 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view;
pipelineStateDescriptor.depthAttachmentPixelFormat = view.depthStencilPixelFormat;
pipelineStateDescriptor.stencilAttachmentPixelFormat = view.depthStencilPixelFormat;
NSError *error = NULL;
_pipelineState = [_device newRenderPipelineStateWithDescriptor:pipelineStateDescriptor error:&error];
if (!_pipelineState)
{
@@ -73,8 +128,15 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view;
_commandQueue = [_device newCommandQueue];
MTLTextureDescriptor* desc = [MTLTextureDescriptor texture2DDescriptorWithPixelFormat:MTLPixelFormatRGBA32Float width:kBackbufferWidth height:kBackbufferHeight mipmapped:NO];
desc.usage = MTLTextureUsageShaderRead;
_backbuffer = [_device newTextureWithDescriptor:desc];
desc.usage = MTLTextureUsageShaderRead;
#if DO_COMPUTE
desc.usage |= MTLTextureUsageShaderWrite;
desc.storageMode = MTLStorageModePrivate;
#endif
_backbuffer = [_device newTextureWithDescriptor:desc];
_backbuffer2 = [_device newTextureWithDescriptor:desc];
_backbufferPixels = new float[kBackbufferWidth * kBackbufferHeight * 4];
memset(_backbufferPixels, 0, kBackbufferWidth * kBackbufferHeight * 4 * sizeof(_backbufferPixels[0]));
}
@@ -83,28 +145,82 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view;
- (void)_loadAssets
{
InitializeTest();
}
}
static uint64_t _computeStartTime;
static uint64_t _computeDur;
static size_t rayCounter = 0;
- (void)_updateBackbufferState
- (void)_doRenderingWith:(id <MTLCommandBuffer>) cmd;
{
static int totalCounter = 0;
static int frameCounter = 0;
static uint64_t frameTime = 0;
static size_t rayCounter = 0;
uint64_t time1 = mach_absolute_time();
uint64_t time1 = mach_absolute_time();
_computeStartTime = time1;
uint64_t curNs = (time1 * _clock_timebase.numer) / _clock_timebase.denom;
float curT = float(curNs * 1.0e-9f);
int rayCount;
UpdateTest(curT, totalCounter, kBackbufferWidth, kBackbufferHeight);
#if DO_COMPUTE
_backbufferIndex = 1-_backbufferIndex;
_uniformBufferIndex = (_uniformBufferIndex + 1) % kMaxBuffersInFlight;
uint8_t* dataSpheres = (uint8_t*)[_computeSpheres contents];
uint8_t* dataMaterials = (uint8_t*)[_computeMaterials contents];
uint8_t* dataParams = (uint8_t*)[_computeParams contents];
uint8_t* dataCounter = (uint8_t*)[_computeCounter contents];
int spheresSize = AlignedSize(_sphereCount * _objSize);
int matsSize = AlignedSize(_sphereCount * _matSize);
int paramsSize = AlignedSize(sizeof(ComputeParams));
int counterSize = AlignedSize(4);
ComputeParams* params = (ComputeParams*)(dataParams+_uniformBufferIndex*paramsSize);
GetSceneDesc(dataSpheres+_uniformBufferIndex*spheresSize, dataMaterials+_uniformBufferIndex*matsSize, params);
params->sphereCount = _sphereCount;
params->screenWidth = kBackbufferWidth;
params->screenHeight = kBackbufferHeight;
params->frames = totalCounter;
params->invWidth = 1.0f / kBackbufferWidth;
params->invHeight = 1.0f / kBackbufferHeight;
params->lerpFac = float(totalCounter) / float(totalCounter+1);
#if DO_ANIMATE
params->lerpFac *= DO_ANIMATE_SMOOTHING;
#endif
#if !DO_PROGRESSIVE
params->lerpFac = 0;
#endif
*(int*)(dataCounter+_uniformBufferIndex*counterSize) = 0;
[_computeSpheres didModifyRange:NSMakeRange(_uniformBufferIndex*spheresSize, spheresSize)];
[_computeMaterials didModifyRange:NSMakeRange(_uniformBufferIndex*matsSize, matsSize)];
[_computeParams didModifyRange:NSMakeRange(_uniformBufferIndex*paramsSize, paramsSize)];
id<MTLComputeCommandEncoder> enc = [cmd computeCommandEncoder];
[enc setComputePipelineState:_computeState];
[enc setBuffer:_computeSpheres offset:_uniformBufferIndex*spheresSize atIndex:0];
[enc setBuffer:_computeMaterials offset:_uniformBufferIndex*matsSize atIndex:1];
[enc setBuffer:_computeParams offset:_uniformBufferIndex*paramsSize atIndex:2];
[enc setBuffer:_computeCounter offset:_uniformBufferIndex*counterSize atIndex:3];
[enc setTexture:_backbufferIndex==0?_backbuffer2:_backbuffer atIndex: 0];
[enc setTexture:_backbufferIndex==0?_backbuffer:_backbuffer2 atIndex: 1];
MTLSize groupSize = {16, 16, 1};
MTLSize groupCount = {kBackbufferWidth/groupSize.width, kBackbufferHeight/groupSize.height, 1};
[enc dispatchThreadgroups:groupCount threadsPerThreadgroup:groupSize];
[enc endEncoding];
#else
int rayCount;
DrawTest(curT, totalCounter, kBackbufferWidth, kBackbufferHeight, _backbufferPixels, rayCount);
rayCounter += rayCount;
rayCounter += rayCount;
#endif
uint64_t time2 = mach_absolute_time();
++frameCounter;
++totalCounter;
frameTime += (time2-time1);
++totalCounter;
#if !DO_COMPUTE
frameTime += (time2-time1);
#else
frameTime += _computeDur;
#endif
if (frameCounter > 10)
{
uint64_t ns = (frameTime * _clock_timebase.numer) / _clock_timebase.denom;
@@ -118,8 +234,10 @@ - (void)_updateBackbufferState
frameTime = 0;
rayCounter = 0;
}
[_backbuffer replaceRegion:MTLRegionMake2D(0,0,kBackbufferWidth,kBackbufferHeight) mipmapLevel:0 withBytes:_backbufferPixels bytesPerRow:kBackbufferWidth*16];
#if !DO_COMPUTE
[_backbuffer replaceRegion:MTLRegionMake2D(0,0,kBackbufferWidth,kBackbufferHeight) mipmapLevel:0 withBytes:_backbufferPixels bytesPerRow:kBackbufferWidth*16];
#endif
}
- (void)drawInMTKView:(nonnull MTKView *)view
@@ -128,10 +246,27 @@ - (void)drawInMTKView:(nonnull MTKView *)view
id <MTLCommandBuffer> cmd = [_commandQueue commandBuffer];
__block dispatch_semaphore_t block_sema = _inFlightSemaphore;
[cmd addCompletedHandler:^(id<MTLCommandBuffer> buffer) { dispatch_semaphore_signal(block_sema); }];
__block dispatch_semaphore_t block_sema = _inFlightSemaphore;
#if DO_COMPUTE
int counterIndex = (_uniformBufferIndex+1)%kMaxBuffersInFlight;
#endif
[cmd addCompletedHandler:^(id<MTLCommandBuffer> buffer)
{
#if DO_COMPUTE
// There's no easy/proper way to do GPU timing on Metal (or at least I couldn't find any),
// so I'm timing CPU side, from beginning of command buffer invocation to when we get the
// callback that the GPU is done with it. Not 100% proper, but gets similar results to
// what Xcode reports for the GPU duration.
uint64_t time2 = mach_absolute_time();
_computeDur = (time2 - _computeStartTime);
int rayCount = *(const int*)(((const uint8_t*)[_computeCounter contents]) + counterIndex*AlignedSize(4));
rayCounter += rayCount;
#endif
dispatch_semaphore_signal(block_sema);
}];
[self _updateBackbufferState];
[self _doRenderingWith:cmd];
// Delay getting the currentRenderPassDescriptor until we absolutely need it to avoid
// holding onto the drawable and blocking the display pipeline any longer than necessary
@@ -152,7 +287,8 @@ - (void)drawInMTKView:(nonnull MTKView *)view
}
- (void)mtkView:(nonnull MTKView *)view drawableSizeWillChange:(CGSize)size
{
{
//printf("View size %ix%i\n", (int)size.width, (int)size.height);
}
@end
Oops, something went wrong.
ProTip! Use n and p to navigate between commits in a pull request.