diff --git a/Cpp/Mac/Renderer.h b/Cpp/Apple/Renderer.h similarity index 64% rename from Cpp/Mac/Renderer.h rename to Cpp/Apple/Renderer.h index 305177f..14de2a0 100644 --- a/Cpp/Mac/Renderer.h +++ b/Cpp/Apple/Renderer.h @@ -1,7 +1,11 @@ #import -@interface Renderer : NSObject - --(nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view withLabel:(nonnull NSTextField*) label; +@interface Renderer : NSObject + +#if TARGET_OS_IPHONE +-(nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view; +#else +-(nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view withLabel:(nonnull NSTextField*) label; +#endif @end diff --git a/Cpp/Mac/Renderer.mm b/Cpp/Apple/Renderer.mm similarity index 92% rename from Cpp/Mac/Renderer.mm rename to Cpp/Apple/Renderer.mm index 672e50e..b77508b 100644 --- a/Cpp/Mac/Renderer.mm +++ b/Cpp/Apple/Renderer.mm @@ -7,7 +7,13 @@ #include "../Source/Test.h" -static const NSUInteger kMaxBuffersInFlight = 3; +static const NSUInteger kMaxBuffersInFlight = 3; + +#if TARGET_OS_IPHONE +#define kMetalBufferMode MTLResourceStorageModeShared +#else +#define kMetalBufferMode MTLResourceStorageModeManaged +#endif #if DO_COMPUTE_GPU // Metal on Mac needs buffer offsets to be 256-byte aligned @@ -58,15 +64,23 @@ @implementation Renderer float* _backbufferPixels; mach_timebase_info_data_t _clock_timebase; - NSTextField* _label; +#if !TARGET_OS_IPHONE + NSTextField* _label; +#endif } --(nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view withLabel:(nonnull NSTextField*) label; +#if TARGET_OS_IPHONE +-(nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view; +#else +-(nonnull instancetype)initWithMetalKitView:(nonnull MTKView *)view withLabel:(nonnull NSTextField*) label; +#endif { self = [super init]; if(self) { - _label = label; +#if !TARGET_OS_IPHONE + _label = label; +#endif _device = view.device; printf("GPU: %s\n", [[_device name] UTF8String]); _inFlightSemaphore = dispatch_semaphore_create(kMaxBuffersInFlight); @@ -99,10 +113,10 @@ - (void)_loadMetalWithView:(nonnull MTKView *)view; assert(_objSize == 20); assert(_matSize == 36); assert(camSize == 88); - _computeSpheres = [_device newBufferWithLength:AlignedSize(_sphereCount*_objSize)*kMaxBuffersInFlight options:MTLResourceStorageModeManaged]; - _computeMaterials = [_device newBufferWithLength:AlignedSize(_sphereCount*_matSize)*kMaxBuffersInFlight options:MTLResourceStorageModeManaged]; - _computeParams = [_device newBufferWithLength:AlignedSize(sizeof(ComputeParams))*kMaxBuffersInFlight options:MTLResourceStorageModeManaged]; - _computeEmissives = [_device newBufferWithLength:AlignedSize(_sphereCount*4)*kMaxBuffersInFlight options:MTLResourceStorageModeManaged]; + _computeSpheres = [_device newBufferWithLength:AlignedSize(_sphereCount*_objSize)*kMaxBuffersInFlight options:kMetalBufferMode]; + _computeMaterials = [_device newBufferWithLength:AlignedSize(_sphereCount*_matSize)*kMaxBuffersInFlight options:kMetalBufferMode]; + _computeParams = [_device newBufferWithLength:AlignedSize(sizeof(ComputeParams))*kMaxBuffersInFlight options:kMetalBufferMode]; + _computeEmissives = [_device newBufferWithLength:AlignedSize(_sphereCount*4)*kMaxBuffersInFlight options:kMetalBufferMode]; _computeCounter = [_device newBufferWithLength:AlignedSize(4)*kMaxBuffersInFlight options:MTLStorageModeShared]; _uniformBufferIndex = 0; #endif @@ -193,10 +207,12 @@ - (void)_doRenderingWith:(id ) cmd; if (!(g_TestFlags & kFlagProgressive)) params->lerpFac = 0; *(int*)(dataCounter+_uniformBufferIndex*counterSize) = 0; +#if !TARGET_OS_IPHONE [_computeSpheres didModifyRange:NSMakeRange(_uniformBufferIndex*spheresSize, spheresSize)]; [_computeMaterials didModifyRange:NSMakeRange(_uniformBufferIndex*matsSize, matsSize)]; [_computeEmissives didModifyRange:NSMakeRange(_uniformBufferIndex*emissivesSize, emissivesSize)]; [_computeParams didModifyRange:NSMakeRange(_uniformBufferIndex*paramsSize, paramsSize)]; +#endif id enc = [cmd computeCommandEncoder]; [enc setComputePipelineState:_computeState]; @@ -213,7 +229,7 @@ - (void)_doRenderingWith:(id ) cmd; [enc endEncoding]; #else int rayCount; - DrawTest(curT, totalCounter, kBackbufferWidth, kBackbufferHeight, _backbufferPixels, rayCount); + DrawTest(curT, totalCounter, kBackbufferWidth, kBackbufferHeight, _backbufferPixels, rayCount, g_TestFlags); rayCounter += rayCount; #endif @@ -232,8 +248,10 @@ - (void)_doRenderingWith:(id ) cmd; char buffer[200]; snprintf(buffer, 200, "%.2fms (%.1f FPS) %.1fMrays/s %.2fMrays/frame frames %i", s * 1000.0f, 1.f / s, rayCounter / frameCounter / s * 1.0e-6f, rayCounter / frameCounter * 1.0e-6f, totalCounter); puts(buffer); +#if !TARGET_OS_IPHONE NSString* str = [[NSString alloc] initWithUTF8String:buffer]; - _label.stringValue = str; + _label.stringValue = str; +#endif frameCounter = 0; frameTime = 0; rayCounter = 0; @@ -253,6 +271,7 @@ - (void)drawInMTKView:(nonnull MTKView *)view __block dispatch_semaphore_t block_sema = _inFlightSemaphore; #if DO_COMPUTE_GPU int counterIndex = (_uniformBufferIndex+1)%kMaxBuffersInFlight; + id counterBuffer = _computeCounter; #endif [cmd addCompletedHandler:^(id buffer) { @@ -263,7 +282,7 @@ - (void)drawInMTKView:(nonnull MTKView *)view // what Xcode reports for the GPU duration. uint64_t time2 = mach_absolute_time(); _computeDur = (time2 - _computeStartTime); - int rayCount = *(const int*)(((const uint8_t*)[_computeCounter contents]) + counterIndex*AlignedSize(4)); + int rayCount = *(const int*)(((const uint8_t*)[counterBuffer contents]) + counterIndex*AlignedSize(4)); rayCounter += rayCount; #endif diff --git a/Cpp/Mac/Shaders.metal b/Cpp/Apple/Shaders.metal similarity index 100% rename from Cpp/Mac/Shaders.metal rename to Cpp/Apple/Shaders.metal diff --git a/Cpp/Mac/Test.entitlements b/Cpp/Apple/Test.entitlements similarity index 100% rename from Cpp/Mac/Test.entitlements rename to Cpp/Apple/Test.entitlements diff --git a/Cpp/Mac/Test.xcodeproj/project.pbxproj b/Cpp/Apple/Test.xcodeproj/project.pbxproj similarity index 63% rename from Cpp/Mac/Test.xcodeproj/project.pbxproj rename to Cpp/Apple/Test.xcodeproj/project.pbxproj index 1dfa47c..9a325d3 100644 --- a/Cpp/Mac/Test.xcodeproj/project.pbxproj +++ b/Cpp/Apple/Test.xcodeproj/project.pbxproj @@ -7,6 +7,17 @@ objects = { /* Begin PBXBuildFile section */ + 2B2B5AA220BE72FD00040BFE /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 2B2B5AA120BE72FD00040BFE /* AppDelegate.m */; }; + 2B2B5AA820BE72FD00040BFE /* GameViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 2B2B5AA720BE72FD00040BFE /* GameViewController.m */; }; + 2B2B5AAE20BE72FD00040BFE /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 2B2B5AAC20BE72FD00040BFE /* Main.storyboard */; }; + 2B2B5AB320BE72FE00040BFE /* LaunchScreen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 2B2B5AB120BE72FE00040BFE /* LaunchScreen.storyboard */; }; + 2B2B5AB620BE72FE00040BFE /* main.m in Sources */ = {isa = PBXBuildFile; fileRef = 2B2B5AB520BE72FE00040BFE /* main.m */; }; + 2B2B5ABA20BE742700040BFE /* Renderer.mm in Sources */ = {isa = PBXBuildFile; fileRef = 2B2D97B520519C7100520EC1 /* Renderer.mm */; }; + 2B2B5ABB20BE742A00040BFE /* Shaders.metal in Sources */ = {isa = PBXBuildFile; fileRef = 2B2D97BA20519C7100520EC1 /* Shaders.metal */; }; + 2B2B5ABC20BE77ED00040BFE /* Maths.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BFC4E1420614A7B0007766C /* Maths.cpp */; }; + 2B2B5ABD20BE77F000040BFE /* Test.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BE32DC7205BEDA6003C05B4 /* Test.cpp */; }; + 2B2B5ABE20BE77F500040BFE /* TaskScheduler_c.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BE32DCE205BFC31003C05B4 /* TaskScheduler_c.cpp */; }; + 2B2B5ABF20BE77F900040BFE /* TaskScheduler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 2BE32DD0205BFC31003C05B4 /* TaskScheduler.cpp */; }; 2B2D97B320519C7100520EC1 /* AppDelegate.m in Sources */ = {isa = PBXBuildFile; fileRef = 2B2D97B220519C7100520EC1 /* AppDelegate.m */; }; 2B2D97B620519C7100520EC1 /* Renderer.mm in Sources */ = {isa = PBXBuildFile; fileRef = 2B2D97B520519C7100520EC1 /* Renderer.mm */; }; 2B2D97B920519C7100520EC1 /* GameViewController.m in Sources */ = {isa = PBXBuildFile; fileRef = 2B2D97B820519C7100520EC1 /* GameViewController.m */; }; @@ -21,6 +32,15 @@ /* End PBXBuildFile section */ /* Begin PBXFileReference section */ + 2B2B5A9E20BE72FD00040BFE /* Test iOS.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "Test iOS.app"; sourceTree = BUILT_PRODUCTS_DIR; }; + 2B2B5AA020BE72FD00040BFE /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = ""; }; + 2B2B5AA120BE72FD00040BFE /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = ""; }; + 2B2B5AA620BE72FD00040BFE /* GameViewController.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = GameViewController.h; sourceTree = ""; }; + 2B2B5AA720BE72FD00040BFE /* GameViewController.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = GameViewController.m; sourceTree = ""; }; + 2B2B5AAD20BE72FD00040BFE /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; + 2B2B5AB220BE72FE00040BFE /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/LaunchScreen.storyboard; sourceTree = ""; }; + 2B2B5AB420BE72FE00040BFE /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 2B2B5AB520BE72FE00040BFE /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = ""; }; 2B2D97AE20519C7100520EC1 /* Test.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Test.app; sourceTree = BUILT_PRODUCTS_DIR; }; 2B2D97B120519C7100520EC1 /* AppDelegate.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = AppDelegate.h; sourceTree = ""; }; 2B2D97B220519C7100520EC1 /* AppDelegate.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = AppDelegate.m; sourceTree = ""; }; @@ -33,23 +53,30 @@ 2B2D97C220519C7100520EC1 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 2B2D97C320519C7100520EC1 /* main.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = main.m; sourceTree = ""; }; 2B2D97C520519C7100520EC1 /* Test.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = Test.entitlements; sourceTree = ""; }; - 2B6AD0DB20736FF70025F674 /* Config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Config.h; path = ../Source/Config.h; sourceTree = ""; }; + 2B6AD0DB20736FF70025F674 /* Config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Config.h; sourceTree = ""; }; 2B8065FE207CDB540043116F /* MathSimd.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MathSimd.h; path = ../Source/MathSimd.h; sourceTree = ""; }; 2BB9041D2064EB6D00C0A0EA /* MetalKit.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = MetalKit.framework; path = System/Library/Frameworks/MetalKit.framework; sourceTree = SDKROOT; }; 2BE32DC7205BEDA6003C05B4 /* Test.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Test.cpp; path = ../Source/Test.cpp; sourceTree = ""; }; 2BE32DC8205BEDA6003C05B4 /* Test.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Test.h; path = ../Source/Test.h; sourceTree = ""; }; - 2BE32DCB205BFC31003C05B4 /* TaskScheduler_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TaskScheduler_c.h; path = ../Source/enkiTS/TaskScheduler_c.h; sourceTree = ""; }; - 2BE32DCC205BFC31003C05B4 /* LockLessMultiReadPipe.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LockLessMultiReadPipe.h; path = ../Source/enkiTS/LockLessMultiReadPipe.h; sourceTree = ""; }; - 2BE32DCD205BFC31003C05B4 /* Threads.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Threads.h; path = ../Source/enkiTS/Threads.h; sourceTree = ""; }; - 2BE32DCE205BFC31003C05B4 /* TaskScheduler_c.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TaskScheduler_c.cpp; path = ../Source/enkiTS/TaskScheduler_c.cpp; sourceTree = ""; }; - 2BE32DCF205BFC31003C05B4 /* Atomics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Atomics.h; path = ../Source/enkiTS/Atomics.h; sourceTree = ""; }; - 2BE32DD0205BFC31003C05B4 /* TaskScheduler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TaskScheduler.cpp; path = ../Source/enkiTS/TaskScheduler.cpp; sourceTree = ""; }; - 2BE32DD1205BFC31003C05B4 /* TaskScheduler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TaskScheduler.h; path = ../Source/enkiTS/TaskScheduler.h; sourceTree = ""; }; + 2BE32DCB205BFC31003C05B4 /* TaskScheduler_c.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TaskScheduler_c.h; sourceTree = ""; }; + 2BE32DCC205BFC31003C05B4 /* LockLessMultiReadPipe.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = LockLessMultiReadPipe.h; sourceTree = ""; }; + 2BE32DCD205BFC31003C05B4 /* Threads.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Threads.h; sourceTree = ""; }; + 2BE32DCE205BFC31003C05B4 /* TaskScheduler_c.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TaskScheduler_c.cpp; sourceTree = ""; }; + 2BE32DCF205BFC31003C05B4 /* Atomics.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = Atomics.h; sourceTree = ""; }; + 2BE32DD0205BFC31003C05B4 /* TaskScheduler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = TaskScheduler.cpp; sourceTree = ""; }; + 2BE32DD1205BFC31003C05B4 /* TaskScheduler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = TaskScheduler.h; sourceTree = ""; }; 2BFC4E1420614A7B0007766C /* Maths.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Maths.cpp; path = ../Source/Maths.cpp; sourceTree = ""; }; 2BFC4E1520614A7B0007766C /* Maths.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Maths.h; path = ../Source/Maths.h; sourceTree = ""; }; /* End PBXFileReference section */ /* Begin PBXFrameworksBuildPhase section */ + 2B2B5A9B20BE72FD00040BFE /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; 2B2D97AB20519C7100520EC1 /* Frameworks */ = { isa = PBXFrameworksBuildPhase; buildActionMask = 2147483647; @@ -61,12 +88,24 @@ /* End PBXFrameworksBuildPhase section */ /* Begin PBXGroup section */ - 2B2D97A520519C7100520EC1 = { + 2B2B5A9420BE6EFA00040BFE /* Source */ = { isa = PBXGroup; children = ( + 2B2B5A9720BE6F8E00040BFE /* enkiTS */, + 2B6AD0DB20736FF70025F674 /* Config.h */, 2BFC4E1420614A7B0007766C /* Maths.cpp */, 2BFC4E1520614A7B0007766C /* Maths.h */, 2B8065FE207CDB540043116F /* MathSimd.h */, + 2BE32DC7205BEDA6003C05B4 /* Test.cpp */, + 2BE32DC8205BEDA6003C05B4 /* Test.h */, + ); + name = Source; + path = ../Source; + sourceTree = ""; + }; + 2B2B5A9720BE6F8E00040BFE /* enkiTS */ = { + isa = PBXGroup; + children = ( 2BE32DCF205BFC31003C05B4 /* Atomics.h */, 2BE32DCC205BFC31003C05B4 /* LockLessMultiReadPipe.h */, 2BE32DCE205BFC31003C05B4 /* TaskScheduler_c.cpp */, @@ -74,10 +113,44 @@ 2BE32DD0205BFC31003C05B4 /* TaskScheduler.cpp */, 2BE32DD1205BFC31003C05B4 /* TaskScheduler.h */, 2BE32DCD205BFC31003C05B4 /* Threads.h */, - 2B6AD0DB20736FF70025F674 /* Config.h */, - 2BE32DC7205BEDA6003C05B4 /* Test.cpp */, - 2BE32DC8205BEDA6003C05B4 /* Test.h */, - 2B2D97B020519C7100520EC1 /* Mac */, + ); + path = enkiTS; + sourceTree = ""; + }; + 2B2B5A9920BE710E00040BFE /* macOS */ = { + isa = PBXGroup; + children = ( + 2B2D97B120519C7100520EC1 /* AppDelegate.h */, + 2B2D97B220519C7100520EC1 /* AppDelegate.m */, + 2B2D97B720519C7100520EC1 /* GameViewController.h */, + 2B2D97B820519C7100520EC1 /* GameViewController.m */, + 2B2D97C320519C7100520EC1 /* main.m */, + 2B2D97C220519C7100520EC1 /* Info.plist */, + 2B2D97BF20519C7100520EC1 /* Main.storyboard */, + ); + path = macOS; + sourceTree = ""; + }; + 2B2B5A9F20BE72FD00040BFE /* iOS */ = { + isa = PBXGroup; + children = ( + 2B2B5AA020BE72FD00040BFE /* AppDelegate.h */, + 2B2B5AA120BE72FD00040BFE /* AppDelegate.m */, + 2B2B5AA620BE72FD00040BFE /* GameViewController.h */, + 2B2B5AA720BE72FD00040BFE /* GameViewController.m */, + 2B2B5AAC20BE72FD00040BFE /* Main.storyboard */, + 2B2B5AB120BE72FE00040BFE /* LaunchScreen.storyboard */, + 2B2B5AB420BE72FE00040BFE /* Info.plist */, + 2B2B5AB520BE72FE00040BFE /* main.m */, + ); + path = iOS; + sourceTree = ""; + }; + 2B2D97A520519C7100520EC1 = { + isa = PBXGroup; + children = ( + 2B2B5A9420BE6EFA00040BFE /* Source */, + 2B2D97B020519C7100520EC1 /* Apple */, 2B2D97AF20519C7100520EC1 /* Products */, 2BB9041C2064EB6D00C0A0EA /* Frameworks */, ); @@ -87,26 +160,22 @@ isa = PBXGroup; children = ( 2B2D97AE20519C7100520EC1 /* Test.app */, + 2B2B5A9E20BE72FD00040BFE /* Test iOS.app */, ); name = Products; sourceTree = ""; }; - 2B2D97B020519C7100520EC1 /* Mac */ = { + 2B2D97B020519C7100520EC1 /* Apple */ = { isa = PBXGroup; children = ( - 2B2D97B120519C7100520EC1 /* AppDelegate.h */, - 2B2D97B220519C7100520EC1 /* AppDelegate.m */, + 2B2B5A9920BE710E00040BFE /* macOS */, + 2B2B5A9F20BE72FD00040BFE /* iOS */, 2B2D97B420519C7100520EC1 /* Renderer.h */, 2B2D97B520519C7100520EC1 /* Renderer.mm */, - 2B2D97B720519C7100520EC1 /* GameViewController.h */, - 2B2D97B820519C7100520EC1 /* GameViewController.m */, 2B2D97BA20519C7100520EC1 /* Shaders.metal */, - 2B2D97BF20519C7100520EC1 /* Main.storyboard */, - 2B2D97C220519C7100520EC1 /* Info.plist */, - 2B2D97C320519C7100520EC1 /* main.m */, 2B2D97C520519C7100520EC1 /* Test.entitlements */, ); - name = Mac; + name = Apple; sourceTree = ""; }; 2BB9041C2064EB6D00C0A0EA /* Frameworks */ = { @@ -120,9 +189,26 @@ /* End PBXGroup section */ /* Begin PBXNativeTarget section */ - 2B2D97AD20519C7100520EC1 /* Test */ = { + 2B2B5A9D20BE72FD00040BFE /* Test iOS */ = { + isa = PBXNativeTarget; + buildConfigurationList = 2B2B5AB720BE72FE00040BFE /* Build configuration list for PBXNativeTarget "Test iOS" */; + buildPhases = ( + 2B2B5A9A20BE72FD00040BFE /* Sources */, + 2B2B5A9B20BE72FD00040BFE /* Frameworks */, + 2B2B5A9C20BE72FD00040BFE /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = "Test iOS"; + productName = "Test iOS"; + productReference = 2B2B5A9E20BE72FD00040BFE /* Test iOS.app */; + productType = "com.apple.product-type.application"; + }; + 2B2D97AD20519C7100520EC1 /* Test Mac */ = { isa = PBXNativeTarget; - buildConfigurationList = 2B2D97C820519C7100520EC1 /* Build configuration list for PBXNativeTarget "Test" */; + buildConfigurationList = 2B2D97C820519C7100520EC1 /* Build configuration list for PBXNativeTarget "Test Mac" */; buildPhases = ( 2B2D97AA20519C7100520EC1 /* Sources */, 2B2D97AB20519C7100520EC1 /* Frameworks */, @@ -132,7 +218,7 @@ ); dependencies = ( ); - name = Test; + name = "Test Mac"; productName = Test; productReference = 2B2D97AE20519C7100520EC1 /* Test.app */; productType = "com.apple.product-type.application"; @@ -143,9 +229,13 @@ 2B2D97A620519C7100520EC1 /* Project object */ = { isa = PBXProject; attributes = { - LastUpgradeCheck = 0920; + LastUpgradeCheck = 0930; ORGANIZATIONNAME = "Aras Pranckevicius"; TargetAttributes = { + 2B2B5A9D20BE72FD00040BFE = { + CreatedOnToolsVersion = 9.3; + ProvisioningStyle = Automatic; + }; 2B2D97AD20519C7100520EC1 = { CreatedOnToolsVersion = 9.2; ProvisioningStyle = Automatic; @@ -170,12 +260,22 @@ projectDirPath = ""; projectRoot = ""; targets = ( - 2B2D97AD20519C7100520EC1 /* Test */, + 2B2D97AD20519C7100520EC1 /* Test Mac */, + 2B2B5A9D20BE72FD00040BFE /* Test iOS */, ); }; /* End PBXProject section */ /* Begin PBXResourcesBuildPhase section */ + 2B2B5A9C20BE72FD00040BFE /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 2B2B5AB320BE72FE00040BFE /* LaunchScreen.storyboard in Resources */, + 2B2B5AAE20BE72FD00040BFE /* Main.storyboard in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; 2B2D97AC20519C7100520EC1 /* Resources */ = { isa = PBXResourcesBuildPhase; buildActionMask = 2147483647; @@ -187,6 +287,22 @@ /* End PBXResourcesBuildPhase section */ /* Begin PBXSourcesBuildPhase section */ + 2B2B5A9A20BE72FD00040BFE /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 2B2B5ABE20BE77F500040BFE /* TaskScheduler_c.cpp in Sources */, + 2B2B5ABB20BE742A00040BFE /* Shaders.metal in Sources */, + 2B2B5ABC20BE77ED00040BFE /* Maths.cpp in Sources */, + 2B2B5ABD20BE77F000040BFE /* Test.cpp in Sources */, + 2B2B5ABA20BE742700040BFE /* Renderer.mm in Sources */, + 2B2B5AB620BE72FE00040BFE /* main.m in Sources */, + 2B2B5AA820BE72FD00040BFE /* GameViewController.m in Sources */, + 2B2B5AA220BE72FD00040BFE /* AppDelegate.m in Sources */, + 2B2B5ABF20BE77F900040BFE /* TaskScheduler.cpp in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; 2B2D97AA20519C7100520EC1 /* Sources */ = { isa = PBXSourcesBuildPhase; buildActionMask = 2147483647; @@ -206,6 +322,22 @@ /* End PBXSourcesBuildPhase section */ /* Begin PBXVariantGroup section */ + 2B2B5AAC20BE72FD00040BFE /* Main.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 2B2B5AAD20BE72FD00040BFE /* Base */, + ); + name = Main.storyboard; + sourceTree = ""; + }; + 2B2B5AB120BE72FE00040BFE /* LaunchScreen.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 2B2B5AB220BE72FE00040BFE /* Base */, + ); + name = LaunchScreen.storyboard; + sourceTree = ""; + }; 2B2D97BF20519C7100520EC1 /* Main.storyboard */ = { isa = PBXVariantGroup; children = ( @@ -217,6 +349,41 @@ /* End PBXVariantGroup section */ /* Begin XCBuildConfiguration section */ + 2B2B5AB820BE72FE00040BFE /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_OBJC_WEAK = YES; + CODE_SIGN_IDENTITY = "iPhone Developer"; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = BVPN9UFA9B; + INFOPLIST_FILE = "$(SRCROOT)/iOS/Info.plist"; + IPHONEOS_DEPLOYMENT_TARGET = 11.3; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; + PRODUCT_BUNDLE_IDENTIFIER = "com.aras.Test-iOS"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 2B2B5AB920BE72FE00040BFE /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + CLANG_ENABLE_OBJC_WEAK = YES; + CODE_SIGN_IDENTITY = "iPhone Developer"; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = BVPN9UFA9B; + INFOPLIST_FILE = "$(SRCROOT)/iOS/Info.plist"; + IPHONEOS_DEPLOYMENT_TARGET = 11.3; + LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/Frameworks"; + PRODUCT_BUNDLE_IDENTIFIER = "com.aras.Test-iOS"; + PRODUCT_NAME = "$(TARGET_NAME)"; + SDKROOT = iphoneos; + TARGETED_DEVICE_FAMILY = "1,2"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; 2B2D97C620519C7100520EC1 /* Debug */ = { isa = XCBuildConfiguration; buildSettings = { @@ -231,6 +398,7 @@ CLANG_WARN_BOOL_CONVERSION = YES; CLANG_WARN_COMMA = YES; CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; CLANG_WARN_DOCUMENTATION_COMMENTS = YES; CLANG_WARN_EMPTY_BODY = YES; @@ -238,6 +406,7 @@ CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; @@ -265,6 +434,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 10.0; MACOSX_DEPLOYMENT_TARGET = 10.11; MTL_ENABLE_DEBUG_INFO = YES; ONLY_ACTIVE_ARCH = YES; @@ -286,6 +456,7 @@ CLANG_WARN_BOOL_CONVERSION = YES; CLANG_WARN_COMMA = YES; CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; CLANG_WARN_DOCUMENTATION_COMMENTS = YES; CLANG_WARN_EMPTY_BODY = YES; @@ -293,6 +464,7 @@ CLANG_WARN_INFINITE_RECURSION = YES; CLANG_WARN_INT_CONVERSION = YES; CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; @@ -315,6 +487,7 @@ GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; GCC_WARN_UNUSED_FUNCTION = YES; GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 10.0; MACOSX_DEPLOYMENT_TARGET = 10.11; MTL_ENABLE_DEBUG_INFO = NO; SDKROOT = macosx; @@ -328,7 +501,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; GCC_FAST_MATH = YES; - INFOPLIST_FILE = "$(SRCROOT)/Info.plist"; + INFOPLIST_FILE = "$(SRCROOT)/macOS/Info.plist"; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks"; PRODUCT_BUNDLE_IDENTIFIER = com.aras.Test; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -342,7 +515,7 @@ CODE_SIGN_STYLE = Automatic; COMBINE_HIDPI_IMAGES = YES; GCC_FAST_MATH = YES; - INFOPLIST_FILE = "$(SRCROOT)/Info.plist"; + INFOPLIST_FILE = "$(SRCROOT)/macOS/Info.plist"; LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks"; PRODUCT_BUNDLE_IDENTIFIER = com.aras.Test; PRODUCT_NAME = "$(TARGET_NAME)"; @@ -352,6 +525,15 @@ /* End XCBuildConfiguration section */ /* Begin XCConfigurationList section */ + 2B2B5AB720BE72FE00040BFE /* Build configuration list for PBXNativeTarget "Test iOS" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 2B2B5AB820BE72FE00040BFE /* Debug */, + 2B2B5AB920BE72FE00040BFE /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; 2B2D97A920519C7100520EC1 /* Build configuration list for PBXProject "Test" */ = { isa = XCConfigurationList; buildConfigurations = ( @@ -361,7 +543,7 @@ defaultConfigurationIsVisible = 0; defaultConfigurationName = Release; }; - 2B2D97C820519C7100520EC1 /* Build configuration list for PBXNativeTarget "Test" */ = { + 2B2D97C820519C7100520EC1 /* Build configuration list for PBXNativeTarget "Test Mac" */ = { isa = XCConfigurationList; buildConfigurations = ( 2B2D97C920519C7100520EC1 /* Debug */, diff --git a/Cpp/Apple/iOS/AppDelegate.h b/Cpp/Apple/iOS/AppDelegate.h new file mode 100644 index 0000000..a5a8b38 --- /dev/null +++ b/Cpp/Apple/iOS/AppDelegate.h @@ -0,0 +1,7 @@ +#import + +@interface AppDelegate : UIResponder + +@property (strong, nonatomic) UIWindow *window; + +@end diff --git a/Cpp/Apple/iOS/AppDelegate.m b/Cpp/Apple/iOS/AppDelegate.m new file mode 100644 index 0000000..ed6928a --- /dev/null +++ b/Cpp/Apple/iOS/AppDelegate.m @@ -0,0 +1,43 @@ +#import "AppDelegate.h" + +@interface AppDelegate () + +@end + +@implementation AppDelegate + + +- (BOOL)application:(UIApplication *)application didFinishLaunchingWithOptions:(NSDictionary *)launchOptions { + // Override point for customization after application launch. + return YES; +} + + +- (void)applicationWillResignActive:(UIApplication *)application { + // Sent when the application is about to move from active to inactive state. This can occur for certain types of temporary interruptions (such as an incoming phone call or SMS message) or when the user quits the application and it begins the transition to the background state. + // Use this method to pause ongoing tasks, disable timers, and invalidate graphics rendering callbacks. Games should use this method to pause the game. +} + + +- (void)applicationDidEnterBackground:(UIApplication *)application { + // Use this method to release shared resources, save user data, invalidate timers, and store enough application state information to restore your application to its current state in case it is terminated later. + // If your application supports background execution, this method is called instead of applicationWillTerminate: when the user quits. +} + + +- (void)applicationWillEnterForeground:(UIApplication *)application { + // Called as part of the transition from the background to the active state; here you can undo many of the changes made on entering the background. +} + + +- (void)applicationDidBecomeActive:(UIApplication *)application { + // Restart any tasks that were paused (or not yet started) while the application was inactive. If the application was previously in the background, optionally refresh the user interface. +} + + +- (void)applicationWillTerminate:(UIApplication *)application { + // Called when the application is about to terminate. Save data if appropriate. See also applicationDidEnterBackground:. +} + + +@end diff --git a/Cpp/Apple/iOS/Base.lproj/LaunchScreen.storyboard b/Cpp/Apple/iOS/Base.lproj/LaunchScreen.storyboard new file mode 100644 index 0000000..7cd0339 --- /dev/null +++ b/Cpp/Apple/iOS/Base.lproj/LaunchScreen.storyboard @@ -0,0 +1,29 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Cpp/Apple/iOS/Base.lproj/Main.storyboard b/Cpp/Apple/iOS/Base.lproj/Main.storyboard new file mode 100644 index 0000000..2c01067 --- /dev/null +++ b/Cpp/Apple/iOS/Base.lproj/Main.storyboard @@ -0,0 +1,24 @@ + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/Cpp/Apple/iOS/GameViewController.h b/Cpp/Apple/iOS/GameViewController.h new file mode 100644 index 0000000..5c00608 --- /dev/null +++ b/Cpp/Apple/iOS/GameViewController.h @@ -0,0 +1,9 @@ +#import +#import +#import +#import "Renderer.h" + +// Our iOS view controller +@interface GameViewController : UIViewController + +@end diff --git a/Cpp/Apple/iOS/GameViewController.m b/Cpp/Apple/iOS/GameViewController.m new file mode 100644 index 0000000..d0e4ca3 --- /dev/null +++ b/Cpp/Apple/iOS/GameViewController.m @@ -0,0 +1,34 @@ +#import "GameViewController.h" +#import "Renderer.h" + +@implementation GameViewController +{ + MTKView *_view; + + Renderer *_renderer; +} + +- (void)viewDidLoad +{ + [super viewDidLoad]; + + _view = (MTKView *)self.view; + + _view.device = MTLCreateSystemDefaultDevice(); + _view.backgroundColor = UIColor.blackColor; + + if(!_view.device) + { + NSLog(@"Metal is not supported on this device"); + self.view = [[UIView alloc] initWithFrame:self.view.frame]; + return; + } + + _renderer = [[Renderer alloc] initWithMetalKitView:_view]; + + [_renderer mtkView:_view drawableSizeWillChange:_view.bounds.size]; + + _view.delegate = _renderer; +} + +@end diff --git a/Cpp/Apple/iOS/Info.plist b/Cpp/Apple/iOS/Info.plist new file mode 100644 index 0000000..189efdd --- /dev/null +++ b/Cpp/Apple/iOS/Info.plist @@ -0,0 +1,47 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + LSRequiresIPhoneOS + + UILaunchStoryboardName + LaunchScreen + UIMainStoryboardFile + Main + UIRequiredDeviceCapabilities + + armv7 + metal + + UIStatusBarHidden + + UISupportedInterfaceOrientations + + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + UIInterfaceOrientationPortraitUpsideDown + UIInterfaceOrientationLandscapeLeft + UIInterfaceOrientationLandscapeRight + + + diff --git a/Cpp/Apple/iOS/main.m b/Cpp/Apple/iOS/main.m new file mode 100644 index 0000000..81e84cb --- /dev/null +++ b/Cpp/Apple/iOS/main.m @@ -0,0 +1,8 @@ +#import +#import "AppDelegate.h" + +int main(int argc, char * argv[]) { + @autoreleasepool { + return UIApplicationMain(argc, argv, nil, NSStringFromClass([AppDelegate class])); + } +} diff --git a/Cpp/Mac/AppDelegate.h b/Cpp/Apple/macOS/AppDelegate.h similarity index 100% rename from Cpp/Mac/AppDelegate.h rename to Cpp/Apple/macOS/AppDelegate.h diff --git a/Cpp/Mac/AppDelegate.m b/Cpp/Apple/macOS/AppDelegate.m similarity index 100% rename from Cpp/Mac/AppDelegate.m rename to Cpp/Apple/macOS/AppDelegate.m diff --git a/Cpp/Mac/Base.lproj/Main.storyboard b/Cpp/Apple/macOS/Base.lproj/Main.storyboard similarity index 100% rename from Cpp/Mac/Base.lproj/Main.storyboard rename to Cpp/Apple/macOS/Base.lproj/Main.storyboard diff --git a/Cpp/Mac/GameViewController.h b/Cpp/Apple/macOS/GameViewController.h similarity index 100% rename from Cpp/Mac/GameViewController.h rename to Cpp/Apple/macOS/GameViewController.h diff --git a/Cpp/Mac/GameViewController.m b/Cpp/Apple/macOS/GameViewController.m similarity index 100% rename from Cpp/Mac/GameViewController.m rename to Cpp/Apple/macOS/GameViewController.m diff --git a/Cpp/Mac/Info.plist b/Cpp/Apple/macOS/Info.plist similarity index 100% rename from Cpp/Mac/Info.plist rename to Cpp/Apple/macOS/Info.plist diff --git a/Cpp/Mac/main.m b/Cpp/Apple/macOS/main.m similarity index 100% rename from Cpp/Mac/main.m rename to Cpp/Apple/macOS/main.m diff --git a/Cpp/Source/Config.h b/Cpp/Source/Config.h index dd0ecae..c86297b 100644 --- a/Cpp/Source/Config.h +++ b/Cpp/Source/Config.h @@ -1,4 +1,8 @@ +#if defined(__APPLE__) && !defined(__METAL_VERSION__) +#include +#endif + #define kBackbufferWidth 1280 #define kBackbufferHeight 720 @@ -14,8 +18,8 @@ #define kCSGroupSizeY 8 #define kCSMaxObjects 64 -// Should float3 struct use SSE? -#define DO_FLOAT3_WITH_SSE (!(DO_COMPUTE_GPU) && 1) +// Should float3 struct use SSE/NEON? +#define DO_FLOAT3_WITH_SIMD (!(DO_COMPUTE_GPU) && 1) -// Should HitSpheres function use SSE? -#define DO_HIT_SPHERES_SSE 1 +// Should HitSpheres function use SSE/NEON? +#define DO_HIT_SPHERES_SIMD 1 diff --git a/Cpp/Source/MathSimd.h b/Cpp/Source/MathSimd.h index f31219d..4b28f90 100644 --- a/Cpp/Source/MathSimd.h +++ b/Cpp/Source/MathSimd.h @@ -1,17 +1,21 @@ -#pragma once +#pragma once #if defined(_MSC_VER) #define VM_INLINE __forceinline #else #define VM_INLINE __attribute__((unused, always_inline, nodebug)) inline #endif - + +#define kSimdWidth 4 + +#if !defined(__arm__) && !defined(__arm64__) + +// ---- SSE implementation + #include #include #include -#define kSimdWidth 4 - #define SHUFFLE4(V, X,Y,Z,W) float4(_mm_shuffle_ps((V).m, (V).m, _MM_SHUFFLE(W,Z,Y,X))) struct float4 @@ -85,4 +89,104 @@ VM_INLINE __m128i select(__m128i a, __m128i b, bool4 cond) #endif } -VM_INLINE float4 sqrtf(float4 v) { return float4(_mm_sqrt_ps(v.m)); } +VM_INLINE float4 sqrtf(float4 v) { return float4(_mm_sqrt_ps(v.m)); } + +#else + +// ---- NEON implementation + +#define USE_NEON 1 +#include + +struct float4 +{ + VM_INLINE float4() {} + VM_INLINE explicit float4(const float *p) { m = vld1q_f32(p); } + VM_INLINE explicit float4(float x, float y, float z, float w) { float v[4] = {x, y, z, w}; m = vld1q_f32(v); } + VM_INLINE explicit float4(float v) { m = vdupq_n_f32(v); } + VM_INLINE explicit float4(float32x4_t v) { m = v; } + + VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); } + VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); } + VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); } + VM_INLINE float getW() const { return vgetq_lane_f32(m, 3); } + + float32x4_t m; +}; + +typedef float4 bool4; + +VM_INLINE float4 operator+ (float4 a, float4 b) { a.m = vaddq_f32(a.m, b.m); return a; } +VM_INLINE float4 operator- (float4 a, float4 b) { a.m = vsubq_f32(a.m, b.m); return a; } +VM_INLINE float4 operator* (float4 a, float4 b) { a.m = vmulq_f32(a.m, b.m); return a; } +VM_INLINE bool4 operator==(float4 a, float4 b) { a.m = vceqq_f32(a.m, b.m); return a; } +VM_INLINE bool4 operator!=(float4 a, float4 b) { a.m = a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; } +VM_INLINE bool4 operator< (float4 a, float4 b) { a.m = vcltq_f32(a.m, b.m); return a; } +VM_INLINE bool4 operator> (float4 a, float4 b) { a.m = vcgtq_f32(a.m, b.m); return a; } +VM_INLINE bool4 operator<=(float4 a, float4 b) { a.m = vcleq_f32(a.m, b.m); return a; } +VM_INLINE bool4 operator>=(float4 a, float4 b) { a.m = vcgeq_f32(a.m, b.m); return a; } +VM_INLINE bool4 operator&(bool4 a, bool4 b) { a.m = vandq_u32(a.m, b.m); return a; } +VM_INLINE bool4 operator|(bool4 a, bool4 b) { a.m = vorrq_u32(a.m, b.m); return a; } +VM_INLINE float4 operator- (float4 a) { a.m = vnegq_f32(a.m); return a; } +VM_INLINE float4 min(float4 a, float4 b) { a.m = vminq_f32(a.m, b.m); return a; } +VM_INLINE float4 max(float4 a, float4 b) { a.m = vmaxq_f32(a.m, b.m); return a; } + +VM_INLINE float hmin(float4 v) +{ + float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m)); + float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs); + return vget_lane_f32(minOfMinOfHalfs, 0); +} + +// Returns a 4-bit code where bit0..bit3 is X..W +VM_INLINE unsigned mask(float4 v) +{ + static const uint32x4_t movemask = { 1, 2, 4, 8 }; + static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + uint32x4_t t0 = vreinterpretq_u32_f32(v.m); + uint32x4_t t1 = vtstq_u32(t0, highbit); + uint32x4_t t2 = vandq_u32(t1, movemask); + uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2)); + return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1); +} +// Once we have a comparison, we can branch based on its results: +VM_INLINE bool any(bool4 v) { return mask(v) != 0; } +VM_INLINE bool all(bool4 v) { return mask(v) == 15; } + +// "select", i.e. hibit(cond) ? b : a +// on SSE4.1 and up this can be done easily via "blend" instruction; +// on older SSEs has to do a bunch of hoops, see +// https://fgiesen.wordpress.com/2016/04/03/sse-mind-the-gap/ + +VM_INLINE float4 select(float4 a, float4 b, bool4 cond) +{ + a.m = vbslq_f32(cond.m, b.m, a.m); + return a; +} +VM_INLINE int32x4_t select(int32x4_t a, int32x4_t b, bool4 cond) +{ + return vbslq_f32(cond.m, b, a); +} + +VM_INLINE float4 sqrtf(float4 v) +{ + float32x4_t V = v.m; + float32x4_t S0 = vrsqrteq_f32(V); + float32x4_t P0 = vmulq_f32( V, S0 ); + float32x4_t R0 = vrsqrtsq_f32( P0, S0 ); + float32x4_t S1 = vmulq_f32( S0, R0 ); + float32x4_t P1 = vmulq_f32( V, S1 ); + float32x4_t R1 = vrsqrtsq_f32( P1, S1 ); + float32x4_t S2 = vmulq_f32( S1, R1 ); + float32x4_t P2 = vmulq_f32( V, S2 ); + float32x4_t R2 = vrsqrtsq_f32( P2, S2 ); + float32x4_t S3 = vmulq_f32( S2, R2 ); + return float4(vmulq_f32(V, S3)); +} + +VM_INLINE float4 splatX(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 0)); } +VM_INLINE float4 splatY(float32x4_t v) { return float4(vdupq_lane_f32(vget_low_f32(v), 1)); } +VM_INLINE float4 splatZ(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 0)); } +VM_INLINE float4 splatW(float32x4_t v) { return float4(vdupq_lane_f32(vget_high_f32(v), 1)); } + +#endif diff --git a/Cpp/Source/Maths.cpp b/Cpp/Source/Maths.cpp index ddbb284..ca87513 100644 --- a/Cpp/Source/Maths.cpp +++ b/Cpp/Source/Maths.cpp @@ -49,17 +49,28 @@ float3 RandomUnitVector(uint32_t& state) int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, Hit& outHit) { -#if DO_HIT_SPHERES_SSE +#if DO_HIT_SPHERES_SIMD float4 hitT = float4(tMax); +#if USE_NEON + int32x4_t id = vdupq_n_s32(-1); +#else __m128i id = _mm_set1_epi32(-1); +#endif -#if DO_FLOAT3_WITH_SSE +#if DO_FLOAT3_WITH_SIMD && !USE_NEON float4 rOrigX = SHUFFLE4(r.orig, 0, 0, 0, 0); float4 rOrigY = SHUFFLE4(r.orig, 1, 1, 1, 1); float4 rOrigZ = SHUFFLE4(r.orig, 2, 2, 2, 2); float4 rDirX = SHUFFLE4(r.dir, 0, 0, 0, 0); float4 rDirY = SHUFFLE4(r.dir, 1, 1, 1, 1); float4 rDirZ = SHUFFLE4(r.dir, 2, 2, 2, 2); +#elif DO_FLOAT3_WITH_SIMD + float4 rOrigX = splatX(r.orig.m); + float4 rOrigY = splatY(r.orig.m); + float4 rOrigZ = splatZ(r.orig.m); + float4 rDirX = splatX(r.dir.m); + float4 rDirY = splatY(r.dir.m); + float4 rDirZ = splatZ(r.dir.m); #else float4 rOrigX = float4(r.orig.x); float4 rOrigY = float4(r.orig.y); @@ -69,7 +80,11 @@ int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, float4 rDirZ = float4(r.dir.z); #endif float4 tMin4 = float4(tMin); +#if USE_NEON + int32x4_t curId = vcombine_u32(vcreate_u32(0ULL | (1ULL<<32)), vcreate_u32(2ULL | (3ULL<<32))); +#else __m128i curId = _mm_set_epi32(3, 2, 1, 0); +#endif // process 4 spheres at once for (int i = 0; i < spheres.simdCount; i += kSimdWidth) { @@ -101,7 +116,11 @@ int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, id = select(id, curId, msk); hitT = select(hitT, t, msk); } +#if USE_NEON + curId = vaddq_s32(curId, vdupq_n_s32(kSimdWidth)); +#else curId = _mm_add_epi32(curId, _mm_set1_epi32(kSimdWidth)); +#endif } // now we have up to 4 hits, find and return closest one float minT = hmin(hitT); @@ -112,8 +131,13 @@ int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, { int id_scalar[4]; float hitT_scalar[4]; +#if USE_NEON + vst1q_s32(id_scalar, id); + vst1q_f32(hitT_scalar, hitT.m); +#else _mm_storeu_si128((__m128i *)id_scalar, id); _mm_storeu_ps(hitT_scalar, hitT.m); +#endif // In general, you would do this with a bit scan (first set/trailing zero count). // But who cares, it's only 16 options. @@ -138,7 +162,7 @@ int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, return -1; -#else // #if DO_HIT_SPHERES_SSE +#else // #if DO_HIT_SPHERES_SIMD float hitT = tMax; int id = -1; @@ -175,5 +199,5 @@ int HitSpheres(const Ray& r, const SpheresSoA& spheres, float tMin, float tMax, } else return -1; -#endif // #else of #if DO_HIT_SPHERES_SSE +#endif // #else of #if DO_HIT_SPHERES_SIMD } diff --git a/Cpp/Source/Maths.h b/Cpp/Source/Maths.h index c84ad2a..b587715 100644 --- a/Cpp/Source/Maths.h +++ b/Cpp/Source/Maths.h @@ -9,7 +9,12 @@ #define kPI 3.1415926f // SSE/SIMD vector largely based on http://www.codersnotes.com/notes/maths-lib-2016/ -#if DO_FLOAT3_WITH_SSE +#if DO_FLOAT3_WITH_SIMD + + +#if !defined(__arm__) && !defined(__arm64__) + +// ---- SSE implementation // SHUFFLE3(v, 0,1,2) leaves the vector unchanged (v.xyz). // SHUFFLE3(v, 0,0,0) splats the X (v.xxx). @@ -109,7 +114,137 @@ VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); } VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); } -#else // #if DO_FLOAT3_WITH_SSE +#else // #if !defined(__arm__) && !defined(__arm64__) + +// ---- NEON implementation + +#include + +struct float3 +{ + VM_INLINE float3() {} + VM_INLINE explicit float3(const float *p) { float v[4] = {p[0], p[1], p[2], 0}; m = vld1q_f32(v); } + VM_INLINE explicit float3(float x, float y, float z) { float v[4] = {x, y, z, 0}; m = vld1q_f32(v); } + VM_INLINE explicit float3(float v) { m = vdupq_n_f32(v); } + VM_INLINE explicit float3(float32x4_t v) { m = v; } + + VM_INLINE float getX() const { return vgetq_lane_f32(m, 0); } + VM_INLINE float getY() const { return vgetq_lane_f32(m, 1); } + VM_INLINE float getZ() const { return vgetq_lane_f32(m, 2); } + + VM_INLINE float3 yzx() const + { + float32x2_t low = vget_low_f32(m); + float32x4_t yzx = vcombine_f32(vext_f32(low, vget_high_f32(m), 1), low); + return float3(yzx); + } + VM_INLINE float3 zxy() const + { + float32x4_t p = m; + p = vuzpq_f32(vreinterpretq_f32_s32(vextq_s32(vreinterpretq_s32_f32(p), vreinterpretq_s32_f32(p), 1)), p).val[1]; + return float3(p); + } + + VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); } + + void setX(float x) + { + m = vsetq_lane_f32(x, m, 0); + } + void setY(float y) + { + m = vsetq_lane_f32(y, m, 1); + } + void setZ(float z) + { + m = vsetq_lane_f32(z, m, 2); + } + + float32x4_t m; +}; + +typedef float3 bool3; + +VM_INLINE float32x4_t rcp_2(float32x4_t v) +{ + float32x4_t e = vrecpeq_f32(v); + e = vmulq_f32(vrecpsq_f32(e, v), e); + e = vmulq_f32(vrecpsq_f32(e, v), e); + return e; +} + +VM_INLINE float3 operator+ (float3 a, float3 b) { a.m = vaddq_f32(a.m, b.m); return a; } +VM_INLINE float3 operator- (float3 a, float3 b) { a.m = vsubq_f32(a.m, b.m); return a; } +VM_INLINE float3 operator* (float3 a, float3 b) { a.m = vmulq_f32(a.m, b.m); return a; } +VM_INLINE float3 operator/ (float3 a, float3 b) { float32x4_t recip = rcp_2(b.m); a.m = vmulq_f32(a.m, recip); return a; } +VM_INLINE float3 operator* (float3 a, float b) { a.m = vmulq_f32(a.m, vdupq_n_f32(b)); return a; } +VM_INLINE float3 operator/ (float3 a, float b) { float32x4_t recip = rcp_2(vdupq_n_f32(b)); a.m = vmulq_f32(a.m, recip); return a; } +VM_INLINE float3 operator* (float a, float3 b) { b.m = vmulq_f32(vdupq_n_f32(a), b.m); return b; } +VM_INLINE float3 operator/ (float a, float3 b) { float32x4_t recip = rcp_2(b.m); b.m = vmulq_f32(vdupq_n_f32(a), recip); return b; } +VM_INLINE float3& operator+= (float3 &a, float3 b) { a = a + b; return a; } +VM_INLINE float3& operator-= (float3 &a, float3 b) { a = a - b; return a; } +VM_INLINE float3& operator*= (float3 &a, float3 b) { a = a * b; return a; } +VM_INLINE float3& operator/= (float3 &a, float3 b) { a = a / b; return a; } +VM_INLINE float3& operator*= (float3 &a, float b) { a = a * b; return a; } +VM_INLINE float3& operator/= (float3 &a, float b) { a = a / b; return a; } +VM_INLINE bool3 operator==(float3 a, float3 b) { a.m = vceqq_f32(a.m, b.m); return a; } +VM_INLINE bool3 operator!=(float3 a, float3 b) { a.m = vmvnq_u32(vceqq_f32(a.m, b.m)); return a; } +VM_INLINE bool3 operator< (float3 a, float3 b) { a.m = vcltq_f32(a.m, b.m); return a; } +VM_INLINE bool3 operator> (float3 a, float3 b) { a.m = vcgtq_f32(a.m, b.m); return a; } +VM_INLINE bool3 operator<=(float3 a, float3 b) { a.m = vcleq_f32(a.m, b.m); return a; } +VM_INLINE bool3 operator>=(float3 a, float3 b) { a.m = vcgeq_f32(a.m, b.m); return a; } +VM_INLINE float3 min(float3 a, float3 b) { a.m = vminq_f32(a.m, b.m); return a; } +VM_INLINE float3 max(float3 a, float3 b) { a.m = vmaxq_f32(a.m, b.m); return a; } + +VM_INLINE float3 operator- (float3 a) { a.m = vnegq_f32(a.m); return a; } + +VM_INLINE float hmin(float3 v) +{ + float32x2_t minOfHalfs = vpmin_f32(vget_low_f32(v.m), vget_high_f32(v.m)); + float32x2_t minOfMinOfHalfs = vpmin_f32(minOfHalfs, minOfHalfs); + return vget_lane_f32(minOfMinOfHalfs, 0); +} +VM_INLINE float hmax(float3 v) +{ + float32x2_t maxOfHalfs = vpmax_f32(vget_low_f32(v.m), vget_high_f32(v.m)); + float32x2_t maxOfMaxOfHalfs = vpmax_f32(maxOfHalfs, maxOfHalfs); + return vget_lane_f32(maxOfMaxOfHalfs, 0); +} + +VM_INLINE float3 cross(float3 a, float3 b) +{ + // x <- a.y*b.z - a.z*b.y + // y <- a.z*b.x - a.x*b.z + // z <- a.x*b.y - a.y*b.x + // We can save a shuffle by grouping it in this wacky order: + return (a.zxy()*b - a*b.zxy()).zxy(); +} + +// Returns a 3-bit code where bit0..bit2 is X..Z +VM_INLINE unsigned mask(float3 v) +{ + static const uint32x4_t movemask = { 1, 2, 4, 8 }; + static const uint32x4_t highbit = { 0x80000000, 0x80000000, 0x80000000, 0x80000000 }; + uint32x4_t t0 = vreinterpretq_u32_f32(v.m); + uint32x4_t t1 = vtstq_u32(t0, highbit); + uint32x4_t t2 = vandq_u32(t1, movemask); + uint32x2_t t3 = vorr_u32(vget_low_u32(t2), vget_high_u32(t2)); + return vget_lane_u32(t3, 0) | vget_lane_u32(t3, 1); +} +// Once we have a comparison, we can branch based on its results: +VM_INLINE bool any(bool3 v) { return mask(v) != 0; } +VM_INLINE bool all(bool3 v) { return mask(v) == 7; } + +VM_INLINE float3 clamp(float3 t, float3 a, float3 b) { return min(max(t, a), b); } +VM_INLINE float sum(float3 v) { return v.getX() + v.getY() + v.getZ(); } +VM_INLINE float dot(float3 a, float3 b) { return sum(a*b); } + + +#endif // #else of #if !defined(__arm__) && !defined(__arm64__) + +#else // #if DO_FLOAT3_WITH_SIMD + +// ---- Simple scalar C implementation struct float3 @@ -126,6 +261,9 @@ struct float3 VM_INLINE float getX() const { return x; } VM_INLINE float getY() const { return y; } VM_INLINE float getZ() const { return z; } + VM_INLINE void setX(float x_) { x = x_; } + VM_INLINE void setY(float y_) { y = y_; } + VM_INLINE void setZ(float z_) { z = z_; } VM_INLINE void store(float *p) const { p[0] = getX(); p[1] = getY(); p[2] = getZ(); } float x, y, z; @@ -145,7 +283,7 @@ VM_INLINE float3 cross(const float3& a, const float3& b) a.x*b.y - a.y*b.x ); } -#endif // #else of #if DO_FLOAT3_WITH_SSE +#endif // #else of #if DO_FLOAT3_WITH_SIMD VM_INLINE float length(float3 v) { return sqrtf(dot(v, v)); } VM_INLINE float sqLength(float3 v) { return dot(v, v); } diff --git a/Cpp/Source/Test.cpp b/Cpp/Source/Test.cpp index 3193080..56ce23c 100644 --- a/Cpp/Source/Test.cpp +++ b/Cpp/Source/Test.cpp @@ -291,8 +291,8 @@ void UpdateTest(float time, int frameCount, int screenWidth, int screenHeight, u { if (testFlags & kFlagAnimate) { - s_Spheres[1].center.y = cosf(time) + 1.0f; - s_Spheres[8].center.z = sinf(time)*0.3f; + s_Spheres[1].center.setY(cosf(time) + 1.0f); + s_Spheres[8].center.setZ(sinf(time)*0.3f); } float3 lookfrom(0, 2, 3); float3 lookat(0, 0, 0);