diff --git a/softlight/CMakeLists.txt b/softlight/CMakeLists.txt index 5a0c18bb..eb44d24f 100644 --- a/softlight/CMakeLists.txt +++ b/softlight/CMakeLists.txt @@ -123,6 +123,7 @@ set(SL_LIB_HEADERS include/softlight/SL_AnimationProperty.hpp include/softlight/SL_Atlas.hpp include/softlight/SL_BlitProcesor.hpp + include/softlight/SL_BlitCompressedProcesor.hpp include/softlight/SL_BoundingBox.hpp include/softlight/SL_Camera.hpp include/softlight/SL_ClearProcesor.hpp @@ -213,6 +214,7 @@ set(SL_LIB_SOURCES src/SL_AnimationPlayer.cpp src/SL_Atlas.cpp src/SL_BlitProcessor.cpp + src/SL_BlitCompressedProcessor.cpp src/SL_BoundingBox.cpp src/SL_Camera.cpp src/SL_ClearProcessor.cpp diff --git a/softlight/include/softlight/SL_BlitCompressedProcesor.hpp b/softlight/include/softlight/SL_BlitCompressedProcesor.hpp new file mode 100644 index 00000000..34b7a63d --- /dev/null +++ b/softlight/include/softlight/SL_BlitCompressedProcesor.hpp @@ -0,0 +1,86 @@ + +#ifndef SL_BLIT_COMPRESSED_PROCESSOR_HPP +#define SL_BLIT_COMPRESSED_PROCESSOR_HPP + +#include + + + +/*----------------------------------------------------------------------------- + * Forward Declarations +-----------------------------------------------------------------------------*/ +class SL_Texture; + + + +/**---------------------------------------------------------------------------- + * @brief The Blit Processor helps to perform texture blitting to the native + * window backbuffer on another thread. + * + * Much of the blitting routines are templated to support conversion between + * possible texture types and the backbuffer (which is an 8-bit RGBA buffer). + * + * Texture blitting uses nearest-neighbor filtering to increase or decrease the + * resolution and fit the backbuffer. Fixed-point calculation is used to avoid + * precision errors and increase ALU throughput. Benchmarks on x86 and ARM has + * shown that floating-point logic performs worse in this area. +-----------------------------------------------------------------------------*/ +struct SL_BlitCompressedProcessor +{ + enum : uint_fast32_t + { + NUM_FIXED_BITS = 16u + }; + + // 32 bits + uint16_t mThreadId; + uint16_t mNumThreads; + + // 64-bits + uint16_t srcX0; + uint16_t srcY0; + uint16_t srcX1; + uint16_t srcY1; + + // 64-bits + uint16_t dstX0; + uint16_t dstY0; + uint16_t dstX1; + uint16_t dstY1; + + // 64-128 bits + const SL_Texture* mTexture; + SL_Texture* mBackBuffer; + + // 224-288 bits total, 28-36 bytes + + // Blit a single R channel + template + void blit_src_r() noexcept; + + // Blit a texture with only RG color channels + template + void blit_src_rg() noexcept; + + // Blit an RGB texture + template + void blit_src_rgb() noexcept; + + // Blit all 4 color components + template + void blit_src_rgba() noexcept; + + // Blit compressed color components + template + void blit_src_compressed() noexcept; + + // Blit all 4 color components + template + void blit_nearest() noexcept; + + void execute() noexcept; +}; + + + +#endif /* SL_BLIT_COMPRESSED_PROCESSOR_HPP */ diff --git a/softlight/include/softlight/SL_BlitProcesor.hpp b/softlight/include/softlight/SL_BlitProcesor.hpp index 63e53ad7..e0990fe2 100644 --- a/softlight/include/softlight/SL_BlitProcesor.hpp +++ b/softlight/include/softlight/SL_BlitProcesor.hpp @@ -5,1102 +5,12 @@ #include -#include "lightsky/math/vec_utils.h" // vector casting - -#include "softlight/SL_Color.hpp" -#include "softlight/SL_ColorCompressed.hpp" -#include "softlight/SL_Texture.hpp" - /*----------------------------------------------------------------------------- - * Helper functions and namespaces + * Forward Declarations -----------------------------------------------------------------------------*/ -/*------------------------------------- - * Recolor to R --------------------------------------*/ -template -struct SL_Blit_R_to_R -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_RG_to_R -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor)[0]; - } -}; - -template -struct SL_Blit_RGB_to_R -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor)[0]; - } -}; - -template -struct SL_Blit_RGBA_to_R -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBAType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor)[0]; - } -}; - -template -struct SL_Blit_RGB_to_R -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor)[0]; - } -}; - -template -struct SL_Blit_RGBA_to_R -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor)[0]; - } -}; - -template -struct SL_Blit_RGBA_to_R -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor)[0]; - } -}; - - - -/*------------------------------------- - * Recolor to RG --------------------------------------*/ -template -struct SL_Blit_R_to_RG -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGType inColor = SL_ColorRGType{inColorR[0], SL_ColorLimits::min().r}; - - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_RG_to_RG -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_RGB_to_RG -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBType inColorRGB = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGType inColor = ls::math::vec2_cast(inColorRGB); - - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_RGBA_to_RG -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBAType inColorRGBA = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGType inColor = ls::math::vec2_cast(inColorRGBA); - - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_RGB_to_RG -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec2_cast(rgb_cast(inColor)); - } -}; - -template -struct SL_Blit_RGBA_to_RG -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec2_cast(rgb_cast(inColor)); - } -}; - -template -struct SL_Blit_RGBA_to_RG -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec2_cast(rgb_cast(inColor)); - } -}; - - - -/*------------------------------------- - * Recolor to RGB --------------------------------------*/ -template -struct SL_Blit_R_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBType inColor = SL_ColorRGBType{SL_ColorLimits::min().r, SL_ColorLimits::min().r, inColorR[0]}; - - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_R_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB565) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBType inColor = SL_ColorRGBType{inColorR[0], SL_ColorLimits::min().r, SL_ColorLimits::min().r}; - - *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(inColor); - } -}; - -template -struct SL_Blit_RG_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGType inColorRG = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBType inColor = ls::math::vec3_cast(inColorRG, SL_ColorLimits::min().r); - - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_RG_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB565) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGType inColorRG = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBType inColor = ls::math::vec3_cast(inColorRG, SL_ColorLimits::min().r); - - *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(inColor); - } -}; - -template -struct SL_Blit_RGB_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_RGB_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor); - } -}; - -template -struct SL_Blit_RGB_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB565) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(inColor); - } -}; - -template<> -struct SL_Blit_RGB_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB565) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast(pOutBuf + outIndex) = inColor; - } -}; - -template -struct SL_Blit_RGBA_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBAType inColorRGBA = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBType inColor = ls::math::vec3_cast(inColorRGBA); - - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_RGBA_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB565) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBAType inColorRGBA = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBType inColor = ls::math::vec3_cast(inColorRGBA); - - *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(inColor); - } -}; - -template -struct SL_Blit_RGBA_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec3_cast(rgb_cast(inColor)); - } -}; - -template<> -struct SL_Blit_RGBA_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB565) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - const ls::math::vec3_t outColor = ls::math::vec3_cast(rgb_cast(inColor)); - *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(outColor); - } -}; - -template -struct SL_Blit_RGBA_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec3_cast(rgb_cast(inColor)); - } -}; - -template<> -struct SL_Blit_RGBA_to_RGB -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB565) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - const ls::math::vec3_t outColor = ls::math::vec3_cast(rgb_cast(inColor)); - *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(outColor); - } -}; - - - -/*------------------------------------- - * Recolor to RGBA --------------------------------------*/ -template -struct SL_Blit_R_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBAType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBAType inColor = SL_ColorRGBAType{SL_ColorLimits::min().r, SL_ColorLimits::min().r, inColorR[0], SL_ColorLimits::max()[3]}; - - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_R_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB5551) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBAType inColor = SL_ColorRGBAType{SL_ColorLimits::min().r, SL_ColorLimits::min().r, inColorR[0], SL_ColorLimits::max()[3]}; - - *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(inColor); - } -}; - -template -struct SL_Blit_R_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB4444) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBAType inColor = SL_ColorRGBAType{SL_ColorLimits::min().r, SL_ColorLimits::min().r, inColorR[0], SL_ColorLimits::max()[3]}; - - *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(inColor); - } -}; - -template -struct SL_Blit_RG_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBAType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGType inColorRG = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBAType inColor = ls::math::vec4_cast(SL_ColorLimits::min().r, inColorRG, SL_ColorLimits::max()[3]); - - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_RG_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB5551) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGType inColorRG = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBAType inColor = ls::math::vec4_cast(SL_ColorLimits::min().r, inColorRG, SL_ColorLimits::max()[3]); - - *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(inColor); - } -}; - -template -struct SL_Blit_RG_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB4444) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGType inColorRG = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBAType inColor = ls::math::vec4_cast(SL_ColorLimits::min().r, inColorRG, SL_ColorLimits::max()[3]); - - *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(inColor); - } -}; - -template -struct SL_Blit_RGB_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBAType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBType inColorRGB = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBAType inColor = ls::math::vec4_cast(inColorRGB, SL_ColorLimits::max()[3]); - - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -template -struct SL_Blit_RGB_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBAType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec4_cast(rgb_cast(inColor), SL_ColorLimits::max()[3]); - } -}; - -template<> -struct SL_Blit_RGB_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB5551) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - const ls::math::vec4_t outRGBA = ls::math::vec4_cast(rgb_cast(inColor), SL_ColorLimits::max().a); - *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(outRGBA); - } -}; - -template<> -struct SL_Blit_RGB_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB4444) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - const ls::math::vec4_t outRGBA = ls::math::vec4_cast(rgb_cast(inColor), SL_ColorLimits::max().a); - *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(outRGBA); - } -}; - -template -struct SL_Blit_RGB_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB5551) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBType inColorRGB = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBAType inColor = ls::math::vec4_cast(inColorRGB, SL_ColorLimits::max()[3]); - - *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(inColor); - } -}; - -template -struct SL_Blit_RGB_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB4444) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBType inColorRGB = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBAType inColor = ls::math::vec4_cast(inColorRGB, SL_ColorLimits::max()[3]); - - *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(inColor); - } -}; - - -template -struct SL_Blit_RGBA_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBAType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBAType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); - } -}; - -// I spent enough time on blitting... Here, I'm only optimizing for the most -// common blit operations, from RGBAf & RGBA8 to RGBA8 -template<> -struct SL_Blit_RGBA_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBAType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const int32_t inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast(pOutBuf + outIndex) = inColor; - } -}; - - - -template<> -struct SL_Blit_RGBA_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBAType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBAType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - const SL_ColorRGBAType in = color_cast(inColor); - *reinterpret_cast(pOutBuf + outIndex) = reinterpret_cast(in); - } -}; - -template -struct SL_Blit_RGBA_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB5551) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBAType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(inColor); - } -}; - -template -struct SL_Blit_RGBA_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBAType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor); - } -}; - -template<> -struct SL_Blit_RGBA_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB5551) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast(pOutBuf + outIndex) = inColor; - } -}; - -template<> -struct SL_Blit_RGBA_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB4444) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - const ls::math::vec4_t outColor = rgb_cast(inColor); - *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(outColor); - } -}; - -template -struct SL_Blit_RGBA_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB4444) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGBAType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(inColor); - } -}; - -template -struct SL_Blit_RGBA_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGBAType) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor); - } -}; - -template<> -struct SL_Blit_RGBA_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB4444) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - *reinterpret_cast(pOutBuf + outIndex) = inColor; - } -}; - -template<> -struct SL_Blit_RGBA_to_RGBA -{ - enum : uint_fast32_t - { - stride = sizeof(SL_ColorRGB5551) - }; - - inline LS_INLINE void operator()( - const SL_Texture* pTexture, - const uint_fast32_t srcX, - const uint_fast32_t srcY, - unsigned char* const pOutBuf, - uint_fast32_t outIndex) const noexcept - { - const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); - const ls::math::vec4_t outColor = rgb_cast(inColor); - *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(outColor); - } -}; +class SL_Texture; @@ -1170,277 +80,4 @@ struct SL_BlitProcessor - -/*------------------------------------- - * Nearest-neighbor filtering (R Channel) --------------------------------------*/ -template -void SL_BlitProcessor::blit_src_r() noexcept -{ - switch (mBackBuffer->type()) - { - case SL_COLOR_R_8U: blit_nearest>(); break; - case SL_COLOR_R_16U: blit_nearest>(); break; - case SL_COLOR_R_32U: blit_nearest>(); break; - case SL_COLOR_R_64U: blit_nearest>(); break; - case SL_COLOR_R_FLOAT: blit_nearest>(); break; - case SL_COLOR_R_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RG_8U: blit_nearest>(); break; - case SL_COLOR_RG_16U: blit_nearest>(); break; - case SL_COLOR_RG_32U: blit_nearest>(); break; - case SL_COLOR_RG_64U: blit_nearest>(); break; - case SL_COLOR_RG_FLOAT: blit_nearest>(); break; - case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_8U: blit_nearest>(); break; - case SL_COLOR_RGB_16U: blit_nearest>(); break; - case SL_COLOR_RGB_32U: blit_nearest>(); break; - case SL_COLOR_RGB_64U: blit_nearest>(); break; - case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGBA_8U: blit_nearest>(); break; - case SL_COLOR_RGBA_16U: blit_nearest>(); break; - case SL_COLOR_RGBA_32U: blit_nearest>(); break; - case SL_COLOR_RGBA_64U: blit_nearest>(); break; - case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; - - default: - break; - } -} - - - -/*------------------------------------- - * Nearest-neighbor filtering (R & G Channels) --------------------------------------*/ -template -void SL_BlitProcessor::blit_src_rg() noexcept -{ - switch (mBackBuffer->type()) - { - case SL_COLOR_R_8U: blit_nearest>(); break; - case SL_COLOR_R_16U: blit_nearest>(); break; - case SL_COLOR_R_32U: blit_nearest>(); break; - case SL_COLOR_R_64U: blit_nearest>(); break; - case SL_COLOR_R_FLOAT: blit_nearest>(); break; - case SL_COLOR_R_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RG_8U: blit_nearest>(); break; - case SL_COLOR_RG_16U: blit_nearest>(); break; - case SL_COLOR_RG_32U: blit_nearest>(); break; - case SL_COLOR_RG_64U: blit_nearest>(); break; - case SL_COLOR_RG_FLOAT: blit_nearest>(); break; - case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_8U: blit_nearest>(); break; - case SL_COLOR_RGB_16U: blit_nearest>(); break; - case SL_COLOR_RGB_32U: blit_nearest>(); break; - case SL_COLOR_RGB_64U: blit_nearest>(); break; - case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGBA_8U: blit_nearest>(); break; - case SL_COLOR_RGBA_16U: blit_nearest>(); break; - case SL_COLOR_RGBA_32U: blit_nearest>(); break; - case SL_COLOR_RGBA_64U: blit_nearest>(); break; - case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; - - default: - break; - } -} - - - -/*------------------------------------- - * Nearest-neighbor filtering (RGB) --------------------------------------*/ -template -void SL_BlitProcessor::blit_src_rgb() noexcept -{ - switch (mBackBuffer->type()) - { - case SL_COLOR_R_8U: blit_nearest>(); break; - case SL_COLOR_R_16U: blit_nearest>(); break; - case SL_COLOR_R_32U: blit_nearest>(); break; - case SL_COLOR_R_64U: blit_nearest>(); break; - case SL_COLOR_R_FLOAT: blit_nearest>(); break; - case SL_COLOR_R_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RG_8U: blit_nearest>(); break; - case SL_COLOR_RG_16U: blit_nearest>(); break; - case SL_COLOR_RG_32U: blit_nearest>(); break; - case SL_COLOR_RG_64U: blit_nearest>(); break; - case SL_COLOR_RG_FLOAT: blit_nearest>(); break; - case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_8U: blit_nearest>(); break; - case SL_COLOR_RGB_16U: blit_nearest>(); break; - case SL_COLOR_RGB_32U: blit_nearest>(); break; - case SL_COLOR_RGB_64U: blit_nearest>(); break; - case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGBA_8U: blit_nearest>(); break; - case SL_COLOR_RGBA_16U: blit_nearest>(); break; - case SL_COLOR_RGBA_32U: blit_nearest>(); break; - case SL_COLOR_RGBA_64U: blit_nearest>(); break; - case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_565: blit_nearest>(); break; - case SL_COLOR_RGBA_5551: blit_nearest>(); break; - case SL_COLOR_RGBA_4444: blit_nearest>(); break; - - default: - break; - } -} - -template<> -void SL_BlitProcessor::blit_src_rgb() noexcept; - - - -/*------------------------------------- - * Nearest-neighbor filtering (RGBA) --------------------------------------*/ -template -void SL_BlitProcessor::blit_src_rgba() noexcept -{ - switch (mBackBuffer->type()) - { - case SL_COLOR_R_8U: blit_nearest>(); break; - case SL_COLOR_R_16U: blit_nearest>(); break; - case SL_COLOR_R_32U: blit_nearest>(); break; - case SL_COLOR_R_64U: blit_nearest>(); break; - case SL_COLOR_R_FLOAT: blit_nearest>(); break; - case SL_COLOR_R_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RG_8U: blit_nearest>(); break; - case SL_COLOR_RG_16U: blit_nearest>(); break; - case SL_COLOR_RG_32U: blit_nearest>(); break; - case SL_COLOR_RG_64U: blit_nearest>(); break; - case SL_COLOR_RG_FLOAT: blit_nearest>(); break; - case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_8U: blit_nearest>(); break; - case SL_COLOR_RGB_16U: blit_nearest>(); break; - case SL_COLOR_RGB_32U: blit_nearest>(); break; - case SL_COLOR_RGB_64U: blit_nearest>(); break; - case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGBA_8U: blit_nearest>(); break; - case SL_COLOR_RGBA_16U: blit_nearest>(); break; - case SL_COLOR_RGBA_32U: blit_nearest>(); break; - case SL_COLOR_RGBA_64U: blit_nearest>(); break; - case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_565: blit_nearest>(); break; - case SL_COLOR_RGBA_5551: blit_nearest>(); break; - case SL_COLOR_RGBA_4444: blit_nearest>(); break; - - default: - break; - } -} - -template<> -void SL_BlitProcessor::blit_src_rgba() noexcept; - -template<> -void SL_BlitProcessor::blit_src_rgba() noexcept; - - - -// MSVC crashes when generating all blit permutations. -#if !defined(LS_COMPILER_MSC) - extern template void SL_BlitProcessor::blit_src_r(); - extern template void SL_BlitProcessor::blit_src_r(); - extern template void SL_BlitProcessor::blit_src_r(); - extern template void SL_BlitProcessor::blit_src_r(); - extern template void SL_BlitProcessor::blit_src_r(); - extern template void SL_BlitProcessor::blit_src_r(); - extern template void SL_BlitProcessor::blit_src_rg(); - extern template void SL_BlitProcessor::blit_src_rg(); - extern template void SL_BlitProcessor::blit_src_rg(); - extern template void SL_BlitProcessor::blit_src_rg(); - extern template void SL_BlitProcessor::blit_src_rg(); - extern template void SL_BlitProcessor::blit_src_rg(); - extern template void SL_BlitProcessor::blit_src_rgb(); - extern template void SL_BlitProcessor::blit_src_rgb(); - extern template void SL_BlitProcessor::blit_src_rgb(); - extern template void SL_BlitProcessor::blit_src_rgb(); - extern template void SL_BlitProcessor::blit_src_rgb(); - extern template void SL_BlitProcessor::blit_src_rgb(); - extern template void SL_BlitProcessor::blit_src_rgba(); - extern template void SL_BlitProcessor::blit_src_rgba(); - extern template void SL_BlitProcessor::blit_src_rgba(); - extern template void SL_BlitProcessor::blit_src_rgba(); - extern template void SL_BlitProcessor::blit_src_rgba(); - extern template void SL_BlitProcessor::blit_src_rgba(); -#endif - - - -/*------------------------------------- - * Nearest-neighbor filtering (RGBA) --------------------------------------*/ -template -void SL_BlitProcessor::blit_nearest() noexcept -{ - constexpr BlipOp blitOp; - unsigned char* const pOutBuf = reinterpret_cast(mBackBuffer->data()); - - const uint_fast32_t inW = (uint_fast32_t)srcX1 - (uint_fast32_t)srcX0; - const uint_fast32_t inH = (uint_fast32_t)srcY1 - (uint_fast32_t)srcY0; - const uint_fast32_t outW = (uint_fast32_t)dstX1 - (uint_fast32_t)dstX0; - - const uint_fast32_t totalOutW = mBackBuffer->width(); - const uint_fast32_t totalOutH = mBackBuffer->height(); - - // Only tile data along the y-axis of the render buffer. This will help to - // make use of the CPU prefetcher when iterating pixels along the x-axis - const uint_fast32_t x0 = ls::math::max(0u, dstX0); - const uint_fast32_t x1 = ls::math::min(totalOutW, x0 + outW); - const uint_fast32_t y0 = dstY0+mThreadId; - const uint_fast32_t y1 = dstY1; - const uint_fast32_t finW = (inW << NUM_FIXED_BITS); - const uint_fast32_t finH = (inH << NUM_FIXED_BITS); - const uint_fast32_t foutW = (finW / totalOutW) + 1u; // account for rounding errors - const uint_fast32_t foutH = (finH / totalOutH) + 1u; - - uint_fast32_t y = y0; - - while (LS_LIKELY(y < y1)) - { - const uint_fast32_t yf = (y * foutH) >> NUM_FIXED_BITS; - const uint_fast32_t srcY = srcY1 - (srcY0 + yf) - 1u; - uint_fast32_t outIndex = (x0 + totalOutW * y) * BlipOp::stride; - - uint_fast32_t x = x0; - - while (LS_LIKELY(x < x1)) - { - const uint_fast32_t xf = x * foutW; - const uint_fast32_t srcX = xf >> NUM_FIXED_BITS; - - blitOp(mTexture, srcX, srcY, pOutBuf, outIndex); - ++x; - outIndex += BlipOp::stride; - } - - y += mNumThreads; - } -} - - - #endif /* SL_BLIT_PROCESSOR_HPP */ diff --git a/softlight/include/softlight/SL_Color.hpp b/softlight/include/softlight/SL_Color.hpp index e6ed274b..c47af33b 100644 --- a/softlight/include/softlight/SL_Color.hpp +++ b/softlight/include/softlight/SL_Color.hpp @@ -82,6 +82,13 @@ unsigned sl_elements_per_color(SL_ColorDataType p); +/*------------------------------------- + * Compressed format check +-------------------------------------*/ +bool sl_is_compressed_color(SL_ColorDataType p); + + + /**---------------------------------------------------------------------------- * @brief Red-only Color Types -----------------------------------------------------------------------------*/ diff --git a/softlight/include/softlight/SL_ProcessorPool.hpp b/softlight/include/softlight/SL_ProcessorPool.hpp index 412c78fc..f46d3809 100644 --- a/softlight/include/softlight/SL_ProcessorPool.hpp +++ b/softlight/include/softlight/SL_ProcessorPool.hpp @@ -112,6 +112,19 @@ class SL_ProcessorPool uint16_t dstY1 ) noexcept; + void run_blit_compressed_processors( + const SL_Texture* inTex, + SL_Texture* outTex, + uint16_t srcX0, + uint16_t srcY0, + uint16_t srcX1, + uint16_t srcY1, + uint16_t dstX0, + uint16_t dstY0, + uint16_t dstX1, + uint16_t dstY1 + ) noexcept; + void run_clear_processors(const void* inColor, SL_Texture* outTex) noexcept; void run_clear_processors(const void* inColor, const void* depth, SL_Texture* colorBuf, SL_Texture* depthBuf) noexcept; diff --git a/softlight/include/softlight/SL_ShaderProcessor.hpp b/softlight/include/softlight/SL_ShaderProcessor.hpp index 06612277..941a9bbd 100644 --- a/softlight/include/softlight/SL_ShaderProcessor.hpp +++ b/softlight/include/softlight/SL_ShaderProcessor.hpp @@ -5,6 +5,7 @@ #include #include "softlight/SL_BlitProcesor.hpp" +#include "softlight/SL_BlitCompressedProcesor.hpp" #include "softlight/SL_ClearProcesor.hpp" #include "softlight/SL_LineProcessor.hpp" #include "softlight/SL_PointProcessor.hpp" @@ -25,6 +26,7 @@ enum SL_ShaderType : uint8_t SL_LINE_PROCESSOR, SL_POINT_PROCESSOR, SL_BLIT_PROCESSOR, + SL_BLIT_COMPRESSED_PROCESSOR, SL_CLEAR_PROCESSOR }; @@ -46,6 +48,7 @@ struct SL_ShaderProcessor SL_LineProcessor mLineProcessor; SL_PointProcessor mPointProcessor; SL_BlitProcessor mBlitter; + SL_BlitCompressedProcessor mBlitterCompressed; SL_ClearProcessor mClear; }; @@ -93,6 +96,10 @@ inline void SL_ShaderProcessor::operator()() noexcept mBlitter.execute(); break; + case SL_BLIT_COMPRESSED_PROCESSOR: + mBlitterCompressed.execute(); + break; + case SL_CLEAR_PROCESSOR: mClear.execute(); break; diff --git a/softlight/src/SL_BlitCompressedProcessor.cpp b/softlight/src/SL_BlitCompressedProcessor.cpp new file mode 100644 index 00000000..d49dfe6d --- /dev/null +++ b/softlight/src/SL_BlitCompressedProcessor.cpp @@ -0,0 +1,1066 @@ + +#include "lightsky/utils/Assertions.h" + +#include "lightsky/math/scalar_utils.h" +#include "lightsky/math/vec_utils.h" // vector casting + +#include "softlight/SL_BlitCompressedProcesor.hpp" +#include "softlight/SL_ColorCompressed.hpp" +#include "softlight/SL_Texture.hpp" + + + +/*----------------------------------------------------------------------------- + * Helper functions and namespaces +-----------------------------------------------------------------------------*/ +/*------------------------------------- + * Compressed to R/G/B/A +-------------------------------------*/ +template +struct SL_Blit_Compressed_to_R; + +template +struct SL_Blit_Compressed_to_RG; + +template +struct SL_Blit_Compressed_to_RGB; + +template +struct SL_Blit_Compressed_to_RGBA; + + + +/*------------------------------------- + * R/G/B/A to Compressed +-------------------------------------*/ +template +struct SL_Blit_R_to_Compressed; + +template +struct SL_Blit_RG_to_Compressed; + +template +struct SL_Blit_RGB_to_Compressed; + +template +struct SL_Blit_RGBA_to_Compressed; + + + +/*------------------------------------- + * Compressed to Compressed +-------------------------------------*/ +template +struct SL_Blit_Compressed_to_Compressed; + + + +/*------------------------------------- + * Compressed to R +-------------------------------------*/ +template +struct SL_Blit_Compressed_to_R +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor)[0]; + } +}; + + + +template +struct SL_Blit_Compressed_to_R +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor)[0]; + } +}; + + + +template +struct SL_Blit_Compressed_to_R +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor)[0]; + } +}; + + + +/*------------------------------------- + * Compressed to RG +-------------------------------------*/ +template +struct SL_Blit_Compressed_to_RG +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec2_cast(rgb_cast(inColor)); + } +}; + + + +template +struct SL_Blit_Compressed_to_RG +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec2_cast(rgb_cast(inColor)); + } +}; + + + +template +struct SL_Blit_Compressed_to_RG +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec2_cast(rgb_cast(inColor)); + } +}; + + + +/*------------------------------------- + * Compressed to RGB +-------------------------------------*/ +template +struct SL_Blit_Compressed_to_RGB +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor); + } +}; + + + +template +struct SL_Blit_Compressed_to_RGB +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec3_cast(rgb_cast(inColor)); + } +}; + + + +template +struct SL_Blit_Compressed_to_RGB +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec3_cast(rgb_cast(inColor)); + } +}; + + + +/*------------------------------------- + * Compressed to RGBA +-------------------------------------*/ +template +struct SL_Blit_Compressed_to_RGBA +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBAType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = ls::math::vec4_cast(rgb_cast(inColor), SL_ColorLimits::max()[3]); + } +}; + + + +template +struct SL_Blit_Compressed_to_RGBA +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBAType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor); + } +}; + + + +template +struct SL_Blit_Compressed_to_RGBA +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBAType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = rgb_cast(inColor); + } +}; + + + +/*------------------------------------- + * R to Compressed +-------------------------------------*/ +template +struct SL_Blit_R_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB565) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBType inColor = ls::math::vec3_t(inColorR.r, SL_ColorLimits::min().r, SL_ColorLimits::min().r); + + *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(inColor); + } +}; + + + +template +struct SL_Blit_R_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB5551) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBAType inColor = SL_ColorRGBAType{SL_ColorLimits::min().r, SL_ColorLimits::min().r, inColorR[0], SL_ColorLimits::max()[3]}; + + *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(inColor); + } +}; + + + +template +struct SL_Blit_R_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB4444) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBAType inColor = SL_ColorRGBAType{SL_ColorLimits::min().r, SL_ColorLimits::min().r, inColorR[0], SL_ColorLimits::max()[3]}; + + *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(inColor); + } +}; + + + + +/*------------------------------------- + * RG to Compressed +-------------------------------------*/ +template +struct SL_Blit_RG_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB565) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGType inColorRG = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBType inColor = ls::math::vec3_cast(inColorRG, SL_ColorLimits::min().r); + + *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(inColor); + } +}; + + + +template +struct SL_Blit_RG_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB5551) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGType inColorRG = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBAType inColor = ls::math::vec4_cast(SL_ColorLimits::min().r, inColorRG, SL_ColorLimits::max()[3]); + + *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(inColor); + } +}; + + + +template +struct SL_Blit_RG_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB4444) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGType inColorRG = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBAType inColor = ls::math::vec4_cast(SL_ColorLimits::min().r, inColorRG, SL_ColorLimits::max()[3]); + + *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(inColor); + } +}; + + + +/*------------------------------------- + * RGB to Compressed +-------------------------------------*/ +template +struct SL_Blit_RGB_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB565) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(inColor); + } +}; + + + +template +struct SL_Blit_RGB_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB5551) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBType inColorRGB = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBAType inColor = ls::math::vec4_cast(inColorRGB, SL_ColorLimits::max()[3]); + + *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(inColor); + } +}; + + + +template +struct SL_Blit_RGB_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB4444) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBType inColorRGB = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBAType inColor = ls::math::vec4_cast(inColorRGB, SL_ColorLimits::max()[3]); + + *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(inColor); + } +}; + + + +/*------------------------------------- + * RGBA to Compressed +-------------------------------------*/ +template +struct SL_Blit_RGBA_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB565) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBAType inColorRGBA = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBType inColor = ls::math::vec3_cast(inColorRGBA); + + *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(inColor); + } +}; + + + +template +struct SL_Blit_RGBA_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB5551) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBAType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(inColor); + } +}; + + + +template +struct SL_Blit_RGBA_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB4444) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBAType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(inColor); + } +}; + + + +/*------------------------------------- + * Compressed to Compressed +-------------------------------------*/ +template<> +struct SL_Blit_Compressed_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB565) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast(pOutBuf + outIndex) = inColor; + } +}; + + + +template<> +struct SL_Blit_Compressed_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB565) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + const ls::math::vec3_t outColor = ls::math::vec3_cast(rgb_cast(inColor)); + *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(outColor); + } +}; + + + +template<> +struct SL_Blit_Compressed_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB565) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + const ls::math::vec3_t outColor = ls::math::vec3_cast(rgb_cast(inColor)); + *reinterpret_cast(pOutBuf + outIndex) = rgb565_cast(outColor); + } +}; + + + +template<> +struct SL_Blit_Compressed_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB5551) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + const ls::math::vec4_t outRGBA = ls::math::vec4_cast(rgb_cast(inColor), SL_ColorLimits::max().a); + *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(outRGBA); + } +}; + + + +template<> +struct SL_Blit_Compressed_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB5551) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast(pOutBuf + outIndex) = inColor; + } +}; + + + +template<> +struct SL_Blit_Compressed_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB5551) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + const ls::math::vec4_t outColor = rgb_cast(inColor); + *reinterpret_cast(pOutBuf + outIndex) = rgb5551_cast(outColor); + } +}; + + + +template<> +struct SL_Blit_Compressed_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB4444) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB565 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + const ls::math::vec4_t outRGBA = ls::math::vec4_cast(rgb_cast(inColor), SL_ColorLimits::max().a); + *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(outRGBA); + } +}; + + + +template<> +struct SL_Blit_Compressed_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB4444) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB5551 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + const ls::math::vec4_t outColor = rgb_cast(inColor); + *reinterpret_cast(pOutBuf + outIndex) = rgb4444_cast(outColor); + } +}; + + + +template<> +struct SL_Blit_Compressed_to_Compressed +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGB4444) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGB4444 inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast(pOutBuf + outIndex) = inColor; + } +}; + + + + +/*----------------------------------------------------------------------------- + * SL_BlitProcessorCompressed functions and namespaces +-----------------------------------------------------------------------------*/ +/*------------------------------------- + * Nearest-neighbor filtering (R Channel) +-------------------------------------*/ +template +void SL_BlitCompressedProcessor::blit_src_r() noexcept +{ + switch (mBackBuffer->type()) + { + case SL_COLOR_RGB_565: blit_nearest>(); break; + case SL_COLOR_RGBA_5551: blit_nearest>(); break; + case SL_COLOR_RGBA_4444: blit_nearest>(); break; + + default: + LS_ASSERT(false); + LS_UNREACHABLE(); + } +} + + + +/*------------------------------------- + * Nearest-neighbor filtering (R & G Channels) +-------------------------------------*/ +template +void SL_BlitCompressedProcessor::blit_src_rg() noexcept +{ + switch (mBackBuffer->type()) + { + case SL_COLOR_RGB_565: blit_nearest>(); break; + case SL_COLOR_RGBA_5551: blit_nearest>(); break; + case SL_COLOR_RGBA_4444: blit_nearest>(); break; + + default: + LS_ASSERT(false); + LS_UNREACHABLE(); + } +} + + + +/*------------------------------------- + * Nearest-neighbor filtering (RGB) +-------------------------------------*/ +template +void SL_BlitCompressedProcessor::blit_src_rgb() noexcept +{ + switch (mBackBuffer->type()) + { + case SL_COLOR_RGB_565: blit_nearest>(); break; + case SL_COLOR_RGBA_5551: blit_nearest>(); break; + case SL_COLOR_RGBA_4444: blit_nearest>(); break; + + default: + LS_ASSERT(false); + LS_UNREACHABLE(); + } +} + + + +/*------------------------------------- + * Nearest-neighbor filtering (RGBA) +-------------------------------------*/ +template +void SL_BlitCompressedProcessor::blit_src_rgba() noexcept +{ + switch (mBackBuffer->type()) + { + case SL_COLOR_RGB_565: blit_nearest>(); break; + case SL_COLOR_RGBA_5551: blit_nearest>(); break; + case SL_COLOR_RGBA_4444: blit_nearest>(); break; + + default: + LS_ASSERT(false); + LS_UNREACHABLE(); + } +} + + + +/*------------------------------------- + * Nearest-neighbor filtering (RGBA) +-------------------------------------*/ +template +void SL_BlitCompressedProcessor::blit_src_compressed() noexcept +{ + switch (mBackBuffer->type()) + { + case SL_COLOR_R_8U: blit_nearest>(); break; + case SL_COLOR_R_16U: blit_nearest>(); break; + case SL_COLOR_R_32U: blit_nearest>(); break; + case SL_COLOR_R_64U: blit_nearest>(); break; + case SL_COLOR_R_FLOAT: blit_nearest>(); break; + case SL_COLOR_R_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RG_8U: blit_nearest>(); break; + case SL_COLOR_RG_16U: blit_nearest>(); break; + case SL_COLOR_RG_32U: blit_nearest>(); break; + case SL_COLOR_RG_64U: blit_nearest>(); break; + case SL_COLOR_RG_FLOAT: blit_nearest>(); break; + case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RGB_8U: blit_nearest>(); break; + case SL_COLOR_RGB_16U: blit_nearest>(); break; + case SL_COLOR_RGB_32U: blit_nearest>(); break; + case SL_COLOR_RGB_64U: blit_nearest>(); break; + case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; + case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RGBA_8U: blit_nearest>(); break; + case SL_COLOR_RGBA_16U: blit_nearest>(); break; + case SL_COLOR_RGBA_32U: blit_nearest>(); break; + case SL_COLOR_RGBA_64U: blit_nearest>(); break; + case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; + case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RGB_565: blit_nearest>(); break; + case SL_COLOR_RGBA_5551: blit_nearest>(); break; + case SL_COLOR_RGBA_4444: blit_nearest>(); break; + + default: + LS_ASSERT(false); + LS_UNREACHABLE(); + } +} + + + +/*------------------------------------- + * Nearest-neighbor filtering (RGBA) +-------------------------------------*/ +template +void SL_BlitCompressedProcessor::blit_nearest() noexcept +{ + constexpr BlitOp blitOp; + unsigned char* const pOutBuf = reinterpret_cast(mBackBuffer->data()); + + const uint_fast32_t inW = (uint_fast32_t)srcX1 - (uint_fast32_t)srcX0; + const uint_fast32_t inH = (uint_fast32_t)srcY1 - (uint_fast32_t)srcY0; + const uint_fast32_t outW = (uint_fast32_t)dstX1 - (uint_fast32_t)dstX0; + + const uint_fast32_t totalOutW = mBackBuffer->width(); + const uint_fast32_t totalOutH = mBackBuffer->height(); + + // Only tile data along the y-axis of the render buffer. This will help to + // make use of the CPU prefetcher when iterating pixels along the x-axis + const uint_fast32_t x0 = ls::math::max(0u, dstX0); + const uint_fast32_t x1 = ls::math::min(totalOutW, x0 + outW); + const uint_fast32_t y0 = dstY0+mThreadId; + const uint_fast32_t y1 = dstY1; + const uint_fast32_t finW = (inW << NUM_FIXED_BITS); + const uint_fast32_t finH = (inH << NUM_FIXED_BITS); + const uint_fast32_t foutW = (finW / totalOutW) + 1u; // account for rounding errors + const uint_fast32_t foutH = (finH / totalOutH) + 1u; + + uint_fast32_t y = y0; + + while (LS_LIKELY(y < y1)) + { + const uint_fast32_t yf = (y * foutH) >> NUM_FIXED_BITS; + const uint_fast32_t srcY = srcY1 - (srcY0 + yf) - 1u; + uint_fast32_t outIndex = (x0 + totalOutW * y) * BlitOp::stride; + + uint_fast32_t x = x0; + + while (LS_LIKELY(x < x1)) + { + const uint_fast32_t xf = x * foutW; + const uint_fast32_t srcX = xf >> NUM_FIXED_BITS; + + blitOp(mTexture, srcX, srcY, pOutBuf, outIndex); + ++x; + outIndex += BlitOp::stride; + } + + y += mNumThreads; + } +} + + + +/*------------------------------------- + * Run the texture blitter +-------------------------------------*/ +void SL_BlitCompressedProcessor::execute() noexcept +{ + LS_ASSERT(sl_is_compressed_color(mTexture->type()) || sl_is_compressed_color(mBackBuffer->type())); + + switch (mTexture->type()) + { + case SL_COLOR_R_8U: blit_src_r(); break; + case SL_COLOR_R_16U: blit_src_r(); break; + case SL_COLOR_R_32U: blit_src_r(); break; + case SL_COLOR_R_64U: blit_src_r(); break; + case SL_COLOR_R_FLOAT: blit_src_r(); break; + case SL_COLOR_R_DOUBLE: blit_src_r(); break; + + case SL_COLOR_RG_8U: blit_src_rg(); break; + case SL_COLOR_RG_16U: blit_src_rg(); break; + case SL_COLOR_RG_32U: blit_src_rg(); break; + case SL_COLOR_RG_64U: blit_src_rg(); break; + case SL_COLOR_RG_FLOAT: blit_src_rg(); break; + case SL_COLOR_RG_DOUBLE: blit_src_rg(); break; + + case SL_COLOR_RGB_8U: blit_src_rgb(); break; + case SL_COLOR_RGB_16U: blit_src_rgb(); break; + case SL_COLOR_RGB_32U: blit_src_rgb(); break; + case SL_COLOR_RGB_64U: blit_src_rgb(); break; + case SL_COLOR_RGB_FLOAT: blit_src_rgb(); break; + case SL_COLOR_RGB_DOUBLE: blit_src_rgb(); break; + + case SL_COLOR_RGBA_8U: blit_src_rgba(); break; + case SL_COLOR_RGBA_16U: blit_src_rgba(); break; + case SL_COLOR_RGBA_32U: blit_src_rgba(); break; + case SL_COLOR_RGBA_64U: blit_src_rgba(); break; + case SL_COLOR_RGBA_FLOAT: blit_src_rgba(); break; + case SL_COLOR_RGBA_DOUBLE: blit_src_rgba(); break; + + case SL_COLOR_RGB_565: blit_src_compressed(); break; + case SL_COLOR_RGBA_5551: blit_src_compressed(); break; + case SL_COLOR_RGBA_4444: blit_src_compressed(); break; + + default: + LS_ASSERT(false); + LS_UNREACHABLE(); + } +} diff --git a/softlight/src/SL_BlitProcessor.cpp b/softlight/src/SL_BlitProcessor.cpp index 91d8fddc..a90d2935 100644 --- a/softlight/src/SL_BlitProcessor.cpp +++ b/softlight/src/SL_BlitProcessor.cpp @@ -1,7 +1,12 @@ +#include "lightsky/utils/Assertions.h" + #include "lightsky/math/scalar_utils.h" +#include "lightsky/math/vec_utils.h" // vector casting #include "softlight/SL_BlitProcesor.hpp" +#include "softlight/SL_Color.hpp" +#include "softlight/SL_Texture.hpp" @@ -10,176 +15,637 @@ -----------------------------------------------------------------------------*/ namespace math = ls::math; +/*------------------------------------- + * Recolor to R +-------------------------------------*/ +template +struct SL_Blit_R_to_R +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + +template +struct SL_Blit_RG_to_R +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor)[0]; + } +}; + +template +struct SL_Blit_RGB_to_R +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor)[0]; + } +}; + +template +struct SL_Blit_RGBA_to_R +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBAType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor)[0]; + } +}; + + + +/*------------------------------------- + * Recolor to RG +-------------------------------------*/ +template +struct SL_Blit_R_to_RG +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGType inColor = SL_ColorRGType{inColorR[0], SL_ColorLimits::min().r}; + + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + +template +struct SL_Blit_RG_to_RG +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + +template +struct SL_Blit_RGB_to_RG +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBType inColorRGB = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGType inColor = ls::math::vec2_cast(inColorRGB); + + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + +template +struct SL_Blit_RGBA_to_RG +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBAType inColorRGBA = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGType inColor = ls::math::vec2_cast(inColorRGBA); + + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + + + +/*------------------------------------- + * Recolor to RGB +-------------------------------------*/ +template +struct SL_Blit_R_to_RGB +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBType inColor = SL_ColorRGBType{SL_ColorLimits::min().r, SL_ColorLimits::min().r, inColorR[0]}; + + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + +template +struct SL_Blit_RG_to_RGB +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGType inColorRG = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBType inColor = ls::math::vec3_cast(inColorRG, SL_ColorLimits::min().r); + + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + +template +struct SL_Blit_RGB_to_RGB +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + +template +struct SL_Blit_RGBA_to_RGB +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBAType inColorRGBA = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBType inColor = ls::math::vec3_cast(inColorRGBA); + + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + + + +/*------------------------------------- + * Recolor to RGBA +-------------------------------------*/ +template +struct SL_Blit_R_to_RGBA +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBAType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRType inColorR = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBAType inColor = SL_ColorRGBAType{SL_ColorLimits::min().r, SL_ColorLimits::min().r, inColorR[0], SL_ColorLimits::max()[3]}; + + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + +template +struct SL_Blit_RG_to_RGBA +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBAType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGType inColorRG = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBAType inColor = ls::math::vec4_cast(SL_ColorLimits::min().r, inColorRG, SL_ColorLimits::max()[3]); + + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + +template +struct SL_Blit_RGB_to_RGBA +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBAType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBType inColorRGB = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBAType inColor = ls::math::vec4_cast(inColorRGB, SL_ColorLimits::max()[3]); + + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + + +template +struct SL_Blit_RGBA_to_RGBA +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBAType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBAType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast*>(pOutBuf + outIndex) = color_cast(inColor); + } +}; + +// I spent enough time on blitting... Here, I'm only optimizing for the most +// common blit operations, from RGBAf & RGBA8 to RGBA8 +template<> +struct SL_Blit_RGBA_to_RGBA +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBAType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const int32_t inColor = pTexture->texel((uint16_t)srcX, (uint16_t)srcY); + *reinterpret_cast(pOutBuf + outIndex) = inColor; + } +}; + + + +template<> +struct SL_Blit_RGBA_to_RGBA +{ + enum : uint_fast32_t + { + stride = sizeof(SL_ColorRGBAType) + }; + + inline LS_INLINE void operator()( + const SL_Texture* pTexture, + const uint_fast32_t srcX, + const uint_fast32_t srcY, + unsigned char* const pOutBuf, + uint_fast32_t outIndex) const noexcept + { + const SL_ColorRGBAType inColor = pTexture->texel>((uint16_t)srcX, (uint16_t)srcY); + const SL_ColorRGBAType in = color_cast(inColor); + *reinterpret_cast(pOutBuf + outIndex) = reinterpret_cast(in); + } +}; + /*----------------------------------------------------------------------------- - * SL_BlitProcessor Class + * SL_BlitProcessor functions and namespaces -----------------------------------------------------------------------------*/ -#if !defined(LS_COMPILER_MSC) - template void SL_BlitProcessor::blit_src_r(); - template void SL_BlitProcessor::blit_src_r(); - template void SL_BlitProcessor::blit_src_r(); - template void SL_BlitProcessor::blit_src_r(); - template void SL_BlitProcessor::blit_src_r(); - template void SL_BlitProcessor::blit_src_r(); - template void SL_BlitProcessor::blit_src_rg(); - template void SL_BlitProcessor::blit_src_rg(); - template void SL_BlitProcessor::blit_src_rg(); - template void SL_BlitProcessor::blit_src_rg(); - template void SL_BlitProcessor::blit_src_rg(); - template void SL_BlitProcessor::blit_src_rg(); - template void SL_BlitProcessor::blit_src_rgb(); - template void SL_BlitProcessor::blit_src_rgb(); - template void SL_BlitProcessor::blit_src_rgb(); - template void SL_BlitProcessor::blit_src_rgb(); - template void SL_BlitProcessor::blit_src_rgb(); - template void SL_BlitProcessor::blit_src_rgb(); - template void SL_BlitProcessor::blit_src_rgba(); - template void SL_BlitProcessor::blit_src_rgba(); - template void SL_BlitProcessor::blit_src_rgba(); - template void SL_BlitProcessor::blit_src_rgba(); - template void SL_BlitProcessor::blit_src_rgba(); - template void SL_BlitProcessor::blit_src_rgba(); -#endif +/*------------------------------------- + * Nearest-neighbor filtering (R Channel) +-------------------------------------*/ +template +void SL_BlitProcessor::blit_src_r() noexcept +{ + switch (mBackBuffer->type()) + { + case SL_COLOR_R_8U: blit_nearest>(); break; + case SL_COLOR_R_16U: blit_nearest>(); break; + case SL_COLOR_R_32U: blit_nearest>(); break; + case SL_COLOR_R_64U: blit_nearest>(); break; + case SL_COLOR_R_FLOAT: blit_nearest>(); break; + case SL_COLOR_R_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RG_8U: blit_nearest>(); break; + case SL_COLOR_RG_16U: blit_nearest>(); break; + case SL_COLOR_RG_32U: blit_nearest>(); break; + case SL_COLOR_RG_64U: blit_nearest>(); break; + case SL_COLOR_RG_FLOAT: blit_nearest>(); break; + case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RGB_8U: blit_nearest>(); break; + case SL_COLOR_RGB_16U: blit_nearest>(); break; + case SL_COLOR_RGB_32U: blit_nearest>(); break; + case SL_COLOR_RGB_64U: blit_nearest>(); break; + case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; + case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RGBA_8U: blit_nearest>(); break; + case SL_COLOR_RGBA_16U: blit_nearest>(); break; + case SL_COLOR_RGBA_32U: blit_nearest>(); break; + case SL_COLOR_RGBA_64U: blit_nearest>(); break; + case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; + case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; + + default: + LS_ASSERT(false); + LS_UNREACHABLE(); + } +} /*------------------------------------- - * Nearest-neighbor filtering (RGB565) + * Nearest-neighbor filtering (R & G Channels) -------------------------------------*/ -template<> -void SL_BlitProcessor::blit_src_rgb() noexcept +template +void SL_BlitProcessor::blit_src_rg() noexcept { switch (mBackBuffer->type()) { - case SL_COLOR_R_8U: blit_nearest>(); break; - case SL_COLOR_R_16U: blit_nearest>(); break; - case SL_COLOR_R_32U: blit_nearest>(); break; - case SL_COLOR_R_64U: blit_nearest>(); break; - case SL_COLOR_R_FLOAT: blit_nearest>(); break; - case SL_COLOR_R_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RG_8U: blit_nearest>(); break; - case SL_COLOR_RG_16U: blit_nearest>(); break; - case SL_COLOR_RG_32U: blit_nearest>(); break; - case SL_COLOR_RG_64U: blit_nearest>(); break; - case SL_COLOR_RG_FLOAT: blit_nearest>(); break; - case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_8U: blit_nearest>(); break; - case SL_COLOR_RGB_16U: blit_nearest>(); break; - case SL_COLOR_RGB_32U: blit_nearest>(); break; - case SL_COLOR_RGB_64U: blit_nearest>(); break; - case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGBA_8U: blit_nearest>(); break; - case SL_COLOR_RGBA_16U: blit_nearest>(); break; - case SL_COLOR_RGBA_32U: blit_nearest>(); break; - case SL_COLOR_RGBA_64U: blit_nearest>(); break; - case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_565: blit_nearest>(); break; - case SL_COLOR_RGBA_5551: blit_nearest>(); break; - case SL_COLOR_RGBA_4444: blit_nearest>(); break; + case SL_COLOR_R_8U: blit_nearest>(); break; + case SL_COLOR_R_16U: blit_nearest>(); break; + case SL_COLOR_R_32U: blit_nearest>(); break; + case SL_COLOR_R_64U: blit_nearest>(); break; + case SL_COLOR_R_FLOAT: blit_nearest>(); break; + case SL_COLOR_R_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RG_8U: blit_nearest>(); break; + case SL_COLOR_RG_16U: blit_nearest>(); break; + case SL_COLOR_RG_32U: blit_nearest>(); break; + case SL_COLOR_RG_64U: blit_nearest>(); break; + case SL_COLOR_RG_FLOAT: blit_nearest>(); break; + case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RGB_8U: blit_nearest>(); break; + case SL_COLOR_RGB_16U: blit_nearest>(); break; + case SL_COLOR_RGB_32U: blit_nearest>(); break; + case SL_COLOR_RGB_64U: blit_nearest>(); break; + case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; + case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RGBA_8U: blit_nearest>(); break; + case SL_COLOR_RGBA_16U: blit_nearest>(); break; + case SL_COLOR_RGBA_32U: blit_nearest>(); break; + case SL_COLOR_RGBA_64U: blit_nearest>(); break; + case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; + case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; default: - break; + LS_ASSERT(false); + LS_UNREACHABLE(); } } /*------------------------------------- - * Nearest-neighbor filtering (RGBA5551) + * Nearest-neighbor filtering (RGB) -------------------------------------*/ -template<> -void SL_BlitProcessor::blit_src_rgba() noexcept +template +void SL_BlitProcessor::blit_src_rgb() noexcept { switch (mBackBuffer->type()) { - case SL_COLOR_R_8U: blit_nearest>(); break; - case SL_COLOR_R_16U: blit_nearest>(); break; - case SL_COLOR_R_32U: blit_nearest>(); break; - case SL_COLOR_R_64U: blit_nearest>(); break; - case SL_COLOR_R_FLOAT: blit_nearest>(); break; - case SL_COLOR_R_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RG_8U: blit_nearest>(); break; - case SL_COLOR_RG_16U: blit_nearest>(); break; - case SL_COLOR_RG_32U: blit_nearest>(); break; - case SL_COLOR_RG_64U: blit_nearest>(); break; - case SL_COLOR_RG_FLOAT: blit_nearest>(); break; - case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_8U: blit_nearest>(); break; - case SL_COLOR_RGB_16U: blit_nearest>(); break; - case SL_COLOR_RGB_32U: blit_nearest>(); break; - case SL_COLOR_RGB_64U: blit_nearest>(); break; - case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGBA_8U: blit_nearest>(); break; - case SL_COLOR_RGBA_16U: blit_nearest>(); break; - case SL_COLOR_RGBA_32U: blit_nearest>(); break; - case SL_COLOR_RGBA_64U: blit_nearest>(); break; - case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_565: blit_nearest>(); break; - case SL_COLOR_RGBA_5551: blit_nearest>(); break; - case SL_COLOR_RGBA_4444: blit_nearest>(); break; + case SL_COLOR_R_8U: blit_nearest>(); break; + case SL_COLOR_R_16U: blit_nearest>(); break; + case SL_COLOR_R_32U: blit_nearest>(); break; + case SL_COLOR_R_64U: blit_nearest>(); break; + case SL_COLOR_R_FLOAT: blit_nearest>(); break; + case SL_COLOR_R_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RG_8U: blit_nearest>(); break; + case SL_COLOR_RG_16U: blit_nearest>(); break; + case SL_COLOR_RG_32U: blit_nearest>(); break; + case SL_COLOR_RG_64U: blit_nearest>(); break; + case SL_COLOR_RG_FLOAT: blit_nearest>(); break; + case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RGB_8U: blit_nearest>(); break; + case SL_COLOR_RGB_16U: blit_nearest>(); break; + case SL_COLOR_RGB_32U: blit_nearest>(); break; + case SL_COLOR_RGB_64U: blit_nearest>(); break; + case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; + case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RGBA_8U: blit_nearest>(); break; + case SL_COLOR_RGBA_16U: blit_nearest>(); break; + case SL_COLOR_RGBA_32U: blit_nearest>(); break; + case SL_COLOR_RGBA_64U: blit_nearest>(); break; + case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; + case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; default: - break; + LS_ASSERT(false); + LS_UNREACHABLE(); } } /*------------------------------------- - * Nearest-neighbor filtering (RGBA4444) + * Nearest-neighbor filtering (RGBA) -------------------------------------*/ -template<> -void SL_BlitProcessor::blit_src_rgba() noexcept +template +void SL_BlitProcessor::blit_src_rgba() noexcept { switch (mBackBuffer->type()) { - case SL_COLOR_R_8U: blit_nearest>(); break; - case SL_COLOR_R_16U: blit_nearest>(); break; - case SL_COLOR_R_32U: blit_nearest>(); break; - case SL_COLOR_R_64U: blit_nearest>(); break; - case SL_COLOR_R_FLOAT: blit_nearest>(); break; - case SL_COLOR_R_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RG_8U: blit_nearest>(); break; - case SL_COLOR_RG_16U: blit_nearest>(); break; - case SL_COLOR_RG_32U: blit_nearest>(); break; - case SL_COLOR_RG_64U: blit_nearest>(); break; - case SL_COLOR_RG_FLOAT: blit_nearest>(); break; - case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_8U: blit_nearest>(); break; - case SL_COLOR_RGB_16U: blit_nearest>(); break; - case SL_COLOR_RGB_32U: blit_nearest>(); break; - case SL_COLOR_RGB_64U: blit_nearest>(); break; - case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGBA_8U: blit_nearest>(); break; - case SL_COLOR_RGBA_16U: blit_nearest>(); break; - case SL_COLOR_RGBA_32U: blit_nearest>(); break; - case SL_COLOR_RGBA_64U: blit_nearest>(); break; - case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; - case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; - - case SL_COLOR_RGB_565: blit_nearest>(); break; - case SL_COLOR_RGBA_5551: blit_nearest>(); break; - case SL_COLOR_RGBA_4444: blit_nearest>(); break; + case SL_COLOR_R_8U: blit_nearest>(); break; + case SL_COLOR_R_16U: blit_nearest>(); break; + case SL_COLOR_R_32U: blit_nearest>(); break; + case SL_COLOR_R_64U: blit_nearest>(); break; + case SL_COLOR_R_FLOAT: blit_nearest>(); break; + case SL_COLOR_R_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RG_8U: blit_nearest>(); break; + case SL_COLOR_RG_16U: blit_nearest>(); break; + case SL_COLOR_RG_32U: blit_nearest>(); break; + case SL_COLOR_RG_64U: blit_nearest>(); break; + case SL_COLOR_RG_FLOAT: blit_nearest>(); break; + case SL_COLOR_RG_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RGB_8U: blit_nearest>(); break; + case SL_COLOR_RGB_16U: blit_nearest>(); break; + case SL_COLOR_RGB_32U: blit_nearest>(); break; + case SL_COLOR_RGB_64U: blit_nearest>(); break; + case SL_COLOR_RGB_FLOAT: blit_nearest>(); break; + case SL_COLOR_RGB_DOUBLE: blit_nearest>(); break; + + case SL_COLOR_RGBA_8U: blit_nearest>(); break; + case SL_COLOR_RGBA_16U: blit_nearest>(); break; + case SL_COLOR_RGBA_32U: blit_nearest>(); break; + case SL_COLOR_RGBA_64U: blit_nearest>(); break; + case SL_COLOR_RGBA_FLOAT: blit_nearest>(); break; + case SL_COLOR_RGBA_DOUBLE: blit_nearest>(); break; default: - break; + LS_ASSERT(false); + LS_UNREACHABLE(); + } +} + + + +/*------------------------------------- + * Nearest-neighbor filtering (RGBA) +-------------------------------------*/ +template +void SL_BlitProcessor::blit_nearest() noexcept +{ + constexpr BlipOp blitOp; + unsigned char* const pOutBuf = reinterpret_cast(mBackBuffer->data()); + + const uint_fast32_t inW = (uint_fast32_t)srcX1 - (uint_fast32_t)srcX0; + const uint_fast32_t inH = (uint_fast32_t)srcY1 - (uint_fast32_t)srcY0; + const uint_fast32_t outW = (uint_fast32_t)dstX1 - (uint_fast32_t)dstX0; + + const uint_fast32_t totalOutW = mBackBuffer->width(); + const uint_fast32_t totalOutH = mBackBuffer->height(); + + // Only tile data along the y-axis of the render buffer. This will help to + // make use of the CPU prefetcher when iterating pixels along the x-axis + const uint_fast32_t x0 = ls::math::max(0u, dstX0); + const uint_fast32_t x1 = ls::math::min(totalOutW, x0 + outW); + const uint_fast32_t y0 = dstY0+mThreadId; + const uint_fast32_t y1 = dstY1; + const uint_fast32_t finW = (inW << NUM_FIXED_BITS); + const uint_fast32_t finH = (inH << NUM_FIXED_BITS); + const uint_fast32_t foutW = (finW / totalOutW) + 1u; // account for rounding errors + const uint_fast32_t foutH = (finH / totalOutH) + 1u; + + uint_fast32_t y = y0; + + while (LS_LIKELY(y < y1)) + { + const uint_fast32_t yf = (y * foutH) >> NUM_FIXED_BITS; + const uint_fast32_t srcY = srcY1 - (srcY0 + yf) - 1u; + uint_fast32_t outIndex = (x0 + totalOutW * y) * BlipOp::stride; + + uint_fast32_t x = x0; + + while (LS_LIKELY(x < x1)) + { + const uint_fast32_t xf = x * foutW; + const uint_fast32_t srcX = xf >> NUM_FIXED_BITS; + + blitOp(mTexture, srcX, srcY, pOutBuf, outIndex); + ++x; + outIndex += BlipOp::stride; + } + + y += mNumThreads; } } @@ -190,6 +656,8 @@ void SL_BlitProcessor::blit_src_rgba() noexcept -------------------------------------*/ void SL_BlitProcessor::execute() noexcept { + LS_ASSERT(!sl_is_compressed_color(mTexture->type()) && !sl_is_compressed_color(mBackBuffer->type())); + switch (mTexture->type()) { case SL_COLOR_R_8U: blit_src_r(); break; @@ -220,8 +688,8 @@ void SL_BlitProcessor::execute() noexcept case SL_COLOR_RGBA_FLOAT: blit_src_rgba(); break; case SL_COLOR_RGBA_DOUBLE: blit_src_rgba(); break; - case SL_COLOR_RGB_565: blit_src_rgb(); break; - case SL_COLOR_RGBA_5551: blit_src_rgba(); break; - case SL_COLOR_RGBA_4444: blit_src_rgba(); break; + default: + LS_ASSERT(false); + LS_UNREACHABLE(); } } diff --git a/softlight/src/SL_Color.cpp b/softlight/src/SL_Color.cpp index c3234fe8..b6f657de 100644 --- a/softlight/src/SL_Color.cpp +++ b/softlight/src/SL_Color.cpp @@ -102,6 +102,27 @@ unsigned sl_elements_per_color(SL_ColorDataType p) +/*------------------------------------- + * Compressed format check +-------------------------------------*/ +bool sl_is_compressed_color(SL_ColorDataType p) +{ + switch (p) + { + case SL_COLOR_RGB_565: + case SL_COLOR_RGBA_5551: + case SL_COLOR_RGBA_4444: + return true; + + default: + break; + } + + return false; +} + + + /*------------------------------------- * helper function to avoid breaking reinterpret_cast from a compressed color * to uint16_t (i.e., don't break strict aliasing). diff --git a/softlight/src/SL_Context.cpp b/softlight/src/SL_Context.cpp index dbdd6513..77e0023c 100644 --- a/softlight/src/SL_Context.cpp +++ b/softlight/src/SL_Context.cpp @@ -742,17 +742,13 @@ void SL_Context::blit(size_t outTextureId, size_t inTextureId) noexcept const uint16_t dstX1 = pOut->width(); const uint16_t dstY1 = pOut->height(); - mProcessors.run_blit_processors( - mTextures[inTextureId], - mTextures[outTextureId], - srcX0, - srcY0, - srcX1, - srcY1, - dstX0, - dstY0, - dstX1, - dstY1); + this->blit( + outTextureId, + inTextureId, + srcX0, srcY0, + srcX1, srcY1, + dstX0, dstY0, + dstX1, dstY1); } @@ -772,17 +768,26 @@ void SL_Context::blit( uint16_t dstX1, uint16_t dstY1) noexcept { - mProcessors.run_blit_processors( - mTextures[inTextureId], - mTextures[outTextureId], - srcX0, - srcY0, - srcX1, - srcY1, - dstX0, - dstY0, - dstX1, - dstY1); + if (sl_is_compressed_color(mTextures[outTextureId]->type()) || sl_is_compressed_color(mTextures[inTextureId]->type())) + { + mProcessors.run_blit_compressed_processors( + mTextures[inTextureId], + mTextures[outTextureId], + srcX0, srcY0, + srcX1, srcY1, + dstX0, dstY0, + dstX1, dstY1); + } + else + { + mProcessors.run_blit_processors( + mTextures[inTextureId], + mTextures[outTextureId], + srcX0, srcY0, + srcX1, srcY1, + dstX0, dstY0, + dstX1, dstY1); + } } @@ -802,17 +807,13 @@ void SL_Context::blit(SL_WindowBuffer& buffer, size_t textureId) noexcept const uint16_t dstX1 = (uint16_t)buffer.width(); const uint16_t dstY1 = (uint16_t)buffer.height(); - mProcessors.run_blit_processors( - mTextures[textureId], - &(buffer.mTexture), - srcX0, - srcY0, - srcX1, - srcY1, - dstX0, - dstY0, - dstX1, - dstY1); + this->blit( + buffer, + textureId, + srcX0, srcY0, + srcX1, srcY1, + dstX0, dstY0, + dstX1, dstY1); } @@ -832,17 +833,26 @@ void SL_Context::blit( uint16_t dstX1, uint16_t dstY1) noexcept { - mProcessors.run_blit_processors( - mTextures[textureId], - &(buffer.mTexture), - srcX0, - srcY0, - srcX1, - srcY1, - dstX0, - dstY0, - dstX1, - dstY1); + if (sl_is_compressed_color(mTextures[textureId]->type()) || sl_is_compressed_color(buffer.mTexture.type())) + { + mProcessors.run_blit_compressed_processors( + mTextures[textureId], + &(buffer.mTexture), + srcX0, srcY0, + srcX1, srcY1, + dstX0, dstY0, + dstX1, dstY1); + } + else + { + mProcessors.run_blit_processors( + mTextures[textureId], + &(buffer.mTexture), + srcX0, srcY0, + srcX1, srcY1, + dstX0, dstY0, + dstX1, dstY1); + } } diff --git a/softlight/src/SL_ProcessorPool.cpp b/softlight/src/SL_ProcessorPool.cpp index 7a915a98..f94d49b9 100644 --- a/softlight/src/SL_ProcessorPool.cpp +++ b/softlight/src/SL_ProcessorPool.cpp @@ -273,13 +273,15 @@ unsigned SL_ProcessorPool::concurrency(unsigned inNumThreads) noexcept LS_LOG_MSG( "Rendering threads updated:" - "\n\tThread Count: ", inNumThreads, - "\n\tBytes per Task: ", sizeof(SL_ShaderProcessor), - "\n\tBytes of Task Pool: ", sizeof(SL_ProcessorPool), - "\n\tVertex Task Size: ", sizeof(SL_VertexProcessor), - "\n\tFragment Task Size: ", sizeof(SL_FragmentProcessor), - "\n\tFragment Bin Size: ", sizeof(SL_FragmentBin), - "\n\tBlitter Task Size: ", sizeof(SL_BlitProcessor)); + "\n\tThread Count: ", inNumThreads, + "\n\tBytes per Task: ", sizeof(SL_ShaderProcessor), + "\n\tBytes of Task Pool: ", sizeof(SL_ProcessorPool), + "\n\tVertex Task Size: ", sizeof(SL_VertexProcessor), + "\n\tFragment Task Size: ", sizeof(SL_FragmentProcessor), + "\n\tFragment Bin Size: ", sizeof(SL_FragmentBin), + "\n\tBlitter Task Size: ", sizeof(SL_BlitProcessor), + "\n\tBlitter (compressed) Task Size: ", sizeof(SL_BlitCompressedProcessor) + ); return inNumThreads; } @@ -423,6 +425,7 @@ void SL_ProcessorPool::run_blit_processors( uint16_t dstY1) noexcept { SL_ShaderProcessor processor; + LS_ASSERT(!sl_is_compressed_color(inTex->type()) && !sl_is_compressed_color(outTex->type())); processor.mType = SL_BLIT_PROCESSOR; SL_BlitProcessor& blitter = processor.mBlitter; @@ -458,6 +461,58 @@ void SL_ProcessorPool::run_blit_processors( } +/*------------------------------------- + * Execute a texture blit across threads +-------------------------------------*/ +void SL_ProcessorPool::run_blit_compressed_processors( + const SL_Texture* inTex, + SL_Texture* outTex, + uint16_t srcX0, + uint16_t srcY0, + uint16_t srcX1, + uint16_t srcY1, + uint16_t dstX0, + uint16_t dstY0, + uint16_t dstX1, + uint16_t dstY1) noexcept +{ + SL_ShaderProcessor processor; + LS_ASSERT(sl_is_compressed_color(inTex->type()) || sl_is_compressed_color(outTex->type())); + processor.mType = SL_BLIT_COMPRESSED_PROCESSOR; + + SL_BlitCompressedProcessor& blitter = processor.mBlitterCompressed; + blitter.mThreadId = 0; + blitter.mNumThreads = (uint16_t)mNumThreads; + blitter.srcX0 = srcX0; + blitter.srcY0 = srcY0; + blitter.srcX1 = srcX1; + blitter.srcY1 = srcY1; + blitter.dstX0 = dstX0; + blitter.dstY0 = dstY0; + blitter.dstX1 = dstX1; + blitter.dstY1 = dstY1; + blitter.mTexture = inTex; + blitter.mBackBuffer = outTex; + + // Process most of the rendering on other threads first. + for (uint16_t threadId = 0; threadId < mNumThreads - 1; ++threadId) + { + blitter.mThreadId = threadId; + + SL_ProcessorPool::ThreadedWorker& worker = mWorkers[threadId]; + worker.busy_waiting(false); + worker.push(processor); + } + + flush(); + blitter.mThreadId = (uint16_t)(mNumThreads - 1u); + blitter.execute(); + + // Each thread should now pause except for the main thread. + wait(); +} + + /*------------------------------------- * Clear a framebuffer's attachment across threads diff --git a/softlight/src/SL_ShaderProcessor.cpp b/softlight/src/SL_ShaderProcessor.cpp index 6a1b1932..decbbb25 100644 --- a/softlight/src/SL_ShaderProcessor.cpp +++ b/softlight/src/SL_ShaderProcessor.cpp @@ -80,6 +80,10 @@ SL_ShaderProcessor::SL_ShaderProcessor(const SL_ShaderProcessor& sp) noexcept : mBlitter = sp.mBlitter; break; + case SL_BLIT_COMPRESSED_PROCESSOR: + mBlitterCompressed = sp.mBlitterCompressed; + break; + case SL_CLEAR_PROCESSOR: mClear = sp.mClear; break; @@ -112,6 +116,10 @@ SL_ShaderProcessor::SL_ShaderProcessor(SL_ShaderProcessor&& sp) noexcept : mBlitter = sp.mBlitter; break; + case SL_BLIT_COMPRESSED_PROCESSOR: + mBlitterCompressed = sp.mBlitterCompressed; + break; + case SL_CLEAR_PROCESSOR: mClear = sp.mClear; break; @@ -147,6 +155,10 @@ SL_ShaderProcessor& SL_ShaderProcessor::operator=(const SL_ShaderProcessor& sp) mBlitter = sp.mBlitter; break; + case SL_BLIT_COMPRESSED_PROCESSOR: + mBlitterCompressed = sp.mBlitterCompressed; + break; + case SL_CLEAR_PROCESSOR: mClear = sp.mClear; break; @@ -185,6 +197,10 @@ SL_ShaderProcessor& SL_ShaderProcessor::operator=(SL_ShaderProcessor&& sp) noexc mBlitter = sp.mBlitter; break; + case SL_BLIT_COMPRESSED_PROCESSOR: + mBlitterCompressed = sp.mBlitterCompressed; + break; + case SL_CLEAR_PROCESSOR: mClear = sp.mClear; break;