Skip to content
Permalink
Browse files

TextureCache: Support reinterpreting formats for VRAM textures

  • Loading branch information...
stenzek committed Jul 14, 2019
1 parent 77f406c commit 946571b7595b6b24e02cf8622f2bd5935b5aa360
@@ -1,6 +1,7 @@
#include "VideoCommon/FramebufferShaderGen.h"
#include <sstream>
#include "VideoCommon/FramebufferManager.h"
#include "VideoCommon/TextureDecoder.h"
#include "VideoCommon/VertexShaderGen.h"

namespace FramebufferShaderGen
@@ -68,6 +69,26 @@ static void EmitSampleTexture(std::stringstream& ss, u32 n, const char* coords)
}
}

// Emits a texel fetch/load instruction. Assumes that "coords" is a 4-element vector, with z
// containing the layer, and w containing the mipmap level.
static void EmitTextureLoad(std::stringstream& ss, u32 n, const char* coords)
{
switch (GetAPIType())
{
case APIType::D3D:
ss << "tex" << n << ".Load(" << coords << ")";
break;

case APIType::OpenGL:
case APIType::Vulkan:
ss << "texelFetch(samp" << n << ", (" << coords << ").xyz, (" << coords << ").w)";
break;

default:
break;
}
}

static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
u32 num_color_inputs, bool position_input,
u32 num_tex_outputs, u32 num_color_outputs,
@@ -133,7 +154,7 @@ static void EmitVertexMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,

static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
u32 num_color_inputs, const char* output_type = "float4",
const char* extra_vars = "")
const char* extra_vars = "", bool emit_frag_coord = false)
{
switch (GetAPIType())
{
@@ -144,6 +165,8 @@ static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,
ss << "in float3 v_tex" << i << " : TEXCOORD" << i << ", ";
for (u32 i = 0; i < num_color_inputs; i++)
ss << "in float4 v_col" << i << " : COLOR" << i << ", ";
if (emit_frag_coord)
ss << "in float4 frag_coord : SV_Position, ";
ss << extra_vars << "out " << output_type << " ocol0 : SV_Target)\n";
}
break;
@@ -170,6 +193,8 @@ static void EmitPixelMainDeclaration(std::stringstream& ss, u32 num_tex_inputs,

ss << "FRAGMENT_OUTPUT_LOCATION(0) out " << output_type << " ocol0;\n";
ss << extra_vars << "\n";
if (emit_frag_coord)
ss << "#define frag_coord gl_FragCoord\n";
ss << "void main()\n";
}
break;
@@ -496,4 +521,126 @@ std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samp
return ss.str();
}

std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format)
{
std::stringstream ss;
EmitSamplerDeclarations(ss, 0, 1, false);
EmitPixelMainDeclaration(ss, 1, 0, "float4", "", true);
ss << "{\n";
ss << " int layer = int(v_tex0.z);\n";
ss << " int4 coords = int4(int2(frag_coord.xy), layer, 0);\n";

// Convert to a 32-bit value encompassing all channels, filling the most significant bits with
// zeroes.
ss << " uint raw_value;\n";
switch (from_format)
{
case TextureFormat::I8:
case TextureFormat::C8:
{
ss << " float4 temp_value = ";
EmitTextureLoad(ss, 0, "coords");
ss << ";\n";
ss << " raw_value = uint(temp_value.r * 255.0);\n";
}
break;

case TextureFormat::IA8:
{
ss << " float4 temp_value = ";
EmitTextureLoad(ss, 0, "coords");
ss << ";\n";
ss << " raw_value = uint(temp_value.r * 255.0) | (uint(temp_value.a * 255.0) << 8);\n";
}
break;

case TextureFormat::IA4:
{
ss << " float4 temp_value = ";
EmitTextureLoad(ss, 0, "coords");
ss << ";\n";
ss << " raw_value = uint(temp_value.r * 15.0) | (uint(temp_value.a * 15.0) << 4);\n";
}
break;

case TextureFormat::RGB565:
{
ss << " float4 temp_value = ";
EmitTextureLoad(ss, 0, "coords");
ss << ";\n";
ss << " raw_value = uint(temp_value.b * 31.0) | (uint(temp_value.g * 63.0) << 5) |\n";
ss << " (uint(temp_value.r * 31.0) << 11);\n";
}
break;

case TextureFormat::RGB5A3:
{
ss << " float4 temp_value = ";
EmitTextureLoad(ss, 0, "coords");
ss << ";\n";

// 0.8784 = 224 / 255 which is the maximum alpha value that can be represented in 3 bits
ss << " if (temp_value.a > 0.878f) {\n";
ss << " raw_value = (uint(temp_value.b * 31.0)) | (uint(temp_value.g * 31.0) << 5) |\n";
ss << " (uint(temp_value.r * 31.0) << 10) | 0x8000u;\n";
ss << " } else {\n";
ss << " raw_value = (uint(temp_value.b * 15.0)) | (uint(temp_value.g * 15.0) << 4) |\n";
ss << " (uint(temp_value.r * 15.0) << 8) | (uint(temp_value.a * 7.0) << 12);\n";
ss << " }\n";
}
break;
}

// Now convert it to its new representation.
switch (to_format)
{
case TextureFormat::I8:
case TextureFormat::C8:
{
ss << " ocol0.rgba = (float(raw_value & 0xFFu) / 255.0).rrrr;\n";
}
break;

case TextureFormat::IA8:
{
ss << " ocol0.rgb = (float(raw_value & 0xFFu) / 255.0).rrr;\n";
ss << " ocol0.a = float((raw_value >> 8) & 0xFFu) / 255.0;\n";
}
break;

case TextureFormat::IA4:
{
ss << " ocol0.rgb = (float(raw_value & 0xFu) / 15.0).rrr;\n";
ss << " ocol0.a = float((raw_value >> 4) & 0xFu) / 15.0;\n";
}
break;

case TextureFormat::RGB565:
{
ss << " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0\n";
ss << " float((raw_value >> 5) & 0x1Fu) / 31.0,\n";
ss << " float(raw_value & 0x1Fu) / 31.0,, 1.0);\n";
}
break;

case TextureFormat::RGB5A3:
{
ss << " if ((raw_value & 0x8000u) != 0u) {\n";
ss << " ocol0 = float4(float((raw_value >> 10) & 0x1Fu) / 31.0,\n";
ss << " float((raw_value >> 5) & 0x1Fu) / 31.0,\n";
ss << " float(raw_value & 0x1Fu) / 31.0, 1.0);\n";
ss << " } else {\n";
ss << " ocol0 = float4(float((raw_value >> 8) & 0x0Fu) / 15.0,\n";
ss << " float((raw_value >> 4) & 0x0Fu) / 15.0,\n";
ss << " float(raw_value & 0x0Fu) / 15.0,\n";
ss << " float((raw_value >> 12) & 0x07u) / 7.0);\n";
ss << " }\n";
}
break;
}

ss << "}\n";
return ss.str();
}

} // namespace FramebufferShaderGen
@@ -3,6 +3,7 @@
#include "VideoCommon/VideoCommon.h"

enum class EFBReinterpretType;
enum class TextureFormat;

namespace FramebufferShaderGen
{
@@ -28,5 +29,6 @@ std::string GenerateClearVertexShader();
std::string GenerateEFBPokeVertexShader();
std::string GenerateColorPixelShader();
std::string GenerateFormatConversionShader(EFBReinterpretType convtype, u32 samples);
std::string GenerateTextureReinterpretShader(TextureFormat from_format, TextureFormat to_format);

} // namespace FramebufferShaderGen
@@ -1255,6 +1255,44 @@ const AbstractPipeline* ShaderCache::GetPaletteConversionPipeline(TLUTFormat for
return m_palette_conversion_pipelines[static_cast<size_t>(format)].get();
}

const AbstractPipeline* ShaderCache::GetTextureReinterpretPipeline(TextureFormat from_format,
TextureFormat to_format)
{
const auto key = std::make_pair(from_format, to_format);
auto iter = m_texture_reinterpret_pipelines.find(key);
if (iter != m_texture_reinterpret_pipelines.end())
return iter->second.get();

std::string shader_source =
FramebufferShaderGen::GenerateTextureReinterpretShader(from_format, to_format);
if (shader_source.empty())
{
m_texture_reinterpret_pipelines.emplace(key, nullptr);
return nullptr;
}

std::unique_ptr<AbstractShader> shader =
g_renderer->CreateShaderFromSource(ShaderStage::Pixel, shader_source);
if (!shader)
{
m_texture_reinterpret_pipelines.emplace(key, nullptr);
return nullptr;
}

AbstractPipelineConfig config;
config.vertex_format = nullptr;
config.vertex_shader = m_screen_quad_vertex_shader.get();
config.geometry_shader = nullptr;
config.pixel_shader = shader.get();
config.rasterization_state = RenderState::GetNoCullRasterizationState(PrimitiveType::Triangles);
config.depth_state = RenderState::GetNoDepthTestingDepthState();
config.blending_state = RenderState::GetNoBlendingBlendState();
config.framebuffer_state = RenderState::GetRGBA8FramebufferState();
config.usage = AbstractPipelineUsage::Utility;
auto iiter = m_texture_reinterpret_pipelines.emplace(key, g_renderer->CreatePipeline(config));
return iiter.first->second.get();
}

const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format,
TLUTFormat palette_format)
{
@@ -1282,5 +1320,4 @@ const AbstractShader* ShaderCache::GetTextureDecodingShader(TextureFormat format
auto iiter = m_texture_decoding_shaders.emplace(key, std::move(shader));
return iiter.first->second.get();
}

} // namespace VideoCommon
@@ -34,6 +34,7 @@

class NativeVertexFormat;
enum class AbstractTextureFormat : u32;
enum class TextureFormat;
enum class TLUTFormat;

namespace VideoCommon
@@ -104,6 +105,10 @@ class ShaderCache final
// Palette texture conversion pipelines
const AbstractPipeline* GetPaletteConversionPipeline(TLUTFormat format);

// Texture reinterpret pipelines
const AbstractPipeline* GetTextureReinterpretPipeline(TextureFormat from_format,
TextureFormat to_format);

// Texture decoding compute shaders
const AbstractShader* GetTextureDecodingShader(TextureFormat format, TLUTFormat palette_format);

@@ -238,6 +243,10 @@ class ShaderCache final
std::array<std::unique_ptr<AbstractPipeline>, NUM_PALETTE_CONVERSION_SHADERS>
m_palette_conversion_pipelines;

// Texture reinterpreting pipeline
std::map<std::pair<TextureFormat, TextureFormat>, std::unique_ptr<AbstractPipeline>>
m_texture_reinterpret_pipelines;

// Texture decoding shaders
std::map<std::pair<u32, u32>, std::unique_ptr<AbstractShader>> m_texture_decoding_shaders;
};

0 comments on commit 946571b

Please sign in to comment.
You can’t perform that action at this time.