25 changes: 12 additions & 13 deletions Source/Core/VideoBackends/OGL/SamplerCache.cpp
Expand Up @@ -7,7 +7,6 @@
#include <memory>

#include "Common/CommonTypes.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/VideoConfig.h"

namespace OGL
Expand Down Expand Up @@ -72,16 +71,16 @@ void SamplerCache::InvalidateBinding(u32 stage)
void SamplerCache::SetParameters(GLuint sampler_id, const SamplerState& params)
{
GLenum min_filter;
GLenum mag_filter = (params.mag_filter == SamplerState::Filter::Point) ? GL_NEAREST : GL_LINEAR;
if (params.mipmap_filter == SamplerState::Filter::Linear)
GLenum mag_filter = (params.tm0.mag_filter == FilterMode::Near) ? GL_NEAREST : GL_LINEAR;
if (params.tm0.mipmap_filter == FilterMode::Linear)
{
min_filter = (params.min_filter == SamplerState::Filter::Point) ? GL_NEAREST_MIPMAP_LINEAR :
GL_LINEAR_MIPMAP_LINEAR;
min_filter = (params.tm0.min_filter == FilterMode::Near) ? GL_NEAREST_MIPMAP_LINEAR :
GL_LINEAR_MIPMAP_LINEAR;
}
else
{
min_filter = (params.min_filter == SamplerState::Filter::Point) ? GL_NEAREST_MIPMAP_NEAREST :
GL_LINEAR_MIPMAP_NEAREST;
min_filter = (params.tm0.min_filter == FilterMode::Near) ? GL_NEAREST_MIPMAP_NEAREST :
GL_LINEAR_MIPMAP_NEAREST;
}

glSamplerParameteri(sampler_id, GL_TEXTURE_MIN_FILTER, min_filter);
Expand All @@ -91,17 +90,17 @@ void SamplerCache::SetParameters(GLuint sampler_id, const SamplerState& params)
{GL_CLAMP_TO_EDGE, GL_REPEAT, GL_MIRRORED_REPEAT}};

glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_S,
address_modes[static_cast<u32>(params.wrap_u.Value())]);
address_modes[static_cast<u32>(params.tm0.wrap_u.Value())]);
glSamplerParameteri(sampler_id, GL_TEXTURE_WRAP_T,
address_modes[static_cast<u32>(params.wrap_v.Value())]);
address_modes[static_cast<u32>(params.tm0.wrap_v.Value())]);

glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, params.min_lod / 16.f);
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, params.max_lod / 16.f);
glSamplerParameterf(sampler_id, GL_TEXTURE_MIN_LOD, params.tm1.min_lod / 16.f);
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_LOD, params.tm1.max_lod / 16.f);

if (!static_cast<Renderer*>(g_renderer.get())->IsGLES())
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.lod_bias / 256.f);
glSamplerParameterf(sampler_id, GL_TEXTURE_LOD_BIAS, params.tm0.lod_bias / 256.f);

if (params.anisotropic_filtering && g_ogl_config.bSupportsAniso)
if (params.tm0.anisotropic_filtering && g_ogl_config.bSupportsAniso)
{
glSamplerParameterf(sampler_id, GL_TEXTURE_MAX_ANISOTROPY_EXT,
static_cast<float>(1 << g_ActiveConfig.iMaxAnisotropy));
Expand Down
25 changes: 14 additions & 11 deletions Source/Core/VideoBackends/Software/Rasterizer.cpp
Expand Up @@ -171,22 +171,25 @@ static inline void CalculateLOD(s32* lodp, bool* linear, u32 texmap, u32 texcoor
const TexMode1& tm1 = texUnit.texMode1;

float sDelta, tDelta;

float* uv00 = rasterBlock.Pixel[0][0].Uv[texcoord];
float* uv10 = rasterBlock.Pixel[1][0].Uv[texcoord];
float* uv01 = rasterBlock.Pixel[0][1].Uv[texcoord];

float dudx = fabsf(uv00[0] - uv10[0]);
float dvdx = fabsf(uv00[1] - uv10[1]);
float dudy = fabsf(uv00[0] - uv01[0]);
float dvdy = fabsf(uv00[1] - uv01[1]);

if (tm0.diag_lod == LODType::Diagonal)
{
float* uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
float* uv1 = rasterBlock.Pixel[1][1].Uv[texcoord];

sDelta = fabsf(uv0[0] - uv1[0]);
tDelta = fabsf(uv0[1] - uv1[1]);
sDelta = dudx + dudy;
tDelta = dvdx + dvdy;
}
else
{
float* uv0 = rasterBlock.Pixel[0][0].Uv[texcoord];
float* uv1 = rasterBlock.Pixel[1][0].Uv[texcoord];
float* uv2 = rasterBlock.Pixel[0][1].Uv[texcoord];

sDelta = std::max(fabsf(uv0[0] - uv1[0]), fabsf(uv0[0] - uv2[0]));
tDelta = std::max(fabsf(uv0[1] - uv1[1]), fabsf(uv0[1] - uv2[1]));
sDelta = std::max(dudx, dudy);
tDelta = std::max(dvdx, dvdy);
}

// get LOD in s28.4
Expand Down
2 changes: 2 additions & 0 deletions Source/Core/VideoBackends/Software/SWmain.cpp
Expand Up @@ -84,6 +84,8 @@ void VideoSoftware::InitBackendInfo()
g_Config.backend_info.bSupportsShaderBinaries = false;
g_Config.backend_info.bSupportsPipelineCacheData = false;
g_Config.backend_info.bSupportsBBox = true;
g_Config.backend_info.bSupportsCoarseDerivatives = false;
g_Config.backend_info.bSupportsTextureQueryLevels = false;

// aamodes
g_Config.backend_info.AAModes = {1};
Expand Down
3 changes: 1 addition & 2 deletions Source/Core/VideoBackends/Software/TextureSampler.cpp
Expand Up @@ -11,7 +11,6 @@
#include "Core/HW/Memmap.h"

#include "VideoCommon/BPMemory.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/TextureDecoder.h"

#define ALLOW_MIPMAP 1
Expand Down Expand Up @@ -79,7 +78,7 @@ void Sample(s32 s, s32 t, s32 lod, bool linear, u8 texmap, u8* sample)

const s32 lodFract = lod & 0xf;

if (lod > 0 && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0))
if (lod > 0 && tm0.mipmap_filter != MipMode::None)
{
// use mipmap
baseMip = lod >> 4;
Expand Down
26 changes: 13 additions & 13 deletions Source/Core/VideoBackends/Vulkan/ObjectCache.cpp
Expand Up @@ -315,28 +315,28 @@ VkSampler ObjectCache::GetSampler(const SamplerState& info)
VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT}};

VkSamplerCreateInfo create_info = {
VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType
nullptr, // const void* pNext
0, // VkSamplerCreateFlags flags
filters[static_cast<u32>(info.mag_filter.Value())], // VkFilter magFilter
filters[static_cast<u32>(info.min_filter.Value())], // VkFilter minFilter
mipmap_modes[static_cast<u32>(info.mipmap_filter.Value())], // VkSamplerMipmapMode mipmapMode
address_modes[static_cast<u32>(info.wrap_u.Value())], // VkSamplerAddressMode addressModeU
address_modes[static_cast<u32>(info.wrap_v.Value())], // VkSamplerAddressMode addressModeV
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW
info.lod_bias / 256.0f, // float mipLodBias
VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, // VkStructureType sType
nullptr, // const void* pNext
0, // VkSamplerCreateFlags flags
filters[u32(info.tm0.mag_filter.Value())], // VkFilter magFilter
filters[u32(info.tm0.min_filter.Value())], // VkFilter minFilter
mipmap_modes[u32(info.tm0.mipmap_filter.Value())], // VkSamplerMipmapMode mipmapMode
address_modes[u32(info.tm0.wrap_u.Value())], // VkSamplerAddressMode addressModeU
address_modes[u32(info.tm0.wrap_v.Value())], // VkSamplerAddressMode addressModeV
VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, // VkSamplerAddressMode addressModeW
info.tm0.lod_bias / 256.0f, // float mipLodBias
VK_FALSE, // VkBool32 anisotropyEnable
0.0f, // float maxAnisotropy
VK_FALSE, // VkBool32 compareEnable
VK_COMPARE_OP_ALWAYS, // VkCompareOp compareOp
info.min_lod / 16.0f, // float minLod
info.max_lod / 16.0f, // float maxLod
info.tm1.min_lod / 16.0f, // float minLod
info.tm1.max_lod / 16.0f, // float maxLod
VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, // VkBorderColor borderColor
VK_FALSE // VkBool32 unnormalizedCoordinates
};

// Can we use anisotropic filtering with this sampler?
if (info.anisotropic_filtering && g_vulkan_context->SupportsAnisotropicFiltering())
if (info.tm0.anisotropic_filtering && g_vulkan_context->SupportsAnisotropicFiltering())
{
// Cap anisotropy to device limits.
create_info.anisotropyEnable = VK_TRUE;
Expand Down
8 changes: 4 additions & 4 deletions Source/Core/VideoBackends/Vulkan/VKRenderer.cpp
Expand Up @@ -49,7 +49,7 @@ Renderer::Renderer(std::unique_ptr<SwapChain> swap_chain, float backbuffer_scale
{
UpdateActiveConfig();
for (SamplerState& m_sampler_state : m_sampler_states)
m_sampler_state.hex = RenderState::GetPointSamplerState().hex;
m_sampler_state = RenderState::GetPointSamplerState();
}

Renderer::~Renderer() = default;
Expand Down Expand Up @@ -545,7 +545,7 @@ void Renderer::SetTexture(u32 index, const AbstractTexture* texture)
void Renderer::SetSamplerState(u32 index, const SamplerState& state)
{
// Skip lookup if the state hasn't changed.
if (m_sampler_states[index].hex == state.hex)
if (m_sampler_states[index] == state)
return;

// Look up new state and replace in state tracker.
Expand All @@ -557,7 +557,7 @@ void Renderer::SetSamplerState(u32 index, const SamplerState& state)
}

StateTracker::GetInstance()->SetSampler(index, sampler);
m_sampler_states[index].hex = state.hex;
m_sampler_states[index] = state;
}

void Renderer::SetComputeImageTexture(AbstractTexture* texture, bool read, bool write)
Expand Down Expand Up @@ -588,7 +588,7 @@ void Renderer::ResetSamplerStates()
// Invalidate all sampler states, next draw will re-initialize them.
for (u32 i = 0; i < m_sampler_states.size(); i++)
{
m_sampler_states[i].hex = RenderState::GetPointSamplerState().hex;
m_sampler_states[i] = RenderState::GetPointSamplerState();
StateTracker::GetInstance()->SetSampler(i, g_object_cache->GetPointSampler());
}

Expand Down
2 changes: 2 additions & 0 deletions Source/Core/VideoBackends/Vulkan/VulkanContext.cpp
Expand Up @@ -286,6 +286,8 @@ void VulkanContext::PopulateBackendInfo(VideoConfig* config)
config->backend_info.bSupportsLogicOp = false; // Dependent on features.
config->backend_info.bSupportsLargePoints = false; // Dependent on features.
config->backend_info.bSupportsFramebufferFetch = false; // No support.
config->backend_info.bSupportsCoarseDerivatives = true; // Assumed support.
config->backend_info.bSupportsTextureQueryLevels = true; // Assumed support.
}

void VulkanContext::PopulateBackendInfoAdapters(VideoConfig* config, const GPUList& gpu_list)
Expand Down
1 change: 0 additions & 1 deletion Source/Core/VideoCommon/CMakeLists.txt
Expand Up @@ -70,7 +70,6 @@ add_library(videocommon
RenderBase.h
RenderState.cpp
RenderState.h
SamplerCommon.h
ShaderCache.cpp
ShaderCache.h
ShaderGenCommon.cpp
Expand Down
6 changes: 3 additions & 3 deletions Source/Core/VideoCommon/ConstantManager.h
Expand Up @@ -21,7 +21,7 @@ struct PixelShaderConstants
std::array<int4, 4> colors;
std::array<int4, 4> kcolors;
int4 alpha;
std::array<float4, 8> texdims;
std::array<uint4, 8> texdims;
std::array<int4, 2> zbias;
std::array<int4, 2> indtexscale;
std::array<int4, 6> indtexmtx;
Expand All @@ -32,7 +32,7 @@ struct PixelShaderConstants
float4 zslope;
std::array<float, 2> efbscale; // .xy

// Constants from here onwards are only used in ubershaders.
// Constants from here onwards are only used in ubershaders, other than pack2.
u32 genmode; // .z
u32 alphaTest; // .w
u32 fogParam3; // .x
Expand All @@ -44,7 +44,7 @@ struct PixelShaderConstants
u32 dither; // .z (bool)
u32 bounding_box; // .w (bool)
std::array<uint4, 16> pack1; // .xy - combiners, .z - tevind, .w - iref
std::array<uint4, 8> pack2; // .x - tevorder, .y - tevksel
std::array<uint4, 8> pack2; // .x - tevorder, .y - tevksel, .z/.w - SamplerState tm0/tm1
std::array<int4, 32> konst; // .rgba
// The following are used in ubershaders when using shader_framebuffer_fetch blending
u32 blend_enable;
Expand Down
346 changes: 317 additions & 29 deletions Source/Core/VideoCommon/PixelShaderGen.cpp

Large diffs are not rendered by default.

22 changes: 14 additions & 8 deletions Source/Core/VideoCommon/PixelShaderManager.cpp
Expand Up @@ -273,16 +273,22 @@ void PixelShaderManager::SetDestAlphaChanged()

void PixelShaderManager::SetTexDims(int texmapid, u32 width, u32 height)
{
float rwidth = 1.0f / (width * 128.0f);
float rheight = 1.0f / (height * 128.0f);

// TODO: move this check out to callee. There we could just call this function on texture changes
// or better, use textureSize() in glsl
if (constants.texdims[texmapid][0] != rwidth || constants.texdims[texmapid][1] != rheight)
if (constants.texdims[texmapid][0] != width || constants.texdims[texmapid][1] != height)
dirty = true;

constants.texdims[texmapid][0] = width;
constants.texdims[texmapid][1] = height;
}

void PixelShaderManager::SetSamplerState(int texmapid, u32 tm0, u32 tm1)
{
if (constants.pack2[texmapid][2] != tm0 || constants.pack2[texmapid][3] != tm1)
dirty = true;

constants.texdims[texmapid][0] = rwidth;
constants.texdims[texmapid][1] = rheight;
constants.pack2[texmapid][2] = tm0;
constants.pack2[texmapid][3] = tm1;
}

void PixelShaderManager::SetZTextureBias()
Expand Down Expand Up @@ -382,8 +388,8 @@ void PixelShaderManager::SetZTextureOpChanged()
void PixelShaderManager::SetTexCoordChanged(u8 texmapid)
{
TCoordInfo& tc = bpmem.texcoords[texmapid];
constants.texdims[texmapid][2] = (float)(tc.s.scale_minus_1 + 1) * 128.0f;
constants.texdims[texmapid][3] = (float)(tc.t.scale_minus_1 + 1) * 128.0f;
constants.texdims[texmapid][2] = tc.s.scale_minus_1 + 1;
constants.texdims[texmapid][3] = tc.t.scale_minus_1 + 1;
dirty = true;
}

Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoCommon/PixelShaderManager.h
Expand Up @@ -30,6 +30,7 @@ class PixelShaderManager
static void SetAlphaTestChanged();
static void SetDestAlphaChanged();
static void SetTexDims(int texmapid, u32 width, u32 height);
static void SetSamplerState(int texmapid, u32 tm0, u32 tm1);
static void SetZTextureBias();
static void SetViewportChanged();
static void SetEfbScaleChanged(float scalex, float scaley);
Expand Down
117 changes: 54 additions & 63 deletions Source/Core/VideoCommon/RenderState.cpp
Expand Up @@ -2,9 +2,10 @@
// SPDX-License-Identifier: GPL-2.0-or-later

#include "VideoCommon/RenderState.h"

#include <algorithm>
#include <array>
#include "VideoCommon/SamplerCommon.h"

#include "VideoCommon/TextureConfig.h"

void RasterizationState::Generate(const BPMemory& bp, PrimitiveType primitive_type)
Expand All @@ -17,31 +18,13 @@ void RasterizationState::Generate(const BPMemory& bp, PrimitiveType primitive_ty
cullmode = CullMode::None;
}

RasterizationState& RasterizationState::operator=(const RasterizationState& rhs)
{
hex = rhs.hex;
return *this;
}

FramebufferState& FramebufferState::operator=(const FramebufferState& rhs)
{
hex = rhs.hex;
return *this;
}

void DepthState::Generate(const BPMemory& bp)
{
testenable = bp.zmode.testenable.Value();
updateenable = bp.zmode.updateenable.Value();
func = bp.zmode.func.Value();
}

DepthState& DepthState::operator=(const DepthState& rhs)
{
hex = rhs.hex;
return *this;
}

// If the framebuffer format has no alpha channel, it is assumed to
// ONE on blending. As the backends may emulate this framebuffer
// configuration with an alpha channel, we just drop all references
Expand Down Expand Up @@ -216,42 +199,45 @@ void BlendingState::ApproximateLogicOpWithBlending()
dstfactor = approximations[u32(logicmode.Value())].dstfactor;
}

BlendingState& BlendingState::operator=(const BlendingState& rhs)
{
hex = rhs.hex;
return *this;
}

void SamplerState::Generate(const BPMemory& bp, u32 index)
{
auto tex = bp.tex.GetUnit(index);
const TexMode0& tm0 = tex.texMode0;
const TexMode1& tm1 = tex.texMode1;
const TexMode0& bp_tm0 = tex.texMode0;
const TexMode1& bp_tm1 = tex.texMode1;

// GX can configure the mip filter to none. However, D3D and Vulkan can't express this in their
// sampler states. Therefore, we set the min/max LOD to zero if this option is used.
min_filter = tm0.min_filter == FilterMode::Linear ? Filter::Linear : Filter::Point;
mipmap_filter = tm0.mipmap_filter == MipMode::Linear ? Filter::Linear : Filter::Point;
mag_filter = tm0.mag_filter == FilterMode::Linear ? Filter::Linear : Filter::Point;
tm0.min_filter = bp_tm0.min_filter;
tm0.mipmap_filter =
bp_tm0.mipmap_filter == MipMode::Linear ? FilterMode::Linear : FilterMode::Near;
tm0.mag_filter = bp_tm0.mag_filter;

// If mipmaps are disabled, clamp min/max lod
max_lod = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm1.max_lod.Value() : 0;
min_lod = std::min(max_lod.Value(), static_cast<u64>(tm1.min_lod));
lod_bias = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ? tm0.lod_bias * (256 / 32) : 0;
if (bp_tm0.mipmap_filter == MipMode::None)
{
tm1.max_lod = 0;
tm1.min_lod = 0;
tm0.lod_bias = 0;
}
else
{
// NOTE: When comparing, max is checked first, then min; if max is less than min, max wins
tm1.max_lod = bp_tm1.max_lod.Value();
tm1.min_lod = std::min(tm1.max_lod.Value(), bp_tm1.min_lod.Value());
tm0.lod_bias = bp_tm0.lod_bias * (256 / 32);
}

// Address modes
// Wrap modes
// Hardware testing indicates that wrap_mode set to 3 behaves the same as clamp.
static constexpr std::array<AddressMode, 4> address_modes = {
{AddressMode::Clamp, AddressMode::Repeat, AddressMode::MirroredRepeat, AddressMode::Clamp}};
wrap_u = address_modes[u32(tm0.wrap_s.Value())];
wrap_v = address_modes[u32(tm0.wrap_t.Value())];
anisotropic_filtering = 0;
}
auto filter_invalid_wrap = [](WrapMode mode) {
return (mode <= WrapMode::Mirror) ? mode : WrapMode::Clamp;
};
tm0.wrap_u = filter_invalid_wrap(bp_tm0.wrap_s);
tm0.wrap_v = filter_invalid_wrap(bp_tm0.wrap_t);

SamplerState& SamplerState::operator=(const SamplerState& rhs)
{
hex = rhs.hex;
return *this;
tm0.diag_lod = bp_tm0.diag_lod;
tm0.anisotropic_filtering = false; // TODO: Respect BP anisotropic filtering mode
tm0.lod_clamp = bp_tm0.lod_clamp; // TODO: What does this do?
}

namespace RenderState
Expand Down Expand Up @@ -344,37 +330,42 @@ BlendingState GetNoColorWriteBlendState()
SamplerState GetInvalidSamplerState()
{
SamplerState state;
state.hex = UINT64_C(0xFFFFFFFFFFFFFFFF);
state.tm0.hex = 0xFFFFFFFF;
state.tm1.hex = 0xFFFFFFFF;
return state;
}

SamplerState GetPointSamplerState()
{
SamplerState state = {};
state.min_filter = SamplerState::Filter::Point;
state.mag_filter = SamplerState::Filter::Point;
state.mipmap_filter = SamplerState::Filter::Point;
state.wrap_u = SamplerState::AddressMode::Clamp;
state.wrap_v = SamplerState::AddressMode::Clamp;
state.min_lod = 0;
state.max_lod = 255;
state.lod_bias = 0;
state.anisotropic_filtering = false;
state.tm0.min_filter = FilterMode::Near;
state.tm0.mag_filter = FilterMode::Near;
state.tm0.mipmap_filter = FilterMode::Near;
state.tm0.wrap_u = WrapMode::Clamp;
state.tm0.wrap_v = WrapMode::Clamp;
state.tm1.min_lod = 0;
state.tm1.max_lod = 255;
state.tm0.lod_bias = 0;
state.tm0.anisotropic_filtering = false;
state.tm0.diag_lod = LODType::Edge;
state.tm0.lod_clamp = false;
return state;
}

SamplerState GetLinearSamplerState()
{
SamplerState state = {};
state.min_filter = SamplerState::Filter::Linear;
state.mag_filter = SamplerState::Filter::Linear;
state.mipmap_filter = SamplerState::Filter::Linear;
state.wrap_u = SamplerState::AddressMode::Clamp;
state.wrap_v = SamplerState::AddressMode::Clamp;
state.min_lod = 0;
state.max_lod = 255;
state.lod_bias = 0;
state.anisotropic_filtering = false;
state.tm0.min_filter = FilterMode::Linear;
state.tm0.mag_filter = FilterMode::Linear;
state.tm0.mipmap_filter = FilterMode::Linear;
state.tm0.wrap_u = WrapMode::Clamp;
state.tm0.wrap_v = WrapMode::Clamp;
state.tm1.min_lod = 0;
state.tm1.max_lod = 255;
state.tm0.lod_bias = 0;
state.tm0.anisotropic_filtering = false;
state.tm0.diag_lod = LODType::Edge;
state.tm0.lod_clamp = false;
return state;
}

Expand Down
157 changes: 120 additions & 37 deletions Source/Core/VideoCommon/RenderState.h
Expand Up @@ -22,11 +22,24 @@ union RasterizationState
{
void Generate(const BPMemory& bp, PrimitiveType primitive_type);

RasterizationState& operator=(const RasterizationState& rhs);
RasterizationState() = default;
RasterizationState(const RasterizationState&) = default;
RasterizationState& operator=(const RasterizationState& rhs)
{
hex = rhs.hex;
return *this;
}
RasterizationState(RasterizationState&&) = default;
RasterizationState& operator=(RasterizationState&& rhs)
{
hex = rhs.hex;
return *this;
}

bool operator==(const RasterizationState& rhs) const { return hex == rhs.hex; }
bool operator!=(const RasterizationState& rhs) const { return hex != rhs.hex; }
bool operator!=(const RasterizationState& rhs) const { return !operator==(rhs); }
bool operator<(const RasterizationState& rhs) const { return hex < rhs.hex; }

BitField<0, 2, CullMode> cullmode;
BitField<3, 2, PrimitiveType> primitive;

Expand All @@ -35,27 +48,53 @@ union RasterizationState

union FramebufferState
{
FramebufferState() = default;
FramebufferState(const FramebufferState&) = default;
FramebufferState& operator=(const FramebufferState& rhs)
{
hex = rhs.hex;
return *this;
}
FramebufferState(FramebufferState&&) = default;
FramebufferState& operator=(FramebufferState&& rhs)
{
hex = rhs.hex;
return *this;
}

bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; }
bool operator!=(const FramebufferState& rhs) const { return !operator==(rhs); }

BitField<0, 8, AbstractTextureFormat> color_texture_format;
BitField<8, 8, AbstractTextureFormat> depth_texture_format;
BitField<16, 8, u32> samples;
BitField<24, 1, u32> per_sample_shading;

bool operator==(const FramebufferState& rhs) const { return hex == rhs.hex; }
bool operator!=(const FramebufferState& rhs) const { return hex != rhs.hex; }
FramebufferState& operator=(const FramebufferState& rhs);

u32 hex;
};

union DepthState
{
void Generate(const BPMemory& bp);

DepthState& operator=(const DepthState& rhs);
DepthState() = default;
DepthState(const DepthState&) = default;
DepthState& operator=(const DepthState& rhs)
{
hex = rhs.hex;
return *this;
}
DepthState(DepthState&&) = default;
DepthState& operator=(DepthState&& rhs)
{
hex = rhs.hex;
return *this;
}

bool operator==(const DepthState& rhs) const { return hex == rhs.hex; }
bool operator!=(const DepthState& rhs) const { return hex != rhs.hex; }
bool operator!=(const DepthState& rhs) const { return !operator==(rhs); }
bool operator<(const DepthState& rhs) const { return hex < rhs.hex; }

BitField<0, 1, u32> testenable;
BitField<1, 1, u32> updateenable;
BitField<2, 3, CompareMode> func;
Expand All @@ -71,11 +110,24 @@ union BlendingState
// Will not be bit-correct, and in some cases not even remotely in the same ballpark.
void ApproximateLogicOpWithBlending();

BlendingState& operator=(const BlendingState& rhs);
BlendingState() = default;
BlendingState(const BlendingState&) = default;
BlendingState& operator=(const BlendingState& rhs)
{
hex = rhs.hex;
return *this;
}
BlendingState(BlendingState&&) = default;
BlendingState& operator=(BlendingState&& rhs)
{
hex = rhs.hex;
return *this;
}

bool operator==(const BlendingState& rhs) const { return hex == rhs.hex; }
bool operator!=(const BlendingState& rhs) const { return hex != rhs.hex; }
bool operator!=(const BlendingState& rhs) const { return !operator==(rhs); }
bool operator<(const BlendingState& rhs) const { return hex < rhs.hex; }

BitField<0, 1, u32> blendenable;
BitField<1, 1, u32> logicopenable;
BitField<2, 1, u32> dstalpha;
Expand All @@ -93,42 +145,73 @@ union BlendingState
u32 hex;
};

union SamplerState
struct SamplerState
{
using StorageType = u64;
void Generate(const BPMemory& bp, u32 index);

enum class Filter : StorageType
SamplerState() = default;
SamplerState(const SamplerState&) = default;
SamplerState& operator=(const SamplerState& rhs)
{
Point,
Linear
};
tm0.hex = rhs.tm0.hex;
tm1.hex = rhs.tm1.hex;
return *this;
}
SamplerState(SamplerState&&) = default;
SamplerState& operator=(SamplerState&& rhs)
{
tm0.hex = rhs.tm0.hex;
tm1.hex = rhs.tm1.hex;
return *this;
}

bool operator==(const SamplerState& rhs) const { return Hex() == rhs.Hex(); }
bool operator!=(const SamplerState& rhs) const { return !operator==(rhs); }
bool operator<(const SamplerState& rhs) const { return Hex() < rhs.Hex(); }

constexpr u64 Hex() const { return tm0.hex | (static_cast<u64>(tm1.hex) << 32); }

enum class AddressMode : StorageType
// Based on BPMemory TexMode0/TexMode1, but with slightly higher precision and some
// simplifications
union TM0
{
Clamp,
Repeat,
MirroredRepeat
// BP's mipmap_filter can be None, but that is represented here by setting min_lod and max_lod
// to 0
BitField<0, 1, FilterMode> min_filter;
BitField<1, 1, FilterMode> mag_filter;
BitField<2, 1, FilterMode> mipmap_filter;
// Guaranteed to be valid values (i.e. not 3)
BitField<3, 2, WrapMode> wrap_u;
BitField<5, 2, WrapMode> wrap_v;
BitField<7, 1, LODType> diag_lod;
BitField<8, 16, s32> lod_bias; // multiplied by 256, higher precision than normal
BitField<24, 1, bool, u32> lod_clamp; // TODO: This isn't currently implemented
BitField<25, 1, bool, u32> anisotropic_filtering; // TODO: This doesn't use the BP one yet
u32 hex;
};
union TM1
{
// Min is guaranteed to be less than or equal to max
BitField<0, 8, u32> min_lod; // multiplied by 16
BitField<8, 8, u32> max_lod; // multiplied by 16
u32 hex;
};

void Generate(const BPMemory& bp, u32 index);
TM0 tm0;
TM1 tm1;
};

SamplerState& operator=(const SamplerState& rhs);

bool operator==(const SamplerState& rhs) const { return hex == rhs.hex; }
bool operator!=(const SamplerState& rhs) const { return hex != rhs.hex; }
bool operator<(const SamplerState& rhs) const { return hex < rhs.hex; }
BitField<0, 1, Filter> min_filter;
BitField<1, 1, Filter> mag_filter;
BitField<2, 1, Filter> mipmap_filter;
BitField<3, 2, AddressMode> wrap_u;
BitField<5, 2, AddressMode> wrap_v;
BitField<7, 16, s64> lod_bias; // multiplied by 256
BitField<23, 8, u64> min_lod; // multiplied by 16
BitField<31, 8, u64> max_lod; // multiplied by 16
BitField<39, 1, u64> anisotropic_filtering;

StorageType hex;
namespace std
{
template <>
struct hash<SamplerState>
{
std::size_t operator()(SamplerState const& state) const noexcept
{
return std::hash<u64>{}(state.Hex());
}
};
} // namespace std

namespace RenderState
{
Expand Down
27 changes: 0 additions & 27 deletions Source/Core/VideoCommon/SamplerCommon.h

This file was deleted.

27 changes: 27 additions & 0 deletions Source/Core/VideoCommon/ShaderGenCommon.cpp
Expand Up @@ -39,6 +39,9 @@ ShaderHostConfig ShaderHostConfig::GetCurrent()
bits.backend_logic_op = g_ActiveConfig.backend_info.bSupportsLogicOp;
bits.backend_palette_conversion = g_ActiveConfig.backend_info.bSupportsPaletteConversion;
bits.enable_validation_layer = g_ActiveConfig.bEnableValidationLayer;
bits.manual_texture_sampling = !g_ActiveConfig.bFastTextureSampling;
bits.manual_texture_sampling_custom_texture_sizes =
g_ActiveConfig.ManualTextureSamplingWithHiResTextures();
return bits;
}

Expand Down Expand Up @@ -105,6 +108,30 @@ void WriteIsNanHeader(ShaderCode& out, APIType api_type)
}
}

void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config)
{
// ==============================================
// BitfieldExtract for APIs which don't have it
// ==============================================
if (!host_config.backend_bitfield)
{
out.Write("uint bitfieldExtract(uint val, int off, int size) {{\n"
" // This built-in function is only supported in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
" uint mask = uint((1 << size) - 1);\n"
" return uint(val >> off) & mask;\n"
"}}\n\n");
out.Write("int bitfieldExtract(int val, int off, int size) {{\n"
" // This built-in function is only supported in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
" return ((val << (32 - size - off)) >> (32 - size));\n"
"}}\n\n");
}
}

static void DefineOutputMember(ShaderCode& object, APIType api_type, std::string_view qualifier,
std::string_view type, std::string_view name, int var_index,
std::string_view semantic = {}, int semantic_index = -1)
Expand Down
15 changes: 15 additions & 0 deletions Source/Core/VideoCommon/ShaderGenCommon.h
Expand Up @@ -14,6 +14,7 @@
#include "Common/BitField.h"
#include "Common/CommonTypes.h"
#include "Common/StringUtil.h"
#include "Common/TypeUtils.h"

enum class APIType;

Expand Down Expand Up @@ -168,6 +169,8 @@ union ShaderHostConfig
BitField<21, 1, bool, u32> backend_logic_op;
BitField<22, 1, bool, u32> backend_palette_conversion;
BitField<23, 1, bool, u32> enable_validation_layer;
BitField<24, 1, bool, u32> manual_texture_sampling;
BitField<25, 1, bool, u32> manual_texture_sampling_custom_texture_sizes;

static ShaderHostConfig GetCurrent();
};
Expand All @@ -177,6 +180,8 @@ std::string GetDiskShaderCacheFileName(APIType api_type, const char* type, bool
bool include_host_config, bool include_api = true);

void WriteIsNanHeader(ShaderCode& out, APIType api_type);
void WriteBitfieldExtractHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config);

void GenerateVSOutputMembers(ShaderCode& object, APIType api_type, u32 texgens,
const ShaderHostConfig& host_config, std::string_view qualifier);
Expand All @@ -195,6 +200,16 @@ void AssignVSOutputMembers(ShaderCode& object, std::string_view a, std::string_v
const char* GetInterpolationQualifier(bool msaa, bool ssaa, bool in_glsl_interface_block = false,
bool in = false);

// bitfieldExtract generator for BitField types
template <auto ptr_to_bitfield_member>
std::string BitfieldExtract(std::string_view source)
{
using BitFieldT = Common::MemberType<ptr_to_bitfield_member>;
return fmt::format("bitfieldExtract({}({}), {}, {})", BitFieldT::IsSigned() ? "int" : "uint",
source, static_cast<u32>(BitFieldT::StartBit()),
static_cast<u32>(BitFieldT::NumBits()));
}

// Constant variable names
#define I_COLORS "color"
#define I_KCOLORS "k"
Expand Down
47 changes: 29 additions & 18 deletions Source/Core/VideoCommon/TextureCacheBase.cpp
Expand Up @@ -40,7 +40,6 @@
#include "VideoCommon/OpcodeDecoding.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/ShaderCache.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/TMEM.h"
Expand Down Expand Up @@ -966,6 +965,18 @@ void TextureCacheBase::DumpTexture(TCacheEntry* entry, std::string basename, uns
entry->texture->Save(filename, level);
}

// Helper for checking if a BPMemory TexMode0 register is set to Point
// Filtering modes. This is used to decide whether Anisotropic enhancements
// are (mostly) safe in the VideoBackends.
// If both the minification and magnification filters are set to POINT modes
// then applying anisotropic filtering is equivalent to forced filtering. Point
// mode textures are usually some sort of 2D UI billboard which will end up
// misaligned from the correct pixels when filtered anisotropically.
static bool IsAnisostropicEnhancementSafe(const TexMode0& tm0)
{
return !(tm0.min_filter == FilterMode::Near && tm0.mag_filter == FilterMode::Near);
}

static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex,
bool has_arbitrary_mips)
{
Expand All @@ -977,19 +988,18 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex,
// Force texture filtering config option.
if (g_ActiveConfig.bForceFiltering)
{
state.min_filter = SamplerState::Filter::Linear;
state.mag_filter = SamplerState::Filter::Linear;
state.mipmap_filter = SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0) ?
SamplerState::Filter::Linear :
SamplerState::Filter::Point;
state.tm0.min_filter = FilterMode::Linear;
state.tm0.mag_filter = FilterMode::Linear;
state.tm0.mipmap_filter =
tm0.mipmap_filter != MipMode::None ? FilterMode::Linear : FilterMode::Near;
}

// Custom textures may have a greater number of mips
if (custom_tex)
state.max_lod = 255;
state.tm1.max_lod = 255;

// Anisotropic filtering option.
if (g_ActiveConfig.iMaxAnisotropy != 0 && !SamplerCommon::IsBpTexMode0PointFiltering(tm0))
if (g_ActiveConfig.iMaxAnisotropy != 0 && IsAnisostropicEnhancementSafe(tm0))
{
// https://www.opengl.org/registry/specs/EXT/texture_filter_anisotropic.txt
// For predictable results on all hardware/drivers, only use one of:
Expand All @@ -998,31 +1008,32 @@ static void SetSamplerState(u32 index, float custom_tex_scale, bool custom_tex,
// Letting the game set other combinations will have varying arbitrary results;
// possibly being interpreted as equal to bilinear/trilinear, implicitly
// disabling anisotropy, or changing the anisotropic algorithm employed.
state.min_filter = SamplerState::Filter::Linear;
state.mag_filter = SamplerState::Filter::Linear;
if (SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0))
state.mipmap_filter = SamplerState::Filter::Linear;
state.anisotropic_filtering = 1;
state.tm0.min_filter = FilterMode::Linear;
state.tm0.mag_filter = FilterMode::Linear;
if (tm0.mipmap_filter != MipMode::None)
state.tm0.mipmap_filter = FilterMode::Linear;
state.tm0.anisotropic_filtering = true;
}
else
{
state.anisotropic_filtering = 0;
state.tm0.anisotropic_filtering = false;
}

if (has_arbitrary_mips && SamplerCommon::AreBpTexMode0MipmapsEnabled(tm0))
if (has_arbitrary_mips && tm0.mipmap_filter != MipMode::None)
{
// Apply a secondary bias calculated from the IR scale to pull inwards mipmaps
// that have arbitrary contents, eg. are used for fog effects where the
// distance they kick in at is important to preserve at any resolution.
// Correct this with the upscaling factor of custom textures.
s64 lod_offset = std::log2(g_renderer->GetEFBScale() / custom_tex_scale) * 256.f;
state.lod_bias = std::clamp<s64>(state.lod_bias + lod_offset, -32768, 32767);
s32 lod_offset = std::log2(g_renderer->GetEFBScale() / custom_tex_scale) * 256.f;
state.tm0.lod_bias = std::clamp<s32>(state.tm0.lod_bias + lod_offset, -32768, 32767);

// Anisotropic also pushes mips farther away so it cannot be used either
state.anisotropic_filtering = 0;
state.tm0.anisotropic_filtering = false;
}

g_renderer->SetSamplerState(index, state);
PixelShaderManager::SetSamplerState(index, state.tm0.hex, state.tm1.hex);
}

void TextureCacheBase::BindTextures(BitSet32 used_textures)
Expand Down
3 changes: 1 addition & 2 deletions Source/Core/VideoCommon/TextureInfo.cpp
Expand Up @@ -9,7 +9,6 @@
#include "Common/Align.h"
#include "Core/HW/Memmap.h"
#include "VideoCommon/BPMemory.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/TextureDecoder.h"

TextureInfo TextureInfo::FromStage(u32 stage)
Expand All @@ -28,7 +27,7 @@ TextureInfo TextureInfo::FromStage(u32 stage)
const u8* tlut_ptr = &texMem[tlutaddr];

std::optional<u32> mip_count;
const bool has_mipmaps = SamplerCommon::AreBpTexMode0MipmapsEnabled(tex.texMode0);
const bool has_mipmaps = tex.texMode0.mipmap_filter != MipMode::None;
if (has_mipmaps)
{
mip_count = (tex.texMode1.max_lod + 0xf) / 0x10;
Expand Down
18 changes: 0 additions & 18 deletions Source/Core/VideoCommon/UberShaderCommon.cpp
Expand Up @@ -9,24 +9,6 @@

namespace UberShader
{
void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config)
{
// ==============================================
// BitfieldExtract for APIs which don't have it
// ==============================================
if (!host_config.backend_bitfield)
{
out.Write("uint bitfieldExtract(uint val, int off, int size) {{\n"
" // This built-in function is only support in OpenGL 4.0+ and ES 3.1+\n"
" // Microsoft's HLSL compiler automatically optimises this to a bitfield extract "
"instruction.\n"
" uint mask = uint((1 << size) - 1);\n"
" return uint(val >> off) & mask;\n"
"}}\n\n");
}
}

void WriteLightingFunction(ShaderCode& out)
{
// ==============================================
Expand Down
19 changes: 0 additions & 19 deletions Source/Core/VideoCommon/UberShaderCommon.h
Expand Up @@ -3,37 +3,18 @@

#pragma once

#include <string>
#include <string_view>

#include <fmt/format.h>

#include "Common/CommonTypes.h"
#include "Common/TypeUtils.h"

class ShaderCode;
enum class APIType;
union ShaderHostConfig;

namespace UberShader
{
// Common functions across all ubershaders
void WriteUberShaderCommonHeader(ShaderCode& out, APIType api_type,
const ShaderHostConfig& host_config);

// Vertex lighting
void WriteLightingFunction(ShaderCode& out);
void WriteVertexLighting(ShaderCode& out, APIType api_type, std::string_view world_pos_var,
std::string_view normal_var, std::string_view in_color_0_var,
std::string_view in_color_1_var, std::string_view out_color_0_var,
std::string_view out_color_1_var);

// bitfieldExtract generator for BitField types
template <auto ptr_to_bitfield_member>
std::string BitfieldExtract(std::string_view source)
{
using BitFieldT = Common::MemberType<ptr_to_bitfield_member>;
return fmt::format("bitfieldExtract({}, {}, {})", source, static_cast<u32>(BitFieldT::StartBit()),
static_cast<u32>(BitFieldT::NumBits()));
}
} // namespace UberShader
45 changes: 24 additions & 21 deletions Source/Core/VideoCommon/UberShaderPixel.cpp
Expand Up @@ -63,8 +63,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,

out.Write("// Pixel UberShader for {} texgens{}{}\n", numTexgen,
early_depth ? ", early-depth" : "", per_pixel_depth ? ", per-pixel depth" : "");
WriteBitfieldExtractHeader(out, api_type, host_config);
WritePixelShaderCommonHeader(out, api_type, host_config, bounding_box);
WriteUberShaderCommonHeader(out, api_type, host_config);
if (per_pixel_lighting)
WriteLightingFunction(out);

Expand Down Expand Up @@ -226,27 +226,32 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
{
// Doesn't look like DirectX supports this. Oh well the code path is here just in case it
// supports this in the future.
out.Write("int4 sampleTexture(uint sampler_num, float3 uv) {{\n");
out.Write("int4 sampleTextureWrapper(uint texmap, int2 uv, int layer) {{\n");
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write(" return iround(texture(samp[sampler_num], uv) * 255.0);\n");
out.Write(" return sampleTexture(texmap, samp[texmap], uv, layer);\n");
else if (api_type == APIType::D3D)
out.Write(" return iround(Tex[sampler_num].Sample(samp[sampler_num], uv) * 255.0);\n");
out.Write(" return sampleTexture(texmap, tex[texmap], samp[texmap], uv, layer);\n");
out.Write("}}\n\n");
}
else
{
out.Write("int4 sampleTexture(uint sampler_num, float3 uv) {{\n"
" // This is messy, but DirectX, OpenGL 3.3 and OpenGL ES 3.0 doesn't support "
out.Write("int4 sampleTextureWrapper(uint sampler_num, int2 uv, int layer) {{\n"
" // This is messy, but DirectX, OpenGL 3.3, and OpenGL ES 3.0 don't support "
"dynamic indexing of the sampler array\n"
" // With any luck the shader compiler will optimise this if the hardware supports "
"dynamic indexing.\n"
" switch(sampler_num) {{\n");
for (int i = 0; i < 8; i++)
{
if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
out.Write(" case {}u: return iround(texture(samp[{}], uv) * 255.0);\n", i, i);
{
out.Write(" case {0}u: return sampleTexture({0}u, samp[{0}u], uv, layer);\n", i);
}
else if (api_type == APIType::D3D)
out.Write(" case {}u: return iround(Tex[{}].Sample(samp[{}], uv) * 255.0);\n", i, i, i);
{
out.Write(" case {0}u: return sampleTexture({0}u, tex[{0}u], samp[{0}u], uv, layer);\n",
i);
}
}
out.Write(" }}\n"
"}}\n\n");
Expand Down Expand Up @@ -284,8 +289,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
// ======================
// Indirect Lookup
// ======================
const auto LookupIndirectTexture = [&out, stereo](std::string_view out_var_name,
std::string_view in_index_name) {
const auto LookupIndirectTexture = [&out](std::string_view out_var_name,
std::string_view in_index_name) {
// in_index_name is the indirect stage, not the tev stage
// bpmem_iref is packed differently from RAS1_IREF
// This function assumes bpmem_iref is nonzero (i.e. matrix is not off, and the
Expand All @@ -301,11 +306,9 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
" else\n"
" fixedPoint_uv = fixedPoint_uv >> " I_INDTEXSCALE "[{} >> 1].zw;\n"
"\n"
" {} = sampleTexture(texmap, float3(float2(fixedPoint_uv) * " I_TEXDIMS
"[texmap].xy, {})).abg;\n"
"}}",
in_index_name, in_index_name, in_index_name, in_index_name, out_var_name,
stereo ? "float(layer)" : "0.0");
" {} = sampleTextureWrapper(texmap, fixedPoint_uv, layer).abg;\n"
"}}\n",
in_index_name, in_index_name, in_index_name, in_index_name, out_var_name);
};

// ======================
Expand Down Expand Up @@ -729,6 +732,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
out.Write(",\n in uint layer : SV_RenderTargetArrayIndex\n");
out.Write("\n ) {{\n");
}
if (!stereo)
out.Write(" int layer = 0;\n");

out.Write(" int3 tevcoord = int3(0, 0, 0);\n"
" State s;\n"
Expand Down Expand Up @@ -786,7 +791,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
{
out.Write(" int2 fixpoint_uv{} = int2(", i);
out.Write("(tex{}.z == 0.0 ? tex{}.xy : tex{}.xy / tex{}.z)", i, i, i, i);
out.Write(" * " I_TEXDIMS "[{}].zw);\n", i);
out.Write(" * float2(" I_TEXDIMS "[{}].zw * 128));\n", i);
// TODO: S24 overflows here?
}

Expand Down Expand Up @@ -820,7 +825,7 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
// For the undefined case, we just skip applying the indirect operation, which is close enough.
// Viewtiful Joe hits the undefined case (bug 12525).
// Wrapping and add to previous still apply in this case (and when the stage is disabled).
out.Write(" if (bpmem_iref(bt) != 0u) {{");
out.Write(" if (bpmem_iref(bt) != 0u) {{\n");
out.Write(" int3 indcoord;\n");
LookupIndirectTexture("indcoord", "bt");
out.Write(" if (bs != 0u)\n"
Expand Down Expand Up @@ -910,10 +915,8 @@ ShaderCode GenPixelShader(APIType api_type, const ShaderHostConfig& host_config,
" uint sampler_num = {};\n",
BitfieldExtract<&TwoTevStageOrders::texmap0>("ss.order"));
out.Write("\n"
" float2 uv = (float2(tevcoord.xy)) * " I_TEXDIMS "[sampler_num].xy;\n");
out.Write(" int4 color = sampleTexture(sampler_num, float3(uv, {}));\n",
stereo ? "float(layer)" : "0.0");
out.Write(" uint swap = {};\n",
" int4 color = sampleTextureWrapper(sampler_num, tevcoord.xy, layer);\n"
" uint swap = {};\n",
BitfieldExtract<&TevStageCombiner::AlphaCombiner::tswap>("ss.ac"));
out.Write(" s.TexColor = Swizzle(swap, color);\n");
out.Write(" }} else {{\n"
Expand Down
2 changes: 1 addition & 1 deletion Source/Core/VideoCommon/UberShaderVertex.cpp
Expand Up @@ -49,8 +49,8 @@ ShaderCode GenVertexShader(APIType api_type, const ShaderHostConfig& host_config
GenerateVSOutputMembers(out, api_type, num_texgen, host_config, "");
out.Write("}};\n\n");

WriteUberShaderCommonHeader(out, api_type, host_config);
WriteIsNanHeader(out, api_type);
WriteBitfieldExtractHeader(out, api_type, host_config);
WriteLightingFunction(out);

if (api_type == APIType::OpenGL || api_type == APIType::Vulkan)
Expand Down
1 change: 0 additions & 1 deletion Source/Core/VideoCommon/VertexManagerBase.cpp
Expand Up @@ -27,7 +27,6 @@
#include "VideoCommon/PerfQueryBase.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/SamplerCommon.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VertexLoaderManager.h"
Expand Down
1 change: 1 addition & 0 deletions Source/Core/VideoCommon/VideoConfig.cpp
Expand Up @@ -135,6 +135,7 @@ void VideoConfig::Refresh()
bVertexRounding = Config::Get(Config::GFX_HACK_VERTEX_ROUDING);
iEFBAccessTileSize = Config::Get(Config::GFX_HACK_EFB_ACCESS_TILE_SIZE);
iMissingColorValue = Config::Get(Config::GFX_HACK_MISSING_COLOR_VALUE);
bFastTextureSampling = Config::Get(Config::GFX_HACK_FAST_TEXTURE_SAMPLING);

bPerfQueriesEnable = Config::Get(Config::GFX_PERF_QUERIES_ENABLE);

Expand Down
13 changes: 13 additions & 0 deletions Source/Core/VideoCommon/VideoConfig.h
Expand Up @@ -135,6 +135,7 @@ struct VideoConfig final
int iLog = 0; // CONF_ bits
int iSaveTargetId = 0; // TODO: Should be dropped
u32 iMissingColorValue = 0;
bool bFastTextureSampling = false;

// Stereoscopy
StereoMode stereo_mode{};
Expand Down Expand Up @@ -230,6 +231,8 @@ struct VideoConfig final
bool bSupportsDepthReadback = false;
bool bSupportsShaderBinaries = false;
bool bSupportsPipelineCacheData = false;
bool bSupportsCoarseDerivatives = false;
bool bSupportsTextureQueryLevels = false;
} backend_info;

// Utility
Expand All @@ -243,6 +246,16 @@ struct VideoConfig final
return backend_info.bSupportsGPUTextureDecoding && bEnableGPUTextureDecoding;
}
bool UseVertexRounding() const { return bVertexRounding && iEFBScale != 1; }
bool ManualTextureSamplingWithHiResTextures() const
{
// Hi-res textures (including hi-res EFB copies, but not native-resolution EFB copies at higher
// internal resolutions) breaks the wrapping logic used by manual texture sampling.
if (bFastTextureSampling)
return false;
if (iEFBScale != 1 && bCopyEFBScaled)
return true;
return bHiresTextures;
}
bool UsingUberShaders() const;
u32 GetShaderCompilerThreads() const;
u32 GetShaderPrecompilerThreads() const;
Expand Down