Skip to content

Commit

Permalink
Merge pull request #18446 from hrydgard/minor-bbox-opt
Browse files Browse the repository at this point in the history
Minor bbox optimizations, assorted bugfixes
  • Loading branch information
hrydgard committed Nov 26, 2023
2 parents d6324d1 + dae758e commit 77e3f11
Show file tree
Hide file tree
Showing 7 changed files with 108 additions and 68 deletions.
4 changes: 2 additions & 2 deletions Common/VR/VRFramebuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ static bool ovrFramebuffer_CreateGLES(XrSession session, ovrFramebuffer* frameBu
swapChainCreateInfo.arraySize = multiview ? 2 : 1;

#ifdef ANDROID
XrSwapchainCreateInfoFoveationFB swapChainFoveationCreateInfo;
if (VR_GetPlatformFlag(VR_PLATFORM_EXTENSION_FOVEATION)) {
XrSwapchainCreateInfoFoveationFB swapChainFoveationCreateInfo;
memset(&swapChainFoveationCreateInfo, 0, sizeof(swapChainFoveationCreateInfo));
swapChainFoveationCreateInfo.type = XR_TYPE_SWAPCHAIN_CREATE_INFO_FOVEATION_FB;
swapChainCreateInfo.next = &swapChainFoveationCreateInfo;
Expand Down Expand Up @@ -203,8 +203,8 @@ static bool ovrFramebuffer_CreateVK(XrSession session, ovrFramebuffer* frameBuff
swapChainCreateInfo.arraySize = multiview ? 2 : 1;

#ifdef ANDROID
XrSwapchainCreateInfoFoveationFB swapChainFoveationCreateInfo;
if (VR_GetPlatformFlag(VR_PLATFORM_EXTENSION_FOVEATION)) {
XrSwapchainCreateInfoFoveationFB swapChainFoveationCreateInfo;
memset(&swapChainFoveationCreateInfo, 0, sizeof(swapChainFoveationCreateInfo));
swapChainFoveationCreateInfo.type = XR_TYPE_SWAPCHAIN_CREATE_INFO_FOVEATION_FB;
swapChainCreateInfo.next = &swapChainFoveationCreateInfo;
Expand Down
137 changes: 92 additions & 45 deletions GPU/Common/DrawEngineCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -219,16 +219,17 @@ void DrawEngineCommon::UpdatePlanes() {
Vec2f minViewport = (minOffset_ - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;
Vec2f maxViewport = (maxOffset_ - Vec2f(gstate.getViewportXCenter(), gstate.getViewportYCenter())) * inverseViewportScale;

Lin::Matrix4x4 applyViewport;
applyViewport.empty();
Vec2f viewportInvSize = Vec2f(1.0f / (maxViewport.x - minViewport.x), 1.0f / (maxViewport.y - minViewport.y));

Lin::Matrix4x4 applyViewport{};
// Scale to the viewport's size.
applyViewport.xx = 2.0f / (maxViewport.x - minViewport.x);
applyViewport.yy = 2.0f / (maxViewport.y - minViewport.y);
applyViewport.xx = 2.0f * viewportInvSize.x;
applyViewport.yy = 2.0f * viewportInvSize.y;
applyViewport.zz = 1.0f;
applyViewport.ww = 1.0f;
// And offset to the viewport's centers.
applyViewport.wx = -(maxViewport.x + minViewport.x) / (maxViewport.x - minViewport.x);
applyViewport.wy = -(maxViewport.y + minViewport.y) / (maxViewport.y - minViewport.y);
applyViewport.wx = -(maxViewport.x + minViewport.x) * viewportInvSize.x;
applyViewport.wy = -(maxViewport.y + minViewport.y) * viewportInvSize.y;

float mtx[16];
Matrix4ByMatrix4(mtx, worldviewproj, applyViewport.m);
Expand All @@ -245,62 +246,109 @@ void DrawEngineCommon::UpdatePlanes() {
//
// It does the simplest and safest test possible: If all points of a bbox is outside a single of
// our clipping planes, we reject the box. Tighter bounds would be desirable but would take more calculations.
bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *inds, int vertexCount, u32 vertType) {
// The name is a slight misnomer, because any bounding shape will work, not just boxes.
//
// Potential optimizations:
// * SIMD-ify the plane culling, and also the vertex data conversion (could even group together xxxxyyyyzzzz for example)
// * Compute min/max of the verts, and then compute a bounding sphere and check that against the planes.
// - Less accurate, but..
// - Only requires six plane evaluations then.

bool DrawEngineCommon::TestBoundingBox(const void *vdata, const void *inds, int vertexCount, u32 vertType) {
// Grab temp buffer space from large offsets in decoded_. Not exactly safe for large draws.
if (vertexCount > 1024) {
return true;
}

SimpleVertex *corners = (SimpleVertex *)(decoded_ + 65536 * 12);
float *verts = (float *)(decoded_ + 65536 * 18);
int vertStride = 3;

// Although this may lead to drawing that shouldn't happen, the viewport is more complex on VR.
// Let's always say objects are within bounds.
if (gstate_c.Use(GPU_USE_VIRTUAL_REALITY))
return true;

// Due to world matrix updates per "thing", this isn't quite as effective as it could be if we did world transform
// in here as well. Though, it still does cut down on a lot of updates in Tekken 6.
if (gstate_c.IsDirty(DIRTY_CULL_PLANES)) {
UpdatePlanes();
gpuStats.numPlaneUpdates++;
gstate_c.Clean(DIRTY_CULL_PLANES);
}

// Try to skip NormalizeVertices if it's pure positions. No need to bother with a vertex decoder
// and a large vertex format.
if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_FLOAT && !inds) {
verts = (float *)control_points;
verts = (float *)vdata;
} else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_8BIT && !inds) {
const s8 *vtx = (const s8 *)control_points;
const s8 *vtx = (const s8 *)vdata;
for (int i = 0; i < vertexCount * 3; i++) {
verts[i] = vtx[i] * (1.0f / 128.0f);
}
} else if ((vertType & 0xFFFFFF) == GE_VTYPE_POS_16BIT && !inds) {
const s16 *vtx = (const s16 *)control_points;
const s16 *vtx = (const s16 *)vdata;
for (int i = 0; i < vertexCount * 3; i++) {
verts[i] = vtx[i] * (1.0f / 32768.0f);
}
} else {
// Simplify away indices, bones, and morph before proceeding.
u8 *temp_buffer = decoded_ + 65536 * 24;
int vertexSize = 0;

u16 indexLowerBound = 0;
u16 indexUpperBound = (u16)vertexCount - 1;
if (vertexCount > 0 && inds) {
GetIndexBounds(inds, vertexCount, vertType, &indexLowerBound, &indexUpperBound);
}
if ((inds || (vertType & (GE_VTYPE_WEIGHT_MASK | GE_VTYPE_MORPHCOUNT_MASK)))) {
u16 indexLowerBound = 0;
u16 indexUpperBound = (u16)vertexCount - 1;

// Force software skinning.
bool wasApplyingSkinInDecode = decOptions_.applySkinInDecode;
decOptions_.applySkinInDecode = true;
NormalizeVertices((u8 *)corners, temp_buffer, (const u8 *)control_points, indexLowerBound, indexUpperBound, vertType);
decOptions_.applySkinInDecode = wasApplyingSkinInDecode;

IndexConverter conv(vertType, inds);
for (int i = 0; i < vertexCount; i++) {
verts[i * 3] = corners[conv(i)].pos.x;
verts[i * 3 + 1] = corners[conv(i)].pos.y;
verts[i * 3 + 2] = corners[conv(i)].pos.z;
if (vertexCount > 0 && inds) {
GetIndexBounds(inds, vertexCount, vertType, &indexLowerBound, &indexUpperBound);
}
// TODO: Avoid normalization if just plain skinning.
// Force software skinning.
bool wasApplyingSkinInDecode = decOptions_.applySkinInDecode;
decOptions_.applySkinInDecode = true;
NormalizeVertices((u8 *)corners, temp_buffer, (const u8 *)vdata, indexLowerBound, indexUpperBound, vertType);
decOptions_.applySkinInDecode = wasApplyingSkinInDecode;

IndexConverter conv(vertType, inds);
for (int i = 0; i < vertexCount; i++) {
verts[i * 3] = corners[conv(i)].pos.x;
verts[i * 3 + 1] = corners[conv(i)].pos.y;
verts[i * 3 + 2] = corners[conv(i)].pos.z;
}
} else {
// Simple, most common case.
VertexDecoder *dec = GetVertexDecoder(vertType);
int stride = dec->VertexSize();
int offset = dec->posoff;
switch (vertType & GE_VTYPE_POS_MASK) {
case GE_VTYPE_POS_8BIT:
for (int i = 0; i < vertexCount; i++) {
const s8 *data = (const s8 *)vdata + i * stride + offset;
for (int j = 0; j < 3; j++) {
verts[i * 3 + j] = data[j] * (1.0f / 128.0f);
}
}
break;
case GE_VTYPE_POS_16BIT:
for (int i = 0; i < vertexCount; i++) {
const s16 *data = ((const s16 *)((const s8 *)vdata + i * stride + offset));
for (int j = 0; j < 3; j++) {
verts[i * 3 + j] = data[j] * (1.0f / 32768.0f);
}
}
break;
case GE_VTYPE_POS_FLOAT:
// No need to copy in this case, we can just read directly from the source format with a stride.
verts = (float *)((uint8_t *)vdata + offset);
vertStride = stride / 4;
// Previous code:
// for (int i = 0; i < vertexCount; i++)
// memcpy(&verts[i * 3], (const u8 *)vdata + stride * i + offset, sizeof(float) * 3);
break;
}
}
}

// Due to world matrix updates per "thing", this isn't quite as effective as it could be if we did world transform
// in here as well. Though, it still does cut down on a lot of updates in Tekken 6.
if (gstate_c.IsDirty(DIRTY_CULL_PLANES)) {
UpdatePlanes();
gpuStats.numPlaneUpdates++;
gstate_c.Clean(DIRTY_CULL_PLANES);
}

// Note: near/far are not checked without clamp/clip enabled, so we skip those planes.
int totalPlanes = gstate.isDepthClampEnabled() ? 6 : 4;
for (int plane = 0; plane < totalPlanes; plane++) {
Expand All @@ -310,8 +358,9 @@ bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *i
// Test against the frustum planes, and count.
// TODO: We should test 4 vertices at a time using SIMD.
// I guess could also test one vertex against 4 planes at a time, though a lot of waste at the common case of 6.
float value = planes_[plane].Test(verts + i * 3);
if (value <= -FLT_EPSILON)
const float *pos = verts + i * vertStride;
float value = planes_[plane].Test(pos);
if (value <= -FLT_EPSILON) // Not sure why we use exactly this value. Probably '< 0' would do.
out++;
else
inside++;
Expand All @@ -321,14 +370,12 @@ bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *i
if (inside == 0) {
// All out - but check for X and Y if the offset was near the cullbox edge.
bool outsideEdge = false;
if (plane == 1)
outsideEdge = minOffset_.x < 1.0f;
if (plane == 2)
outsideEdge = minOffset_.y < 1.0f;
else if (plane == 0)
outsideEdge = maxOffset_.x >= 4096.0f;
else if (plane == 3)
outsideEdge = maxOffset_.y >= 4096.0f;
switch (plane) {
case 0: outsideEdge = maxOffset_.x >= 4096.0f; break;
case 1: outsideEdge = minOffset_.x < 1.0f; break;
case 2: outsideEdge = minOffset_.y < 1.0f; break;
case 3: outsideEdge = maxOffset_.y >= 4096.0f; break;
}

// Only consider this outside if offset + scissor/region is fully inside the cullbox.
if (!outsideEdge)
Expand Down
6 changes: 2 additions & 4 deletions GPU/Common/FramebufferManagerCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1593,10 +1593,8 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
if (!vfb) {
if (Memory::IsValidAddress(fbaddr)) {
// The game is displaying something directly from RAM. In GTA, it's decoded video.
if (!vfb) {
DrawFramebufferToOutput(Memory::GetPointerUnchecked(fbaddr), displayStride_, displayFormat_);
return;
}
DrawFramebufferToOutput(Memory::GetPointerUnchecked(fbaddr), displayStride_, displayFormat_);
return;
} else {
DEBUG_LOG(FRAMEBUF, "Found no FBO to display! displayFBPtr = %08x", fbaddr);
// No framebuffer to display! Clear to black.
Expand Down
8 changes: 4 additions & 4 deletions Tools/langtool/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 3 additions & 6 deletions Tools/langtool/src/inifile.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,12 +120,9 @@ impl IniFile {
}

pub fn get_section_mut(&mut self, section_name: &str) -> Option<&mut Section> {
for section in &mut self.sections {
if section.name == section_name {
return Some(section);
}
}
None
self.sections
.iter_mut()
.find(|section| section.name == section_name)
}
}

Expand Down
10 changes: 4 additions & 6 deletions Tools/langtool/src/section.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,10 @@ impl Section {
if prefix.starts_with("Font") || prefix.starts_with('#') {
continue;
}
if !other.lines.iter().any(|line| line.starts_with(prefix)) {
if !prefix.contains("URL") {
println!("Commenting out from {}: {line}", other.name);
// Comment out the line.
*line = "#".to_owned() + line;
}
if !other.lines.iter().any(|line| line.starts_with(prefix)) && !prefix.contains("URL") {
println!("Commenting out from {}: {line}", other.name);
// Comment out the line.
*line = "#".to_owned() + line;
}
}
}
Expand Down
2 changes: 1 addition & 1 deletion Windows/MainWindow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -660,7 +660,7 @@ namespace MainWindow
double now = time_now_d();
if ((now - lastMouseDown) < 0.001 * GetDoubleClickTime()) {
float dx = lastMouseDownX - x;
float dy = lastMouseDownX - x;
float dy = lastMouseDownY - y;
float distSq = dx * dx + dy * dy;
if (distSq < 3.0f*3.0f && !g_Config.bShowTouchControls && !g_Config.bMouseControl && GetUIState() == UISTATE_INGAME && g_Config.bFullscreenOnDoubleclick) {
SendToggleFullscreen(!g_Config.UseFullScreen());
Expand Down

0 comments on commit 77e3f11

Please sign in to comment.