diff --git a/.gitmodules b/.gitmodules
index 2dd1e55e8095..82a4f4bf24c6 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -50,3 +50,6 @@
[submodule "ext/naett"]
path = ext/naett
url = https://github.com/erkkah/naett.git
+[submodule "ext/libchdr"]
+ path = ext/libchdr
+ url = https://github.com/rtissera/libchdr.git
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8b455b21bc47..af616d3e2a9f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1343,17 +1343,20 @@ else()
SDL/SDLVulkanGraphicsContext.cpp
)
endif()
- if(SDL2_ttf_FOUND OR SDL2_ttf_PKGCONFIG_FOUND)
+ if(SDL2_ttf_FOUND OR
+ (SDL2_ttf_PKGCONFIG_FOUND AND
+ SDL2_ttf_PKGCONFIG_VERSION VERSION_GREATER_EQUAL "2.0.18"))
add_definitions(-DUSE_SDL2_TTF)
if(FONTCONFIG_FOUND)
add_definitions(-DUSE_SDL2_TTF_FONTCONFIG)
set(nativeExtraLibs ${nativeExtraLibs} Fontconfig::Fontconfig)
endif()
+ elseif(SDL2_ttf_PKGCONFIG_FOUND)
+ message(WARNING "Found SDL2_ttf <2.0.18 - this is too old, falling back to atlas")
endif()
if(SDL2_ttf_FOUND)
set(nativeExtraLibs ${nativeExtraLibs} SDL2_ttf::SDL2_ttf)
elseif(SDL2_ttf_PKGCONFIG_FOUND)
- add_definitions(-DUSE_SDL2_TTF_PKGCONFIG)
set(nativeExtraLibs ${nativeExtraLibs} PkgConfig::SDL2_ttf_PKGCONFIG)
endif()
if(APPLE)
@@ -2314,7 +2317,9 @@ else()
include_directories(ext/zstd/lib)
endif()
-target_link_libraries(${CoreLibName} Common native kirk cityhash sfmt19937 xbrz xxhash rcheevos ${GlslangLibs}
+include_directories(ext/libchdr/include)
+
+target_link_libraries(${CoreLibName} Common native chdr kirk cityhash sfmt19937 xbrz xxhash rcheevos ${GlslangLibs}
${CoreExtraLibs} ${OPENGL_LIBRARIES} ${X11_LIBRARIES} ${CMAKE_DL_LIBS})
if(NOT HTTPS_NOT_AVAILABLE)
diff --git a/Common/Arm64Emitter.cpp b/Common/Arm64Emitter.cpp
index a5d87c5a11fc..1d2c8b0438b6 100644
--- a/Common/Arm64Emitter.cpp
+++ b/Common/Arm64Emitter.cpp
@@ -4204,6 +4204,14 @@ void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch, bo
if (negate) {
FNEG(32, Rd, Rd);
}
+ } else if (TryAnyMOVI(32, Rd, ival)) {
+ if (negate) {
+ FNEG(32, Rd, Rd);
+ }
+ } else if (TryAnyMOVI(32, Rd, ival ^ 0x80000000)) {
+ if (!negate) {
+ FNEG(32, Rd, Rd);
+ }
} else {
_assert_msg_(scratch != INVALID_REG, "Failed to find a way to generate FP immediate %f without scratch", value);
if (negate) {
@@ -4214,6 +4222,96 @@ void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch, bo
}
}
+bool ARM64FloatEmitter::TryMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {
+ if (size == 8) {
+ // Can always do 8.
+ MOVI(size, Rd, elementValue & 0xFF);
+ return true;
+ } else if (size == 16) {
+ if ((elementValue & 0xFF00) == 0) {
+ MOVI(size, Rd, elementValue & 0xFF, 0);
+ return true;
+ } else if ((elementValue & 0x00FF) == 0) {
+ MOVI(size, Rd, (elementValue >> 8) & 0xFF, 8);
+ return true;
+ } else if ((elementValue & 0xFF00) == 0xFF00) {
+ MVNI(size, Rd, ~elementValue & 0xFF, 0);
+ return true;
+ } else if ((elementValue & 0x00FF) == 0x00FF) {
+ MVNI(size, Rd, (~elementValue >> 8) & 0xFF, 8);
+ return true;
+ }
+
+ return false;
+ } else if (size == 32) {
+ for (int shift = 0; shift < 32; shift += 8) {
+ uint32_t mask = 0xFFFFFFFF &~ (0xFF << shift);
+ if ((elementValue & mask) == 0) {
+ MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift);
+ return true;
+ } else if ((elementValue & mask) == mask) {
+ MVNI(size, Rd, (~elementValue >> shift) & 0xFF, shift);
+ return true;
+ }
+ }
+
+ // Maybe an MSL shift will work?
+ for (int shift = 8; shift <= 16; shift += 8) {
+ uint32_t mask = 0xFFFFFFFF & ~(0xFF << shift);
+ uint32_t ones = (1 << shift) - 1;
+ uint32_t notOnes = 0xFFFFFF00 << shift;
+ if ((elementValue & mask) == ones) {
+ MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);
+ return true;
+ } else if ((elementValue & mask) == notOnes) {
+ MVNI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);
+ return true;
+ }
+ }
+
+ return false;
+ } else if (size == 64) {
+ uint8_t imm8 = 0;
+ for (int i = 0; i < 8; ++i) {
+ uint8_t byte = (elementValue >> (i * 8)) & 0xFF;
+ if (byte != 0 && byte != 0xFF)
+ return false;
+
+ if (byte == 0xFF)
+ imm8 |= 1 << i;
+ }
+
+ // Didn't run into any partial bytes, so size 64 is doable.
+ MOVI(size, Rd, imm8);
+ return true;
+ }
+ return false;
+}
+
+bool ARM64FloatEmitter::TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {
+ // Try the original size first in case that's more optimal.
+ if (TryMOVI(size, Rd, elementValue))
+ return true;
+
+ uint64_t value = elementValue;
+ if (size != 64) {
+ uint64_t masked = elementValue & ((1 << size) - 1);
+ for (int i = size; i < 64; ++i) {
+ value |= masked << i;
+ }
+ }
+
+ for (int attempt = 8; attempt <= 64; attempt += attempt) {
+ // Original size was already attempted above.
+ if (attempt != size) {
+ if (TryMOVI(attempt, Rd, value))
+ return true;
+ }
+ }
+
+ return false;
+}
+
void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
u32 val;
bool shift;
diff --git a/Common/Arm64Emitter.h b/Common/Arm64Emitter.h
index cd4a54cb73e9..0c3603d1bf9e 100644
--- a/Common/Arm64Emitter.h
+++ b/Common/Arm64Emitter.h
@@ -925,6 +925,10 @@ class ARM64FloatEmitter
void ORR(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);
void BIC(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);
+ bool TryMOVI(u8 size, ARM64Reg Rd, uint64_t value);
+ // Allow using a different size. Unclear if there's a penalty.
+ bool TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t value);
+
// One source
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters
index 288b7dc289e5..a0852244bacc 100644
--- a/Common/Common.vcxproj.filters
+++ b/Common/Common.vcxproj.filters
@@ -1073,6 +1073,9 @@
{34f45db9-5c08-49cb-b349-b9e760ce3213}
+
+ {b681797d-7747-487f-b448-5ef5b2d2805b}
+
diff --git a/Common/Data/Collections/Hashmaps.h b/Common/Data/Collections/Hashmaps.h
index 939cb8eda994..a9e5dece99e6 100644
--- a/Common/Data/Collections/Hashmaps.h
+++ b/Common/Data/Collections/Hashmaps.h
@@ -72,7 +72,7 @@ class DenseHashMap {
}
bool ContainsKey(const Key &key) const {
- // Slightly wasteful.
+ // Slightly wasteful, though compiler might optimize it.
Value value;
return Get(key, &value);
}
@@ -135,6 +135,7 @@ class DenseHashMap {
return false;
}
+ // This will never crash if you call it without locking - but, the value might not be right.
size_t size() const {
return count_;
}
diff --git a/Common/Data/Format/IniFile.cpp b/Common/Data/Format/IniFile.cpp
index f3b35cd60958..4e23bfda9f51 100644
--- a/Common/Data/Format/IniFile.cpp
+++ b/Common/Data/Format/IniFile.cpp
@@ -173,7 +173,7 @@ std::string* Section::GetLine(const char* key, std::string* valueOut, std::strin
if (!strcasecmp(lineKey.c_str(), key))
return &line;
}
- return 0;
+ return nullptr;
}
const std::string* Section::GetLine(const char* key, std::string* valueOut, std::string* commentOut) const
@@ -186,7 +186,7 @@ const std::string* Section::GetLine(const char* key, std::string* valueOut, std:
if (!strcasecmp(lineKey.c_str(), key))
return &line;
}
- return 0;
+ return nullptr;
}
void Section::Set(const char* key, uint32_t newValue) {
@@ -423,14 +423,14 @@ const Section* IniFile::GetSection(const char* sectionName) const {
for (const auto &iter : sections)
if (!strcasecmp(iter->name().c_str(), sectionName))
return iter.get();
- return nullptr ;
+ return nullptr;
}
Section* IniFile::GetSection(const char* sectionName) {
for (const auto &iter : sections)
if (!strcasecmp(iter->name().c_str(), sectionName))
return iter.get();
- return 0;
+ return nullptr;
}
Section* IniFile::GetOrCreateSection(const char* sectionName) {
diff --git a/Common/Data/Text/I18n.h b/Common/Data/Text/I18n.h
index dba943fdbc9c..65baa2aec419 100644
--- a/Common/Data/Text/I18n.h
+++ b/Common/Data/Text/I18n.h
@@ -116,8 +116,9 @@ class I18NRepo {
std::string LanguageID();
std::shared_ptr GetCategory(I18NCat category);
- std::shared_ptr GetCategoryByName(const char *name);
+ // Translate the string, by looking up "key" in the file, and falling back to either def or key, in that order, if the lookup fails.
+ // def can (and usually is) set to nullptr.
const char *T(I18NCat category, const char *key, const char *def = nullptr) {
if (category == I18NCat::NONE)
return def ? def : key;
diff --git a/Common/File/DirListing.cpp b/Common/File/DirListing.cpp
index 009c9b944829..0ee76098a16d 100644
--- a/Common/File/DirListing.cpp
+++ b/Common/File/DirListing.cpp
@@ -184,7 +184,7 @@ bool GetFilesInDir(const Path &directory, std::vector *files, const ch
std::string tmp;
while (*filter) {
if (*filter == ':') {
- filters.insert(std::move(tmp));
+ filters.insert(tmp);
tmp.clear();
} else {
tmp.push_back(*filter);
@@ -192,7 +192,7 @@ bool GetFilesInDir(const Path &directory, std::vector *files, const ch
filter++;
}
if (!tmp.empty())
- filters.insert(std::move(tmp));
+ filters.insert(tmp);
}
#if PPSSPP_PLATFORM(WINDOWS)
diff --git a/Common/GPU/OpenGL/GLFrameData.cpp b/Common/GPU/OpenGL/GLFrameData.cpp
index fa5a051d3055..a82669dc8ad5 100644
--- a/Common/GPU/OpenGL/GLFrameData.cpp
+++ b/Common/GPU/OpenGL/GLFrameData.cpp
@@ -32,25 +32,25 @@ void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) {
}
pushBuffers.clear();
for (auto shader : shaders) {
- if (skipGLCalls)
+ if (skipGLCalls && shader)
shader->shader = 0; // prevent the glDeleteShader
delete shader;
}
shaders.clear();
for (auto program : programs) {
- if (skipGLCalls)
+ if (skipGLCalls && program)
program->program = 0; // prevent the glDeleteProgram
delete program;
}
programs.clear();
for (auto buffer : buffers) {
- if (skipGLCalls)
+ if (skipGLCalls && buffer)
buffer->buffer_ = 0;
delete buffer;
}
buffers.clear();
for (auto texture : textures) {
- if (skipGLCalls)
+ if (skipGLCalls && texture)
texture->texture = 0;
delete texture;
}
diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h
index 1fb5c8247390..8aed292758cf 100644
--- a/Common/GPU/OpenGL/GLRenderManager.h
+++ b/Common/GPU/OpenGL/GLRenderManager.h
@@ -349,24 +349,31 @@ class GLRenderManager {
}
void DeleteShader(GLRShader *shader) {
+ _dbg_assert_(shader != nullptr);
deleter_.shaders.push_back(shader);
}
void DeleteProgram(GLRProgram *program) {
+ _dbg_assert_(program != nullptr);
deleter_.programs.push_back(program);
}
void DeleteBuffer(GLRBuffer *buffer) {
+ _dbg_assert_(buffer != nullptr);
deleter_.buffers.push_back(buffer);
}
void DeleteTexture(GLRTexture *texture) {
+ _dbg_assert_(texture != nullptr);
deleter_.textures.push_back(texture);
}
void DeleteInputLayout(GLRInputLayout *inputLayout) {
+ _dbg_assert_(inputLayout != nullptr);
deleter_.inputLayouts.push_back(inputLayout);
}
void DeleteFramebuffer(GLRFramebuffer *framebuffer) {
+ _dbg_assert_(framebuffer != nullptr);
deleter_.framebuffers.push_back(framebuffer);
}
void DeletePushBuffer(GLPushBuffer *pushbuffer) {
+ _dbg_assert_(pushbuffer != nullptr);
deleter_.pushBuffers.push_back(pushbuffer);
}
diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp
index 66c408f95375..59cd3eba3c6a 100644
--- a/Common/GPU/OpenGL/thin3d_gl.cpp
+++ b/Common/GPU/OpenGL/thin3d_gl.cpp
@@ -934,7 +934,7 @@ void OpenGLTexture::UpdateTextureLevels(GLRenderManager *render, const uint8_t *
OpenGLTexture::~OpenGLTexture() {
if (tex_) {
render_->DeleteTexture(tex_);
- tex_ = 0;
+ tex_ = nullptr;
generatedMips_ = false;
}
}
diff --git a/Common/GPU/Vulkan/VulkanDebug.cpp b/Common/GPU/Vulkan/VulkanDebug.cpp
index 022093e217c2..c0bf23567771 100644
--- a/Common/GPU/Vulkan/VulkanDebug.cpp
+++ b/Common/GPU/Vulkan/VulkanDebug.cpp
@@ -90,6 +90,19 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
break;
}
+ /*
+ // Can be used to temporarily turn errors into info for easier debugging.
+ switch (messageCode) {
+ case 1544472022:
+ if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
+ messageSeverity = (VkDebugUtilsMessageSeverityFlagBitsEXT)((messageSeverity & ~VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT);
+ }
+ break;
+ default:
+ break;
+ }
+ */
+
int count;
{
std::lock_guard lock(g_errorCountMutex);
diff --git a/Common/GPU/Vulkan/VulkanFramebuffer.cpp b/Common/GPU/Vulkan/VulkanFramebuffer.cpp
index 975dab04bd57..19de9b67c458 100644
--- a/Common/GPU/Vulkan/VulkanFramebuffer.cpp
+++ b/Common/GPU/Vulkan/VulkanFramebuffer.cpp
@@ -2,6 +2,35 @@
#include "Common/GPU/Vulkan/VulkanFramebuffer.h"
#include "Common/GPU/Vulkan/VulkanQueueRunner.h"
+static const char *rpTypeDebugNames[] = {
+ "RENDER",
+ "RENDER_DEPTH",
+ "RENDER_INPUT",
+ "RENDER_DEPTH_INPUT",
+ "MV_RENDER",
+ "MV_RENDER_DEPTH",
+ "MV_RENDER_INPUT",
+ "MV_RENDER_DEPTH_INPUT",
+ "MS_RENDER",
+ "MS_RENDER_DEPTH",
+ "MS_RENDER_INPUT",
+ "MS_RENDER_DEPTH_INPUT",
+ "MS_MV_RENDER",
+ "MS_MV_RENDER_DEPTH",
+ "MS_MV_RENDER_INPUT",
+ "MS_MV_RENDER_DEPTH_INPUT",
+ "BACKBUF",
+};
+
+const char *GetRPTypeName(RenderPassType rpType) {
+ uint32_t index = (uint32_t)rpType;
+ if (index < ARRAY_SIZE(rpTypeDebugNames)) {
+ return rpTypeDebugNames[index];
+ } else {
+ return "N/A";
+ }
+}
+
VkSampleCountFlagBits MultiSampleLevelToFlagBits(int count) {
// TODO: Check hardware support here, or elsewhere?
// Some hardware only supports 4x.
@@ -387,12 +416,25 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas
}
if (isBackbuffer) {
+ // We don't specify any explicit transitions for these, so let's use subpass dependencies.
+ // This makes sure that writes to the depth image are done before we try to write to it again.
+ // From Sascha's examples.
deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
deps[numDeps].dstSubpass = 0;
- deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+ deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+ deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+ deps[numDeps].srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ deps[numDeps].dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+ deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
+ numDeps++;
+ // Dependencies for the color image.
+ deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
+ deps[numDeps].dstSubpass = 0;
+ deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
- deps[numDeps].srcAccessMask = 0;
+ deps[numDeps].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT;
deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+ deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
numDeps++;
}
@@ -494,6 +536,10 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas
res = vkCreateRenderPass(vulkan->GetDevice(), &rp, nullptr, &pass);
}
+ if (pass) {
+ vulkan->SetDebugName(pass, VK_OBJECT_TYPE_RENDER_PASS, GetRPTypeName(rpType));
+ }
+
_assert_(res == VK_SUCCESS);
_assert_(pass != VK_NULL_HANDLE);
return pass;
diff --git a/Common/GPU/Vulkan/VulkanFramebuffer.h b/Common/GPU/Vulkan/VulkanFramebuffer.h
index 465983efaa7c..97ff9e367a56 100644
--- a/Common/GPU/Vulkan/VulkanFramebuffer.h
+++ b/Common/GPU/Vulkan/VulkanFramebuffer.h
@@ -157,3 +157,5 @@ class VKRRenderPass {
VkSampleCountFlagBits sampleCounts[(size_t)RenderPassType::TYPE_COUNT];
RPKey key_;
};
+
+const char *GetRPTypeName(RenderPassType rpType);
diff --git a/Common/GPU/Vulkan/VulkanLoader.cpp b/Common/GPU/Vulkan/VulkanLoader.cpp
index f30092ab1bff..3c7069c4a1cc 100644
--- a/Common/GPU/Vulkan/VulkanLoader.cpp
+++ b/Common/GPU/Vulkan/VulkanLoader.cpp
@@ -314,7 +314,7 @@ static void VulkanFreeLibrary(VulkanLibraryHandle &h) {
}
void VulkanSetAvailable(bool available) {
- INFO_LOG(G3D, "Forcing Vulkan availability to true");
+ INFO_LOG(G3D, "Setting Vulkan availability to true");
g_vulkanAvailabilityChecked = true;
g_vulkanMayBeAvailable = available;
}
diff --git a/Common/GPU/Vulkan/VulkanMemory.cpp b/Common/GPU/Vulkan/VulkanMemory.cpp
index 457d75d51f15..f29fc33d0b18 100644
--- a/Common/GPU/Vulkan/VulkanMemory.cpp
+++ b/Common/GPU/Vulkan/VulkanMemory.cpp
@@ -291,7 +291,7 @@ VulkanPushPool::Block VulkanPushPool::CreateBlock(size_t size) {
_assert_(result == VK_SUCCESS);
result = vmaMapMemory(vulkan_->Allocator(), block.allocation, (void **)(&block.writePtr));
- _assert_msg_(result == VK_SUCCESS, "VulkanPushPool: Failed to map memory (result = %08x)", result);
+ _assert_msg_(result == VK_SUCCESS, "VulkanPushPool: Failed to map memory (result = %s)", VulkanResultToString(result));
_assert_msg_(block.writePtr != nullptr, "VulkanPushPool: Failed to map memory on block of size %d", (int)block.size);
return block;
diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp
index c4a04f90bc98..e6bb324f76c7 100644
--- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp
+++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp
@@ -674,26 +674,6 @@ const char *AspectToString(VkImageAspectFlags aspect) {
}
}
-static const char *rpTypeDebugNames[] = {
- "RENDER",
- "RENDER_DEPTH",
- "RENDER_INPUT",
- "RENDER_DEPTH_INPUT",
- "MV_RENDER",
- "MV_RENDER_DEPTH",
- "MV_RENDER_INPUT",
- "MV_RENDER_DEPTH_INPUT",
- "MS_RENDER",
- "MS_RENDER_DEPTH",
- "MS_RENDER_INPUT",
- "MS_RENDER_DEPTH_INPUT",
- "MS_MV_RENDER",
- "MS_MV_RENDER_DEPTH",
- "MS_MV_RENDER_INPUT",
- "MS_MV_RENDER_DEPTH_INPUT",
- "BACKBUF",
-};
-
std::string VulkanQueueRunner::StepToString(VulkanContext *vulkan, const VKRStep &step) {
char buffer[256];
switch (step.stepType) {
@@ -703,7 +683,7 @@ std::string VulkanQueueRunner::StepToString(VulkanContext *vulkan, const VKRStep
int h = step.render.framebuffer ? step.render.framebuffer->height : vulkan->GetBackbufferHeight();
int actual_w = step.render.renderArea.extent.width;
int actual_h = step.render.renderArea.extent.height;
- const char *renderCmd = rpTypeDebugNames[(size_t)step.render.renderPassType];
+ const char *renderCmd = GetRPTypeName(step.render.renderPassType);
snprintf(buffer, sizeof(buffer), "%s %s %s (draws: %d, %dx%d/%dx%d)", renderCmd, step.tag, step.render.framebuffer ? step.render.framebuffer->Tag() : "", step.render.numDraws, actual_w, actual_h, w, h);
break;
}
diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp
index fff194adc256..4414af64f9c0 100644
--- a/Common/GPU/Vulkan/VulkanRenderManager.cpp
+++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp
@@ -288,7 +288,6 @@ bool VulkanRenderManager::CreateBackbuffers() {
return false;
}
-
VkCommandBuffer cmdInit = GetInitCmd();
if (!queueRunner_.CreateSwapchain(cmdInit)) {
@@ -310,6 +309,11 @@ bool VulkanRenderManager::CreateBackbuffers() {
outOfDateFrames_ = 0;
+ for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
+ auto &frameData = frameData_[i];
+ frameData.readyForFence = true; // Just in case.
+ }
+
// Start the thread(s).
if (HasBackbuffers()) {
run_ = true; // For controlling the compiler thread's exit
diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp
index dc9aaf60ef0e..c5b4d8787625 100644
--- a/Common/GPU/Vulkan/thin3d_vulkan.cpp
+++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp
@@ -874,8 +874,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
caps_.tesselationShaderSupported = vulkan->GetDeviceFeatures().enabled.standard.tessellationShader != 0;
caps_.dualSourceBlend = vulkan->GetDeviceFeatures().enabled.standard.dualSrcBlend != 0;
caps_.depthClampSupported = vulkan->GetDeviceFeatures().enabled.standard.depthClamp != 0;
+
+ // Comment out these two to test geometry shader culling on any geometry shader-supporting hardware.
caps_.clipDistanceSupported = vulkan->GetDeviceFeatures().enabled.standard.shaderClipDistance != 0;
caps_.cullDistanceSupported = vulkan->GetDeviceFeatures().enabled.standard.shaderCullDistance != 0;
+
caps_.framebufferBlitSupported = true;
caps_.framebufferCopySupported = true;
caps_.framebufferDepthBlitSupported = vulkan->GetDeviceInfo().canBlitToPreferredDepthStencilFormat;
diff --git a/Common/Input/InputState.h b/Common/Input/InputState.h
index c01680a0ae64..a94547443d23 100644
--- a/Common/Input/InputState.h
+++ b/Common/Input/InputState.h
@@ -31,7 +31,7 @@ enum InputDeviceID {
DEVICE_ID_XINPUT_1 = 21,
DEVICE_ID_XINPUT_2 = 22,
DEVICE_ID_XINPUT_3 = 23,
- DEVICE_ID_ACCELEROMETER = 30,
+ DEVICE_ID_ACCELEROMETER = 30, // no longer used
DEVICE_ID_XR_HMD = 39,
DEVICE_ID_XR_CONTROLLER_LEFT = 40,
DEVICE_ID_XR_CONTROLLER_RIGHT = 41,
diff --git a/Common/Input/KeyCodes.h b/Common/Input/KeyCodes.h
index dc2b9897ce37..8614e5a2d287 100644
--- a/Common/Input/KeyCodes.h
+++ b/Common/Input/KeyCodes.h
@@ -305,7 +305,7 @@ enum InputAxis {
JOYSTICK_AXIS_MOUSE_REL_X = 26,
JOYSTICK_AXIS_MOUSE_REL_Y = 27,
- // Mobile device accelerometer/gyro
+ // Mobile device accelerometer/gyro. NOTE: These are no longer passed around internally, only used for the plugin API.
JOYSTICK_AXIS_ACCELEROMETER_X = 40,
JOYSTICK_AXIS_ACCELEROMETER_Y = 41,
JOYSTICK_AXIS_ACCELEROMETER_Z = 42,
diff --git a/Common/Log.cpp b/Common/Log.cpp
index 28c2f821d005..988e51f439fb 100644
--- a/Common/Log.cpp
+++ b/Common/Log.cpp
@@ -25,6 +25,7 @@
#include "StringUtils.h"
#include "Common/Data/Encoding/Utf8.h"
#include "Common/Thread/ThreadUtil.h"
+#include "Common/TimeUtil.h"
#if PPSSPP_PLATFORM(ANDROID)
#include
@@ -38,10 +39,12 @@ static bool hitAnyAsserts = false;
std::mutex g_extraAssertInfoMutex;
std::string g_extraAssertInfo = "menu";
+double g_assertInfoTime = 0.0;
void SetExtraAssertInfo(const char *info) {
std::lock_guard guard(g_extraAssertInfoMutex);
g_extraAssertInfo = info ? info : "menu";
+ g_assertInfoTime = time_now_d();
}
bool HandleAssert(const char *function, const char *file, int line, const char *expression, const char* format, ...) {
@@ -57,7 +60,8 @@ bool HandleAssert(const char *function, const char *file, int line, const char *
char formatted[LOG_BUF_SIZE + 128];
{
std::lock_guard guard(g_extraAssertInfoMutex);
- snprintf(formatted, sizeof(formatted), "(%s:%s:%d): [%s] (%s) %s", file, function, line, expression, g_extraAssertInfo.c_str(), text);
+ double delta = time_now_d() - g_assertInfoTime;
+ snprintf(formatted, sizeof(formatted), "(%s:%s:%d): [%s] (%s, %0.1fs) %s", file, function, line, expression, g_extraAssertInfo.c_str(), delta, text);
}
// Normal logging (will also log to Android log)
diff --git a/Common/Net/HTTPClient.cpp b/Common/Net/HTTPClient.cpp
index b68ad6bfe961..c6366af8262d 100644
--- a/Common/Net/HTTPClient.cpp
+++ b/Common/Net/HTTPClient.cpp
@@ -30,6 +30,7 @@
#include "Common/Net/URL.h"
#include "Common/File/FileDescriptor.h"
+#include "Common/SysError.h"
#include "Common/Thread/ThreadUtil.h"
#include "Common/Data/Encoding/Compression.h"
#include "Common/Net/NetBuffer.h"
@@ -97,7 +98,7 @@ static void FormatAddr(char *addrbuf, size_t bufsize, const addrinfo *info) {
switch (info->ai_family) {
case AF_INET:
case AF_INET6:
- inet_ntop(info->ai_family, info->ai_addr, addrbuf, bufsize);
+ inet_ntop(info->ai_family, &((sockaddr_in *)info->ai_addr)->sin_addr, addrbuf, bufsize);
break;
default:
snprintf(addrbuf, bufsize, "(Unknown AF %d)", info->ai_family);
@@ -131,11 +132,22 @@ bool Connection::Connect(int maxTries, double timeout, bool *cancelConnect) {
// Start trying to connect (async with timeout.)
errno = 0;
if (connect(sock, possible->ai_addr, (int)possible->ai_addrlen) < 0) {
- if (errno != 0 && errno != EINPROGRESS) {
- char addrStr[128];
+#if PPSSPP_PLATFORM(WINDOWS)
+ int errorCode = WSAGetLastError();
+ std::string errorString = GetStringErrorMsg(errorCode);
+ bool unreachable = errorCode == WSAENETUNREACH;
+ bool inProgress = errorCode == WSAEINPROGRESS || errorCode == WSAEWOULDBLOCK;
+#else
+ int errorCode = errno;
+ std::string errorString = strerror(errno);
+ bool unreachable = errorCode == ENETUNREACH;
+ bool inProgress = errorCode == EINPROGRESS || errorCode == EWOULDBLOCK;
+#endif
+ if (!inProgress) {
+ char addrStr[128]{};
FormatAddr(addrStr, sizeof(addrStr), possible);
- if (errno != ENETUNREACH) {
- ERROR_LOG(HTTP, "connect(%d) call to %s failed (%d: %s)", sock, addrStr, errno, strerror(errno));
+ if (!unreachable) {
+ ERROR_LOG(HTTP, "connect(%d) call to %s failed (%d: %s)", sock, addrStr, errorCode, errorString.c_str());
} else {
INFO_LOG(HTTP, "connect(%d): Ignoring unreachable resolved address %s", sock, addrStr);
}
@@ -207,9 +219,9 @@ namespace http {
// TODO: do something sane here
constexpr const char *DEFAULT_USERAGENT = "PPSSPP";
+constexpr const char *HTTP_VERSION = "1.1";
Client::Client() {
- httpVersion_ = "1.1";
userAgent_ = DEFAULT_USERAGENT;
}
@@ -341,7 +353,7 @@ int Client::SendRequestWithData(const char *method, const RequestParams &req, co
"\r\n";
buffer.Printf(tpl,
- method, req.resource.c_str(), httpVersion_,
+ method, req.resource.c_str(), HTTP_VERSION,
host_.c_str(),
userAgent_.c_str(),
req.acceptMime,
diff --git a/Common/Net/HTTPClient.h b/Common/Net/HTTPClient.h
index dd104e2fa603..619ab80423b6 100644
--- a/Common/Net/HTTPClient.h
+++ b/Common/Net/HTTPClient.h
@@ -86,7 +86,6 @@ class Client : public net::Connection {
protected:
std::string userAgent_;
- const char *httpVersion_;
double dataTimeout_ = 900.0;
};
diff --git a/Common/Render/Text/draw_text_sdl.cpp b/Common/Render/Text/draw_text_sdl.cpp
index 8a4178d71a1d..494858355648 100644
--- a/Common/Render/Text/draw_text_sdl.cpp
+++ b/Common/Render/Text/draw_text_sdl.cpp
@@ -378,7 +378,7 @@ void TextDrawerSDL::DrawStringBitmap(std::vector &bitmapData, TextStrin
font = fallbackFonts_[0];
}
-#ifndef USE_SDL2_TTF_PKGCONFIG
+#if SDL_TTF_VERSION_ATLEAST(2, 20, 0)
if (align & ALIGN_HCENTER)
TTF_SetFontWrappedAlign(font, TTF_WRAPPED_ALIGN_CENTER);
else if (align & ALIGN_RIGHT)
diff --git a/Common/System/NativeApp.h b/Common/System/NativeApp.h
index 94a67c9eb9c8..4799fa6f11a8 100644
--- a/Common/System/NativeApp.h
+++ b/Common/System/NativeApp.h
@@ -55,6 +55,7 @@ bool NativeIsRestarting();
void NativeTouch(const TouchInput &touch);
bool NativeKey(const KeyInput &key);
void NativeAxis(const AxisInput *axis, size_t count);
+void NativeAccelerometer(float tiltX, float tiltY, float tiltZ);
// Called when it's process a frame, including rendering. If the device can keep up, this
// will be called sixty times per second. Main thread.
diff --git a/Common/Thread/Promise.h b/Common/Thread/Promise.h
index f8dbaf9e6a1e..93e4dfd98507 100644
--- a/Common/Thread/Promise.h
+++ b/Common/Thread/Promise.h
@@ -45,6 +45,7 @@ class PromiseTask : public Task {
template
class Promise {
public:
+ // Never fails.
static Promise *Spawn(ThreadManager *threadman, std::function fun, TaskType taskType, TaskPriority taskPriority = TaskPriority::NORMAL) {
Mailbox *mailbox = new Mailbox();
diff --git a/Common/UI/PopupScreens.cpp b/Common/UI/PopupScreens.cpp
index 95714b9de76f..1d470d1242da 100644
--- a/Common/UI/PopupScreens.cpp
+++ b/Common/UI/PopupScreens.cpp
@@ -122,7 +122,11 @@ void PopupMultiChoice::UpdateText() {
if (index < 0 || index >= numChoices_) {
valueText_ = "(invalid choice)"; // Shouldn't happen. Should be no need to translate this.
} else {
- valueText_ = T(category_, choices_[index]);
+ if (choices_[index]) {
+ valueText_ = T(category_, choices_[index]);
+ } else {
+ valueText_ = "";
+ }
}
}
diff --git a/Common/UI/Screen.cpp b/Common/UI/Screen.cpp
index 80b8fced14ff..bd8c713da2dc 100644
--- a/Common/UI/Screen.cpp
+++ b/Common/UI/Screen.cpp
@@ -227,9 +227,11 @@ void ScreenManager::getFocusPosition(float &x, float &y, float &z) {
}
void ScreenManager::sendMessage(const char *msg, const char *value) {
- if (!strcmp(msg, "recreateviews"))
+ if (!msg) {
+ _dbg_assert_msg_(false, "Empty msg in ScreenManager::sendMessage");
+ } else if (!strcmp(msg, "recreateviews")) {
RecreateAllViews();
- if (!strcmp(msg, "lost_focus")) {
+ } else if (!strcmp(msg, "lost_focus")) {
TouchInput input{};
input.x = -50000.0f;
input.y = -50000.0f;
@@ -238,6 +240,7 @@ void ScreenManager::sendMessage(const char *msg, const char *value) {
input.id = 0;
touch(input);
}
+
if (!stack_.empty())
stack_.back().screen->sendMessage(msg, value);
}
diff --git a/Common/x64Emitter.cpp b/Common/x64Emitter.cpp
index c2a5ba8c4d49..814fc7e0d631 100644
--- a/Common/x64Emitter.cpp
+++ b/Common/x64Emitter.cpp
@@ -1697,7 +1697,6 @@ void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, ar
void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
-// THESE TWO ARE UNTESTED.
void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
@@ -1892,6 +1891,9 @@ void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest
void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
+void XEmitter::INSERTPS(X64Reg dest, OpArg arg, u8 dstsubreg, u8 srcsubreg, u8 zmask) { WriteSSE41Op(0x66, 0x3A21, dest, arg, 1); Write8((srcsubreg << 6) | (dstsubreg << 4) | zmask); }
+void XEmitter::EXTRACTPS(OpArg dest, X64Reg arg, u8 subreg) { WriteSSE41Op(0x66, 0x3A17, arg, dest, 1); Write8(subreg); }
+
void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);}
void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);}
void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);}
@@ -2084,7 +2086,7 @@ void XEmitter::VCVTTPD2DQ(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits,
void XEmitter::VCVTTSS2SI(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(0, 0xF3, 0x2C, regOp1, arg, 0, bits == 64 ? 1 : 0); }
void XEmitter::VCVTTSD2SI(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(0, 0xF2, 0x2C, regOp1, arg, 0, bits == 64 ? 1 : 0); }
void XEmitter::VEXTRACTPS(OpArg arg, X64Reg regOp1, u8 subreg) { WriteAVXOp(0, 0x66, 0x3A17, regOp1, arg, 1); Write8(subreg); }
-void XEmitter::VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 subreg) { WriteAVXOp(0, 0x66, 0x3A21, regOp1, regOp2, arg, 1); Write8(subreg); }
+void XEmitter::VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 dstsubreg, u8 srcsubreg, u8 zmask) { WriteAVXOp(0, 0x66, 0x3A21, regOp1, regOp2, arg, 1); Write8((srcsubreg << 6) | (dstsubreg << 4) | zmask); }
void XEmitter::VLDDQU(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits, 0xF2, sseLDDQU, regOp1, arg); }
void XEmitter::VMOVAPS(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits, 0x00, sseMOVAPfromRM, regOp1, arg); }
void XEmitter::VMOVAPD(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits, 0x66, sseMOVAPfromRM, regOp1, arg); }
diff --git a/Common/x64Emitter.h b/Common/x64Emitter.h
index 16f30a35b0f2..832ed767cbdd 100644
--- a/Common/x64Emitter.h
+++ b/Common/x64Emitter.h
@@ -684,12 +684,14 @@ class XEmitter
// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
void DPPD(X64Reg dest, OpArg src, u8 arg);
-
- // These are probably useful for VFPU emulation.
- void INSERTPS(X64Reg dest, OpArg src, u8 arg);
- void EXTRACTPS(OpArg dest, X64Reg src, u8 arg);
#endif
+ // SSE4: Insert and extract for floats.
+ // Note: insert from memory or an XMM.
+ void INSERTPS(X64Reg dest, OpArg arg, u8 dstsubreg, u8 srcsubreg = 0, u8 zmask = 0);
+ // Extract to memory or GPR.
+ void EXTRACTPS(OpArg dest, X64Reg arg, u8 subreg);
+
// SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
void HADDPS(X64Reg dest, OpArg src);
@@ -1040,7 +1042,7 @@ class XEmitter
// Can only extract from the low 128 bits.
void VEXTRACTPS(OpArg arg, X64Reg regOp1, u8 subreg);
// Can only insert into the low 128 bits, zeros upper bits. Inserts from XMM.
- void VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 subreg);
+ void VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 dstsubreg, u8 srcsubreg = 0, u8 zmask = 0);
void VLDDQU(int bits, X64Reg regOp1, OpArg arg);
void VMOVAPS(int bits, X64Reg regOp1, OpArg arg);
void VMOVAPD(int bits, X64Reg regOp1, OpArg arg);
diff --git a/Core/ControlMapper.cpp b/Core/ControlMapper.cpp
index 5fd5c50ee0fe..b27a619cfa07 100644
--- a/Core/ControlMapper.cpp
+++ b/Core/ControlMapper.cpp
@@ -480,8 +480,9 @@ void ControlMapper::Axis(const AxisInput &axis) {
double now = time_now_d();
std::lock_guard guard(mutex_);
- if (axis.deviceId < DEVICE_ID_COUNT) {
- deviceTimestamps_[(int)axis.deviceId] = now;
+ size_t deviceIndex = (size_t)axis.deviceId; // this'll wrap around ANY (-1) to max, which will eliminate it on the next line, if such an event appears by mistake.
+ if (deviceIndex < (size_t)DEVICE_ID_COUNT) {
+ deviceTimestamps_[deviceIndex] = now;
}
if (axis.value >= 0.0f) {
InputMapping mapping(axis.deviceId, axis.axisId, 1);
diff --git a/Core/ControlMapper.h b/Core/ControlMapper.h
index c90ae68daea3..c2d6c4cd1669 100644
--- a/Core/ControlMapper.h
+++ b/Core/ControlMapper.h
@@ -62,7 +62,7 @@ class ControlMapper {
float virtKeys_[VIRTKEY_COUNT]{};
bool virtKeyOn_[VIRTKEY_COUNT]{}; // Track boolean output separaately since thresholds may differ.
- double deviceTimestamps_[42]{};
+ double deviceTimestamps_[(size_t)DEVICE_ID_COUNT]{};
int lastNonDeadzoneDeviceID_[2]{};
diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj
index cbba66cf140f..4b037a33843a 100644
--- a/Core/Core.vcxproj
+++ b/Core/Core.vcxproj
@@ -138,7 +138,7 @@
Level3
- ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
+ ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
_CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_32=1;_M_IX86=1;_DEBUG;_LIB;_UNICODE;UNICODE;MINIUPNP_STATICLIB;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions)
StreamingSIMDExtensions2
Precise
@@ -165,7 +165,7 @@
Level3
- ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib
+ ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib
_CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_64=1;_M_X64=1;_DEBUG;_LIB;_UNICODE;UNICODE;MINIUPNP_STATICLIB;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions)
NotSet
Precise
@@ -193,7 +193,7 @@
Level3
- ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
+ ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
_CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_64=1;_DEBUG;_LIB;_UNICODE;UNICODE;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions)
NotSet
Precise
@@ -221,7 +221,7 @@
Level3
- ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
+ ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
_CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_32=1;_DEBUG;_LIB;_UNICODE;UNICODE;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions)
NotSet
Precise
@@ -253,7 +253,7 @@
MaxSpeed
true
true
- ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
+ ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
false
StreamingSIMDExtensions2
Precise
@@ -286,7 +286,7 @@
MaxSpeed
true
true
- ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib
+ ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib
NotSet
Precise
false
@@ -321,7 +321,7 @@
MaxSpeed
true
true
- ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
+ ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
NotSet
Precise
false
@@ -356,7 +356,7 @@
MaxSpeed
true
true
- ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
+ ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib
NotSet
Precise
false
@@ -1466,6 +1466,9 @@
{129e5e2b-39c1-4d84-96fe-dfd22dbb4a25}
+
+ {956f1f48-b612-46d8-89ee-96996dcd9383}
+
{d8a71225-178b-424e-96c1-cc3be2c1b047}
diff --git a/Core/Debugger/MemBlockInfo.cpp b/Core/Debugger/MemBlockInfo.cpp
index 93f44f41d0ad..9daa39d4faee 100644
--- a/Core/Debugger/MemBlockInfo.cpp
+++ b/Core/Debugger/MemBlockInfo.cpp
@@ -17,8 +17,10 @@
#include
#include
+#include
#include
#include
+#include
#include "Common/Log.h"
#include "Common/Serialize/Serializer.h"
@@ -78,12 +80,15 @@ struct PendingNotifyMem {
MemBlockFlags flags;
uint32_t start;
uint32_t size;
+ uint32_t copySrc;
uint64_t ticks;
uint32_t pc;
char tag[128];
};
-static constexpr size_t MAX_PENDING_NOTIFIES = 512;
+// 160 KB.
+static constexpr size_t MAX_PENDING_NOTIFIES = 1024;
+static constexpr size_t MAX_PENDING_NOTIFIES_THREAD = 1000;
static MemSlabMap allocMap;
static MemSlabMap suballocMap;
static MemSlabMap writeMap;
@@ -93,9 +98,17 @@ static std::atomic pendingNotifyMinAddr1;
static std::atomic pendingNotifyMaxAddr1;
static std::atomic pendingNotifyMinAddr2;
static std::atomic pendingNotifyMaxAddr2;
-static std::mutex pendingMutex;
+// To prevent deadlocks, acquire Read before Write if you're going to acquire both.
+static std::mutex pendingWriteMutex;
+static std::mutex pendingReadMutex;
static int detailedOverride;
+static std::thread flushThread;
+static std::atomic flushThreadRunning;
+static std::atomic flushThreadPending;
+static std::mutex flushLock;
+static std::condition_variable flushCond;
+
MemSlabMap::MemSlabMap() {
Reset();
}
@@ -369,9 +382,32 @@ void MemSlabMap::FillHeads(Slab *slab) {
}
}
+size_t FormatMemWriteTagAtNoFlush(char *buf, size_t sz, const char *prefix, uint32_t start, uint32_t size);
+
void FlushPendingMemInfo() {
- std::lock_guard guard(pendingMutex);
- for (const auto &info : pendingNotifies) {
+ // This lock prevents us from another thread reading while we're busy flushing.
+ std::lock_guard guard(pendingReadMutex);
+ std::vector thisBatch;
+ {
+ std::lock_guard guard(pendingWriteMutex);
+ thisBatch = std::move(pendingNotifies);
+ pendingNotifies.clear();
+ pendingNotifies.reserve(MAX_PENDING_NOTIFIES);
+
+ pendingNotifyMinAddr1 = 0xFFFFFFFF;
+ pendingNotifyMaxAddr1 = 0;
+ pendingNotifyMinAddr2 = 0xFFFFFFFF;
+ pendingNotifyMaxAddr2 = 0;
+ }
+
+ for (const auto &info : thisBatch) {
+ if (info.copySrc != 0) {
+ char tagData[128];
+ size_t tagSize = FormatMemWriteTagAtNoFlush(tagData, sizeof(tagData), info.tag, info.copySrc, info.size);
+ writeMap.Mark(info.start, info.size, info.ticks, info.pc, true, tagData);
+ continue;
+ }
+
if (info.flags & MemBlockFlags::ALLOC) {
allocMap.Mark(info.start, info.size, info.ticks, info.pc, true, info.tag);
} else if (info.flags & MemBlockFlags::FREE) {
@@ -392,11 +428,6 @@ void FlushPendingMemInfo() {
writeMap.Mark(info.start, info.size, info.ticks, info.pc, true, info.tag);
}
}
- pendingNotifies.clear();
- pendingNotifyMinAddr1 = 0xFFFFFFFF;
- pendingNotifyMaxAddr1 = 0;
- pendingNotifyMinAddr2 = 0xFFFFFFFF;
- pendingNotifyMaxAddr2 = 0;
}
static inline uint32_t NormalizeAddress(uint32_t addr) {
@@ -411,6 +442,9 @@ static inline bool MergeRecentMemInfo(const PendingNotifyMem &info, size_t copyL
for (size_t i = 1; i <= 4; ++i) {
auto &prev = pendingNotifies[pendingNotifies.size() - i];
+ if (prev.copySrc != 0)
+ return false;
+
if (prev.flags != info.flags)
continue;
@@ -440,7 +474,7 @@ void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_
bool needFlush = false;
// When the setting is off, we skip smaller info to keep things fast.
- if (MemBlockInfoDetailed(size)) {
+ if (MemBlockInfoDetailed(size) && flags != MemBlockFlags::READ) {
PendingNotifyMem info{ flags, start, size };
info.ticks = CoreTiming::GetTicks();
info.pc = pc;
@@ -452,7 +486,7 @@ void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_
memcpy(info.tag, tagStr, copyLength);
info.tag[copyLength] = 0;
- std::lock_guard guard(pendingMutex);
+ std::lock_guard guard(pendingWriteMutex);
// Sometimes we get duplicates, quickly check.
if (!MergeRecentMemInfo(info, copyLength)) {
if (start < 0x08000000) {
@@ -464,11 +498,15 @@ void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_
}
pendingNotifies.push_back(info);
}
- needFlush = pendingNotifies.size() > MAX_PENDING_NOTIFIES;
+ needFlush = pendingNotifies.size() > MAX_PENDING_NOTIFIES_THREAD;
}
if (needFlush) {
- FlushPendingMemInfo();
+ {
+ std::lock_guard guard(flushLock);
+ flushThreadPending = true;
+ }
+ flushCond.notify_one();
}
if (!(flags & MemBlockFlags::SKIP_MEMCHECK)) {
@@ -484,6 +522,50 @@ void NotifyMemInfo(MemBlockFlags flags, uint32_t start, uint32_t size, const cha
NotifyMemInfoPC(flags, start, size, currentMIPS->pc, str, strLength);
}
+void NotifyMemInfoCopy(uint32_t destPtr, uint32_t srcPtr, uint32_t size, const char *prefix) {
+ if (size == 0)
+ return;
+
+ bool needsFlush = false;
+ if (CBreakPoints::HasMemChecks()) {
+ // This will cause a flush, but it's needed to trigger memchecks with proper data.
+ char tagData[128];
+ size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), prefix, srcPtr, size);
+ NotifyMemInfo(MemBlockFlags::READ, srcPtr, size, tagData, tagSize);
+ NotifyMemInfo(MemBlockFlags::WRITE, destPtr, size, tagData, tagSize);
+ } else if (MemBlockInfoDetailed(size)) {
+ srcPtr = NormalizeAddress(srcPtr);
+ destPtr = NormalizeAddress(destPtr);
+
+ PendingNotifyMem info{ MemBlockFlags::WRITE, destPtr, size };
+ info.copySrc = srcPtr;
+ info.ticks = CoreTiming::GetTicks();
+ info.pc = currentMIPS->pc;
+
+ // Store the prefix for now. The correct tag will be calculated on flush.
+ truncate_cpy(info.tag, prefix);
+
+ std::lock_guard guard(pendingWriteMutex);
+ if (destPtr < 0x08000000) {
+ pendingNotifyMinAddr1 = std::min(pendingNotifyMinAddr1.load(), destPtr);
+ pendingNotifyMaxAddr1 = std::max(pendingNotifyMaxAddr1.load(), destPtr + size);
+ } else {
+ pendingNotifyMinAddr2 = std::min(pendingNotifyMinAddr2.load(), destPtr);
+ pendingNotifyMaxAddr2 = std::max(pendingNotifyMaxAddr2.load(), destPtr + size);
+ }
+ pendingNotifies.push_back(info);
+ needsFlush = pendingNotifies.size() > MAX_PENDING_NOTIFIES_THREAD;
+ }
+
+ if (needsFlush) {
+ {
+ std::lock_guard guard(flushLock);
+ flushThreadPending = true;
+ }
+ flushCond.notify_one();
+ }
+}
+
std::vector FindMemInfo(uint32_t start, uint32_t size) {
start = NormalizeAddress(start);
@@ -520,13 +602,15 @@ std::vector FindMemInfoByFlag(MemBlockFlags flags, uint32_t start,
return results;
}
-static const char *FindWriteTagByFlag(MemBlockFlags flags, uint32_t start, uint32_t size) {
+static const char *FindWriteTagByFlag(MemBlockFlags flags, uint32_t start, uint32_t size, bool flush = true) {
start = NormalizeAddress(start);
- if (pendingNotifyMinAddr1 < start + size && pendingNotifyMaxAddr1 >= start)
- FlushPendingMemInfo();
- if (pendingNotifyMinAddr2 < start + size && pendingNotifyMaxAddr2 >= start)
- FlushPendingMemInfo();
+ if (flush) {
+ if (pendingNotifyMinAddr1 < start + size && pendingNotifyMaxAddr1 >= start)
+ FlushPendingMemInfo();
+ if (pendingNotifyMinAddr2 < start + size && pendingNotifyMaxAddr2 >= start)
+ FlushPendingMemInfo();
+ }
if (flags & MemBlockFlags::ALLOC) {
const char *tag = allocMap.FastFindWriteTag(MemBlockFlags::ALLOC, start, size);
@@ -564,22 +648,63 @@ size_t FormatMemWriteTagAt(char *buf, size_t sz, const char *prefix, uint32_t st
return snprintf(buf, sz, "%s%08x_size_%08x", prefix, start, size);
}
+size_t FormatMemWriteTagAtNoFlush(char *buf, size_t sz, const char *prefix, uint32_t start, uint32_t size) {
+ const char *tag = FindWriteTagByFlag(MemBlockFlags::WRITE, start, size, false);
+ if (tag && strcmp(tag, "MemInit") != 0) {
+ return snprintf(buf, sz, "%s%s", prefix, tag);
+ }
+ // Fall back to alloc and texture, especially for VRAM. We prefer write above.
+ tag = FindWriteTagByFlag(MemBlockFlags::ALLOC | MemBlockFlags::TEXTURE, start, size, false);
+ if (tag) {
+ return snprintf(buf, sz, "%s%s", prefix, tag);
+ }
+ return snprintf(buf, sz, "%s%08x_size_%08x", prefix, start, size);
+}
+
+static void FlushMemInfoThread() {
+ while (flushThreadRunning.load()) {
+ flushThreadPending = false;
+ FlushPendingMemInfo();
+
+ std::unique_lock guard(flushLock);
+ flushCond.wait(guard, [] {
+ return flushThreadPending.load();
+ });
+ }
+}
+
void MemBlockInfoInit() {
- std::lock_guard guard(pendingMutex);
+ std::lock_guard guard(pendingReadMutex);
+ std::lock_guard guardW(pendingWriteMutex);
pendingNotifies.reserve(MAX_PENDING_NOTIFIES);
pendingNotifyMinAddr1 = 0xFFFFFFFF;
pendingNotifyMaxAddr1 = 0;
pendingNotifyMinAddr2 = 0xFFFFFFFF;
pendingNotifyMaxAddr2 = 0;
+
+ flushThreadRunning = true;
+ flushThreadPending = false;
+ flushThread = std::thread(&FlushMemInfoThread);
}
void MemBlockInfoShutdown() {
- std::lock_guard guard(pendingMutex);
- allocMap.Reset();
- suballocMap.Reset();
- writeMap.Reset();
- textureMap.Reset();
- pendingNotifies.clear();
+ {
+ std::lock_guard guard(pendingReadMutex);
+ std::lock_guard guardW(pendingWriteMutex);
+ allocMap.Reset();
+ suballocMap.Reset();
+ writeMap.Reset();
+ textureMap.Reset();
+ pendingNotifies.clear();
+ }
+
+ if (flushThreadRunning.load()) {
+ std::lock_guard guard(flushLock);
+ flushThreadRunning = false;
+ flushThreadPending = true;
+ }
+ flushCond.notify_one();
+ flushThread.join();
}
void MemBlockInfoDoState(PointerWrap &p) {
diff --git a/Core/Debugger/MemBlockInfo.h b/Core/Debugger/MemBlockInfo.h
index 108423d53f4b..b07c326f82b0 100644
--- a/Core/Debugger/MemBlockInfo.h
+++ b/Core/Debugger/MemBlockInfo.h
@@ -53,6 +53,7 @@ struct MemBlockInfo {
void NotifyMemInfo(MemBlockFlags flags, uint32_t start, uint32_t size, const char *tag, size_t tagLength);
void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_t pc, const char *tag, size_t tagLength);
+void NotifyMemInfoCopy(uint32_t destPtr, uint32_t srcPtr, uint32_t size, const char *prefix);
// This lets us avoid calling strlen on string constants, instead the string length (including null,
// so we have to subtract 1) is computed at compile time.
diff --git a/Core/FileSystems/BlockDevices.cpp b/Core/FileSystems/BlockDevices.cpp
index c1c930f0c610..eaeb4c6261ee 100644
--- a/Core/FileSystems/BlockDevices.cpp
+++ b/Core/FileSystems/BlockDevices.cpp
@@ -24,8 +24,11 @@
#include "Common/System/OSD.h"
#include "Common/Log.h"
#include "Common/Swap.h"
+#include "Common/File/FileUtil.h"
+#include "Common/File/DirListing.h"
#include "Core/Loaders.h"
#include "Core/FileSystems/BlockDevices.h"
+#include "libchdr/chd.h"
extern "C"
{
@@ -37,19 +40,28 @@ extern "C"
std::mutex NPDRMDemoBlockDevice::mutex_;
BlockDevice *constructBlockDevice(FileLoader *fileLoader) {
- // Check for CISO
if (!fileLoader->Exists())
return nullptr;
- char buffer[4]{};
- size_t size = fileLoader->ReadAt(0, 1, 4, buffer);
- if (size == 4 && !memcmp(buffer, "CISO", 4))
+ char buffer[8]{};
+ size_t size = fileLoader->ReadAt(0, 1, 8, buffer);
+ if (size != 8) {
+ // Bad or empty file
+ return nullptr;
+ }
+
+ // Check for CISO
+ if (!memcmp(buffer, "CISO", 4)) {
return new CISOFileBlockDevice(fileLoader);
- if (size == 4 && !memcmp(buffer, "\x00PBP", 4)) {
+ } else if (!memcmp(buffer, "\x00PBP", 4)) {
uint32_t psarOffset = 0;
size = fileLoader->ReadAt(0x24, 1, 4, &psarOffset);
if (size == 4 && psarOffset < fileLoader->FileSize())
return new NPDRMDemoBlockDevice(fileLoader);
+ } else if (!memcmp(buffer, "MComprHD", 8)) {
+ return new CHDFileBlockDevice(fileLoader);
}
+
+ // Should be just a regular ISO. Let's open it as a plain block device and let the other systems take over.
return new FileBlockDevice(fileLoader);
}
@@ -393,7 +405,7 @@ NPDRMDemoBlockDevice::NPDRMDemoBlockDevice(FileLoader *fileLoader)
fileLoader_->ReadAt(0x24, 1, 4, &psarOffset);
size_t readSize = fileLoader_->ReadAt(psarOffset, 1, 256, &np_header);
- if(readSize!=256){
+ if (readSize != 256){
ERROR_LOG(LOADER, "Invalid NPUMDIMG header!");
}
@@ -445,7 +457,6 @@ NPDRMDemoBlockDevice::NPDRMDemoBlockDevice(FileLoader *fileLoader)
}
currentBlock = -1;
-
}
NPDRMDemoBlockDevice::~NPDRMDemoBlockDevice()
@@ -520,3 +531,150 @@ bool NPDRMDemoBlockDevice::ReadBlock(int blockNumber, u8 *outPtr, bool uncached)
return true;
}
+
+/*
+ * CHD file
+ */
+static const UINT8 nullsha1[CHD_SHA1_BYTES] = { 0 };
+
+struct CHDImpl {
+ chd_file *chd = nullptr;
+ const chd_header *header = nullptr;
+};
+
+CHDFileBlockDevice::CHDFileBlockDevice(FileLoader *fileLoader)
+ : BlockDevice(fileLoader), impl_(new CHDImpl())
+{
+ Path paths[8];
+ paths[0] = fileLoader->GetPath();
+ int depth = 0;
+
+ /*
+ // TODO: Support parent/child CHD files.
+
+ // Default, in case of failure
+ numBlocks = 0;
+
+ chd_header childHeader;
+
+ chd_error err = chd_read_header(paths[0].c_str(), &childHeader);
+ if (err != CHDERR_NONE) {
+ ERROR_LOG(LOADER, "Error loading CHD header for '%s': %s", paths[0].c_str(), chd_error_string(err));
+ NotifyReadError();
+ return;
+ }
+
+ if (memcmp(nullsha1, childHeader.parentsha1, sizeof(childHeader.sha1)) != 0) {
+ chd_header parentHeader;
+
+ // Look for parent CHD in current directory
+ Path chdDir = paths[0].NavigateUp();
+
+ std::vector files;
+ if (File::GetFilesInDir(chdDir, &files)) {
+ parentHeader.length = 0;
+
+ for (const auto &file : files) {
+ std::string extension = file.fullName.GetFileExtension();
+ if (extension != ".chd") {
+ continue;
+ }
+
+ if (chd_read_header(filepath.c_str(), &parentHeader) == CHDERR_NONE &&
+ memcmp(parentHeader.sha1, childHeader.parentsha1, sizeof(parentHeader.sha1)) == 0) {
+ // ERROR_LOG(LOADER, "Checking '%s'", filepath.c_str());
+ paths[++depth] = filepath;
+ break;
+ }
+ }
+
+ // Check if parentHeader was opened
+ if (parentHeader.length == 0) {
+ ERROR_LOG(LOADER, "Error loading CHD '%s': parents not found", fileLoader->GetPath().c_str());
+ NotifyReadError();
+ return;
+ }
+ memcpy(childHeader.parentsha1, parentHeader.parentsha1, sizeof(childHeader.parentsha1));
+ } while (memcmp(nullsha1, childHeader.parentsha1, sizeof(childHeader.sha1)) != 0);
+ }
+ */
+
+ chd_file *parent = NULL;
+ chd_file *child = NULL;
+
+ FILE *file = File::OpenCFile(paths[depth], "rb");
+ if (!file) {
+ ERROR_LOG(LOADER, "Error opening CHD file '%s'", paths[depth].c_str());
+ NotifyReadError();
+ return;
+ }
+ chd_error err = chd_open_file(file, CHD_OPEN_READ, NULL, &child);
+ if (err != CHDERR_NONE) {
+ ERROR_LOG(LOADER, "Error loading CHD '%s': %s", paths[depth].c_str(), chd_error_string(err));
+ NotifyReadError();
+ return;
+ }
+
+ // We won't enter this loop until we enable the parent/child stuff above.
+ for (int d = depth - 1; d >= 0; d--) {
+ parent = child;
+ child = NULL;
+ // TODO: Use chd_open_file
+ err = chd_open(paths[d].c_str(), CHD_OPEN_READ, parent, &child);
+ if (err != CHDERR_NONE) {
+ ERROR_LOG(LOADER, "Error loading CHD '%s': %s", paths[d].c_str(), chd_error_string(err));
+ NotifyReadError();
+ return;
+ }
+ }
+ impl_->chd = child;
+
+ impl_->header = chd_get_header(impl_->chd);
+ readBuffer = new u8[impl_->header->hunkbytes];
+ currentHunk = -1;
+ blocksPerHunk = impl_->header->hunkbytes / impl_->header->unitbytes;
+ numBlocks = impl_->header->unitcount;
+}
+
+CHDFileBlockDevice::~CHDFileBlockDevice()
+{
+ if (numBlocks > 0) {
+ chd_close(impl_->chd);
+ delete[] readBuffer;
+ }
+}
+
+bool CHDFileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr, bool uncached)
+{
+ if ((u32)blockNumber >= numBlocks) {
+ memset(outPtr, 0, GetBlockSize());
+ return false;
+ }
+ u32 hunk = blockNumber / blocksPerHunk;
+ u32 blockInHunk = blockNumber % blocksPerHunk;
+
+ if (currentHunk != hunk) {
+ chd_error err = chd_read(impl_->chd, hunk, readBuffer);
+ if (err != CHDERR_NONE) {
+ ERROR_LOG(LOADER, "CHD read failed: %d %d %s", blockNumber, hunk, chd_error_string(err));
+ NotifyReadError();
+ }
+ }
+ memcpy(outPtr, readBuffer + blockInHunk * impl_->header->unitbytes, GetBlockSize());
+
+ return true;
+}
+
+bool CHDFileBlockDevice::ReadBlocks(u32 minBlock, int count, u8 *outPtr) {
+ if (minBlock >= numBlocks) {
+ memset(outPtr, 0, GetBlockSize() * count);
+ return false;
+ }
+
+ for (int i = 0; i < count; i++) {
+ if (!ReadBlock(minBlock + i, outPtr + i * GetBlockSize())) {
+ return false;
+ }
+ }
+ return true;
+}
diff --git a/Core/FileSystems/BlockDevices.h b/Core/FileSystems/BlockDevices.h
index 3575d8cded8a..fd27f667fb91 100644
--- a/Core/FileSystems/BlockDevices.h
+++ b/Core/FileSystems/BlockDevices.h
@@ -130,5 +130,23 @@ class NPDRMDemoBlockDevice : public BlockDevice {
u8 *tempBuf;
};
+struct CHDImpl;
+
+class CHDFileBlockDevice : public BlockDevice {
+public:
+ CHDFileBlockDevice(FileLoader *fileLoader);
+ ~CHDFileBlockDevice();
+ bool ReadBlock(int blockNumber, u8 *outPtr, bool uncached = false) override;
+ bool ReadBlocks(u32 minBlock, int count, u8 *outPtr) override;
+ u32 GetNumBlocks() override { return numBlocks; }
+ bool IsDisc() override { return true; }
+
+private:
+ std::unique_ptr impl_;
+ u8 *readBuffer;
+ u32 currentHunk;
+ u32 blocksPerHunk;
+ u32 numBlocks;
+};
BlockDevice *constructBlockDevice(FileLoader *fileLoader);
diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp
index 30afa6e26b2c..4695d13926aa 100644
--- a/Core/HLE/ReplaceTables.cpp
+++ b/Core/HLE/ReplaceTables.cpp
@@ -159,16 +159,19 @@ static int Replace_memcpy() {
RETURN(destPtr);
if (MemBlockInfoDetailed(bytes)) {
- char tagData[128];
- size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy/", srcPtr, bytes);
- NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize);
- NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize);
-
// It's pretty common that games will copy video data.
- if (!strcmp(tagData, "ReplaceMemcpy/VideoDecode") || !strcmp(tagData, "ReplaceMemcpy/VideoDecodeRange")) {
- if (bytes == 512 * 272 * 4) {
+ // Detect that by manually reading the tag when the size looks right.
+ if (bytes == 512 * 272 * 4) {
+ char tagData[128];
+ size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy/", srcPtr, bytes);
+ NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize);
+ NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize);
+
+ if (!strcmp(tagData, "ReplaceMemcpy/VideoDecode") || !strcmp(tagData, "ReplaceMemcpy/VideoDecodeRange")) {
gpu->PerformWriteFormattedFromMemory(destPtr, bytes, 512, GE_FORMAT_8888);
}
+ } else {
+ NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemcpy/");
}
}
@@ -212,16 +215,19 @@ static int Replace_memcpy_jak() {
RETURN(destPtr);
if (MemBlockInfoDetailed(bytes)) {
- char tagData[128];
- size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy/", srcPtr, bytes);
- NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize);
- NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize);
-
// It's pretty common that games will copy video data.
- if (!strcmp(tagData, "ReplaceMemcpy/VideoDecode") || !strcmp(tagData, "ReplaceMemcpy/VideoDecodeRange")) {
- if (bytes == 512 * 272 * 4) {
+ // Detect that by manually reading the tag when the size looks right.
+ if (bytes == 512 * 272 * 4) {
+ char tagData[128];
+ size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy/", srcPtr, bytes);
+ NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize);
+ NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize);
+
+ if (!strcmp(tagData, "ReplaceMemcpy/VideoDecode") || !strcmp(tagData, "ReplaceMemcpy/VideoDecodeRange")) {
gpu->PerformWriteFormattedFromMemory(destPtr, bytes, 512, GE_FORMAT_8888);
}
+ } else {
+ NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemcpy/");
}
}
@@ -252,10 +258,7 @@ static int Replace_memcpy16() {
RETURN(destPtr);
if (MemBlockInfoDetailed(bytes)) {
- char tagData[128];
- size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy16/", srcPtr, bytes);
- NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize);
- NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize);
+ NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemcpy16/");
}
return 10 + bytes / 4; // approximation
@@ -294,10 +297,7 @@ static int Replace_memcpy_swizzled() {
RETURN(0);
if (MemBlockInfoDetailed(pitch * h)) {
- char tagData[128];
- size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpySwizzle/", srcPtr, pitch * h);
- NotifyMemInfo(MemBlockFlags::READ, srcPtr, pitch * h, tagData, tagSize);
- NotifyMemInfo(MemBlockFlags::WRITE, destPtr, pitch * h, tagData, tagSize);
+ NotifyMemInfoCopy(destPtr, srcPtr, pitch * h, "ReplaceMemcpySwizzle/");
}
return 10 + (pitch * h) / 4; // approximation
@@ -326,10 +326,7 @@ static int Replace_memmove() {
RETURN(destPtr);
if (MemBlockInfoDetailed(bytes)) {
- char tagData[128];
- size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemmove/", srcPtr, bytes);
- NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize);
- NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize);
+ NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemmove/");
}
return 10 + bytes / 4; // approximation
@@ -1590,7 +1587,10 @@ std::vector GetReplacementFuncIndexes(u64 hash, int funcSize) {
return emptyResult;
}
-const ReplacementTableEntry *GetReplacementFunc(int i) {
+const ReplacementTableEntry *GetReplacementFunc(size_t i) {
+ if (i >= ARRAY_SIZE(entries)) {
+ return nullptr;
+ }
return &entries[i];
}
diff --git a/Core/HLE/ReplaceTables.h b/Core/HLE/ReplaceTables.h
index 94ee26d69ea1..980f506b6af1 100644
--- a/Core/HLE/ReplaceTables.h
+++ b/Core/HLE/ReplaceTables.h
@@ -64,7 +64,7 @@ void Replacement_Shutdown();
int GetNumReplacementFuncs();
std::vector GetReplacementFuncIndexes(u64 hash, int funcSize);
-const ReplacementTableEntry *GetReplacementFunc(int index);
+const ReplacementTableEntry *GetReplacementFunc(size_t index);
void WriteReplaceInstructions(u32 address, u64 hash, int size);
void RestoreReplacedInstruction(u32 address);
diff --git a/Core/HLE/sceDmac.cpp b/Core/HLE/sceDmac.cpp
index f7bcf0d0f6f5..8feb1fc89e74 100644
--- a/Core/HLE/sceDmac.cpp
+++ b/Core/HLE/sceDmac.cpp
@@ -51,12 +51,11 @@ static int __DmacMemcpy(u32 dst, u32 src, u32 size) {
}
if (!skip && size != 0) {
currentMIPS->InvalidateICache(src, size);
+ if (Memory::IsValidRange(dst, size) && Memory::IsValidRange(src, size)) {
+ memcpy(Memory::GetPointerWriteUnchecked(dst), Memory::GetPointerUnchecked(src), size);
+ }
if (MemBlockInfoDetailed(size)) {
- char tagData[128];
- size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "DmacMemcpy/", src, size);
- Memory::Memcpy(dst, src, size, tagData, tagSize);
- } else {
- Memory::Memcpy(dst, src, size, "DmacMemcpy");
+ NotifyMemInfoCopy(dst, src, size, "DmacMemcpy/");
}
currentMIPS->InvalidateICache(dst, size);
}
diff --git a/Core/HLE/sceIo.cpp b/Core/HLE/sceIo.cpp
index b554d34e806b..d8599f98650a 100644
--- a/Core/HLE/sceIo.cpp
+++ b/Core/HLE/sceIo.cpp
@@ -1486,6 +1486,12 @@ static u32 sceIoLseek32Async(int id, int offset, int whence) {
}
static FileNode *__IoOpen(int &error, const char *filename, int flags, int mode) {
+ if (!filename) {
+ // To prevent crashes. Not sure about the correct value.
+ error = SCE_KERNEL_ERROR_ERRNO_FILE_NOT_FOUND;
+ return nullptr;
+ }
+
int access = FILEACCESS_NONE;
if (flags & PSP_O_RDONLY)
access |= FILEACCESS_READ;
diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp
index ec4b452a6402..76e1788e397e 100644
--- a/Core/HLE/sceKernelInterrupt.cpp
+++ b/Core/HLE/sceKernelInterrupt.cpp
@@ -657,10 +657,7 @@ static u32 sceKernelMemcpy(u32 dst, u32 src, u32 size)
}
if (MemBlockInfoDetailed(size)) {
- char tagData[128];
- size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "KernelMemcpy/", src, size);
- NotifyMemInfo(MemBlockFlags::READ, src, size, tagData, tagSize);
- NotifyMemInfo(MemBlockFlags::WRITE, dst, size, tagData, tagSize);
+ NotifyMemInfoCopy(dst, src, size, "KernelMemcpy/");
}
return dst;
@@ -693,10 +690,7 @@ static u32 sysclib_memcpy(u32 dst, u32 src, u32 size) {
memcpy(Memory::GetPointerWriteUnchecked(dst), Memory::GetPointerUnchecked(src), size);
}
if (MemBlockInfoDetailed(size)) {
- char tagData[128];
- size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "KernelMemcpy/", src, size);
- NotifyMemInfo(MemBlockFlags::READ, src, size, tagData, tagSize);
- NotifyMemInfo(MemBlockFlags::WRITE, dst, size, tagData, tagSize);
+ NotifyMemInfoCopy(dst, src, size, "KernelMemcpy/");
}
return dst;
}
@@ -797,10 +791,7 @@ static u32 sysclib_memmove(u32 dst, u32 src, u32 size) {
memmove(Memory::GetPointerWriteUnchecked(dst), Memory::GetPointerUnchecked(src), size);
}
if (MemBlockInfoDetailed(size)) {
- char tagData[128];
- size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "KernelMemmove/", src, size);
- NotifyMemInfo(MemBlockFlags::READ, src, size, tagData, tagSize);
- NotifyMemInfo(MemBlockFlags::WRITE, dst, size, tagData, tagSize);
+ NotifyMemInfoCopy(dst, src, size, "KernelMemmove/");
}
return 0;
}
diff --git a/Core/KeyMap.cpp b/Core/KeyMap.cpp
index 0b9a18dff118..fae0a0665fea 100644
--- a/Core/KeyMap.cpp
+++ b/Core/KeyMap.cpp
@@ -516,11 +516,11 @@ bool InputMappingsFromPspButton(int btn, std::vector *mapping
return false;
}
bool mapped = false;
- for (auto iter2 = iter->second.begin(); iter2 != iter->second.end(); ++iter2) {
- bool ignore = ignoreMouse && iter2->HasMouse();
+ for (auto &iter2 : iter->second) {
+ bool ignore = ignoreMouse && iter2.HasMouse();
if (mappings && !ignore) {
mapped = true;
- mappings->push_back(*iter2);
+ mappings->push_back(iter2);
}
}
return mapped;
@@ -536,8 +536,6 @@ bool PspButtonHasMappings(int btn) {
}
MappedAnalogAxes MappedAxesForDevice(InputDeviceID deviceId) {
- MappedAnalogAxes result{};
-
// Find the axisId mapped for a specific virtual button.
auto findAxisId = [&](int btn) -> MappedAnalogAxis {
MappedAnalogAxis info{ -1 };
@@ -563,6 +561,7 @@ MappedAnalogAxes MappedAxesForDevice(InputDeviceID deviceId) {
return MappedAnalogAxis{ -1 };
};
+ MappedAnalogAxes result;
std::lock_guard guard(g_controllerMapLock);
result.leftX = findAxisIdPair(VIRTKEY_AXIS_X_MIN, VIRTKEY_AXIS_X_MAX);
result.leftY = findAxisIdPair(VIRTKEY_AXIS_Y_MIN, VIRTKEY_AXIS_Y_MAX);
@@ -621,6 +620,7 @@ bool ReplaceSingleKeyMapping(int btn, int index, MultiInputMapping key) {
}
void DeleteNthMapping(int key, int number) {
+ std::lock_guard guard(g_controllerMapLock);
auto iter = g_controllerMap.find(key);
if (iter != g_controllerMap.end()) {
if (number < iter->second.size()) {
@@ -699,6 +699,8 @@ void LoadFromIni(IniFile &file) {
return;
}
+ std::lock_guard guard(g_controllerMapLock);
+
Section *controls = file.GetOrCreateSection("ControlMapping");
for (size_t i = 0; i < ARRAY_SIZE(psp_button_names); i++) {
std::string value;
@@ -730,6 +732,8 @@ void LoadFromIni(IniFile &file) {
void SaveToIni(IniFile &file) {
Section *controls = file.GetOrCreateSection("ControlMapping");
+ std::lock_guard guard(g_controllerMapLock);
+
for (size_t i = 0; i < ARRAY_SIZE(psp_button_names); i++) {
std::vector keys;
InputMappingsFromPspButton(psp_button_names[i].key, &keys, false);
diff --git a/Core/Loaders.cpp b/Core/Loaders.cpp
index 28190edd8080..6bb5f5d34365 100644
--- a/Core/Loaders.cpp
+++ b/Core/Loaders.cpp
@@ -94,6 +94,8 @@ IdentifiedFileType Identify_File(FileLoader *fileLoader, std::string *errorStrin
return IdentifiedFileType::PSP_ISO;
} else if (extension == ".cso") {
return IdentifiedFileType::PSP_ISO;
+ } else if (extension == ".chd") {
+ return IdentifiedFileType::PSP_ISO;
} else if (extension == ".ppst") {
return IdentifiedFileType::PPSSPP_SAVESTATE;
} else if (extension == ".ppdmp") {
diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp
index c0ae1071a5ac..4cfbc2512449 100644
--- a/Core/MIPS/ARM/ArmJit.cpp
+++ b/Core/MIPS/ARM/ArmJit.cpp
@@ -561,7 +561,7 @@ void ArmJit::Comp_ReplacementFunc(MIPSOpcode op)
const ReplacementTableEntry *entry = GetReplacementFunc(index);
if (!entry) {
- ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding);
+ ERROR_LOG_REPORT_ONCE(replFunc, HLE, "Invalid replacement op %08x at %08x", op.encoding, js.compilerPC);
return;
}
@@ -745,7 +745,9 @@ void ArmJit::UpdateRoundingMode(u32 fcr31) {
// I don't think this gives us that much benefit.
void ArmJit::WriteExit(u32 destination, int exit_num)
{
- // TODO: Check destination is valid and trigger exception.
+ // NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks).
+ _assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num. dest=%08x", destination);
+
WriteDownCount();
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp
index 775b95df1527..d5dd416d6f52 100644
--- a/Core/MIPS/ARM64/Arm64CompVFPU.cpp
+++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp
@@ -1504,7 +1504,7 @@ namespace MIPSComp {
void Arm64Jit::Comp_VCrossQuat(MIPSOpcode op) {
// This op does not support prefixes anyway.
CONDITIONAL_DISABLE(VFPU_VEC);
- if (js.HasUnknownPrefix())
+ if (!js.HasNoPrefix())
DISABLE;
VectorSize sz = GetVecSize(op);
@@ -1521,20 +1521,26 @@ namespace MIPSComp {
if (sz == V_Triple) {
MIPSReg temp3 = fpr.GetTempV();
+ MIPSReg temp4 = fpr.GetTempV();
fpr.MapRegV(temp3, MAP_DIRTY | MAP_NOINIT);
+ fpr.MapRegV(temp4, MAP_DIRTY | MAP_NOINIT);
// Cross product vcrsp.t
- // Compute X
- fp.FMUL(S0, fpr.V(sregs[1]), fpr.V(tregs[2]));
- fp.FMSUB(S0, fpr.V(sregs[2]), fpr.V(tregs[1]), S0);
+ // Note: using FMSUB here causes accuracy issues, see #18203.
+ // Compute X: s[1] * t[2] - s[2] * t[1]
+ fp.FMUL(fpr.V(temp3), fpr.V(sregs[1]), fpr.V(tregs[2]));
+ fp.FMUL(fpr.V(temp4), fpr.V(sregs[2]), fpr.V(tregs[1]));
+ fp.FSUB(S0, fpr.V(temp3), fpr.V(temp4));
- // Compute Y
- fp.FMUL(S1, fpr.V(sregs[2]), fpr.V(tregs[0]));
- fp.FMSUB(S1, fpr.V(sregs[0]), fpr.V(tregs[2]), S1);
+ // Compute Y: s[2] * t[0] - s[0] * t[2]
+ fp.FMUL(fpr.V(temp3), fpr.V(sregs[2]), fpr.V(tregs[0]));
+ fp.FMUL(fpr.V(temp4), fpr.V(sregs[0]), fpr.V(tregs[2]));
+ fp.FSUB(S1, fpr.V(temp3), fpr.V(temp4));
- // Compute Z
+ // Compute Z: s[0] * t[1] - s[1] * t[0]
fp.FMUL(fpr.V(temp3), fpr.V(sregs[0]), fpr.V(tregs[1]));
- fp.FMSUB(fpr.V(temp3), fpr.V(sregs[1]), fpr.V(tregs[0]), fpr.V(temp3));
+ fp.FMUL(fpr.V(temp4), fpr.V(sregs[1]), fpr.V(tregs[0]));
+ fp.FSUB(fpr.V(temp3), fpr.V(temp3), fpr.V(temp4));
fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT);
fp.FMOV(fpr.V(dregs[0]), S0);
diff --git a/Core/MIPS/ARM64/Arm64IRAsm.cpp b/Core/MIPS/ARM64/Arm64IRAsm.cpp
index 42bee863a22a..d623c6cd58c1 100644
--- a/Core/MIPS/ARM64/Arm64IRAsm.cpp
+++ b/Core/MIPS/ARM64/Arm64IRAsm.cpp
@@ -50,8 +50,18 @@ static void ShowPC(void *membase, void *jitbase) {
}
void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
- BeginWrite(GetMemoryProtectPageSize());
+ // This will be used as a writable scratch area, always 32-bit accessible.
const u8 *start = AlignCodePage();
+ if (DebugProfilerEnabled()) {
+ ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
+ hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
+ Write32(0);
+ hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();
+ Write32(0);
+ }
+
+ const u8 *disasmStart = AlignCodePage();
+ BeginWrite(GetMemoryProtectPageSize());
if (jo.useStaticAlloc) {
saveStaticRegisters_ = AlignCode16();
@@ -63,8 +73,6 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
regs_.EmitLoadStaticRegisters();
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
RET();
-
- start = saveStaticRegisters_;
} else {
saveStaticRegisters_ = nullptr;
loadStaticRegisters_ = nullptr;
@@ -152,13 +160,17 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
MOVI2R(JITBASEREG, (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE);
LoadStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
MovFromPC(SCRATCH1);
+ WriteDebugPC(SCRATCH1);
outerLoopPCInSCRATCH1_ = GetCodePtr();
MovToPC(SCRATCH1);
outerLoop_ = GetCodePtr();
SaveStaticRegisters(); // Advance can change the downcount, so must save/restore
RestoreRoundingMode(true);
+ WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
LoadStaticRegisters();
@@ -191,6 +203,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
}
MovFromPC(SCRATCH1);
+ WriteDebugPC(SCRATCH1);
#ifdef MASKED_PSP_MEMORY
ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK);
#endif
@@ -206,7 +219,9 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
RestoreRoundingMode(true);
+ WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
QuickCallFunction(SCRATCH1_64, &MIPSComp::JitAt);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
// Let's just dispatch again, we'll enter the block since we know it's there.
@@ -221,6 +236,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
const uint8_t *quitLoop = GetCodePtr();
SetJumpTarget(badCoreState);
+ WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
SaveStaticRegisters();
RestoreRoundingMode(true);
@@ -251,7 +267,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
// Leave this at the end, add more stuff above.
if (enableDisasm) {
- std::vector lines = DisassembleArm64(start, (int)(GetCodePtr() - start));
+ std::vector lines = DisassembleArm64(disasmStart, (int)(GetCodePtr() - disasmStart));
for (auto s : lines) {
INFO_LOG(JIT, "%s", s.c_str());
}
diff --git a/Core/MIPS/ARM64/Arm64IRCompALU.cpp b/Core/MIPS/ARM64/Arm64IRCompALU.cpp
index e83fabd6b902..4aeb2ceeb200 100644
--- a/Core/MIPS/ARM64/Arm64IRCompALU.cpp
+++ b/Core/MIPS/ARM64/Arm64IRCompALU.cpp
@@ -170,9 +170,18 @@ void Arm64JitBackend::CompIR_Compare(IRInst inst) {
break;
case IROp::SltU:
- regs_.Map(inst);
- CMP(regs_.R(inst.src1), regs_.R(inst.src2));
- CSET(regs_.R(inst.dest), CC_LO);
+ if (regs_.IsGPRImm(inst.src1) && regs_.GetGPRImm(inst.src1) == 0) {
+ // This is kinda common, same as != 0. Avoid flushing src1.
+ regs_.SpillLockGPR(inst.src2, inst.dest);
+ regs_.MapGPR(inst.src2);
+ regs_.MapGPR(inst.dest, MIPSMap::NOINIT);
+ CMP(regs_.R(inst.src2), 0);
+ CSET(regs_.R(inst.dest), CC_NEQ);
+ } else {
+ regs_.Map(inst);
+ CMP(regs_.R(inst.src1), regs_.R(inst.src2));
+ CSET(regs_.R(inst.dest), CC_LO);
+ }
break;
case IROp::SltUConst:
diff --git a/Core/MIPS/ARM64/Arm64IRCompFPU.cpp b/Core/MIPS/ARM64/Arm64IRCompFPU.cpp
index 74f62da5aa26..99b502c74b3c 100644
--- a/Core/MIPS/ARM64/Arm64IRCompFPU.cpp
+++ b/Core/MIPS/ARM64/Arm64IRCompFPU.cpp
@@ -298,17 +298,23 @@ void Arm64JitBackend::CompIR_FCompare(IRInst inst) {
case IROp::FCmpVfpuAggregate:
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
- MOVI2R(SCRATCH1, inst.dest);
- // Grab the any bit.
- TST(regs_.R(IRREG_VFPU_CC), SCRATCH1);
- CSET(SCRATCH2, CC_NEQ);
- // Now the all bit, by clearing our mask to zero.
- BICS(WZR, SCRATCH1, regs_.R(IRREG_VFPU_CC));
- CSET(SCRATCH1, CC_EQ);
+ if (inst.dest == 1) {
+ // Just replicate the lowest bit to the others.
+ BFI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), 4, 1);
+ BFI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), 5, 1);
+ } else {
+ MOVI2R(SCRATCH1, inst.dest);
+ // Grab the any bit.
+ TST(regs_.R(IRREG_VFPU_CC), SCRATCH1);
+ CSET(SCRATCH2, CC_NEQ);
+ // Now the all bit, by clearing our mask to zero.
+ BICS(WZR, SCRATCH1, regs_.R(IRREG_VFPU_CC));
+ CSET(SCRATCH1, CC_EQ);
- // Insert the bits into place.
- BFI(regs_.R(IRREG_VFPU_CC), SCRATCH2, 4, 1);
- BFI(regs_.R(IRREG_VFPU_CC), SCRATCH1, 5, 1);
+ // Insert the bits into place.
+ BFI(regs_.R(IRREG_VFPU_CC), SCRATCH2, 4, 1);
+ BFI(regs_.R(IRREG_VFPU_CC), SCRATCH1, 5, 1);
+ }
break;
default:
@@ -502,6 +508,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {
auto callFuncF_F = [&](float (*func)(float)) {
regs_.FlushBeforeCall();
+ WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
+
// It might be in a non-volatile register.
// TODO: May have to handle a transfer if SIMD here.
if (regs_.IsFPRMapped(inst.src1)) {
@@ -521,6 +529,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {
if (regs_.F(inst.dest) != S0) {
fp_.FMOV(regs_.F(inst.dest), S0);
}
+
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
};
switch (inst.op) {
diff --git a/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp b/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp
index 42a966d4371a..d0fde9f6f2fc 100644
--- a/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp
+++ b/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp
@@ -80,7 +80,12 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) {
// If it's about to be clobbered, don't waste time pointerifying. Use displacement.
bool clobbersSrc1 = !readsFromSrc1 && regs_.IsGPRClobbered(inst.src1);
- int32_t imm = (int32_t)inst.constant;
+ int64_t imm = (int32_t)inst.constant;
+ // It can't be this negative, must be a constant address with the top bit set.
+ if ((imm & 0xC0000000) == 0x80000000) {
+ imm = (uint64_t)(uint32_t)inst.constant;
+ }
+
LoadStoreArg addrArg;
if (inst.src1 == MIPS_REG_ZERO) {
// The constant gets applied later.
@@ -100,7 +105,7 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) {
// Since we can't modify src1, let's just use a temp reg while copying.
if (!addrArg.useRegisterOffset) {
- ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), (s64)imm, SCRATCH2);
+ ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), imm, SCRATCH2);
#ifdef MASKED_PSP_MEMORY
ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK, SCRATCH2);
#endif
@@ -114,7 +119,7 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) {
// The offset gets set later.
addrArg.base = regs_.MapGPRAsPointer(inst.src1);
} else {
- ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), (s64)imm, SCRATCH2);
+ ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), imm, SCRATCH2);
#ifdef MASKED_PSP_MEMORY
ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK, SCRATCH2);
#endif
@@ -137,15 +142,15 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) {
int scale = IROpToByteWidth(inst.op);
if (imm > 0 && (imm & (scale - 1)) == 0 && imm <= 0xFFF * scale) {
// Okay great, use the LDR/STR form.
- addrArg.immOffset = imm;
+ addrArg.immOffset = (int)imm;
addrArg.useUnscaled = false;
} else if (imm >= -256 && imm < 256) {
// An unscaled offset (LDUR/STUR) should work fine for this range.
- addrArg.immOffset = imm;
+ addrArg.immOffset = (int)imm;
addrArg.useUnscaled = true;
} else {
// No luck, we'll need to load into a register.
- MOVI2R(SCRATCH1, (s64)imm);
+ MOVI2R(SCRATCH1, imm);
addrArg.regOffset = SCRATCH1;
addrArg.useRegisterOffset = true;
addrArg.signExtendRegOffset = true;
diff --git a/Core/MIPS/ARM64/Arm64IRCompSystem.cpp b/Core/MIPS/ARM64/Arm64IRCompSystem.cpp
index 282d4fd2ef86..8fba3c320525 100644
--- a/Core/MIPS/ARM64/Arm64IRCompSystem.cpp
+++ b/Core/MIPS/ARM64/Arm64IRCompSystem.cpp
@@ -21,9 +21,11 @@
#include "Common/Profiler/Profiler.h"
#include "Core/Core.h"
+#include "Core/Debugger/Breakpoints.h"
#include "Core/HLE/HLE.h"
#include "Core/HLE/ReplaceTables.h"
#include "Core/MemMap.h"
+#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/IR/IRInterpreter.h"
#include "Core/MIPS/ARM64/Arm64IRJit.h"
#include "Core/MIPS/ARM64/Arm64IRRegCache.h"
@@ -70,6 +72,7 @@ void Arm64JitBackend::CompIR_Basic(IRInst inst) {
break;
case IROp::SetPCConst:
+ lastConstPC_ = inst.constant;
MOVI2R(SCRATCH1, inst.constant);
MovToPC(SCRATCH1);
break;
@@ -85,37 +88,118 @@ void Arm64JitBackend::CompIR_Breakpoint(IRInst inst) {
switch (inst.op) {
case IROp::Breakpoint:
+ {
FlushAll();
// Note: the constant could be a delay slot.
MOVI2R(W0, inst.constant);
QuickCallFunction(SCRATCH2_64, &IRRunBreakpoint);
+
+ ptrdiff_t distance = dispatcherCheckCoreState_ - GetCodePointer();
+ if (distance >= -0x100000 && distance < 0x100000) {
+ CBNZ(W0, dispatcherCheckCoreState_);
+ } else {
+ FixupBranch keepOnKeepingOn = CBZ(W0);
+ B(dispatcherCheckCoreState_);
+ SetJumpTarget(keepOnKeepingOn);
+ }
break;
+ }
case IROp::MemoryCheck:
- {
- ARM64Reg addrBase = regs_.MapGPR(inst.src1);
- FlushAll();
- ADDI2R(W1, addrBase, inst.constant, SCRATCH1);
- MovFromPC(W0);
- ADDI2R(W0, W0, inst.dest, SCRATCH1);
- QuickCallFunction(SCRATCH2_64, &IRRunMemCheck);
+ if (regs_.IsGPRImm(inst.src1)) {
+ uint32_t iaddr = regs_.GetGPRImm(inst.src1) + inst.constant;
+ uint32_t checkedPC = lastConstPC_ + inst.dest;
+ int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
+ if (size == 0) {
+ checkedPC += 4;
+ size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
+ }
+ bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC);
+
+ MemCheck check;
+ if (CBreakPoints::GetMemCheckInRange(iaddr, size, &check)) {
+ if (!(check.cond & MEMCHECK_READ) && !isWrite)
+ break;
+ if (!(check.cond & (MEMCHECK_WRITE | MEMCHECK_WRITE_ONCHANGE)) && isWrite)
+ break;
+
+ // We need to flush, or conditions and log expressions will see old register values.
+ FlushAll();
+
+ MOVI2R(W0, checkedPC);
+ MOVI2R(W1, iaddr);
+ QuickCallFunction(SCRATCH2_64, &IRRunMemCheck);
+
+ ptrdiff_t distance = dispatcherCheckCoreState_ - GetCodePointer();
+ if (distance >= -0x100000 && distance < 0x100000) {
+ CBNZ(W0, dispatcherCheckCoreState_);
+ } else {
+ FixupBranch keepOnKeepingOn = CBZ(W0);
+ B(dispatcherCheckCoreState_);
+ SetJumpTarget(keepOnKeepingOn);
+ }
+ }
+ } else {
+ uint32_t checkedPC = lastConstPC_ + inst.dest;
+ int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
+ if (size == 0) {
+ checkedPC += 4;
+ size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
+ }
+ bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC);
+
+ const auto memchecks = CBreakPoints::GetMemCheckRanges(isWrite);
+ // We can trivially skip if there are no checks for this type (i.e. read vs write.)
+ if (memchecks.empty())
+ break;
+
+ ARM64Reg addrBase = regs_.MapGPR(inst.src1);
+ ADDI2R(SCRATCH1, addrBase, inst.constant, SCRATCH2);
+
+ // We need to flush, or conditions and log expressions will see old register values.
+ FlushAll();
+
+ std::vector hitChecks;
+ for (auto it : memchecks) {
+ if (it.end != 0) {
+ CMPI2R(SCRATCH1, it.start - size, SCRATCH2);
+ MOVI2R(SCRATCH2, it.end);
+ CCMP(SCRATCH1, SCRATCH2, 0xF, CC_HI);
+ hitChecks.push_back(B(CC_LO));
+ } else {
+ CMPI2R(SCRATCH1, it.start, SCRATCH2);
+ hitChecks.push_back(B(CC_EQ));
+ }
+ }
+
+ FixupBranch noHits = B();
+
+ // Okay, now land any hit here.
+ for (auto &fixup : hitChecks)
+ SetJumpTarget(fixup);
+ hitChecks.clear();
+
+ MOVI2R(W0, checkedPC);
+ MOV(W1, SCRATCH1);
+ QuickCallFunction(SCRATCH2_64, &IRRunMemCheck);
+
+ ptrdiff_t distance = dispatcherCheckCoreState_ - GetCodePointer();
+ if (distance >= -0x100000 && distance < 0x100000) {
+ CBNZ(W0, dispatcherCheckCoreState_);
+ } else {
+ FixupBranch keepOnKeepingOn = CBZ(W0);
+ B(dispatcherCheckCoreState_);
+ SetJumpTarget(keepOnKeepingOn);
+ }
+
+ SetJumpTarget(noHits);
+ }
break;
- }
default:
INVALIDOP;
break;
}
-
- // Both return a flag on whether to bail out.
- ptrdiff_t distance = dispatcherCheckCoreState_ - GetCodePointer();
- if (distance >= -0x100000 && distance < 0x100000) {
- CBNZ(W0, dispatcherCheckCoreState_);
- } else {
- FixupBranch keepOnKeepingOn = CBZ(W0);
- B(dispatcherCheckCoreState_);
- SetJumpTarget(keepOnKeepingOn);
- }
}
void Arm64JitBackend::CompIR_System(IRInst inst) {
@@ -126,6 +210,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
FlushAll();
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
#ifdef USE_PROFILER
// When profiling, we can't skip CallSyscall, since it times syscalls.
MOVI2R(W0, inst.constant);
@@ -145,6 +230,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
}
#endif
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// This is always followed by an ExitToPC, where we check coreState.
break;
@@ -152,7 +238,9 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
case IROp::CallReplacement:
FlushAll();
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
QuickCallFunction(SCRATCH2_64, GetReplacementFunc(inst.constant)->replaceFunc);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
SUB(DOWNCOUNTREG, DOWNCOUNTREG, W0);
break;
@@ -274,6 +362,66 @@ void Arm64JitBackend::CompIR_ValidateAddress(IRInst inst) {
INVALIDOP;
break;
}
+
+ if (regs_.IsGPRMappedAsPointer(inst.src1)) {
+ if (!jo.enablePointerify) {
+ SUB(SCRATCH1_64, regs_.RPtr(inst.src1), MEMBASEREG);
+ ADDI2R(SCRATCH1, SCRATCH1, inst.constant, SCRATCH2);
+ } else {
+ ADDI2R(SCRATCH1, regs_.R(inst.src1), inst.constant, SCRATCH2);
+ }
+ } else {
+ regs_.Map(inst);
+ ADDI2R(SCRATCH1, regs_.R(inst.src1), inst.constant, SCRATCH2);
+ }
+ ANDI2R(SCRATCH1, SCRATCH1, 0x3FFFFFFF, SCRATCH2);
+
+ std::vector validJumps;
+
+ FixupBranch unaligned;
+ if (alignment == 2) {
+ unaligned = TBNZ(SCRATCH1, 0);
+ } else if (alignment != 1) {
+ TSTI2R(SCRATCH1, alignment - 1, SCRATCH2);
+ unaligned = B(CC_NEQ);
+ }
+
+ CMPI2R(SCRATCH1, PSP_GetUserMemoryEnd() - alignment, SCRATCH2);
+ FixupBranch tooHighRAM = B(CC_HI);
+ CMPI2R(SCRATCH1, PSP_GetKernelMemoryBase(), SCRATCH2);
+ validJumps.push_back(B(CC_HS));
+
+ CMPI2R(SCRATCH1, PSP_GetVidMemEnd() - alignment, SCRATCH2);
+ FixupBranch tooHighVid = B(CC_HI);
+ CMPI2R(SCRATCH1, PSP_GetVidMemBase(), SCRATCH2);
+ validJumps.push_back(B(CC_HS));
+
+ CMPI2R(SCRATCH1, PSP_GetScratchpadMemoryEnd() - alignment, SCRATCH2);
+ FixupBranch tooHighScratch = B(CC_HI);
+ CMPI2R(SCRATCH1, PSP_GetScratchpadMemoryBase(), SCRATCH2);
+ validJumps.push_back(B(CC_HS));
+
+ if (alignment != 1)
+ SetJumpTarget(unaligned);
+ SetJumpTarget(tooHighRAM);
+ SetJumpTarget(tooHighVid);
+ SetJumpTarget(tooHighScratch);
+
+ // If we got here, something unusual and bad happened, so we'll always go back to the dispatcher.
+ // Because of that, we can avoid flushing outside this case.
+ auto regsCopy = regs_;
+ regsCopy.FlushAll();
+
+ // Ignores the return value, always returns to the dispatcher.
+ // Otherwise would need a thunk to restore regs.
+ MOV(W0, SCRATCH1);
+ MOVI2R(W1, alignment);
+ MOVI2R(W2, isWrite ? 1 : 0);
+ QuickCallFunction(SCRATCH2, &ReportBadAddress);
+ B(dispatcherCheckCoreState_);
+
+ for (FixupBranch &b : validJumps)
+ SetJumpTarget(b);
}
} // namespace MIPSComp
diff --git a/Core/MIPS/ARM64/Arm64IRJit.cpp b/Core/MIPS/ARM64/Arm64IRJit.cpp
index b99e11674438..ab7692dcc536 100644
--- a/Core/MIPS/ARM64/Arm64IRJit.cpp
+++ b/Core/MIPS/ARM64/Arm64IRJit.cpp
@@ -76,6 +76,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
wroteCheckedOffset = true;
+ WriteDebugPC(startPC);
+
// Check the sign bit to check if negative.
FixupBranch normalEntry = TBZ(DOWNCOUNTREG, 31);
MOVI2R(SCRATCH1, startPC);
@@ -87,6 +89,7 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
const u8 *blockStart = GetCodePointer();
block->SetTargetOffset((int)GetOffset(blockStart));
compilingBlockNum_ = block_num;
+ lastConstPC_ = 0;
regs_.Start(block);
@@ -128,6 +131,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
}
if (jo.enableBlocklink && jo.useBackJump) {
+ WriteDebugPC(startPC);
+
// Small blocks are common, check if it's < 32KB long.
ptrdiff_t distance = blockStart - GetCodePointer();
if (distance >= -0x8000 && distance < 0x8000) {
@@ -228,8 +233,10 @@ void Arm64JitBackend::CompIR_Generic(IRInst inst) {
FlushAll();
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
MOVI2R(X0, value);
QuickCallFunction(SCRATCH2_64, &DoIRInst);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// We only need to check the return value if it's a potential exit.
@@ -255,12 +262,14 @@ void Arm64JitBackend::CompIR_Interpret(IRInst inst) {
// IR protects us against this being a branching instruction (well, hopefully.)
FlushAll();
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
if (DebugStatsEnabled()) {
MOVP2R(X0, MIPSGetName(op));
QuickCallFunction(SCRATCH2_64, &NotifyMIPSInterpret);
}
MOVI2R(X0, inst.constant);
QuickCallFunction(SCRATCH2_64, MIPSGetInterpretFunc(op));
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
}
@@ -353,6 +362,32 @@ void Arm64JitBackend::MovToPC(ARM64Reg r) {
STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
}
+void Arm64JitBackend::WriteDebugPC(uint32_t pc) {
+ if (hooks_.profilerPC) {
+ int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
+ MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
+ MOVI2R(SCRATCH1, pc);
+ STR(SCRATCH1, JITBASEREG, SCRATCH2);
+ }
+}
+
+void Arm64JitBackend::WriteDebugPC(ARM64Reg r) {
+ if (hooks_.profilerPC) {
+ int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
+ MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
+ STR(r, JITBASEREG, SCRATCH2);
+ }
+}
+
+void Arm64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
+ if (hooks_.profilerPC) {
+ int offset = (int)((const u8 *)hooks_.profilerStatus - GetBasePtr());
+ MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
+ MOVI2R(SCRATCH1, (int)status);
+ STR(SCRATCH1, JITBASEREG, SCRATCH2);
+ }
+}
+
void Arm64JitBackend::SaveStaticRegisters() {
if (jo.useStaticAlloc) {
QuickCallFunction(SCRATCH2_64, saveStaticRegisters_);
diff --git a/Core/MIPS/ARM64/Arm64IRJit.h b/Core/MIPS/ARM64/Arm64IRJit.h
index fa2428504c84..055e525565f8 100644
--- a/Core/MIPS/ARM64/Arm64IRJit.h
+++ b/Core/MIPS/ARM64/Arm64IRJit.h
@@ -57,6 +57,11 @@ class Arm64JitBackend : public Arm64Gen::ARM64CodeBlock, public IRNativeBackend
void UpdateRoundingMode(bool force = false);
void MovFromPC(Arm64Gen::ARM64Reg r);
void MovToPC(Arm64Gen::ARM64Reg r);
+ // Destroys SCRATCH2.
+ void WriteDebugPC(uint32_t pc);
+ void WriteDebugPC(Arm64Gen::ARM64Reg r);
+ // Destroys SCRATCH2.
+ void WriteDebugProfilerStatus(IRProfilerStatus status);
void SaveStaticRegisters();
void LoadStaticRegisters();
@@ -145,6 +150,8 @@ class Arm64JitBackend : public Arm64Gen::ARM64CodeBlock, public IRNativeBackend
int jitStartOffset_ = 0;
int compilingBlockNum_ = -1;
int logBlocks_ = 0;
+ // Only useful in breakpoints, where it's set immediately prior.
+ uint32_t lastConstPC_ = 0;
};
class Arm64IRJit : public IRNativeJit {
diff --git a/Core/MIPS/ARM64/Arm64IRRegCache.cpp b/Core/MIPS/ARM64/Arm64IRRegCache.cpp
index 0ce5422fd5ce..f48207fa5fcd 100644
--- a/Core/MIPS/ARM64/Arm64IRRegCache.cpp
+++ b/Core/MIPS/ARM64/Arm64IRRegCache.cpp
@@ -347,7 +347,7 @@ void Arm64IRRegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) {
}
}
-bool Arm64IRRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) {
+bool Arm64IRRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) {
// No special flags, skip the check for a little speed.
return true;
}
@@ -437,19 +437,21 @@ void Arm64IRRegCache::FlushAll(bool gprs, bool fprs) {
// Note: make sure not to change the registers when flushing:
// Branching code may expect the armreg to retain its value.
+ auto needsFlush = [&](IRReg i) {
+ if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic)
+ return false;
+ if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty)
+ return false;
+ return true;
+ };
+
// Try to flush in pairs when possible.
for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) {
- if (mr[i].loc == MIPSLoc::MEM || mr[i].loc == MIPSLoc::MEM || mr[i].isStatic || mr[i + 1].isStatic)
+ if (!needsFlush(i) || !needsFlush(i + 1))
continue;
// Ignore multilane regs. Could handle with more smartness...
if (mr[i].lane != -1 || mr[i + 1].lane != -1)
continue;
- if (mr[i].nReg != -1 && !nr[mr[i].nReg].isDirty)
- continue;
- if (mr[i + 1].nReg != -1 && !nr[mr[i + 1].nReg].isDirty)
- continue;
- if (mr[i].loc == MIPSLoc::MEM || mr[i + 1].loc == MIPSLoc::MEM)
- continue;
int offset = GetMipsRegOffset(i);
diff --git a/Core/MIPS/ARM64/Arm64IRRegCache.h b/Core/MIPS/ARM64/Arm64IRRegCache.h
index 3a9bf77ab9f4..9f0b0cbbac02 100644
--- a/Core/MIPS/ARM64/Arm64IRRegCache.h
+++ b/Core/MIPS/ARM64/Arm64IRRegCache.h
@@ -86,7 +86,7 @@ class Arm64IRRegCache : public IRNativeRegCacheBase {
const int *GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const override;
void AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) override;
- bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) override;
+ bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) override;
void LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) override;
void StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) override;
void SetNativeRegValue(IRNativeReg nreg, uint32_t imm) override;
diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp
index d1f1062f1ef3..9abb69920bfc 100644
--- a/Core/MIPS/ARM64/Arm64Jit.cpp
+++ b/Core/MIPS/ARM64/Arm64Jit.cpp
@@ -562,7 +562,8 @@ void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op)
const ReplacementTableEntry *entry = GetReplacementFunc(index);
if (!entry) {
- ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding);
+ ERROR_LOG_REPORT_ONCE(replFunc, HLE, "Invalid replacement op %08x at %08x", op.encoding, js.compilerPC);
+ // TODO: What should we do here? We're way off in the weeds probably.
return;
}
@@ -724,8 +725,11 @@ void Arm64Jit::UpdateRoundingMode(u32 fcr31) {
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
// I don't think this gives us that much benefit.
void Arm64Jit::WriteExit(u32 destination, int exit_num) {
- // TODO: Check destination is valid and trigger exception.
- WriteDownCount();
+ // NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks).
+ _assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num. dest=%08x", destination);
+
+ // NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks).
+ WriteDownCount();
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
b->exitAddress[exit_num] = destination;
diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp
index e42ac78d2588..6f44c2ccdd84 100644
--- a/Core/MIPS/IR/IRCompVFPU.cpp
+++ b/Core/MIPS/IR/IRCompVFPU.cpp
@@ -1675,7 +1675,7 @@ namespace MIPSComp {
if (homogenous) {
// This is probably even what the hardware basically does, wiring t[3] to 1.0f.
ir.Write(IROp::Vec4Init, IRVTEMP_PFX_T, (int)Vec4Init::AllONE);
- ir.Write(IROp::Vec4Blend, IRVTEMP_PFX_T, t, IRVTEMP_PFX_T, 0x7);
+ ir.Write(IROp::Vec4Blend, IRVTEMP_PFX_T, IRVTEMP_PFX_T, t, 0x7);
t = IRVTEMP_PFX_T;
}
for (int i = 0; i < 4; i++)
@@ -1771,7 +1771,20 @@ namespace MIPSComp {
// d[0] = s[0]*t[1] - s[1]*t[0]
// Note: this operates on two vectors, not a 2x2 matrix.
- DISABLE;
+ VectorSize sz = GetVecSize(op);
+ if (sz != V_Pair)
+ DISABLE;
+
+ u8 sregs[4], dregs[4], tregs[4];
+ GetVectorRegsPrefixS(sregs, sz, _VS);
+ GetVectorRegsPrefixT(tregs, sz, _VT);
+ GetVectorRegsPrefixD(dregs, V_Single, _VD);
+
+ ir.Write(IROp::FMul, IRVTEMP_0, sregs[1], tregs[0]);
+ ir.Write(IROp::FMul, dregs[0], sregs[0], tregs[1]);
+ ir.Write(IROp::FSub, dregs[0], dregs[0], IRVTEMP_0);
+
+ ApplyPrefixD(dregs, V_Single, _VD);
}
void IRFrontend::Comp_Vi2x(MIPSOpcode op) {
diff --git a/Core/MIPS/IR/IRNativeCommon.cpp b/Core/MIPS/IR/IRNativeCommon.cpp
index 6ce1e0f0d614..784d0c7ae23c 100644
--- a/Core/MIPS/IR/IRNativeCommon.cpp
+++ b/Core/MIPS/IR/IRNativeCommon.cpp
@@ -15,10 +15,15 @@
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
+#include
+#include
+#include
#include "Common/Profiler/Profiler.h"
#include "Common/StringUtils.h"
#include "Common/TimeUtil.h"
+#include "Core/Core.h"
#include "Core/Debugger/SymbolMap.h"
+#include "Core/MemMap.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/IR/IRNativeCommon.h"
@@ -28,18 +33,57 @@ namespace MIPSComp {
// Compile time flag to enable debug stats for not compiled ops.
static constexpr bool enableDebugStats = false;
+// Compile time flag for enabling the simple IR jit profiler.
+static constexpr bool enableDebugProfiler = false;
// Used only for debugging when enableDebug is true above.
static std::map debugSeenNotCompiledIR;
static std::map debugSeenNotCompiled;
+static std::map, int> debugSeenPCUsage;
static double lastDebugStatsLog = 0.0;
+static constexpr double debugStatsFrequency = 5.0;
+
+static std::thread debugProfilerThread;
+std::atomic debugProfilerThreadStatus = false;
+
+template
+class IRProfilerTopValues {
+public:
+ void Add(const std::pair &v, int c) {
+ for (int i = 0; i < N; ++i) {
+ if (c > counts[i]) {
+ counts[i] = c;
+ values[i] = v;
+ return;
+ }
+ }
+ }
+
+ int counts[N]{};
+ std::pair values[N]{};
+};
+
+const char *IRProfilerStatusToString(IRProfilerStatus s) {
+ switch (s) {
+ case IRProfilerStatus::NOT_RUNNING: return "NOT_RUNNING";
+ case IRProfilerStatus::IN_JIT: return "IN_JIT";
+ case IRProfilerStatus::TIMER_ADVANCE: return "TIMER_ADVANCE";
+ case IRProfilerStatus::COMPILING: return "COMPILING";
+ case IRProfilerStatus::MATH_HELPER: return "MATH_HELPER";
+ case IRProfilerStatus::REPLACEMENT: return "REPLACEMENT";
+ case IRProfilerStatus::SYSCALL: return "SYSCALL";
+ case IRProfilerStatus::INTERPRET: return "INTERPRET";
+ case IRProfilerStatus::IR_INTERPRET: return "IR_INTERPRET";
+ }
+ return "INVALID";
+}
static void LogDebugStats() {
- if (!enableDebugStats)
+ if (!enableDebugStats && !enableDebugProfiler)
return;
double now = time_now_d();
- if (now < lastDebugStatsLog + 1.0)
+ if (now < lastDebugStatsLog + debugStatsFrequency)
return;
lastDebugStatsLog = now;
@@ -63,16 +107,36 @@ static void LogDebugStats() {
}
debugSeenNotCompiled.clear();
+ IRProfilerTopValues<4> slowestPCs;
+ int64_t totalCount = 0;
+ for (auto it : debugSeenPCUsage) {
+ slowestPCs.Add(it.first, it.second);
+ totalCount += it.second;
+ }
+ debugSeenPCUsage.clear();
+
if (worstIROp != -1)
WARN_LOG(JIT, "Most not compiled IR op: %s (%d)", GetIRMeta((IROp)worstIROp)->name, worstIRVal);
if (worstName != nullptr)
WARN_LOG(JIT, "Most not compiled op: %s (%d)", worstName, worstVal);
+ if (slowestPCs.counts[0] != 0) {
+ for (int i = 0; i < 4; ++i) {
+ uint32_t pc = slowestPCs.values[i].first;
+ const char *status = IRProfilerStatusToString(slowestPCs.values[i].second);
+ const std::string label = g_symbolMap ? g_symbolMap->GetDescription(pc) : "";
+ WARN_LOG(JIT, "Slowest sampled PC #%d: %08x (%s)/%s (%f%%)", i, pc, label.c_str(), status, 100.0 * (double)slowestPCs.counts[i] / (double)totalCount);
+ }
+ }
}
bool IRNativeBackend::DebugStatsEnabled() const {
return enableDebugStats;
}
+bool IRNativeBackend::DebugProfilerEnabled() const {
+ return enableDebugProfiler;
+}
+
void IRNativeBackend::NotifyMIPSInterpret(const char *name) {
_assert_(enableDebugStats);
debugSeenNotCompiled[name]++;
@@ -98,8 +162,32 @@ uint32_t IRNativeBackend::DoIRInst(uint64_t value) {
return IRInterpret(currentMIPS, &inst, 1);
}
+int IRNativeBackend::ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_t isWrite) {
+ const auto toss = [&](MemoryExceptionType t) {
+ Core_MemoryException(addr, alignment, currentMIPS->pc, t);
+ return coreState != CORE_RUNNING ? 1 : 0;
+ };
+
+ if (!Memory::IsValidRange(addr, alignment)) {
+ MemoryExceptionType t = isWrite == 1 ? MemoryExceptionType::WRITE_WORD : MemoryExceptionType::READ_WORD;
+ if (alignment > 4)
+ t = isWrite ? MemoryExceptionType::WRITE_BLOCK : MemoryExceptionType::READ_BLOCK;
+ return toss(t);
+ } else if (alignment > 1 && (addr & (alignment - 1)) != 0) {
+ return toss(MemoryExceptionType::ALIGNMENT);
+ }
+ return 0;
+}
+
IRNativeBackend::IRNativeBackend(IRBlockCache &blocks) : blocks_(blocks) {}
+IRNativeBackend::~IRNativeBackend() {
+ if (debugProfilerThreadStatus) {
+ debugProfilerThreadStatus = false;
+ debugProfilerThread.join();
+ }
+}
+
void IRNativeBackend::CompileIRInst(IRInst inst) {
switch (inst.op) {
case IROp::Nop:
@@ -401,6 +489,20 @@ void IRNativeJit::Init(IRNativeBackend &backend) {
// Wanted this to be a reference, but vtbls get in the way. Shouldn't change.
hooks_ = backend.GetNativeHooks();
+
+ if (enableDebugProfiler && hooks_.profilerPC) {
+ debugProfilerThreadStatus = true;
+ debugProfilerThread = std::thread([&] {
+ // Spin, spin spin... maybe could at least hook into sleeps.
+ while (debugProfilerThreadStatus) {
+ IRProfilerStatus stat = *hooks_.profilerStatus;
+ uint32_t pc = *hooks_.profilerPC;
+ if (stat != IRProfilerStatus::NOT_RUNNING && stat != IRProfilerStatus::SYSCALL) {
+ debugSeenPCUsage[std::make_pair(pc, stat)]++;
+ }
+ }
+ });
+ }
}
bool IRNativeJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) {
@@ -412,7 +514,7 @@ void IRNativeJit::FinalizeTargetBlock(IRBlock *block, int block_num) {
}
void IRNativeJit::RunLoopUntil(u64 globalticks) {
- if constexpr (enableDebugStats) {
+ if constexpr (enableDebugStats || enableDebugProfiler) {
LogDebugStats();
}
@@ -443,13 +545,27 @@ bool IRNativeJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
return false;
int block_num = -1;
+ int block_offset = INT_MAX;
for (int i = 0; i < blocks_.GetNumBlocks(); ++i) {
const auto &b = blocks_.GetBlock(i);
- // We allocate linearly.
- if (b->GetTargetOffset() <= offset)
+ int b_start = b->GetTargetOffset();
+ if (b_start > offset)
+ continue;
+
+ int b_end = backend_->GetNativeBlock(i)->checkedOffset;
+ int b_offset = offset - b_start;
+ if (b_end > b_start && b_end >= offset) {
+ // For sure within the block.
block_num = i;
- if (b->GetTargetOffset() > offset)
+ block_offset = b_offset;
break;
+ }
+
+ if (b_offset < block_offset) {
+ // Possibly within the block, unless in some other block...
+ block_num = i;
+ block_offset = b_offset;
+ }
}
// Used by profiling tools that don't like spaces.
@@ -466,9 +582,9 @@ bool IRNativeJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
// It helps to know which func this block is inside.
const std::string label = g_symbolMap ? g_symbolMap->GetDescription(start) : "";
if (!label.empty())
- name = StringFromFormat("block%d_%08x_%s", block_num, start, label.c_str());
+ name = StringFromFormat("block%d_%08x_%s_0x%x", block_num, start, label.c_str(), block_offset);
else
- name = StringFromFormat("block%d_%08x", block_num, start);
+ name = StringFromFormat("block%d_%08x_0x%x", block_num, start, block_offset);
return true;
}
return false;
diff --git a/Core/MIPS/IR/IRNativeCommon.h b/Core/MIPS/IR/IRNativeCommon.h
index 7da5d3a8318d..4afc50369891 100644
--- a/Core/MIPS/IR/IRNativeCommon.h
+++ b/Core/MIPS/IR/IRNativeCommon.h
@@ -25,12 +25,27 @@ namespace MIPSComp {
typedef void (*IRNativeFuncNoArg)();
+enum class IRProfilerStatus : int32_t {
+ NOT_RUNNING,
+ IN_JIT,
+ TIMER_ADVANCE,
+ COMPILING,
+ MATH_HELPER,
+ REPLACEMENT,
+ SYSCALL,
+ INTERPRET,
+ IR_INTERPRET,
+};
+
struct IRNativeHooks {
IRNativeFuncNoArg enterDispatcher = nullptr;
const uint8_t *dispatcher = nullptr;
const uint8_t *dispatchFetch = nullptr;
const uint8_t *crashHandler = nullptr;
+
+ uint32_t *profilerPC = nullptr;
+ IRProfilerStatus *profilerStatus = nullptr;
};
struct IRNativeBlockExit {
@@ -47,7 +62,7 @@ struct IRNativeBlock {
class IRNativeBackend {
public:
IRNativeBackend(IRBlockCache &blocks);
- virtual ~IRNativeBackend() {}
+ virtual ~IRNativeBackend();
void CompileIRInst(IRInst inst);
@@ -120,6 +135,7 @@ class IRNativeBackend {
// Returns true when debugging statistics should be compiled in.
bool DebugStatsEnabled() const;
+ bool DebugProfilerEnabled() const;
// Callback (compile when DebugStatsEnabled()) to log a base interpreter hit.
// Call the func returned by MIPSGetInterpretFunc(op) directly for interpret.
@@ -131,6 +147,8 @@ class IRNativeBackend {
// Callback to log AND perform an IR interpreter inst. Returns 0 or a PC to jump to.
static uint32_t DoIRInst(uint64_t inst);
+ static int ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_t isWrite);
+
void AddLinkableExit(int block_num, uint32_t pc, int exitStartOffset, int exitLen);
void EraseAllLinks(int block_num);
diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp
index 87aa27687d39..2079b799d6a3 100644
--- a/Core/MIPS/IR/IRPassSimplify.cpp
+++ b/Core/MIPS/IR/IRPassSimplify.cpp
@@ -1794,7 +1794,8 @@ bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &o
bool spModified = false;
for (IRInst inst : in.GetInstructions()) {
IRMemoryOpInfo info = IROpMemoryAccessSize(inst.op);
- if (info.size != 0 && inst.src1 == MIPS_REG_SP) {
+ // Note: we only combine word aligned accesses.
+ if (info.size != 0 && inst.src1 == MIPS_REG_SP && info.size == 4) {
if (spModified) {
// No good, it was modified and then we did more accesses. Can't combine.
spUpper = -1;
@@ -1805,11 +1806,6 @@ bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &o
spUpper = -1;
break;
}
- if (info.size == 16 && (inst.constant & 0xF) != 0) {
- // Shouldn't happen, sp should always be aligned.
- spUpper = -1;
- break;
- }
spLower = std::min(spLower, (int)inst.constant);
spUpper = std::max(spUpper, (int)inst.constant + info.size);
@@ -1828,7 +1824,7 @@ bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &o
std::map checks;
const auto addValidate = [&](IROp validate, uint8_t sz, const IRInst &inst, bool isStore) {
- if (inst.src1 == MIPS_REG_SP && skipSP) {
+ if (inst.src1 == MIPS_REG_SP && skipSP && validate == IROp::ValidateAddress32) {
if (!flushedSP) {
out.Write(IROp::ValidateAddress32, 0, MIPS_REG_SP, spWrite ? 1U : 0U, spLower);
if (spUpper > spLower + 4)
diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp
index 698929a21ad8..38c2fe2bd535 100644
--- a/Core/MIPS/IR/IRRegCache.cpp
+++ b/Core/MIPS/IR/IRRegCache.cpp
@@ -160,7 +160,7 @@ bool IRNativeRegCacheBase::IsFPRMapped(IRReg fpr) {
}
int IRNativeRegCacheBase::GetFPRLaneCount(IRReg fpr) {
- if (!IsFPRMapped(fpr) || mr[fpr + 32].lane > 0)
+ if (!IsFPRMapped(fpr))
return 0;
if (mr[fpr + 32].lane == -1)
return 1;
@@ -406,12 +406,12 @@ IRNativeReg IRNativeRegCacheBase::FindFreeReg(MIPSLoc type, MIPSMap flags) const
bool IRNativeRegCacheBase::IsGPRClobbered(IRReg gpr) const {
_dbg_assert_(IsValidGPR(gpr));
- return IsRegClobbered(MIPSLoc::REG, MIPSMap::INIT, gpr);
+ return IsRegClobbered(MIPSLoc::REG, gpr);
}
bool IRNativeRegCacheBase::IsFPRClobbered(IRReg fpr) const {
_dbg_assert_(IsValidFPR(fpr));
- return IsRegClobbered(MIPSLoc::FREG, MIPSMap::INIT, fpr + 32);
+ return IsRegClobbered(MIPSLoc::FREG, fpr + 32);
}
IRUsage IRNativeRegCacheBase::GetNextRegUsage(const IRSituation &info, MIPSLoc type, IRReg r) const {
@@ -423,7 +423,7 @@ IRUsage IRNativeRegCacheBase::GetNextRegUsage(const IRSituation &info, MIPSLoc t
return IRUsage::UNKNOWN;
}
-bool IRNativeRegCacheBase::IsRegClobbered(MIPSLoc type, MIPSMap flags, IRReg r) const {
+bool IRNativeRegCacheBase::IsRegClobbered(MIPSLoc type, IRReg r) const {
static const int UNUSED_LOOKAHEAD_OPS = 30;
IRSituation info;
@@ -450,6 +450,21 @@ bool IRNativeRegCacheBase::IsRegClobbered(MIPSLoc type, MIPSMap flags, IRReg r)
return false;
}
+bool IRNativeRegCacheBase::IsRegRead(MIPSLoc type, IRReg first) const {
+ static const int UNUSED_LOOKAHEAD_OPS = 30;
+
+ IRSituation info;
+ info.lookaheadCount = UNUSED_LOOKAHEAD_OPS;
+ // We look starting one ahead, unlike spilling.
+ info.currentIndex = irIndex_ + 1;
+ info.instructions = irBlock_->GetInstructions();
+ info.numInstructions = irBlock_->GetNumInstructions();
+
+ // Note: this intentionally doesn't look at the full reg, only the lane.
+ IRUsage usage = GetNextRegUsage(info, type, first);
+ return usage == IRUsage::READ;
+}
+
IRNativeReg IRNativeRegCacheBase::FindBestToSpill(MIPSLoc type, MIPSMap flags, bool unusedOnly, bool *clobbered) const {
int allocCount = 0, base = 0;
const int *allocOrder = GetAllocationOrder(type, flags, allocCount, base);
@@ -501,7 +516,7 @@ IRNativeReg IRNativeRegCacheBase::FindBestToSpill(MIPSLoc type, MIPSMap flags, b
return -1;
}
-bool IRNativeRegCacheBase::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) {
+bool IRNativeRegCacheBase::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) {
int allocCount = 0, base = 0;
const int *allocOrder = GetAllocationOrder(type, flags, allocCount, base);
@@ -514,6 +529,11 @@ bool IRNativeRegCacheBase::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type,
return false;
}
+bool IRNativeRegCacheBase::TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) {
+ // To be overridden if the backend supports transfers.
+ return false;
+}
+
void IRNativeRegCacheBase::DiscardNativeReg(IRNativeReg nreg) {
_assert_msg_(nreg >= 0 && nreg < config_.totalNativeRegs, "DiscardNativeReg on invalid register %d", nreg);
if (nr[nreg].mipsReg != IRREG_INVALID) {
@@ -930,11 +950,14 @@ IRNativeReg IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRReg first, int la
case MIPSLoc::REG:
if (type != MIPSLoc::REG) {
nreg = AllocateReg(type, flags);
- } else if (!IsNativeRegCompatible(nreg, type, flags)) {
+ } else if (!IsNativeRegCompatible(nreg, type, flags, lanes)) {
// If it's not compatible, we'll need to reallocate.
- // TODO: Could do a transfer and avoid memory flush.
- FlushNativeReg(nreg);
- nreg = AllocateReg(type, flags);
+ if (TransferNativeReg(nreg, -1, type, first, lanes, flags)) {
+ nreg = mr[first].nReg;
+ } else {
+ FlushNativeReg(nreg);
+ nreg = AllocateReg(type, flags);
+ }
}
break;
@@ -942,9 +965,13 @@ IRNativeReg IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRReg first, int la
case MIPSLoc::VREG:
if (type != mr[first].loc) {
nreg = AllocateReg(type, flags);
- } else if (!IsNativeRegCompatible(nreg, type, flags)) {
- FlushNativeReg(nreg);
- nreg = AllocateReg(type, flags);
+ } else if (!IsNativeRegCompatible(nreg, type, flags, lanes)) {
+ if (TransferNativeReg(nreg, -1, type, first, lanes, flags)) {
+ nreg = mr[first].nReg;
+ } else {
+ FlushNativeReg(nreg);
+ nreg = AllocateReg(type, flags);
+ }
}
break;
@@ -981,10 +1008,13 @@ void IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg fi
_assert_msg_(!mreg.isStatic, "Cannot MapNativeReg a static reg mismatch");
if ((flags & MIPSMap::NOINIT) != MIPSMap::NOINIT) {
// If we need init, we have to flush mismatches.
- // TODO: Do a shuffle if interior only?
- // TODO: We may also be motivated to have multiple read-only "views" or an IRReg.
- // For example Vec4Scale v0..v3, v0..v3, v3
- FlushNativeReg(mreg.nReg);
+ if (!TransferNativeReg(mreg.nReg, nreg, type, first, lanes, flags)) {
+ // TODO: We may also be motivated to have multiple read-only "views" or an IRReg.
+ // For example Vec4Scale v0..v3, v0..v3, v3
+ FlushNativeReg(mreg.nReg);
+ }
+ // The mismatch has been "resolved" now.
+ mismatch = false;
} else if (oldlanes != 1) {
// Even if we don't care about the current contents, we can't discard outside.
bool extendsBefore = oldlane > i;
@@ -1017,6 +1047,9 @@ void IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg fi
DiscardNativeReg(mreg.nReg);
else
FlushNativeReg(mreg.nReg);
+
+ // That took care of the mismatch, either by clobber or flush.
+ mismatch = false;
}
}
}
@@ -1027,8 +1060,8 @@ void IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg fi
if ((flags & MIPSMap::NOINIT) != MIPSMap::NOINIT) {
// We better not be trying to map to a different nreg if it's in one now.
// This might happen on some sort of transfer...
- // TODO: Make a direct transfer, i.e. FREG -> VREG?
- FlushNativeReg(mreg.nReg);
+ if (!TransferNativeReg(mreg.nReg, nreg, type, first, lanes, flags))
+ FlushNativeReg(mreg.nReg);
} else {
DiscardNativeReg(mreg.nReg);
}
diff --git a/Core/MIPS/IR/IRRegCache.h b/Core/MIPS/IR/IRRegCache.h
index c85bb41848ee..4301886b4414 100644
--- a/Core/MIPS/IR/IRRegCache.h
+++ b/Core/MIPS/IR/IRRegCache.h
@@ -209,13 +209,14 @@ class IRNativeRegCacheBase {
IRNativeReg AllocateReg(MIPSLoc type, MIPSMap flags);
IRNativeReg FindFreeReg(MIPSLoc type, MIPSMap flags) const;
IRNativeReg FindBestToSpill(MIPSLoc type, MIPSMap flags, bool unusedOnly, bool *clobbered) const;
- virtual bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags);
+ virtual bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes);
virtual void DiscardNativeReg(IRNativeReg nreg);
virtual void FlushNativeReg(IRNativeReg nreg);
virtual void DiscardReg(IRReg mreg);
virtual void FlushReg(IRReg mreg);
virtual void AdjustNativeRegAsPtr(IRNativeReg nreg, bool state);
virtual void MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg first, int lanes, MIPSMap flags);
+ virtual bool TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags);
virtual IRNativeReg MapNativeReg(MIPSLoc type, IRReg first, int lanes, MIPSMap flags);
IRNativeReg MapNativeRegAsPointer(IRReg gpr);
@@ -238,7 +239,8 @@ class IRNativeRegCacheBase {
void SetSpillLockIRIndex(IRReg reg, int index);
int GetMipsRegOffset(IRReg r);
- bool IsRegClobbered(MIPSLoc type, MIPSMap flags, IRReg r) const;
+ bool IsRegClobbered(MIPSLoc type, IRReg r) const;
+ bool IsRegRead(MIPSLoc type, IRReg r) const;
IRUsage GetNextRegUsage(const IRSituation &info, MIPSLoc type, IRReg r) const;
bool IsValidGPR(IRReg r) const;
diff --git a/Core/MIPS/JitCommon/JitBlockCache.cpp b/Core/MIPS/JitCommon/JitBlockCache.cpp
index 1e503da62176..f8ba2436c6e2 100644
--- a/Core/MIPS/JitCommon/JitBlockCache.cpp
+++ b/Core/MIPS/JitCommon/JitBlockCache.cpp
@@ -31,6 +31,7 @@
#include "Core/MemMap.h"
#include "Core/CoreTiming.h"
#include "Core/Reporting.h"
+#include "Core/Config.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSTables.h"
@@ -246,8 +247,7 @@ static void ExpandRange(std::pair &range, u32 newStart, u32 newEnd) {
void JitBlockCache::FinalizeBlock(int block_num, bool block_link) {
JitBlock &b = blocks_[block_num];
-
- _assert_msg_(Memory::IsValidAddress(b.originalAddress), "FinalizeBlock: Bad originalAddress %08x in block %d", b.originalAddress, block_num);
+ _assert_msg_(Memory::IsValidAddress(b.originalAddress), "FinalizeBlock: Bad originalAddress %08x in block %d (b.num: %d) proxy: %s sz: %d", b.originalAddress, block_num, b.blockNum, b.proxyFor ? "y" : "n", b.codeSize);
b.originalFirstOpcode = Memory::Read_Opcode_JIT(b.originalAddress);
MIPSOpcode opcode = GetEmuHackOpForBlock(block_num);
@@ -462,6 +462,11 @@ void JitBlockCache::UnlinkBlock(int i) {
if (ppp.first == ppp.second)
return;
for (auto iter = ppp.first; iter != ppp.second; ++iter) {
+ if ((size_t)iter->second >= num_blocks_) {
+ // Something probably went very wrong. Try to stumble along nevertheless.
+ ERROR_LOG(JIT, "UnlinkBlock: Invalid block number %d", iter->second);
+ continue;
+ }
JitBlock &sourceBlock = blocks_[iter->second];
for (int e = 0; e < MAX_JIT_BLOCK_EXITS; e++) {
if (sourceBlock.exitAddress[e] == b.originalAddress)
diff --git a/Core/MIPS/JitCommon/JitBlockCache.h b/Core/MIPS/JitCommon/JitBlockCache.h
index 3049300f9aa7..09eae3cc2138 100644
--- a/Core/MIPS/JitCommon/JitBlockCache.h
+++ b/Core/MIPS/JitCommon/JitBlockCache.h
@@ -29,7 +29,7 @@
#include "Core/MIPS/MIPS.h"
#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64)
-const int MAX_JIT_BLOCK_EXITS = 2;
+const int MAX_JIT_BLOCK_EXITS = 4;
#else
const int MAX_JIT_BLOCK_EXITS = 8;
#endif
diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp
index 35d3caa56a78..9fcc409419cc 100644
--- a/Core/MIPS/MIPSIntVFPU.cpp
+++ b/Core/MIPS/MIPSIntVFPU.cpp
@@ -1446,7 +1446,7 @@ namespace MIPSInt
d[0] += s[2] * t[2] + s[3] * t[3];
}
- ApplyPrefixD(d, sz);
+ ApplyPrefixD(d, V_Single);
WriteVector(d, V_Single, vd);
PC += 4;
EatPrefixes();
diff --git a/Core/MIPS/RiscV/RiscVAsm.cpp b/Core/MIPS/RiscV/RiscVAsm.cpp
index 135e0604e8a3..730a6d9dcc5a 100644
--- a/Core/MIPS/RiscV/RiscVAsm.cpp
+++ b/Core/MIPS/RiscV/RiscVAsm.cpp
@@ -45,8 +45,19 @@ static void ShowPC(u32 downcount, void *membase, void *jitbase) {
}
void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
- BeginWrite(GetMemoryProtectPageSize());
+ // This will be used as a writable scratch area, always 32-bit accessible.
const u8 *start = AlignCodePage();
+ if (DebugProfilerEnabled()) {
+ ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
+ hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
+ *hooks_.profilerPC = 0;
+ hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr() + 1;
+ *hooks_.profilerStatus = IRProfilerStatus::NOT_RUNNING;
+ SetCodePointer(GetCodePtr() + sizeof(uint32_t) * 2, GetWritableCodePtr() + sizeof(uint32_t) * 2);
+ }
+
+ const u8 *disasmStart = AlignCodePage();
+ BeginWrite(GetMemoryProtectPageSize());
if (jo.useStaticAlloc) {
saveStaticRegisters_ = AlignCode16();
@@ -58,8 +69,6 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
regs_.EmitLoadStaticRegisters();
LW(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
RET();
-
- start = saveStaticRegisters_;
} else {
saveStaticRegisters_ = nullptr;
loadStaticRegisters_ = nullptr;
@@ -124,14 +133,18 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
LI(JITBASEREG, GetBasePtr() - MIPS_EMUHACK_OPCODE, SCRATCH1);
LoadStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
MovFromPC(SCRATCH1);
+ WriteDebugPC(SCRATCH1);
outerLoopPCInSCRATCH1_ = GetCodePtr();
MovToPC(SCRATCH1);
outerLoop_ = GetCodePtr();
// Advance can change the downcount (or thread), so must save/restore around it.
SaveStaticRegisters();
RestoreRoundingMode(true);
+ WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
QuickCallFunction(&CoreTiming::Advance, X7);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
LoadStaticRegisters();
@@ -162,6 +175,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
}
LWU(SCRATCH1, CTXREG, offsetof(MIPSState, pc));
+ WriteDebugPC(SCRATCH1);
#ifdef MASKED_PSP_MEMORY
LI(SCRATCH2, 0x3FFFFFFF);
AND(SCRATCH1, SCRATCH1, SCRATCH2);
@@ -180,7 +194,9 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
RestoreRoundingMode(true);
+ WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
QuickCallFunction(&MIPSComp::JitAt, X7);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
// Try again, the block index should be set now.
@@ -195,6 +211,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
const uint8_t *quitLoop = GetCodePtr();
SetJumpTarget(badCoreState);
+ WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
SaveStaticRegisters();
RestoreRoundingMode(true);
diff --git a/Core/MIPS/RiscV/RiscVCompFPU.cpp b/Core/MIPS/RiscV/RiscVCompFPU.cpp
index 3836ae77e4e8..132ef8e58c1c 100644
--- a/Core/MIPS/RiscV/RiscVCompFPU.cpp
+++ b/Core/MIPS/RiscV/RiscVCompFPU.cpp
@@ -520,20 +520,32 @@ void RiscVJitBackend::CompIR_FCompare(IRInst inst) {
case IROp::FCmpVfpuAggregate:
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
- ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest);
- // This is the "any bit", easy.
- SNEZ(SCRATCH2, SCRATCH1);
- // To compare to inst.dest for "all", let's simply subtract it and compare to zero.
- ADDI(SCRATCH1, SCRATCH1, -inst.dest);
- SEQZ(SCRATCH1, SCRATCH1);
- // Now we combine those together.
- SLLI(SCRATCH1, SCRATCH1, 5);
- SLLI(SCRATCH2, SCRATCH2, 4);
- OR(SCRATCH1, SCRATCH1, SCRATCH2);
+ if (inst.dest == 1) {
+ ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest);
+ // Negate so 1 becomes all bits set and zero stays zero, then mask to 0x30.
+ NEG(SCRATCH1, SCRATCH1);
+ ANDI(SCRATCH1, SCRATCH1, 0x30);
+
+ // Reject the old any/all bits and replace them with our own.
+ ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30);
+ OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
+ } else {
+ ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest);
+ FixupBranch skipZero = BEQ(SCRATCH1, R_ZERO);
- // Reject those any/all bits and replace them with our own.
- ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30);
- OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
+ // To compare to inst.dest for "all", let's simply subtract it and compare to zero.
+ ADDI(SCRATCH1, SCRATCH1, -inst.dest);
+ SEQZ(SCRATCH1, SCRATCH1);
+ // Now we combine with the "any" bit.
+ SLLI(SCRATCH1, SCRATCH1, 5);
+ ORI(SCRATCH1, SCRATCH1, 0x10);
+
+ SetJumpTarget(skipZero);
+
+ // Reject the old any/all bits and replace them with our own.
+ ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30);
+ OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
+ }
break;
default:
@@ -573,6 +585,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
auto callFuncF_F = [&](float (*func)(float)) {
regs_.FlushBeforeCall();
+ WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
+
// It might be in a non-volatile register.
// TODO: May have to handle a transfer if SIMD here.
if (regs_.IsFPRMapped(inst.src1)) {
@@ -588,6 +602,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
if (regs_.F(inst.dest) != F10) {
FMV(32, regs_.F(inst.dest), F10);
}
+
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
};
RiscVReg tempReg = INVALID_REG;
diff --git a/Core/MIPS/RiscV/RiscVCompLoadStore.cpp b/Core/MIPS/RiscV/RiscVCompLoadStore.cpp
index 80b149ca02f3..9db1ebb657c6 100644
--- a/Core/MIPS/RiscV/RiscVCompLoadStore.cpp
+++ b/Core/MIPS/RiscV/RiscVCompLoadStore.cpp
@@ -59,8 +59,19 @@ int32_t RiscVJitBackend::AdjustForAddressOffset(RiscVGen::RiscVReg *reg, int32_t
if (constant > 0)
constant &= Memory::MEMVIEW32_MASK;
#endif
- LI(SCRATCH2, constant);
- ADD(SCRATCH1, *reg, SCRATCH2);
+ // It can't be this negative, must be a constant with top bit set.
+ if ((constant & 0xC0000000) == 0x80000000) {
+ if (cpu_info.RiscV_Zba) {
+ LI(SCRATCH2, constant);
+ ADD_UW(SCRATCH1, SCRATCH2, *reg);
+ } else {
+ LI(SCRATCH2, (uint32_t)constant);
+ ADD(SCRATCH1, *reg, SCRATCH2);
+ }
+ } else {
+ LI(SCRATCH2, constant);
+ ADD(SCRATCH1, *reg, SCRATCH2);
+ }
*reg = SCRATCH1;
return 0;
}
diff --git a/Core/MIPS/RiscV/RiscVCompSystem.cpp b/Core/MIPS/RiscV/RiscVCompSystem.cpp
index 878687e64d94..4605648ed8e2 100644
--- a/Core/MIPS/RiscV/RiscVCompSystem.cpp
+++ b/Core/MIPS/RiscV/RiscVCompSystem.cpp
@@ -188,6 +188,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
FlushAll();
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
#ifdef USE_PROFILER
// When profiling, we can't skip CallSyscall, since it times syscalls.
LI(X10, (int32_t)inst.constant);
@@ -207,6 +208,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
}
#endif
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// This is always followed by an ExitToPC, where we check coreState.
break;
@@ -214,7 +216,9 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
case IROp::CallReplacement:
FlushAll();
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
QuickCallFunction(GetReplacementFunc(inst.constant)->replaceFunc, SCRATCH2);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
SUB(DOWNCOUNTREG, DOWNCOUNTREG, X10);
break;
diff --git a/Core/MIPS/RiscV/RiscVJit.cpp b/Core/MIPS/RiscV/RiscVJit.cpp
index 8d3f0155c32e..be4a4534827c 100644
--- a/Core/MIPS/RiscV/RiscVJit.cpp
+++ b/Core/MIPS/RiscV/RiscVJit.cpp
@@ -67,6 +67,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
wroteCheckedOffset = true;
+ WriteDebugPC(startPC);
+
FixupBranch normalEntry = BGE(DOWNCOUNTREG, R_ZERO);
LI(SCRATCH1, startPC);
QuickJ(R_RA, outerLoopPCInSCRATCH1_);
@@ -118,6 +120,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
}
if (jo.enableBlocklink && jo.useBackJump) {
+ WriteDebugPC(startPC);
+
// Most blocks shouldn't be >= 4KB, so usually we can just BGE.
if (BInRange(blockStart)) {
BGE(DOWNCOUNTREG, R_ZERO, blockStart);
@@ -218,7 +222,9 @@ void RiscVJitBackend::CompIR_Generic(IRInst inst) {
FlushAll();
LI(X10, value, SCRATCH2);
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
QuickCallFunction(&DoIRInst, SCRATCH2);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// We only need to check the return value if it's a potential exit.
@@ -241,12 +247,14 @@ void RiscVJitBackend::CompIR_Interpret(IRInst inst) {
// IR protects us against this being a branching instruction (well, hopefully.)
FlushAll();
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
if (DebugStatsEnabled()) {
LI(X10, MIPSGetName(op));
QuickCallFunction(&NotifyMIPSInterpret, SCRATCH2);
}
LI(X10, (int32_t)inst.constant);
QuickCallFunction((const u8 *)MIPSGetInterpretFunc(op), SCRATCH2);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
}
@@ -329,6 +337,32 @@ void RiscVJitBackend::MovToPC(RiscVReg r) {
SW(r, CTXREG, offsetof(MIPSState, pc));
}
+void RiscVJitBackend::WriteDebugPC(uint32_t pc) {
+ if (hooks_.profilerPC) {
+ int offset = (const u8 *)hooks_.profilerPC - GetBasePtr();
+ LI(SCRATCH2, hooks_.profilerPC);
+ LI(R_RA, (int32_t)pc);
+ SW(R_RA, SCRATCH2, 0);
+ }
+}
+
+void RiscVJitBackend::WriteDebugPC(RiscVReg r) {
+ if (hooks_.profilerPC) {
+ int offset = (const u8 *)hooks_.profilerPC - GetBasePtr();
+ LI(SCRATCH2, hooks_.profilerPC);
+ SW(r, SCRATCH2, 0);
+ }
+}
+
+void RiscVJitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
+ if (hooks_.profilerPC) {
+ int offset = (const u8 *)hooks_.profilerStatus - GetBasePtr();
+ LI(SCRATCH2, hooks_.profilerStatus);
+ LI(R_RA, (int)status);
+ SW(R_RA, SCRATCH2, 0);
+ }
+}
+
void RiscVJitBackend::SaveStaticRegisters() {
if (jo.useStaticAlloc) {
QuickCallFunction(saveStaticRegisters_);
diff --git a/Core/MIPS/RiscV/RiscVJit.h b/Core/MIPS/RiscV/RiscVJit.h
index 46448c4c716b..7ccbcce90b36 100644
--- a/Core/MIPS/RiscV/RiscVJit.h
+++ b/Core/MIPS/RiscV/RiscVJit.h
@@ -50,6 +50,9 @@ class RiscVJitBackend : public RiscVGen::RiscVCodeBlock, public IRNativeBackend
void ApplyRoundingMode(bool force = false);
void MovFromPC(RiscVGen::RiscVReg r);
void MovToPC(RiscVGen::RiscVReg r);
+ void WriteDebugPC(uint32_t pc);
+ void WriteDebugPC(RiscVGen::RiscVReg r);
+ void WriteDebugProfilerStatus(IRProfilerStatus status);
void SaveStaticRegisters();
void LoadStaticRegisters();
diff --git a/Core/MIPS/RiscV/RiscVRegCache.cpp b/Core/MIPS/RiscV/RiscVRegCache.cpp
index 7a3e6505cb3e..25528aa3aacb 100644
--- a/Core/MIPS/RiscV/RiscVRegCache.cpp
+++ b/Core/MIPS/RiscV/RiscVRegCache.cpp
@@ -303,11 +303,11 @@ void RiscVRegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) {
}
}
-bool RiscVRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) {
+bool RiscVRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) {
// No special flags except VREG, skip the check for a little speed.
if (type != MIPSLoc::VREG)
return true;
- return IRNativeRegCacheBase::IsNativeRegCompatible(nreg, type, flags);
+ return IRNativeRegCacheBase::IsNativeRegCompatible(nreg, type, flags, lanes);
}
void RiscVRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
diff --git a/Core/MIPS/RiscV/RiscVRegCache.h b/Core/MIPS/RiscV/RiscVRegCache.h
index facfa5219570..e0075f2c619e 100644
--- a/Core/MIPS/RiscV/RiscVRegCache.h
+++ b/Core/MIPS/RiscV/RiscVRegCache.h
@@ -76,7 +76,7 @@ class RiscVRegCache : public IRNativeRegCacheBase {
const int *GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const override;
void AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) override;
- bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) override;
+ bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) override;
void LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) override;
void StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) override;
void SetNativeRegValue(IRNativeReg nreg, uint32_t imm) override;
diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp
index c7e2e2fed802..2f561a2cd05a 100644
--- a/Core/MIPS/x86/Jit.cpp
+++ b/Core/MIPS/x86/Jit.cpp
@@ -605,7 +605,7 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) {
const ReplacementTableEntry *entry = GetReplacementFunc(index);
if (!entry) {
- ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding);
+ ERROR_LOG_REPORT_ONCE(replFunc, HLE, "Invalid replacement op %08x at %08x", op.encoding, js.compilerPC);
return;
}
@@ -708,7 +708,7 @@ static void HitInvalidBranch(uint32_t dest) {
}
void Jit::WriteExit(u32 destination, int exit_num) {
- _dbg_assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num");
+ _assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num. dest=%08x", destination);
if (!Memory::IsValidAddress(destination) || (destination & 3) != 0) {
ERROR_LOG_REPORT(JIT, "Trying to write block exit to illegal destination %08x: pc = %08x", destination, currentMIPS->pc);
diff --git a/Core/MIPS/x86/X64IRAsm.cpp b/Core/MIPS/x86/X64IRAsm.cpp
index fc763bd07c7d..2e095c4c1288 100644
--- a/Core/MIPS/x86/X64IRAsm.cpp
+++ b/Core/MIPS/x86/X64IRAsm.cpp
@@ -49,8 +49,21 @@ static void ShowPC(void *membase, void *jitbase) {
}
void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
- BeginWrite(GetMemoryProtectPageSize());
+ // This will be used as a writable scratch area, always 32-bit accessible.
const u8 *start = AlignCodePage();
+ if (DebugProfilerEnabled()) {
+ ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
+ hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
+ Write32(0);
+ hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();
+ Write32(0);
+ }
+
+ EmitFPUConstants();
+ EmitVecConstants();
+
+ const u8 *disasmStart = AlignCodePage();
+ BeginWrite(GetMemoryProtectPageSize());
jo.downcountInRegister = false;
#if PPSSPP_ARCH(AMD64)
@@ -58,7 +71,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
int jitbaseCtxDisp = 0;
// We pre-bake the MIPS_EMUHACK_OPCODE subtraction into our jitbase value.
intptr_t jitbase = (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE;
- if ((jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) && !Accessible((const u8 *)&mipsState->f[0], GetBasePtr())) {
+ if ((jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) && !Accessible((const u8 *)&mipsState->f[0], (const u8 *)jitbase)) {
jo.reserveR15ForAsm = true;
jitbaseInR15 = true;
} else {
@@ -83,8 +96,6 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
if (jo.downcountInRegister)
MOV(32, R(DOWNCOUNTREG), MDisp(CTXREG, downcountOffset));
RET();
-
- start = saveStaticRegisters_;
} else {
saveStaticRegisters_ = nullptr;
loadStaticRegisters_ = nullptr;
@@ -146,14 +157,18 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
MOV(PTRBITS, R(CTXREG), ImmPtr(&mipsState->f[0]));
LoadStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
MovFromPC(SCRATCH1);
+ WriteDebugPC(SCRATCH1);
outerLoopPCInSCRATCH1_ = GetCodePtr();
MovToPC(SCRATCH1);
outerLoop_ = GetCodePtr();
// Advance can change the downcount (or thread), so must save/restore around it.
SaveStaticRegisters();
RestoreRoundingMode(true);
+ WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance));
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
LoadStaticRegisters();
@@ -209,6 +224,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
}
MovFromPC(SCRATCH1);
+ WriteDebugPC(SCRATCH1);
#ifdef MASKED_PSP_MEMORY
AND(32, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK));
#endif
@@ -247,7 +263,9 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
RestoreRoundingMode(true);
+ WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
ABI_CallFunction(&MIPSComp::JitAt);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
// Let's just dispatch again, we'll enter the block since we know it's there.
JMP(dispatcherNoCheck_, true);
@@ -265,6 +283,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
const uint8_t *quitLoop = GetCodePtr();
SetJumpTarget(badCoreState);
+ WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
SaveStaticRegisters();
RestoreRoundingMode(true);
ABI_PopAllCalleeSavedRegsAndAdjustStack();
@@ -283,16 +302,13 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
// Leave this at the end, add more stuff above.
if (enableDisasm) {
#if PPSSPP_ARCH(AMD64)
- std::vector lines = DisassembleX86(start, (int)(GetCodePtr() - start));
+ std::vector lines = DisassembleX86(disasmStart, (int)(GetCodePtr() - disasmStart));
for (auto s : lines) {
INFO_LOG(JIT, "%s", s.c_str());
}
#endif
}
- EmitFPUConstants();
- EmitVecConstants();
-
// Let's spare the pre-generated code from unprotect-reprotect.
AlignCodePage();
jitStartOffset_ = (int)(GetCodePtr() - start);
diff --git a/Core/MIPS/x86/X64IRCompALU.cpp b/Core/MIPS/x86/X64IRCompALU.cpp
index fc8d7c9b140b..66fe205b2057 100644
--- a/Core/MIPS/x86/X64IRCompALU.cpp
+++ b/Core/MIPS/x86/X64IRCompALU.cpp
@@ -151,8 +151,52 @@ void X64JitBackend::CompIR_Bits(IRInst inst) {
break;
case IROp::ReverseBits:
+ regs_.Map(inst);
+ if (inst.src1 != inst.dest) {
+ MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
+ }
+
+ // Swap even/odd bits (in bits: 0123 -> 1032.)
+ LEA(32, SCRATCH1, MScaled(regs_.RX(inst.dest), 2, 0));
+ SHR(32, regs_.R(inst.dest), Imm8(1));
+ XOR(32, regs_.R(inst.dest), R(SCRATCH1));
+ AND(32, regs_.R(inst.dest), Imm32(0x55555555));
+ XOR(32, regs_.R(inst.dest), R(SCRATCH1));
+
+ // Swap pairs of bits (in bits: 10325476 -> 32107654.)
+ LEA(32, SCRATCH1, MScaled(regs_.RX(inst.dest), 4, 0));
+ SHR(32, regs_.R(inst.dest), Imm8(2));
+ XOR(32, regs_.R(inst.dest), R(SCRATCH1));
+ AND(32, regs_.R(inst.dest), Imm32(0x33333333));
+ XOR(32, regs_.R(inst.dest), R(SCRATCH1));
+
+ // Swap nibbles (in nibbles: ABCD -> BADC.)
+ MOV(32, R(SCRATCH1), regs_.R(inst.dest));
+ SHL(32, R(SCRATCH1), Imm8(4));
+ SHR(32, regs_.R(inst.dest), Imm8(4));
+ XOR(32, regs_.R(inst.dest), R(SCRATCH1));
+ AND(32, regs_.R(inst.dest), Imm32(0x0F0F0F0F));
+ XOR(32, regs_.R(inst.dest), R(SCRATCH1));
+
+ // Finally, swap the bytes to drop everything into place (nibbles: BADCFEHG -> HGFEDCBA.)
+ BSWAP(32, regs_.RX(inst.dest));
+ break;
+
case IROp::BSwap16:
- CompIR_Generic(inst);
+ regs_.Map(inst);
+ if (cpu_info.bBMI2) {
+ // Rotate to put it into the correct register, then swap.
+ if (inst.dest != inst.src1)
+ RORX(32, regs_.RX(inst.dest), regs_.R(inst.src1), 16);
+ else
+ ROR(32, regs_.R(inst.dest), Imm8(16));
+ BSWAP(32, regs_.RX(inst.dest));
+ } else {
+ if (inst.dest != inst.src1)
+ MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
+ BSWAP(32, regs_.RX(inst.dest));
+ ROR(32, regs_.R(inst.dest), Imm8(16));
+ }
break;
case IROp::Clz:
@@ -220,8 +264,24 @@ void X64JitBackend::CompIR_Compare(IRInst inst) {
break;
case IROp::SltU:
- regs_.Map(inst);
- setCC(regs_.R(inst.src2), CC_B);
+ if (regs_.IsGPRImm(inst.src1) && regs_.GetGPRImm(inst.src1) == 0) {
+ // This is kinda common, same as != 0. Avoid flushing src1.
+ regs_.SpillLockGPR(inst.src2, inst.dest);
+ regs_.MapGPR(inst.src2);
+ regs_.MapGPR(inst.dest, MIPSMap::NOINIT);
+ if (inst.dest != inst.src2 && regs_.HasLowSubregister(regs_.RX(inst.dest))) {
+ XOR(32, regs_.R(inst.dest), regs_.R(inst.dest));
+ TEST(32, regs_.R(inst.src2), regs_.R(inst.src2));
+ SETcc(CC_NE, regs_.R(inst.dest));
+ } else {
+ CMP(32, regs_.R(inst.src2), Imm8(0));
+ SETcc(CC_NE, R(SCRATCH1));
+ MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
+ }
+ } else {
+ regs_.Map(inst);
+ setCC(regs_.R(inst.src2), CC_B);
+ }
break;
case IROp::SltUConst:
diff --git a/Core/MIPS/x86/X64IRCompFPU.cpp b/Core/MIPS/x86/X64IRCompFPU.cpp
index 644dff7138ba..0085cadb5460 100644
--- a/Core/MIPS/x86/X64IRCompFPU.cpp
+++ b/Core/MIPS/x86/X64IRCompFPU.cpp
@@ -43,10 +43,12 @@ using namespace X64IRJitConstants;
void X64JitBackend::EmitFPUConstants() {
EmitConst4x32(&constants.noSignMask, 0x7FFFFFFF);
EmitConst4x32(&constants.signBitAll, 0x80000000);
+ EmitConst4x32(&constants.positiveZeroes, 0x00000000);
EmitConst4x32(&constants.positiveInfinity, 0x7F800000);
EmitConst4x32(&constants.qNAN, 0x7FC00000);
EmitConst4x32(&constants.positiveOnes, 0x3F800000);
EmitConst4x32(&constants.negativeOnes, 0xBF800000);
+ EmitConst4x32(&constants.maxIntBelowAsFloat, 0x4EFFFFFF);
constants.mulTableVi2f = (const float *)GetCodePointer();
for (uint8_t i = 0; i < 32; ++i) {
@@ -57,20 +59,14 @@ void X64JitBackend::EmitFPUConstants() {
Write32(val);
}
- constants.mulTableVf2i = (const double *)GetCodePointer();
+ constants.mulTableVf2i = (const float *)GetCodePointer();
for (uint8_t i = 0; i < 32; ++i) {
- double fval = (1UL << i);
- uint64_t val;
+ float fval = (float)(1ULL << i);
+ uint32_t val;
memcpy(&val, &fval, sizeof(val));
- Write64(val);
+ Write32(val);
}
-
- // Note: this first one is (double)(int)0x80000000, sign extended.
- constants.minIntAsDouble = (const double *)GetCodePointer();
- Write64(0xC1E0000000000000ULL);
- constants.maxIntAsDouble = (const double *)GetCodePointer();
- Write64(0x41DFFFFFFFC00000ULL);
}
void X64JitBackend::CopyVec4ToFPRLane0(Gen::X64Reg dest, Gen::X64Reg src, int lane) {
@@ -210,9 +206,9 @@ void X64JitBackend::CompIR_FAssign(IRInst inst) {
// Just to make sure we don't generate bad code.
if (inst.dest == inst.src1)
break;
- if (regs_.IsFPRMapped(inst.src1 & 3) && regs_.GetFPRLaneCount(inst.src1 & ~3) == 4 && (inst.dest & ~3) != (inst.src1 & ~3)) {
+ if (regs_.IsFPRMapped(inst.src1 & 3) && regs_.GetFPRLaneCount(inst.src1) == 4 && (inst.dest & ~3) != (inst.src1 & ~3)) {
// Okay, this is an extract. Avoid unvec4ing src1.
- regs_.SpillLockFPR(inst.src1);
+ regs_.SpillLockFPR(inst.src1 & ~3);
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
CopyVec4ToFPRLane0(regs_.FX(inst.dest), regs_.FX(inst.src1 & ~3), inst.src1 & 3);
} else {
@@ -233,8 +229,30 @@ void X64JitBackend::CompIR_FAssign(IRInst inst) {
break;
case IROp::FSign:
- CompIR_Generic(inst);
+ {
+ X64Reg tempReg = regs_.MapWithFPRTemp(inst);
+
+ // Set tempReg to +1.0 or -1.0 per sign bit.
+ if (cpu_info.bAVX) {
+ VANDPS(128, tempReg, regs_.FX(inst.src1), M(constants.signBitAll)); // rip accessible
+ } else {
+ MOVAPS(tempReg, regs_.F(inst.src1));
+ ANDPS(tempReg, M(constants.signBitAll)); // rip accessible
+ }
+ ORPS(tempReg, M(constants.positiveOnes)); // rip accessible
+
+ // Set dest = 0xFFFFFFFF if +0.0 or -0.0.
+ if (inst.dest != inst.src1) {
+ XORPS(regs_.FX(inst.dest), regs_.F(inst.dest));
+ CMPPS(regs_.FX(inst.dest), regs_.F(inst.src1), CMP_EQ);
+ } else {
+ CMPPS(regs_.FX(inst.dest), M(constants.positiveZeroes), CMP_EQ); // rip accessible
+ }
+
+ // Now not the mask to keep zero if it was zero.
+ ANDNPS(regs_.FX(inst.dest), R(tempReg));
break;
+ }
default:
INVALIDOP;
@@ -273,25 +291,22 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
break;
case IRFpCompareMode::EqualOrdered:
+ {
+ // Since UCOMISS doesn't give us ordered == directly, CMPSS is better.
+ regs_.SpillLockFPR(inst.src1, inst.src2);
+ X64Reg tempReg = regs_.GetAndLockTempFPR();
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
- // Clear the upper bits of SCRATCH1 so we can AND later.
- // We don't have a single flag we can check, unfortunately.
- XOR(32, R(SCRATCH1), R(SCRATCH1));
- UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
- // E/ZF = EQUAL or UNORDERED (not exactly what we want.)
- SETcc(CC_E, R(SCRATCH1));
- if (regs_.HasLowSubregister(regs_.RX(IRREG_FPCOND))) {
- // NP/!PF = ORDERED.
- SETcc(CC_NP, regs_.R(IRREG_FPCOND));
- AND(32, regs_.R(IRREG_FPCOND), R(SCRATCH1));
+
+ if (cpu_info.bAVX) {
+ VCMPSS(tempReg, regs_.FX(inst.src1), regs_.F(inst.src2), CMP_EQ);
} else {
- MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
- // Neither of those affected flags, luckily.
- // NP/!PF = ORDERED.
- SETcc(CC_NP, R(SCRATCH1));
- AND(32, regs_.R(IRREG_FPCOND), R(SCRATCH1));
+ MOVAPS(tempReg, regs_.F(inst.src1));
+ CMPSS(tempReg, regs_.F(inst.src2), CMP_EQ);
}
+ MOVD_xmm(regs_.R(IRREG_FPCOND), tempReg);
+ AND(32, regs_.R(IRREG_FPCOND), Imm32(1));
break;
+ }
case IRFpCompareMode::EqualUnordered:
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
@@ -458,23 +473,69 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
case IROp::FCmpVfpuAggregate:
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
- // First, clear out the bits we're aggregating.
- // The register refuses writes to bits outside 0x3F, and we're setting 0x30.
- AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF));
+ if (inst.dest == 1) {
+ // Special case 1, which is not uncommon.
+ AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF));
+ BT(32, regs_.R(IRREG_VFPU_CC), Imm8(0));
+ FixupBranch skip = J_CC(CC_NC);
+ OR(32, regs_.R(IRREG_VFPU_CC), Imm8(0x30));
+ SetJumpTarget(skip);
+ } else if (inst.dest == 3) {
+ AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF));
+ MOV(32, R(SCRATCH1), regs_.R(IRREG_VFPU_CC));
+ AND(32, R(SCRATCH1), Imm8(3));
+ // 0, 1, and 3 are already correct for the any and all bits.
+ CMP(32, R(SCRATCH1), Imm8(2));
+
+ FixupBranch skip = J_CC(CC_NE);
+ SUB(32, R(SCRATCH1), Imm8(1));
+ SetJumpTarget(skip);
- // Set the any bit.
- TEST(32, regs_.R(IRREG_VFPU_CC), Imm32(inst.dest));
- SETcc(CC_NZ, R(SCRATCH1));
- SHL(32, R(SCRATCH1), Imm8(4));
- OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1));
+ SHL(32, R(SCRATCH1), Imm8(4));
+ OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1));
+ } else if (inst.dest == 0xF) {
+ XOR(32, R(SCRATCH1), R(SCRATCH1));
- // Next up, the "all" bit. A bit annoying...
- MOV(32, R(SCRATCH1), regs_.R(IRREG_VFPU_CC));
- AND(32, R(SCRATCH1), Imm8(inst.dest));
- CMP(32, R(SCRATCH1), Imm8(inst.dest));
- SETcc(CC_E, R(SCRATCH1));
- SHL(32, R(SCRATCH1), Imm8(5));
- OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1));
+ // Clear out the bits we're aggregating.
+ // The register refuses writes to bits outside 0x3F, and we're setting 0x30.
+ AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF));
+
+ // Set the any bit, just using the AND above.
+ FixupBranch noneSet = J_CC(CC_Z);
+ OR(32, regs_.R(IRREG_VFPU_CC), Imm8(0x10));
+
+ // Next up, the "all" bit.
+ CMP(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF));
+ SETcc(CC_E, R(SCRATCH1));
+ SHL(32, R(SCRATCH1), Imm8(5));
+ OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1));
+
+ SetJumpTarget(noneSet);
+ } else {
+ XOR(32, R(SCRATCH1), R(SCRATCH1));
+
+ // Clear out the bits we're aggregating.
+ // The register refuses writes to bits outside 0x3F, and we're setting 0x30.
+ AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF));
+
+ // Set the any bit.
+ if (regs_.HasLowSubregister(regs_.RX(IRREG_VFPU_CC)))
+ TEST(8, regs_.R(IRREG_VFPU_CC), Imm8(inst.dest));
+ else
+ TEST(32, regs_.R(IRREG_VFPU_CC), Imm32(inst.dest));
+ FixupBranch noneSet = J_CC(CC_Z);
+ OR(32, regs_.R(IRREG_VFPU_CC), Imm8(0x10));
+
+ // Next up, the "all" bit. A bit annoying...
+ MOV(32, R(SCRATCH1), regs_.R(IRREG_VFPU_CC));
+ AND(32, R(SCRATCH1), Imm8(inst.dest));
+ CMP(32, R(SCRATCH1), Imm8(inst.dest));
+ SETcc(CC_E, R(SCRATCH1));
+ SHL(32, R(SCRATCH1), Imm8(5));
+ OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1));
+
+ SetJumpTarget(noneSet);
+ }
break;
default:
@@ -579,11 +640,14 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) {
case IROp::FCvtWS:
{
regs_.Map(inst);
- UCOMISS(regs_.FX(inst.src1), M(constants.positiveInfinity)); // rip accessible
+ UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1));
- // UCOMISS set ZF if EQUAL (to infinity) or UNORDERED.
- FixupBranch skip = J_CC(CC_NZ);
+ // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
+ // We want noSignMask otherwise, GREATER or UNORDERED.
+ FixupBranch isNAN = J_CC(CC_P);
+ FixupBranch skip = J_CC(CC_BE);
+ SetJumpTarget(isNAN);
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
SetJumpTarget(skip);
@@ -599,54 +663,65 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) {
regs_.Map(inst);
if (cpu_info.bSSE4_1) {
int scale = inst.src2 & 0x1F;
- int rmode = inst.src2 >> 6;
+ IRRoundMode rmode = (IRRoundMode)(inst.src2 >> 6);
- CVTSS2SD(regs_.FX(inst.dest), regs_.F(inst.src1));
- if (scale != 0)
- MULSD(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible
+ if (scale != 0 && cpu_info.bAVX) {
+ VMULSS(regs_.FX(inst.dest), regs_.FX(inst.src1), M(&constants.mulTableVf2i[scale])); // rip accessible
+ } else {
+ if (inst.dest != inst.src1)
+ MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
+ if (scale != 0)
+ MULSS(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible
+ }
- // On NAN, we want maxInt anyway, so let's let it be the second param.
- MAXSD(regs_.FX(inst.dest), M(constants.minIntAsDouble)); // rip accessible
- MINSD(regs_.FX(inst.dest), M(constants.maxIntAsDouble)); // rip accessible
+ UCOMISS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible
switch (rmode) {
- case 0:
- ROUNDNEARPD(regs_.FX(inst.dest), regs_.F(inst.dest));
- CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
+ case IRRoundMode::RINT_0:
+ ROUNDNEARPS(regs_.FX(inst.dest), regs_.F(inst.dest));
+ CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
break;
- case 1:
- CVTTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
+ case IRRoundMode::CAST_1:
+ CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
break;
- case 2:
- ROUNDCEILPD(regs_.FX(inst.dest), regs_.F(inst.dest));
- CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
+ case IRRoundMode::CEIL_2:
+ ROUNDCEILPS(regs_.FX(inst.dest), regs_.F(inst.dest));
+ CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
break;
- case 3:
- ROUNDFLOORPD(regs_.FX(inst.dest), regs_.F(inst.dest));
- CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
+ case IRRoundMode::FLOOR_3:
+ ROUNDFLOORPS(regs_.FX(inst.dest), regs_.F(inst.dest));
+ CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
break;
}
+
+ // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
+ // We want noSignMask otherwise, GREATER or UNORDERED.
+ FixupBranch isNAN = J_CC(CC_P);
+ FixupBranch skip = J_CC(CC_BE);
+ SetJumpTarget(isNAN);
+ MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
+ SetJumpTarget(skip);
} else {
int scale = inst.src2 & 0x1F;
- int rmode = inst.src2 >> 6;
+ IRRoundMode rmode = (IRRoundMode)(inst.src2 >> 6);
int setMXCSR = -1;
bool useTrunc = false;
switch (rmode) {
- case 0:
+ case IRRoundMode::RINT_0:
// TODO: Could skip if hasSetRounding, but we don't have the flag.
setMXCSR = 0;
break;
- case 1:
+ case IRRoundMode::CAST_1:
useTrunc = true;
break;
- case 2:
+ case IRRoundMode::CEIL_2:
setMXCSR = 2;
break;
- case 3:
+ case IRRoundMode::FLOOR_3:
setMXCSR = 1;
break;
}
@@ -665,21 +740,26 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) {
LDMXCSR(MDisp(CTXREG, tempOffset));
}
- CVTSS2SD(regs_.FX(inst.dest), regs_.F(inst.src1));
+ if (inst.dest != inst.src1)
+ MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
if (scale != 0)
- MULSD(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale]));
+ MULSS(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible
- // On NAN, we want maxInt anyway, so let's let it be the second param.
- MAXSD(regs_.FX(inst.dest), M(constants.minIntAsDouble));
- MINSD(regs_.FX(inst.dest), M(constants.maxIntAsDouble));
+ UCOMISS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible
if (useTrunc) {
- CVTTSD2SI(SCRATCH1, regs_.F(inst.dest));
+ CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
} else {
- CVTSD2SI(SCRATCH1, regs_.F(inst.dest));
+ CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
}
- MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
+ // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
+ // We want noSignMask otherwise, GREATER or UNORDERED.
+ FixupBranch isNAN = J_CC(CC_P);
+ FixupBranch skip = J_CC(CC_BE);
+ SetJumpTarget(isNAN);
+ MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
+ SetJumpTarget(skip);
// Return MXCSR to its previous value.
if (setMXCSR != -1) {
@@ -704,46 +784,105 @@ void X64JitBackend::CompIR_FRound(IRInst inst) {
CONDITIONAL_DISABLE;
switch (inst.op) {
+ case IROp::FCeil:
+ case IROp::FFloor:
case IROp::FRound:
- CompIR_Generic(inst);
- break;
+ if (cpu_info.bSSE4_1) {
+ regs_.Map(inst);
+ UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
- case IROp::FTrunc:
- {
- regs_.SpillLockFPR(inst.dest, inst.src1);
- X64Reg tempZero = regs_.GetAndLockTempFPR();
- regs_.Map(inst);
+ switch (inst.op) {
+ case IROp::FCeil:
+ ROUNDCEILPS(regs_.FX(inst.dest), regs_.F(inst.src1));
+ break;
- CVTTSS2SI(SCRATCH1, regs_.F(inst.src1));
+ case IROp::FFloor:
+ ROUNDFLOORPS(regs_.FX(inst.dest), regs_.F(inst.src1));
+ break;
- // Did we get an indefinite integer value?
- CMP(32, R(SCRATCH1), Imm32(0x80000000));
- FixupBranch wasExact = J_CC(CC_NE);
+ case IROp::FRound:
+ ROUNDNEARPS(regs_.FX(inst.dest), regs_.F(inst.src1));
+ break;
- XORPS(tempZero, R(tempZero));
- if (inst.dest == inst.src1) {
- CMPSS(regs_.FX(inst.dest), R(tempZero), CMP_LT);
- } else if (cpu_info.bAVX) {
- VCMPSS(regs_.FX(inst.dest), regs_.FX(inst.src1), R(tempZero), CMP_LT);
+ default:
+ INVALIDOP;
+ }
+ CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
+ // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
+ // We want noSignMask otherwise, GREATER or UNORDERED.
+ FixupBranch isNAN = J_CC(CC_P);
+ FixupBranch skip = J_CC(CC_BE);
+ SetJumpTarget(isNAN);
+ MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
+
+ SetJumpTarget(skip);
} else {
- MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
- CMPSS(regs_.FX(inst.dest), R(tempZero), CMP_LT);
- }
+ regs_.Map(inst);
- // At this point, -inf = 0xffffffff, inf/nan = 0x00000000.
- // We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits.
- MOVD_xmm(R(SCRATCH1), regs_.FX(inst.dest));
- XOR(32, R(SCRATCH1), Imm32(0x7fffffff));
+ int setMXCSR = -1;
+ switch (inst.op) {
+ case IROp::FRound:
+ // TODO: Could skip if hasSetRounding, but we don't have the flag.
+ setMXCSR = 0;
+ break;
+ case IROp::FCeil:
+ setMXCSR = 2;
+ break;
+ case IROp::FFloor:
+ setMXCSR = 1;
+ break;
+ default:
+ INVALIDOP;
+ }
- SetJumpTarget(wasExact);
- MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
+ // TODO: Might be possible to cache this and update between instructions?
+ // Probably kinda expensive to switch each time...
+ if (setMXCSR != -1) {
+ STMXCSR(MDisp(CTXREG, mxcsrTempOffset));
+ MOV(32, R(SCRATCH1), MDisp(CTXREG, mxcsrTempOffset));
+ AND(32, R(SCRATCH1), Imm32(~(3 << 13)));
+ if (setMXCSR != 0) {
+ OR(32, R(SCRATCH1), Imm32(setMXCSR << 13));
+ }
+ MOV(32, MDisp(CTXREG, tempOffset), R(SCRATCH1));
+ LDMXCSR(MDisp(CTXREG, tempOffset));
+ }
+
+ UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
+
+ CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1));
+ // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
+ // We want noSignMask otherwise, GREATER or UNORDERED.
+ FixupBranch isNAN = J_CC(CC_P);
+ FixupBranch skip = J_CC(CC_BE);
+ SetJumpTarget(isNAN);
+ MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
+
+ SetJumpTarget(skip);
+
+ // Return MXCSR to its previous value.
+ if (setMXCSR != -1) {
+ LDMXCSR(MDisp(CTXREG, mxcsrTempOffset));
+ }
+ }
break;
- }
- case IROp::FCeil:
- case IROp::FFloor:
- CompIR_Generic(inst);
+ case IROp::FTrunc:
+ {
+ regs_.Map(inst);
+ UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
+
+ CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1));
+ // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
+ // We want noSignMask otherwise, GREATER or UNORDERED.
+ FixupBranch isNAN = J_CC(CC_P);
+ FixupBranch skip = J_CC(CC_BE);
+ SetJumpTarget(isNAN);
+ MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
+
+ SetJumpTarget(skip);
break;
+ }
default:
INVALIDOP;
@@ -833,6 +972,7 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) {
auto callFuncF_F = [&](const void *func) {
regs_.FlushBeforeCall();
+ WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
#if X64JIT_USE_XMM_CALL
if (regs_.IsFPRMapped(inst.src1)) {
@@ -865,6 +1005,8 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) {
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
#endif
+
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
};
switch (inst.op) {
diff --git a/Core/MIPS/x86/X64IRCompLoadStore.cpp b/Core/MIPS/x86/X64IRCompLoadStore.cpp
index d033832bf3f0..9b3eea1341d4 100644
--- a/Core/MIPS/x86/X64IRCompLoadStore.cpp
+++ b/Core/MIPS/x86/X64IRCompLoadStore.cpp
@@ -45,35 +45,41 @@ Gen::OpArg X64JitBackend::PrepareSrc1Address(IRInst inst) {
// If it's about to be clobbered, don't waste time pointerifying. Use displacement.
bool clobbersSrc1 = !readsFromSrc1 && regs_.IsGPRClobbered(inst.src1);
+ int32_t disp = (int32_t)inst.constant;
+ // It can't be this negative, must be a constant address with the top bit set.
+ if ((disp & 0xC0000000) == 0x80000000) {
+ disp = inst.constant & 0x7FFFFFFF;
+ }
+
#ifdef MASKED_PSP_MEMORY
- if (inst.constant > 0)
- inst.constant &= Memory::MEMVIEW32_MASK;
+ if (disp > 0)
+ disp &= Memory::MEMVIEW32_MASK;
#endif
OpArg addrArg;
if (inst.src1 == MIPS_REG_ZERO) {
#ifdef MASKED_PSP_MEMORY
- inst.constant &= Memory::MEMVIEW32_MASK;
+ disp &= Memory::MEMVIEW32_MASK;
#endif
#if PPSSPP_ARCH(AMD64)
- addrArg = MDisp(MEMBASEREG, inst.constant & 0x7FFFFFFF);
+ addrArg = MDisp(MEMBASEREG, disp & 0x7FFFFFFF);
#else
- addrArg = M(Memory::base + inst.constant);
+ addrArg = M(Memory::base + disp);
#endif
} else if ((jo.cachePointers || src1IsPointer) && !readsFromSrc1 && (!clobbersSrc1 || src1IsPointer)) {
X64Reg src1 = regs_.MapGPRAsPointer(inst.src1);
- addrArg = MDisp(src1, (int)inst.constant);
+ addrArg = MDisp(src1, disp);
} else {
regs_.MapGPR(inst.src1);
#ifdef MASKED_PSP_MEMORY
- LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), (int)inst.constant));
+ LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), disp));
AND(PTRBITS, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK));
addrArg = MDisp(SCRATCH1, (intptr_t)Memory::base);
#else
#if PPSSPP_ARCH(AMD64)
- addrArg = MComplex(MEMBASEREG, regs_.RX(inst.src1), SCALE_1, (int)inst.constant);
+ addrArg = MComplex(MEMBASEREG, regs_.RX(inst.src1), SCALE_1, disp);
#else
- addrArg = MDisp(regs_.RX(inst.src1), Memory::base + inst.constant);
+ addrArg = MDisp(regs_.RX(inst.src1), Memory::base + disp);
#endif
#endif
}
diff --git a/Core/MIPS/x86/X64IRCompSystem.cpp b/Core/MIPS/x86/X64IRCompSystem.cpp
index b310aade78d0..9d1723aef552 100644
--- a/Core/MIPS/x86/X64IRCompSystem.cpp
+++ b/Core/MIPS/x86/X64IRCompSystem.cpp
@@ -20,9 +20,11 @@
#include "Common/Profiler/Profiler.h"
#include "Core/Core.h"
+#include "Core/Debugger/Breakpoints.h"
#include "Core/HLE/HLE.h"
#include "Core/HLE/ReplaceTables.h"
#include "Core/MemMap.h"
+#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/IR/IRInterpreter.h"
#include "Core/MIPS/x86/X64IRJit.h"
#include "Core/MIPS/x86/X64IRRegCache.h"
@@ -62,6 +64,20 @@ void X64JitBackend::CompIR_Basic(IRInst inst) {
regs_.Map(inst);
if (inst.constant == 0) {
XORPS(regs_.FX(inst.dest), regs_.F(inst.dest));
+ } else if (inst.constant == 0x7FFFFFFF) {
+ MOVSS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
+ } else if (inst.constant == 0x80000000) {
+ MOVSS(regs_.FX(inst.dest), M(constants.signBitAll)); // rip accessible
+ } else if (inst.constant == 0x7F800000) {
+ MOVSS(regs_.FX(inst.dest), M(constants.positiveInfinity)); // rip accessible
+ } else if (inst.constant == 0x7FC00000) {
+ MOVSS(regs_.FX(inst.dest), M(constants.qNAN)); // rip accessible
+ } else if (inst.constant == 0x3F800000) {
+ MOVSS(regs_.FX(inst.dest), M(constants.positiveOnes)); // rip accessible
+ } else if (inst.constant == 0xBF800000) {
+ MOVSS(regs_.FX(inst.dest), M(constants.negativeOnes)); // rip accessible
+ } else if (inst.constant == 0x4EFFFFFF) {
+ MOVSS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible
} else {
MOV(32, R(SCRATCH1), Imm32(inst.constant));
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
@@ -74,6 +90,7 @@ void X64JitBackend::CompIR_Basic(IRInst inst) {
break;
case IROp::SetPCConst:
+ lastConstPC_ = inst.constant;
MOV(32, R(SCRATCH1), Imm32(inst.constant));
MovToPC(SCRATCH1);
break;
@@ -97,17 +114,80 @@ void X64JitBackend::CompIR_Breakpoint(IRInst inst) {
break;
case IROp::MemoryCheck:
- {
- X64Reg addrBase = regs_.MapGPR(inst.src1);
- FlushAll();
- LEA(32, addrBase, MDisp(addrBase, inst.constant));
- MovFromPC(SCRATCH1);
- LEA(32, SCRATCH1, MDisp(SCRATCH1, inst.dest));
- ABI_CallFunctionRR((const void *)&IRRunMemCheck, SCRATCH1, addrBase);
- TEST(32, R(EAX), R(EAX));
- J_CC(CC_NZ, dispatcherCheckCoreState_, true);
+ if (regs_.IsGPRImm(inst.src1)) {
+ uint32_t iaddr = regs_.GetGPRImm(inst.src1) + inst.constant;
+ uint32_t checkedPC = lastConstPC_ + inst.dest;
+ int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
+ if (size == 0) {
+ checkedPC += 4;
+ size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
+ }
+ bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC);
+
+ MemCheck check;
+ if (CBreakPoints::GetMemCheckInRange(iaddr, size, &check)) {
+ if (!(check.cond & MEMCHECK_READ) && !isWrite)
+ break;
+ if (!(check.cond & (MEMCHECK_WRITE | MEMCHECK_WRITE_ONCHANGE)) && isWrite)
+ break;
+
+ // We need to flush, or conditions and log expressions will see old register values.
+ FlushAll();
+
+ ABI_CallFunctionCC((const void *)&IRRunMemCheck, checkedPC, iaddr);
+ TEST(32, R(EAX), R(EAX));
+ J_CC(CC_NZ, dispatcherCheckCoreState_, true);
+ }
+ } else {
+ uint32_t checkedPC = lastConstPC_ + inst.dest;
+ int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
+ if (size == 0) {
+ checkedPC += 4;
+ size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
+ }
+ bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC);
+
+ const auto memchecks = CBreakPoints::GetMemCheckRanges(isWrite);
+ // We can trivially skip if there are no checks for this type (i.e. read vs write.)
+ if (memchecks.empty())
+ break;
+
+ X64Reg addrBase = regs_.MapGPR(inst.src1);
+ LEA(32, SCRATCH1, MDisp(addrBase, inst.constant));
+
+ // We need to flush, or conditions and log expressions will see old register values.
+ FlushAll();
+
+ std::vector hitChecks;
+ for (auto it : memchecks) {
+ if (it.end != 0) {
+ CMP(32, R(SCRATCH1), Imm32(it.start - size));
+ FixupBranch skipNext = J_CC(CC_BE);
+
+ CMP(32, R(SCRATCH1), Imm32(it.end));
+ hitChecks.push_back(J_CC(CC_B, true));
+
+ SetJumpTarget(skipNext);
+ } else {
+ CMP(32, R(SCRATCH1), Imm32(it.start));
+ hitChecks.push_back(J_CC(CC_E, true));
+ }
+ }
+
+ FixupBranch noHits = J(true);
+
+ // Okay, now land any hit here.
+ for (auto &fixup : hitChecks)
+ SetJumpTarget(fixup);
+ hitChecks.clear();
+
+ ABI_CallFunctionAA((const void *)&IRRunMemCheck, Imm32(checkedPC), R(SCRATCH1));
+ TEST(32, R(EAX), R(EAX));
+ J_CC(CC_NZ, dispatcherCheckCoreState_, true);
+
+ SetJumpTarget(noHits);
+ }
break;
- }
default:
INVALIDOP;
@@ -123,6 +203,7 @@ void X64JitBackend::CompIR_System(IRInst inst) {
FlushAll();
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
#ifdef USE_PROFILER
// When profiling, we can't skip CallSyscall, since it times syscalls.
ABI_CallFunctionC((const u8 *)&CallSyscall, inst.constant);
@@ -139,6 +220,7 @@ void X64JitBackend::CompIR_System(IRInst inst) {
}
#endif
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// This is always followed by an ExitToPC, where we check coreState.
break;
@@ -146,14 +228,26 @@ void X64JitBackend::CompIR_System(IRInst inst) {
case IROp::CallReplacement:
FlushAll();
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
ABI_CallFunction(GetReplacementFunc(inst.constant)->replaceFunc);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
//SUB(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG), R(EAX));
SUB(32, MDisp(CTXREG, downcountOffset), R(EAX));
break;
case IROp::Break:
- CompIR_Generic(inst);
+ FlushAll();
+ // This doesn't naturally have restore/apply around it.
+ RestoreRoundingMode(true);
+ SaveStaticRegisters();
+ MovFromPC(SCRATCH1);
+ ABI_CallFunctionR((const void *)&Core_Break, SCRATCH1);
+ LoadStaticRegisters();
+ ApplyRoundingMode(true);
+ MovFromPC(SCRATCH1);
+ LEA(32, SCRATCH1, MDisp(SCRATCH1, 4));
+ JMP(dispatcherPCInSCRATCH1_, true);
break;
default:
@@ -191,8 +285,34 @@ void X64JitBackend::CompIR_Transfer(IRInst inst) {
break;
case IROp::FpCtrlFromReg:
+ regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
+ // Mask out the unused bits, and store fcr31 (using fpcond as a temp.)
+ MOV(32, regs_.R(IRREG_FPCOND), Imm32(0x0181FFFF));
+ AND(32, regs_.R(IRREG_FPCOND), regs_.R(inst.src1));
+ MOV(32, MDisp(CTXREG, fcr31Offset), regs_.R(IRREG_FPCOND));
+
+ // With that done, grab bit 23, the actual fpcond.
+ SHR(32, regs_.R(IRREG_FPCOND), Imm8(23));
+ AND(32, regs_.R(IRREG_FPCOND), Imm32(1));
+ break;
+
case IROp::FpCtrlToReg:
- CompIR_Generic(inst);
+ regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::INIT } });
+ // Start by clearing the fpcond bit (might as well mask while we're here.)
+ MOV(32, regs_.R(inst.dest), Imm32(0x0101FFFF));
+ AND(32, regs_.R(inst.dest), MDisp(CTXREG, fcr31Offset));
+
+ AND(32, regs_.R(IRREG_FPCOND), Imm32(1));
+ if (cpu_info.bBMI2) {
+ RORX(32, SCRATCH1, regs_.R(IRREG_FPCOND), 32 - 23);
+ } else {
+ MOV(32, R(SCRATCH1), regs_.R(IRREG_FPCOND));
+ SHL(32, R(SCRATCH1), Imm8(23));
+ }
+ OR(32, regs_.R(inst.dest), R(SCRATCH1));
+
+ // Update fcr31 while we were here, for consistency.
+ MOV(32, MDisp(CTXREG, fcr31Offset), regs_.R(inst.dest));
break;
case IROp::VfpuCtrlToReg:
@@ -221,23 +341,6 @@ void X64JitBackend::CompIR_Transfer(IRInst inst) {
}
}
-int ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_t isWrite) {
- const auto toss = [&](MemoryExceptionType t) {
- Core_MemoryException(addr, alignment, currentMIPS->pc, t);
- return coreState != CORE_RUNNING ? 1 : 0;
- };
-
- if (!Memory::IsValidRange(addr, alignment)) {
- MemoryExceptionType t = isWrite == 1 ? MemoryExceptionType::WRITE_WORD : MemoryExceptionType::READ_WORD;
- if (alignment > 4)
- t = isWrite ? MemoryExceptionType::WRITE_BLOCK : MemoryExceptionType::READ_BLOCK;
- return toss(t);
- } else if (alignment > 1 && (addr & (alignment - 1)) != 0) {
- return toss(MemoryExceptionType::ALIGNMENT);
- }
- return 0;
-}
-
void X64JitBackend::CompIR_ValidateAddress(IRInst inst) {
CONDITIONAL_DISABLE;
@@ -265,10 +368,17 @@ void X64JitBackend::CompIR_ValidateAddress(IRInst inst) {
break;
}
- // This is unfortunate...
- FlushAll();
- regs_.Map(inst);
- LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), inst.constant));
+ if (regs_.IsGPRMappedAsPointer(inst.src1)) {
+ LEA(PTRBITS, SCRATCH1, MDisp(regs_.RXPtr(inst.src1), inst.constant));
+#if defined(MASKED_PSP_MEMORY)
+ SUB(PTRBITS, R(SCRATCH1), ImmPtr(Memory::base));
+#else
+ SUB(PTRBITS, R(SCRATCH1), R(MEMBASEREG));
+#endif
+ } else {
+ regs_.Map(inst);
+ LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), inst.constant));
+ }
AND(32, R(SCRATCH1), Imm32(0x3FFFFFFF));
std::vector validJumps;
@@ -282,25 +392,32 @@ void X64JitBackend::CompIR_ValidateAddress(IRInst inst) {
CMP(32, R(SCRATCH1), Imm32(PSP_GetUserMemoryEnd() - alignment));
FixupBranch tooHighRAM = J_CC(CC_A);
CMP(32, R(SCRATCH1), Imm32(PSP_GetKernelMemoryBase()));
- validJumps.push_back(J_CC(CC_AE));
+ validJumps.push_back(J_CC(CC_AE, true));
CMP(32, R(SCRATCH1), Imm32(PSP_GetVidMemEnd() - alignment));
FixupBranch tooHighVid = J_CC(CC_A);
CMP(32, R(SCRATCH1), Imm32(PSP_GetVidMemBase()));
- validJumps.push_back(J_CC(CC_AE));
+ validJumps.push_back(J_CC(CC_AE, true));
CMP(32, R(SCRATCH1), Imm32(PSP_GetScratchpadMemoryEnd() - alignment));
FixupBranch tooHighScratch = J_CC(CC_A);
CMP(32, R(SCRATCH1), Imm32(PSP_GetScratchpadMemoryBase()));
- validJumps.push_back(J_CC(CC_AE));
+ validJumps.push_back(J_CC(CC_AE, true));
+ if (alignment != 1)
+ SetJumpTarget(unaligned);
SetJumpTarget(tooHighRAM);
SetJumpTarget(tooHighVid);
SetJumpTarget(tooHighScratch);
+ // If we got here, something unusual and bad happened, so we'll always go back to the dispatcher.
+ // Because of that, we can avoid flushing outside this case.
+ auto regsCopy = regs_;
+ regsCopy.FlushAll();
+
+ // Ignores the return value, always returns to the dispatcher.
+ // Otherwise would need a thunk to restore regs.
ABI_CallFunctionACC((const void *)&ReportBadAddress, R(SCRATCH1), alignment, isWrite);
- TEST(32, R(EAX), R(EAX));
- validJumps.push_back(J_CC(CC_Z));
JMP(dispatcherCheckCoreState_, true);
for (FixupBranch &b : validJumps)
diff --git a/Core/MIPS/x86/X64IRJit.cpp b/Core/MIPS/x86/X64IRJit.cpp
index f70901eba8c3..98279e39895d 100644
--- a/Core/MIPS/x86/X64IRJit.cpp
+++ b/Core/MIPS/x86/X64IRJit.cpp
@@ -19,6 +19,7 @@
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
#include
+#include "Common/StringUtils.h"
#include "Core/MemMap.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/x86/X64IRJit.h"
@@ -63,6 +64,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
wroteCheckedOffset = true;
+ WriteDebugPC(startPC);
+
// TODO: See if we can get flags to always have the downcount compare.
if (jo.downcountInRegister) {
TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));
@@ -79,6 +82,7 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
const u8 *blockStart = GetCodePointer();
block->SetTargetOffset((int)GetOffset(blockStart));
compilingBlockNum_ = block_num;
+ lastConstPC_ = 0;
regs_.Start(block);
@@ -120,6 +124,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
}
if (jo.enableBlocklink && jo.useBackJump) {
+ WriteDebugPC(startPC);
+
if (jo.downcountInRegister) {
TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));
} else {
@@ -214,11 +220,13 @@ void X64JitBackend::CompIR_Generic(IRInst inst) {
FlushAll();
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
#if PPSSPP_ARCH(AMD64)
ABI_CallFunctionP((const void *)&DoIRInst, (void *)value);
#else
ABI_CallFunctionCC((const void *)&DoIRInst, (u32)(value & 0xFFFFFFFF), (u32)(value >> 32));
#endif
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// We only need to check the return value if it's a potential exit.
@@ -236,10 +244,12 @@ void X64JitBackend::CompIR_Interpret(IRInst inst) {
// IR protects us against this being a branching instruction (well, hopefully.)
FlushAll();
SaveStaticRegisters();
+ WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
if (DebugStatsEnabled()) {
ABI_CallFunctionP((const void *)&NotifyMIPSInterpret, (void *)MIPSGetName(op));
}
ABI_CallFunctionC((const void *)MIPSGetInterpretFunc(op), inst.constant);
+ WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
}
@@ -265,7 +275,31 @@ bool X64JitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {
} else if (ptr == applyRoundingMode_) {
name = "applyRoundingMode";
} else if (ptr >= GetBasePtr() && ptr < GetBasePtr() + jitStartOffset_) {
- name = "fixedCode";
+ if (ptr == constants.noSignMask) {
+ name = "constants.noSignMask";
+ } else if (ptr == constants.signBitAll) {
+ name = "constants.signBitAll";
+ } else if (ptr == constants.positiveZeroes) {
+ name = "constants.positiveZeroes";
+ } else if (ptr == constants.positiveInfinity) {
+ name = "constants.positiveInfinity";
+ } else if (ptr == constants.positiveOnes) {
+ name = "constants.positiveOnes";
+ } else if (ptr == constants.negativeOnes) {
+ name = "constants.negativeOnes";
+ } else if (ptr == constants.qNAN) {
+ name = "constants.qNAN";
+ } else if (ptr == constants.maxIntBelowAsFloat) {
+ name = "constants.maxIntBelowAsFloat";
+ } else if ((const float *)ptr >= constants.mulTableVi2f && (const float *)ptr < constants.mulTableVi2f + 32) {
+ name = StringFromFormat("constants.mulTableVi2f[%d]", (int)((const float *)ptr - constants.mulTableVi2f));
+ } else if ((const float *)ptr >= constants.mulTableVf2i && (const float *)ptr < constants.mulTableVf2i + 32) {
+ name = StringFromFormat("constants.mulTableVf2i[%d]", (int)((const float *)ptr - constants.mulTableVf2i));
+ } else if ((const Float4Constant *)ptr >= constants.vec4InitValues && (const Float4Constant *)ptr < constants.vec4InitValues + 8) {
+ name = StringFromFormat("constants.vec4InitValues[%d]", (int)((const Float4Constant *)ptr - constants.vec4InitValues));
+ } else {
+ name = "fixedCode";
+ }
} else {
return IRNativeBackend::DescribeCodePtr(ptr, name);
}
@@ -320,6 +354,21 @@ void X64JitBackend::MovToPC(X64Reg r) {
MOV(32, MDisp(CTXREG, pcOffset), R(r));
}
+void X64JitBackend::WriteDebugPC(uint32_t pc) {
+ if (hooks_.profilerPC)
+ MOV(32, M(hooks_.profilerPC), Imm32(pc));
+}
+
+void X64JitBackend::WriteDebugPC(Gen::X64Reg r) {
+ if (hooks_.profilerPC)
+ MOV(32, M(hooks_.profilerPC), R(r));
+}
+
+void X64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
+ if (hooks_.profilerPC)
+ MOV(32, M(hooks_.profilerStatus), Imm32((int32_t)status));
+}
+
void X64JitBackend::SaveStaticRegisters() {
if (jo.useStaticAlloc) {
//CALL(saveStaticRegisters_);
diff --git a/Core/MIPS/x86/X64IRJit.h b/Core/MIPS/x86/X64IRJit.h
index 6a2c09aef5d2..15a2fb9b449c 100644
--- a/Core/MIPS/x86/X64IRJit.h
+++ b/Core/MIPS/x86/X64IRJit.h
@@ -66,6 +66,9 @@ class X64JitBackend : public Gen::XCodeBlock, public IRNativeBackend {
void ApplyRoundingMode(bool force = false);
void MovFromPC(Gen::X64Reg r);
void MovToPC(Gen::X64Reg r);
+ void WriteDebugPC(uint32_t pc);
+ void WriteDebugPC(Gen::X64Reg r);
+ void WriteDebugProfilerStatus(IRProfilerStatus status);
void SaveStaticRegisters();
void LoadStaticRegisters();
@@ -144,14 +147,14 @@ class X64JitBackend : public Gen::XCodeBlock, public IRNativeBackend {
struct Constants {
const void *noSignMask;
const void *signBitAll;
+ const void *positiveZeroes;
const void *positiveInfinity;
const void *positiveOnes;
const void *negativeOnes;
const void *qNAN;
+ const void *maxIntBelowAsFloat;
const float *mulTableVi2f;
- const double *mulTableVf2i;
- const double *minIntAsDouble;
- const double *maxIntAsDouble;
+ const float *mulTableVf2i;
const Float4Constant *vec4InitValues;
};
Constants constants;
@@ -159,6 +162,8 @@ class X64JitBackend : public Gen::XCodeBlock, public IRNativeBackend {
int jitStartOffset_ = 0;
int compilingBlockNum_ = -1;
int logBlocks_ = 0;
+ // Only useful in breakpoints, where it's set immediately prior.
+ uint32_t lastConstPC_ = 0;
};
class X64IRJit : public IRNativeJit {
diff --git a/Core/MIPS/x86/X64IRRegCache.cpp b/Core/MIPS/x86/X64IRRegCache.cpp
index a169a43791c8..cfbb57712e26 100644
--- a/Core/MIPS/x86/X64IRRegCache.cpp
+++ b/Core/MIPS/x86/X64IRRegCache.cpp
@@ -147,6 +147,67 @@ void X64IRRegCache::FlushBeforeCall() {
#endif
}
+void X64IRRegCache::FlushAll(bool gprs, bool fprs) {
+ // Note: make sure not to change the registers when flushing:
+ // Branching code may expect the x64reg to retain its value.
+
+ auto needsFlush = [&](IRReg i) {
+ if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic)
+ return false;
+ if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty)
+ return false;
+ return true;
+ };
+
+ auto isSingleFloat = [&](IRReg i) {
+ if (mr[i].lane != -1 || mr[i].loc != MIPSLoc::FREG)
+ return false;
+ return true;
+ };
+
+ // Sometimes, float/vector regs may be in separate regs in a sequence.
+ // It's worth combining and flushing together.
+ for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) {
+ if (!needsFlush(i) || !needsFlush(i + 1))
+ continue;
+ // GPRs are probably not worth it. Merging Vec2s might be, but pretty uncommon.
+ if (!isSingleFloat(i) || !isSingleFloat(i + 1))
+ continue;
+
+ X64Reg regs[4]{ INVALID_REG, INVALID_REG, INVALID_REG, INVALID_REG };
+ regs[0] = FromNativeReg(mr[i + 0].nReg);
+ regs[1] = FromNativeReg(mr[i + 1].nReg);
+
+ bool flushVec4 = i + 3 < TOTAL_MAPPABLE_IRREGS && needsFlush(i + 2) && needsFlush(i + 3);
+ if (flushVec4 && isSingleFloat(i + 2) && isSingleFloat(i + 3) && (i & 3) == 0) {
+ regs[2] = FromNativeReg(mr[i + 2].nReg);
+ regs[3] = FromNativeReg(mr[i + 3].nReg);
+
+ // Note that this doesn't change the low lane of any of these regs.
+ emit_->UNPCKLPS(regs[1], ::R(regs[3]));
+ emit_->UNPCKLPS(regs[0], ::R(regs[2]));
+ emit_->UNPCKLPS(regs[0], ::R(regs[1]));
+ emit_->MOVAPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]);
+
+ for (int j = 0; j < 4; ++j)
+ DiscardReg(i + j);
+ i += 3;
+ continue;
+ }
+
+ // TODO: Maybe this isn't always worth doing.
+ emit_->UNPCKLPS(regs[0], ::R(regs[1]));
+ emit_->MOVLPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]);
+
+ DiscardReg(i);
+ DiscardReg(i + 1);
+ ++i;
+ continue;
+ }
+
+ IRNativeRegCacheBase::FlushAll(gprs, fprs);
+}
+
X64Reg X64IRRegCache::TryMapTempImm(IRReg r, X64Map flags) {
_dbg_assert_(IsValidGPR(r));
@@ -353,6 +414,8 @@ void X64IRRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
emit_->MOVSS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
else if (lanes == 2)
emit_->MOVLPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
+ else if (lanes == 4 && (first & 3) == 0)
+ emit_->MOVAPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
else if (lanes == 4)
emit_->MOVUPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
else
@@ -381,6 +444,8 @@ void X64IRRegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
emit_->MOVSS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
else if (lanes == 2)
emit_->MOVLPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
+ else if (lanes == 4 && (first & 3) == 0)
+ emit_->MOVAPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
else if (lanes == 4)
emit_->MOVUPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
else
@@ -388,6 +453,275 @@ void X64IRRegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
}
}
+bool X64IRRegCache::TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) {
+ bool allowed = !mr[nr[nreg].mipsReg].isStatic;
+ // There's currently no support for non-XMMs here.
+ allowed = allowed && type == MIPSLoc::FREG;
+
+ if (dest == -1)
+ dest = nreg;
+
+ if (allowed && (flags == MIPSMap::INIT || flags == MIPSMap::DIRTY)) {
+ // Alright, changing lane count (possibly including lane position.)
+ IRReg oldfirst = nr[nreg].mipsReg;
+ int oldlanes = 0;
+ while (mr[oldfirst + oldlanes].nReg == nreg)
+ oldlanes++;
+ _assert_msg_(oldlanes != 0, "TransferNativeReg encountered nreg mismatch");
+ _assert_msg_(oldlanes != lanes, "TransferNativeReg transfer to same lanecount, misaligned?");
+
+ if (lanes == 1 && TransferVecTo1(nreg, dest, first, oldlanes))
+ return true;
+ if (oldlanes == 1 && Transfer1ToVec(nreg, dest, first, lanes))
+ return true;
+ }
+
+ return IRNativeRegCacheBase::TransferNativeReg(nreg, dest, type, first, lanes, flags);
+}
+
+bool X64IRRegCache::TransferVecTo1(IRNativeReg nreg, IRNativeReg dest, IRReg first, int oldlanes) {
+ IRReg oldfirst = nr[nreg].mipsReg;
+
+ // Is it worth preserving any of the old regs?
+ int numKept = 0;
+ for (int i = 0; i < oldlanes; ++i) {
+ // Skip whichever one this is extracting.
+ if (oldfirst + i == first)
+ continue;
+ // If 0 isn't being transfered, easy to keep in its original reg.
+ if (i == 0 && dest != nreg) {
+ numKept++;
+ continue;
+ }
+
+ IRNativeReg freeReg = FindFreeReg(MIPSLoc::FREG, MIPSMap::INIT);
+ if (freeReg != -1 && IsRegRead(MIPSLoc::FREG, oldfirst + i)) {
+ // If there's one free, use it. Don't modify nreg, though.
+ u8 shuf = VFPU_SWIZZLE(i, i, i, i);
+ if (i == 0) {
+ emit_->MOVAPS(FromNativeReg(freeReg), ::R(FromNativeReg(nreg)));
+ } else if (cpu_info.bAVX) {
+ emit_->VPERMILPS(128, FromNativeReg(freeReg), ::R(FromNativeReg(nreg)), shuf);
+ } else if (i == 2) {
+ emit_->MOVHLPS(FromNativeReg(freeReg), FromNativeReg(nreg));
+ } else {
+ emit_->MOVAPS(FromNativeReg(freeReg), ::R(FromNativeReg(nreg)));
+ emit_->SHUFPS(FromNativeReg(freeReg), ::R(FromNativeReg(freeReg)), shuf);
+ }
+
+ // Update accounting.
+ nr[freeReg].isDirty = nr[nreg].isDirty;
+ nr[freeReg].mipsReg = oldfirst + i;
+ mr[oldfirst + i].lane = -1;
+ mr[oldfirst + i].nReg = freeReg;
+ numKept++;
+ }
+ }
+
+ // Unless all other lanes were kept, store.
+ if (nr[nreg].isDirty && numKept < oldlanes - 1) {
+ StoreNativeReg(nreg, oldfirst, oldlanes);
+ // Set false even for regs that were split out, since they were flushed too.
+ for (int i = 0; i < oldlanes; ++i) {
+ if (mr[oldfirst + i].nReg != -1)
+ nr[mr[oldfirst + i].nReg].isDirty = false;
+ }
+ }
+
+ // Next, shuffle the desired element into first place.
+ u8 shuf = VFPU_SWIZZLE(mr[first].lane, mr[first].lane, mr[first].lane, mr[first].lane);
+ if (mr[first].lane > 0 && cpu_info.bAVX && dest != nreg) {
+ emit_->VPERMILPS(128, FromNativeReg(dest), ::R(FromNativeReg(nreg)), shuf);
+ } else if (mr[first].lane <= 0 && dest != nreg) {
+ emit_->MOVAPS(FromNativeReg(dest), ::R(FromNativeReg(nreg)));
+ } else if (mr[first].lane == 2) {
+ emit_->MOVHLPS(FromNativeReg(dest), FromNativeReg(nreg));
+ } else if (mr[first].lane > 0) {
+ if (dest != nreg)
+ emit_->MOVAPS(FromNativeReg(dest), ::R(FromNativeReg(nreg)));
+ emit_->SHUFPS(FromNativeReg(dest), ::R(FromNativeReg(dest)), shuf);
+ }
+
+ // Now update accounting.
+ for (int i = 0; i < oldlanes; ++i) {
+ auto &mreg = mr[oldfirst + i];
+ if (oldfirst + i == first) {
+ mreg.lane = -1;
+ mreg.nReg = dest;
+ } else if (mreg.nReg == nreg && i == 0 && nreg != dest) {
+ // Still in the same register, but no longer a vec.
+ mreg.lane = -1;
+ } else if (mreg.nReg == nreg) {
+ // No longer in a register.
+ mreg.nReg = -1;
+ mreg.lane = -1;
+ mreg.loc = MIPSLoc::MEM;
+ }
+ }
+
+ if (dest != nreg) {
+ nr[dest].isDirty = nr[nreg].isDirty;
+ if (oldfirst == first) {
+ nr[nreg].mipsReg = -1;
+ nr[nreg].isDirty = false;
+ }
+ }
+ nr[dest].mipsReg = first;
+
+ return true;
+}
+
+bool X64IRRegCache::Transfer1ToVec(IRNativeReg nreg, IRNativeReg dest, IRReg first, int lanes) {
+ X64Reg cur[4]{};
+ int numInRegs = 0;
+ u8 blendMask = 0;
+ for (int i = 0; i < lanes; ++i) {
+ if (mr[first + i].lane != -1 || (i != 0 && mr[first + i].spillLockIRIndex >= irIndex_)) {
+ // Can't do it, either double mapped or overlapping vec.
+ return false;
+ }
+
+ if (mr[first + i].nReg == -1) {
+ cur[i] = INVALID_REG;
+ blendMask |= 1 << i;
+ } else {
+ cur[i] = FromNativeReg(mr[first + i].nReg);
+ numInRegs++;
+ }
+ }
+
+ // Shouldn't happen, this should only get called to transfer one in a reg.
+ if (numInRegs == 0)
+ return false;
+
+ // Move things together into a reg.
+ if (lanes == 4 && cpu_info.bSSE4_1 && numInRegs == 1 && (first & 3) == 0) {
+ // Use a blend to grab the rest. BLENDPS is pretty good.
+ if (cpu_info.bAVX && nreg != dest) {
+ if (cur[0] == INVALID_REG) {
+ // Broadcast to all lanes, then blend from memory to replace.
+ emit_->VPERMILPS(128, FromNativeReg(dest), ::R(FromNativeReg(nreg)), 0);
+ emit_->BLENDPS(FromNativeReg(dest), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
+ } else {
+ emit_->VBLENDPS(128, FromNativeReg(dest), FromNativeReg(nreg), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
+ }
+ cur[0] = FromNativeReg(dest);
+ } else {
+ if (cur[0] == INVALID_REG)
+ emit_->SHUFPS(FromNativeReg(nreg), ::R(FromNativeReg(nreg)), 0);
+ emit_->BLENDPS(FromNativeReg(nreg), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
+ // If this is not dest, it'll get moved there later.
+ cur[0] = FromNativeReg(nreg);
+ }
+ } else if (lanes == 4) {
+ if (blendMask == 0) {
+ // y = yw##, x = xz##, x = xyzw.
+ emit_->UNPCKLPS(cur[1], ::R(cur[3]));
+ emit_->UNPCKLPS(cur[0], ::R(cur[2]));
+ emit_->UNPCKLPS(cur[0], ::R(cur[1]));
+ } else if (blendMask == 0b1100) {
+ // x = xy##, then load zw.
+ emit_->UNPCKLPS(cur[0], ::R(cur[1]));
+ emit_->MOVHPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 2)));
+ } else if (blendMask == 0b1010 && cpu_info.bSSE4_1 && (first & 3) == 0) {
+ // x = x#z#, x = xyzw.
+ emit_->SHUFPS(cur[0], ::R(cur[2]), VFPU_SWIZZLE(0, 0, 0, 0));
+ emit_->BLENDPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
+ } else if (blendMask == 0b0110 && cpu_info.bSSE4_1 && (first & 3) == 0) {
+ // x = x##w, x = xyzw.
+ emit_->SHUFPS(cur[0], ::R(cur[3]), VFPU_SWIZZLE(0, 0, 0, 0));
+ emit_->BLENDPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
+ } else if (blendMask == 0b1001 && cpu_info.bSSE4_1 && (first & 3) == 0) {
+ // y = #yz#, y = xyzw.
+ emit_->SHUFPS(cur[1], ::R(cur[2]), VFPU_SWIZZLE(0, 0, 0, 0));
+ emit_->BLENDPS(cur[1], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
+ // Will be moved to dest as needed.
+ cur[0] = cur[1];
+ } else if (blendMask == 0b0101 && cpu_info.bSSE4_1 && (first & 3) == 0) {
+ // y = #y#w, y = xyzw.
+ emit_->SHUFPS(cur[1], ::R(cur[3]), VFPU_SWIZZLE(0, 0, 0, 0));
+ emit_->BLENDPS(cur[1], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
+ // Will be moved to dest as needed.
+ cur[0] = cur[1];
+ } else if (blendMask == 0b1000) {
+ // x = xz##, z = w###, y = yw##, x = xyzw.
+ emit_->UNPCKLPS(cur[0], ::R(cur[2]));
+ emit_->MOVSS(cur[2], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 3)));
+ emit_->UNPCKLPS(cur[1], ::R(cur[2]));
+ emit_->UNPCKLPS(cur[0], ::R(cur[1]));
+ } else if (blendMask == 0b0100) {
+ // y = yw##, w = z###, x = xz##, x = xyzw.
+ emit_->UNPCKLPS(cur[1], ::R(cur[3]));
+ emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 2)));
+ emit_->UNPCKLPS(cur[0], ::R(cur[3]));
+ emit_->UNPCKLPS(cur[0], ::R(cur[1]));
+ } else if (blendMask == 0b0010) {
+ // z = zw##, w = y###, x = xy##, x = xyzw.
+ emit_->UNPCKLPS(cur[2], ::R(cur[3]));
+ emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 1)));
+ emit_->UNPCKLPS(cur[0], ::R(cur[3]));
+ emit_->MOVLHPS(cur[0], cur[2]);
+ } else if (blendMask == 0b0001) {
+ // y = yw##, w = x###, w = xz##, w = xyzw.
+ emit_->UNPCKLPS(cur[1], ::R(cur[3]));
+ emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 0)));
+ emit_->UNPCKLPS(cur[3], ::R(cur[2]));
+ emit_->UNPCKLPS(cur[3], ::R(cur[1]));
+ // Will be moved to dest as needed.
+ cur[0] = cur[3];
+ } else if (blendMask == 0b0011) {
+ // z = zw##, w = xy##, w = xyzw.
+ emit_->UNPCKLPS(cur[2], ::R(cur[3]));
+ emit_->MOVLPS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 0)));
+ emit_->MOVLHPS(cur[3], cur[2]);
+ // Will be moved to dest as needed.
+ cur[0] = cur[3];
+ } else {
+ // This must mean no SSE4, and numInRegs <= 2 in trickier cases.
+ return false;
+ }
+ } else if (lanes == 2) {
+ if (cur[0] != INVALID_REG && cur[1] != INVALID_REG) {
+ emit_->UNPCKLPS(cur[0], ::R(cur[1]));
+ } else if (cur[0] != INVALID_REG && cpu_info.bSSE4_1) {
+ emit_->INSERTPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 1)), 1);
+ } else {
+ return false;
+ }
+ } else {
+ return false;
+ }
+
+ mr[first].lane = 0;
+ for (int i = 0; i < lanes; ++i) {
+ if (mr[first + i].nReg != -1) {
+ // If this was dirty, the combined reg is now dirty.
+ if (nr[mr[first + i].nReg].isDirty)
+ nr[dest].isDirty = true;
+
+ // Throw away the other register we're no longer using.
+ if (i != 0)
+ DiscardNativeReg(mr[first + i].nReg);
+ }
+
+ // And set it as using the new one.
+ mr[first + i].lane = i;
+ mr[first + i].loc = MIPSLoc::FREG;
+ mr[first + i].nReg = dest;
+ }
+
+ if (cur[0] != FromNativeReg(dest))
+ emit_->MOVAPS(FromNativeReg(dest), ::R(cur[0]));
+
+ if (dest != nreg) {
+ nr[dest].mipsReg = first;
+ nr[nreg].mipsReg = -1;
+ nr[nreg].isDirty = false;
+ }
+
+ return true;
+}
+
void X64IRRegCache::SetNativeRegValue(IRNativeReg nreg, uint32_t imm) {
X64Reg r = FromNativeReg(nreg);
_dbg_assert_(nreg >= 0 && nreg < NUM_X_REGS);
diff --git a/Core/MIPS/x86/X64IRRegCache.h b/Core/MIPS/x86/X64IRRegCache.h
index 90e0259914cd..8a21f563d8f2 100644
--- a/Core/MIPS/x86/X64IRRegCache.h
+++ b/Core/MIPS/x86/X64IRRegCache.h
@@ -92,6 +92,8 @@ class X64IRRegCache : public IRNativeRegCacheBase {
void MapWithFlags(IRInst inst, X64IRJitConstants::X64Map destFlags, X64IRJitConstants::X64Map src1Flags = X64IRJitConstants::X64Map::NONE, X64IRJitConstants::X64Map src2Flags = X64IRJitConstants::X64Map::NONE);
+ // Note: may change the high lanes of single-register XMMs.
+ void FlushAll(bool gprs = true, bool fprs = true) override;
void FlushBeforeCall();
Gen::X64Reg GetAndLockTempGPR();
@@ -115,8 +117,12 @@ class X64IRRegCache : public IRNativeRegCacheBase {
void StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) override;
void SetNativeRegValue(IRNativeReg nreg, uint32_t imm) override;
void StoreRegValue(IRReg mreg, uint32_t imm) override;
+ bool TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) override;
private:
+ bool TransferVecTo1(IRNativeReg nreg, IRNativeReg dest, IRReg first, int oldlanes);
+ bool Transfer1ToVec(IRNativeReg nreg, IRNativeReg dest, IRReg first, int lanes);
+
IRNativeReg GPRToNativeReg(Gen::X64Reg r) {
return (IRNativeReg)r;
}
diff --git a/Core/MemMapHelpers.h b/Core/MemMapHelpers.h
index 6f2ceaca637f..5f89f60312ff 100644
--- a/Core/MemMapHelpers.h
+++ b/Core/MemMapHelpers.h
@@ -69,13 +69,12 @@ inline void Memcpy(const u32 to_address, const u32 from_address, const u32 len,
memcpy(to, from, len);
if (MemBlockInfoDetailed(len)) {
- char tagData[128];
if (!tag) {
- tagLen = FormatMemWriteTagAt(tagData, sizeof(tagData), "Memcpy/", from_address, len);
- tag = tagData;
+ NotifyMemInfoCopy(to_address, from_address, len, "Memcpy/");
+ } else {
+ NotifyMemInfo(MemBlockFlags::READ, from_address, len, tag, tagLen);
+ NotifyMemInfo(MemBlockFlags::WRITE, to_address, len, tag, tagLen);
}
- NotifyMemInfo(MemBlockFlags::READ, from_address, len, tag, tagLen);
- NotifyMemInfo(MemBlockFlags::WRITE, to_address, len, tag, tagLen);
}
}
diff --git a/Core/System.cpp b/Core/System.cpp
index 3158d6a5bafe..0c7bf42fb4c9 100644
--- a/Core/System.cpp
+++ b/Core/System.cpp
@@ -91,7 +91,7 @@ MetaFileSystem pspFileSystem;
ParamSFOData g_paramSFO;
static GlobalUIState globalUIState;
CoreParameter g_CoreParameter;
-static FileLoader *loadedFile;
+static FileLoader *g_loadedFile;
// For background loading thread.
static std::mutex loadingLock;
// For loadingReason updates.
@@ -324,6 +324,7 @@ bool CPU_Init(std::string *errorString, FileLoader *loadedFile) {
// If they shut down early, we'll catch it when load completes.
// Note: this may return before init is complete, which is checked if CPU_IsReady().
+ g_loadedFile = loadedFile;
if (!LoadFile(&loadedFile, &g_CoreParameter.errorString)) {
CPU_Shutdown();
g_CoreParameter.fileToStart.clear();
@@ -368,8 +369,8 @@ void CPU_Shutdown() {
Memory::Shutdown();
HLEPlugins::Shutdown();
- delete loadedFile;
- loadedFile = nullptr;
+ delete g_loadedFile;
+ g_loadedFile = nullptr;
delete g_CoreParameter.mountIsoLoader;
delete g_symbolMap;
@@ -380,8 +381,8 @@ void CPU_Shutdown() {
// TODO: Maybe loadedFile doesn't even belong here...
void UpdateLoadedFile(FileLoader *fileLoader) {
- delete loadedFile;
- loadedFile = fileLoader;
+ delete g_loadedFile;
+ g_loadedFile = fileLoader;
}
void Core_UpdateState(CoreState newState) {
diff --git a/Core/TiltEventProcessor.cpp b/Core/TiltEventProcessor.cpp
index 14486e84bd64..571f58f84f55 100644
--- a/Core/TiltEventProcessor.cpp
+++ b/Core/TiltEventProcessor.cpp
@@ -19,6 +19,12 @@ static u32 tiltButtonsDown = 0;
float rawTiltAnalogX;
float rawTiltAnalogY;
+float g_currentYAngle = 0.0f;
+
+float GetCurrentYAngle() {
+ return g_currentYAngle;
+}
+
// These functions generate tilt events given the current Tilt amount,
// and the deadzone radius.
void GenerateAnalogStickEvent(float analogX, float analogY);
@@ -73,6 +79,7 @@ void ProcessTilt(bool landscape, float calibrationAngle, float x, float y, float
Lin::Vec3 down = Lin::Vec3(x, y, z).normalized();
float angleAroundX = atan2(down.z, down.y);
+ g_currentYAngle = angleAroundX; // TODO: Should smooth this out over time a bit.
float yAngle = angleAroundX - calibrationAngle;
float xAngle = asinf(down.x);
diff --git a/Core/TiltEventProcessor.h b/Core/TiltEventProcessor.h
index 3eda969e17c4..d16f0020d6f4 100644
--- a/Core/TiltEventProcessor.h
+++ b/Core/TiltEventProcessor.h
@@ -1,5 +1,7 @@
#pragma once
+#include "Common/Math/lin/vec3.h"
+
namespace TiltEventProcessor {
// generates a tilt in the correct coordinate system based on
@@ -7,6 +9,8 @@ namespace TiltEventProcessor {
void ProcessTilt(bool landscape, const float calibrationAngle, float x, float y, float z, bool invertX, bool invertY, float xSensitivity, float ySensitivity);
void ResetTiltEvents();
+float GetCurrentYAngle();
+
// Lets you preview the amount of tilt in TiltAnalogSettingsScreen.
extern float rawTiltAnalogX;
extern float rawTiltAnalogY;
diff --git a/Core/Util/PPGeDraw.cpp b/Core/Util/PPGeDraw.cpp
index 1f09bb6e60c9..050551717663 100644
--- a/Core/Util/PPGeDraw.cpp
+++ b/Core/Util/PPGeDraw.cpp
@@ -827,7 +827,7 @@ static void PPGeResetCurrentText() {
// Draws some text using the one font we have in the atlas.
void PPGeDrawCurrentText(u32 color) {
// If the atlas is larger than 512x512, need to use windows into it.
- bool useTextureWindow = g_Config.bSoftwareRendering && atlasWidth > 512 || atlasHeight > 512;
+ bool useTextureWindow = g_Config.bSoftwareRendering && (atlasWidth > 512 || atlasHeight > 512);
uint32_t texturePosX = 0;
uint32_t texturePosY = 0;
@@ -855,7 +855,7 @@ void PPGeDrawCurrentText(u32 color) {
int wantedPosX = (int)floorf(c.sx * textureMaxPosX);
int wantedPosY = (int)floorf(c.sy * textureMaxPosY);
- if (useTextureWindow && wantedPosX != texturePosX || wantedPosY != texturePosY) {
+ if (useTextureWindow && (wantedPosX != texturePosX || wantedPosY != texturePosY)) {
EndVertexDataAndDraw(GE_PRIM_RECTANGLES);
uint32_t offset = atlasWidth * wantedPosY * 256 + wantedPosX * 256;
diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp
index bf881694541a..2177bf31dba6 100644
--- a/GPU/Common/GPUStateUtils.cpp
+++ b/GPU/Common/GPUStateUtils.cpp
@@ -290,8 +290,15 @@ ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat) {
return REPLACE_BLEND_READ_FRAMEBUFFER;
}
- default:
+ case GE_BLENDMODE_MUL_AND_ADD:
+ case GE_BLENDMODE_MUL_AND_SUBTRACT:
+ case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
+ // Handled below.
break;
+
+ default:
+ // Other blend equations simply don't blend on hardware.
+ return REPLACE_BLEND_NO;
}
GEBlendSrcFactor funcA = gstate.getBlendFuncA();
diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp
index 3e9b77111177..a23c37cd0539 100644
--- a/GPU/Common/ShaderId.cpp
+++ b/GPU/Common/ShaderId.cpp
@@ -275,21 +275,6 @@ bool FragmentIdNeedsFramebufferRead(const FShaderID &id) {
(ReplaceBlendType)id.Bits(FS_BIT_REPLACE_BLEND, 3) == REPLACE_BLEND_READ_FRAMEBUFFER;
}
-static GEBlendMode SanitizeBlendEq(GEBlendMode beq) {
- switch (beq) {
- case GE_BLENDMODE_MUL_AND_ADD:
- case GE_BLENDMODE_MUL_AND_SUBTRACT:
- case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
- case GE_BLENDMODE_MIN:
- case GE_BLENDMODE_MAX:
- case GE_BLENDMODE_ABSDIFF:
- return beq;
- default:
- // Just return something that won't cause a shader gen failure.
- return GE_BLENDMODE_MUL_AND_ADD;
- }
-}
-
// Here we must take all the bits of the gstate that determine what the fragment shader will
// look like, and concatenate them together into an ID.
void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pipelineState, const Draw::Bugs &bugs) {
@@ -384,7 +369,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
// 3 bits.
id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend);
// 11 bits total.
- id.SetBits(FS_BIT_BLENDEQ, 3, SanitizeBlendEq(gstate.getBlendEq()));
+ id.SetBits(FS_BIT_BLENDEQ, 3, gstate.getBlendEq());
id.SetBits(FS_BIT_BLENDFUNC_A, 4, gstate.getBlendFuncA());
id.SetBits(FS_BIT_BLENDFUNC_B, 4, gstate.getBlendFuncB());
}
diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp
index 2d9f2719bced..15ae074d1c60 100644
--- a/GPU/Common/SoftwareTransformCommon.cpp
+++ b/GPU/Common/SoftwareTransformCommon.cpp
@@ -90,19 +90,22 @@ static void RotateUVThrough(TransformedVertex v[4]) {
// Clears on the PSP are best done by drawing a series of vertical strips
// in clear mode. This tries to detect that.
static bool IsReallyAClear(const TransformedVertex *transformed, int numVerts, float x2, float y2) {
- if (transformed[0].x != 0.0f || transformed[0].y != 0.0f)
+ if (transformed[0].x < 0.0f || transformed[0].y < 0.0f || transformed[0].x > 0.5f || transformed[0].y > 0.5f)
return false;
+ const float originY = transformed[0].y;
+
// Color and Z are decided by the second vertex, so only need to check those for matching color.
- u32 matchcolor = transformed[1].color0_32;
- float matchz = transformed[1].z;
+ const u32 matchcolor = transformed[1].color0_32;
+ const float matchz = transformed[1].z;
for (int i = 1; i < numVerts; i++) {
if ((i & 1) == 0) {
// Top left of a rectangle
- if (transformed[i].y != 0.0f)
+ if (transformed[i].y != originY)
return false;
- if (i > 0 && transformed[i].x != transformed[i - 1].x)
+ float gap = fabsf(transformed[i].x - transformed[i - 1].x); // Should probably do some smarter check.
+ if (i > 0 && gap > 0.0625)
return false;
} else {
if (transformed[i].color0_32 != matchcolor || transformed[i].z != matchz)
@@ -547,7 +550,7 @@ void SoftwareTransform::DetectOffsetTexture(int maxIndex) {
}
// NOTE: The viewport must be up to date!
-void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *inds, int &indsOffset, int indexBufferSize, int &maxIndex, SoftwareTransformResult *result) {
+void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *&inds, int &maxIndex, SoftwareTransformResult *result) {
TransformedVertex *transformed = params_.transformed;
TransformedVertex *transformedExpanded = params_.transformedExpanded;
bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0;
@@ -560,11 +563,7 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy
bool useBufferedRendering = fbman->UseBufferedRendering();
if (prim == GE_PRIM_RECTANGLES) {
- if (!ExpandRectangles(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) {
- result->drawIndexed = false;
- result->drawNumTrans = 0;
- return;
- }
+ ExpandRectangles(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode);
result->drawBuffer = transformedExpanded;
result->drawIndexed = true;
@@ -582,19 +581,11 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy
}
}
} else if (prim == GE_PRIM_POINTS) {
- if (!ExpandPoints(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) {
- result->drawIndexed = false;
- result->drawNumTrans = 0;
- return;
- }
+ ExpandPoints(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode);
result->drawBuffer = transformedExpanded;
result->drawIndexed = true;
} else if (prim == GE_PRIM_LINES) {
- if (!ExpandLines(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) {
- result->drawIndexed = false;
- result->drawNumTrans = 0;
- return;
- }
+ ExpandLines(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode);
result->drawBuffer = transformedExpanded;
result->drawIndexed = true;
} else {
@@ -686,21 +677,15 @@ void SoftwareTransform::CalcCullParams(float &minZValue, float &maxZValue) {
std::swap(minZValue, maxZValue);
}
-bool SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
- // Before we start, do a sanity check - does the output fit?
- if ((vertexCount / 2) * 6 > indexBufferSize - indsOffset) {
- // Won't fit, kill the draw.
- return false;
- }
-
+void SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
// Rectangles always need 2 vertices, disregard the last one if there's an odd number.
vertexCount = vertexCount & ~1;
numTrans = 0;
TransformedVertex *trans = &transformedExpanded[0];
- const u16 *indsIn = (const u16 *)(inds + indsOffset);
- int newIndsOffset = indsOffset + vertexCount;
- u16 *indsOut = inds + newIndsOffset;
+ const u16 *indsIn = (const u16 *)inds;
+ u16 *newInds = inds + vertexCount;
+ u16 *indsOut = newInds;
maxIndex = 4 * (vertexCount / 2);
for (int i = 0; i < vertexCount; i += 2) {
@@ -745,33 +730,23 @@ bool SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *in
indsOut[3] = i * 2 + 3;
indsOut[4] = i * 2 + 0;
indsOut[5] = i * 2 + 2;
-
trans += 4;
indsOut += 6;
numTrans += 6;
}
-
- indsOffset = newIndsOffset;
- return true;
+ inds = newInds;
}
-bool SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
- // Before we start, do a sanity check - does the output fit?
- if ((vertexCount / 2) * 6 > indexBufferSize - indsOffset) {
- // Won't fit, kill the draw.
- return false;
- }
-
+void SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
// Lines always need 2 vertices, disregard the last one if there's an odd number.
vertexCount = vertexCount & ~1;
numTrans = 0;
TransformedVertex *trans = &transformedExpanded[0];
-
- const u16 *indsIn = (const u16 *)(inds + indsOffset);
- int newIndsOffset = indsOffset + vertexCount;
- u16 *indsOut = inds + newIndsOffset;
+ const u16 *indsIn = (const u16 *)inds;
+ u16 *newInds = inds + vertexCount;
+ u16 *indsOut = newInds;
float dx = 1.0f * gstate_c.vpWidthScale * (1.0f / fabsf(gstate.getViewportXScale()));
float dy = 1.0f * gstate_c.vpHeightScale * (1.0f / fabsf(gstate.getViewportYScale()));
@@ -884,23 +859,17 @@ bool SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *inds, i
}
}
- indsOffset = newIndsOffset;
- return true;
+ inds = newInds;
}
-bool SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
- // Before we start, do a sanity check - does the output fit?
- if (vertexCount * 6 > indexBufferSize - indsOffset) {
- // Won't fit, kill the draw.
- return false;
- }
+void SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
numTrans = 0;
TransformedVertex *trans = &transformedExpanded[0];
- const u16 *indsIn = (const u16 *)(inds + indsOffset);
- int newIndsOffset = indsOffset + vertexCount;
- u16 *indsOut = inds + newIndsOffset;
+ const u16 *indsIn = (const u16 *)inds;
+ u16 *newInds = inds + vertexCount;
+ u16 *indsOut = newInds;
float dx = 1.0f * gstate_c.vpWidthScale * (1.0f / gstate.getViewportXScale());
float dy = 1.0f * gstate_c.vpHeightScale * (1.0f / gstate.getViewportYScale());
@@ -959,7 +928,5 @@ bool SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *inds,
numTrans += 6;
}
-
- indsOffset = newIndsOffset;
- return true;
+ inds = newInds;
}
diff --git a/GPU/Common/SoftwareTransformCommon.h b/GPU/Common/SoftwareTransformCommon.h
index da15ffad9305..480bd18e519e 100644
--- a/GPU/Common/SoftwareTransformCommon.h
+++ b/GPU/Common/SoftwareTransformCommon.h
@@ -62,18 +62,19 @@ struct SoftwareTransformParams {
class SoftwareTransform {
public:
- SoftwareTransform(SoftwareTransformParams ¶ms) : params_(params) {}
+ SoftwareTransform(SoftwareTransformParams ¶ms) : params_(params) {
+ }
void SetProjMatrix(const float mtx[14], bool invertedX, bool invertedY, const Lin::Vec3 &trans, const Lin::Vec3 &scale);
void Decode(int prim, u32 vertexType, const DecVtxFormat &decVtxFormat, int maxIndex, SoftwareTransformResult *result);
void DetectOffsetTexture(int maxIndex);
- void BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *inds, int &indsOffset, int indexBufferSize, int &maxIndex, SoftwareTransformResult *result);
+ void BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *&inds, int &maxIndex, SoftwareTransformResult *result);
protected:
void CalcCullParams(float &minZValue, float &maxZValue);
- bool ExpandRectangles(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
- bool ExpandLines(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
- bool ExpandPoints(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
+ void ExpandRectangles(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
+ void ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
+ void ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
const SoftwareTransformParams ¶ms_;
Lin::Matrix4x4 projMatrix_;
diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp
index 83948a16adee..719dfa329d7f 100644
--- a/GPU/Common/VertexDecoderArm64.cpp
+++ b/GPU/Common/VertexDecoderArm64.cpp
@@ -27,10 +27,6 @@
#include "GPU/Common/VertexDecoderCommon.h"
alignas(16) static float bones[16 * 8]; // First four are kept in registers
-alignas(16) static float boneMask[4] = {1.0f, 1.0f, 1.0f, 0.0f};
-
-static const float by128 = 1.0f / 128.0f;
-static const float by32768 = 1.0f / 32768.0f;
using namespace Arm64Gen;
@@ -50,7 +46,7 @@ static const ARM64Reg scratchReg = W6;
static const ARM64Reg scratchReg64 = X6;
static const ARM64Reg scratchReg2 = W7;
static const ARM64Reg scratchReg3 = W8;
-static const ARM64Reg fullAlphaReg = W12;
+static const ARM64Reg alphaNonFullReg = W12;
static const ARM64Reg boundsMinUReg = W13;
static const ARM64Reg boundsMinVReg = W14;
static const ARM64Reg boundsMaxUReg = W15;
@@ -63,6 +59,8 @@ static const ARM64Reg fpScratchReg4 = S7;
static const ARM64Reg neonScratchRegD = D2;
static const ARM64Reg neonScratchRegQ = Q2;
+static const ARM64Reg neonScratchReg2D = D3;
+static const ARM64Reg neonScratchReg2Q = Q3;
static const ARM64Reg neonUVScaleReg = D0;
static const ARM64Reg neonUVOffsetReg = D1;
@@ -150,6 +148,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
bool prescaleStep = false;
bool skinning = false;
+ bool updateTexBounds = false;
bool log = false;
@@ -165,6 +164,9 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
dec.steps_[i] == &VertexDecoder::Step_WeightsFloatSkin) {
skinning = true;
}
+ if (dec.steps_[i] == &VertexDecoder::Step_TcU16ThroughToFloat) {
+ updateTexBounds = true;
+ }
}
// Not used below, but useful for logging.
@@ -172,24 +174,22 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
// if (skinning) log = true;
+ bool updateFullAlpha = dec.col;
+ if (updateFullAlpha && (dec.VertexType() & GE_VTYPE_COL_MASK) == GE_VTYPE_COL_565)
+ updateFullAlpha = false;
+
// GPRs 0-15 do not need to be saved.
// We don't use any higher GPRs than 16. So:
- uint64_t regs_to_save = 1 << 16; // Arm64Gen::ALL_CALLEE_SAVED;
+ uint64_t regs_to_save = updateTexBounds ? 1 << 16 : 0;
// We only need to save Q8-Q15 if skinning is used.
uint64_t regs_to_save_fp = dec.skinInDecode ? Arm64Gen::ALL_CALLEE_SAVED_FP : 0;
- fp.ABI_PushRegisters(regs_to_save, regs_to_save_fp);
+ // Only bother making stack space and setting up FP if there are saved regs.
+ if (regs_to_save || regs_to_save_fp)
+ fp.ABI_PushRegisters(regs_to_save, regs_to_save_fp);
// Keep the scale/offset in a few fp registers if we need it.
if (prescaleStep) {
- fp.LDR(64, INDEX_UNSIGNED, neonUVScaleReg, X3, 0);
- fp.LDR(64, INDEX_UNSIGNED, neonUVOffsetReg, X3, 8);
- if ((dec.VertexType() & GE_VTYPE_TC_MASK) == GE_VTYPE_TC_8BIT) {
- fp.MOVI2FDUP(neonScratchRegD, by128, scratchReg);
- fp.FMUL(32, neonUVScaleReg, neonUVScaleReg, neonScratchRegD);
- } else if ((dec.VertexType() & GE_VTYPE_TC_MASK) == GE_VTYPE_TC_16BIT) {
- fp.MOVI2FDUP(neonScratchRegD, by32768, scratchReg);
- fp.FMUL(32, neonUVScaleReg, neonUVScaleReg, neonScratchRegD);
- }
+ fp.LDP(64, INDEX_SIGNED, neonUVScaleReg, neonUVOffsetReg, X3, 0);
}
// Add code to convert matrices to 4x4.
@@ -197,43 +197,48 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
if (dec.skinInDecode) {
// Copying from R3 to R4
MOVP2R(X3, gstate.boneMatrix);
- MOVP2R(X4, bones);
- MOVP2R(X5, boneMask);
- fp.LDR(128, INDEX_UNSIGNED, Q3, X5, 0);
+ // This is only used with more than 4 weights, and points to the first of them.
+ if (dec.nweights > 4)
+ MOVP2R(X4, &bones[16 * 4]);
+
+ // Construct a mask to zero out the top lane with.
+ fp.MVNI(32, Q3, 0);
+ fp.MOVI(32, Q4, 0);
+ fp.EXT(Q3, Q3, Q4, 4);
+
for (int i = 0; i < dec.nweights; i++) {
- // Note that INDEX_UNSIGNED does not support offsets not aligned to the data size so we must use POST.
- fp.LDR(128, INDEX_POST, Q4, X3, 12); // Load 128 bits even though we just want 96
- fp.LDR(128, INDEX_POST, Q5, X3, 12);
- fp.LDR(128, INDEX_POST, Q6, X3, 12);
- fp.LDR(128, INDEX_POST, Q7, X3, 12);
+ // This loads Q4,Q5,Q6 with 12 floats and increases X3, all in one go.
+ fp.LD1(32, 3, INDEX_POST, Q4, X3);
+ // Now sort those floats into 4 regs: ABCD EFGH IJKL -> ABC0 DEF0 GHI0 JKL0.
+ // Go backwards to avoid overwriting.
+ fp.EXT(Q7, Q6, Q6, 4); // I[JKLI]JKL
+ fp.EXT(Q6, Q5, Q6, 8); // EF[GHIJ]KL
+ fp.EXT(Q5, Q4, Q5, 12); // ABC[DEFG]H
+
+ ARM64Reg matrixRow[4]{ Q4, Q5, Q6, Q7 };
// First four matrices are in registers Q16+.
if (i < 4) {
- fp.FMUL(32, (ARM64Reg)(Q16 + i * 4), Q4, Q3);
- fp.FMUL(32, (ARM64Reg)(Q17 + i * 4), Q5, Q3);
- fp.FMUL(32, (ARM64Reg)(Q18 + i * 4), Q6, Q3);
- fp.FMUL(32, (ARM64Reg)(Q19 + i * 4), Q7, Q3);
- ADDI2R(X4, X4, 16 * 4);
- } else {
- fp.FMUL(32, Q4, Q4, Q3);
- fp.FMUL(32, Q5, Q5, Q3);
- fp.FMUL(32, Q6, Q6, Q3);
- fp.FMUL(32, Q7, Q7, Q3);
- fp.STR(128, INDEX_UNSIGNED, Q4, X4, 0);
- fp.STR(128, INDEX_UNSIGNED, Q5, X4, 16);
- fp.STR(128, INDEX_UNSIGNED, Q6, X4, 32);
- fp.STR(128, INDEX_UNSIGNED, Q7, X4, 48);
- ADDI2R(X4, X4, 16 * 4);
+ for (int w = 0; w < 4; ++w)
+ matrixRow[w] = (ARM64Reg)(Q16 + i * 4 + w);
}
+ // Zero out the top lane of each one with the mask created above.
+ fp.AND(matrixRow[0], Q4, Q3);
+ fp.AND(matrixRow[1], Q5, Q3);
+ fp.AND(matrixRow[2], Q6, Q3);
+ fp.AND(matrixRow[3], Q7, Q3);
+
+ if (i >= 4)
+ fp.ST1(32, 4, INDEX_POST, matrixRow[0], X4);
}
}
- if (dec.col) {
- // Or LDB and skip the conditional? This is probably cheaper.
- MOVI2R(fullAlphaReg, 0xFF);
+ if (updateFullAlpha) {
+ // This ends up non-zero if alpha is not full.
+ // Often we just ORN into it.
+ MOVI2R(alphaNonFullReg, 0);
}
- if (dec.tc && dec.throughmode) {
- // TODO: Smarter, only when doing bounds.
+ if (updateTexBounds) {
MOVP2R(scratchReg64, &gstate_c.vertBounds.minU);
LDRH(INDEX_UNSIGNED, boundsMinUReg, scratchReg64, offsetof(KnownVertexBounds, minU));
LDRH(INDEX_UNSIGNED, boundsMaxUReg, scratchReg64, offsetof(KnownVertexBounds, maxU));
@@ -259,16 +264,14 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
SUBS(counterReg, counterReg, 1);
B(CC_NEQ, loopStart);
- if (dec.col) {
+ if (updateFullAlpha) {
+ FixupBranch skip = CBZ(alphaNonFullReg);
MOVP2R(tempRegPtr, &gstate_c.vertexFullAlpha);
- CMP(fullAlphaReg, 0);
- FixupBranch skip = B(CC_NEQ);
- STRB(INDEX_UNSIGNED, fullAlphaReg, tempRegPtr, 0);
+ STRB(INDEX_UNSIGNED, WZR, tempRegPtr, 0);
SetJumpTarget(skip);
}
- if (dec.tc && dec.throughmode) {
- // TODO: Smarter, only when doing bounds.
+ if (updateTexBounds) {
MOVP2R(scratchReg64, &gstate_c.vertBounds.minU);
STRH(INDEX_UNSIGNED, boundsMinUReg, scratchReg64, offsetof(KnownVertexBounds, minU));
STRH(INDEX_UNSIGNED, boundsMaxUReg, scratchReg64, offsetof(KnownVertexBounds, maxU));
@@ -276,7 +279,8 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
STRH(INDEX_UNSIGNED, boundsMaxVReg, scratchReg64, offsetof(KnownVertexBounds, maxV));
}
- fp.ABI_PopRegisters(regs_to_save, regs_to_save_fp);
+ if (regs_to_save || regs_to_save_fp)
+ fp.ABI_PopRegisters(regs_to_save, regs_to_save_fp);
RET();
@@ -342,13 +346,11 @@ void VertexDecoderJitCache::Jit_ApplyWeights() {
break;
default:
// Matrices 4+ need to be loaded from memory.
- fp.LDP(128, INDEX_SIGNED, Q8, Q9, scratchReg64, 0);
- fp.LDP(128, INDEX_SIGNED, Q10, Q11, scratchReg64, 2 * 16);
+ fp.LD1(32, 4, INDEX_POST, Q8, scratchReg64);
fp.FMLA(32, Q4, Q8, neonWeightRegsQ[i >> 2], i & 3);
fp.FMLA(32, Q5, Q9, neonWeightRegsQ[i >> 2], i & 3);
fp.FMLA(32, Q6, Q10, neonWeightRegsQ[i >> 2], i & 3);
fp.FMLA(32, Q7, Q11, neonWeightRegsQ[i >> 2], i & 3);
- ADDI2R(scratchReg64, scratchReg64, 4 * 16);
break;
}
}
@@ -482,13 +484,8 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() {
void VertexDecoderJitCache::Jit_Color8888() {
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->coloff);
- // Set flags to determine if alpha != 0xFF.
- ORN(tempReg2, WZR, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
- CMP(tempReg2, 0);
-
- // Clear fullAlphaReg when the inverse was not 0.
- // fullAlphaReg = tempReg2 == 0 ? fullAlphaReg : 0 + 1;
- CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ);
+ // Or any non-set bits into alphaNonFullReg. This way it's non-zero if not full.
+ ORN(alphaNonFullReg, alphaNonFullReg, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.c0off);
}
@@ -508,15 +505,10 @@ void VertexDecoderJitCache::Jit_Color4444() {
// And expand to 8 bits.
ORR(tempReg1, tempReg2, tempReg2, ArithOption(tempReg2, ST_LSL, 4));
- STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.c0off);
-
- // Set flags to determine if alpha != 0xFF.
- ORN(tempReg2, WZR, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
- CMP(tempReg2, 0);
+ // Or any non-set bits into alphaNonFullReg. This way it's non-zero if not full.
+ ORN(alphaNonFullReg, alphaNonFullReg, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
- // Clear fullAlphaReg when the inverse was not 0.
- // fullAlphaReg = tempReg2 == 0 ? fullAlphaReg : 0 + 1;
- CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ);
+ STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.c0off);
}
void VertexDecoderJitCache::Jit_Color565() {
@@ -540,7 +532,7 @@ void VertexDecoderJitCache::Jit_Color565() {
ORR(tempReg3, tempReg3, tempReg1, ArithOption(tempReg1, ST_LSR, 4));
ORR(tempReg2, tempReg2, tempReg3, ArithOption(tempReg3, ST_LSL, 8));
- // Add in full alpha. No need to update fullAlphaReg.
+ // Add in full alpha. No need to update alphaNonFullReg.
ORRI2R(tempReg1, tempReg2, 0xFF000000, scratchReg);
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.c0off);
@@ -566,15 +558,10 @@ void VertexDecoderJitCache::Jit_Color5551() {
ANDI2R(tempReg1, tempReg1, 0xFF000000, scratchReg);
ORR(tempReg2, tempReg2, tempReg1);
- // Set flags to determine if alpha != 0xFF.
- ORN(tempReg3, WZR, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
- CMP(tempReg3, 0);
+ // Or any non-set bits into alphaNonFullReg. This way it's non-zero if not full.
+ ORN(alphaNonFullReg, alphaNonFullReg, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
STR(INDEX_UNSIGNED, tempReg2, dstReg, dec_->decFmt.c0off);
-
- // Clear fullAlphaReg when the inverse was not 0.
- // fullAlphaReg = tempReg3 == 0 ? fullAlphaReg : 0 + 1;
- CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ);
}
void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() {
@@ -608,12 +595,12 @@ void VertexDecoderJitCache::Jit_TcFloat() {
}
void VertexDecoderJitCache::Jit_TcU8Prescale() {
- fp.LDUR(16, neonScratchRegD, srcReg, dec_->tcoff);
- fp.UXTL(8, neonScratchRegQ, neonScratchRegD); // Widen to 16-bit
- fp.UXTL(16, neonScratchRegQ, neonScratchRegD); // Widen to 32-bit
- fp.UCVTF(32, neonScratchRegD, neonScratchRegD);
- fp.FMUL(32, neonScratchRegD, neonScratchRegD, neonUVScaleReg); // TODO: FMLA
- fp.FADD(32, neonScratchRegD, neonScratchRegD, neonUVOffsetReg);
+ fp.LDUR(16, neonScratchReg2D, srcReg, dec_->tcoff);
+ fp.UXTL(8, neonScratchReg2Q, neonScratchReg2D); // Widen to 16-bit
+ fp.UXTL(16, neonScratchReg2Q, neonScratchReg2D); // Widen to 32-bit
+ fp.UCVTF(32, neonScratchReg2D, neonScratchReg2D, 7);
+ fp.MOV(neonScratchRegD, neonUVOffsetReg);
+ fp.FMLA(32, neonScratchRegD, neonScratchReg2D, neonUVScaleReg);
fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff);
}
@@ -626,11 +613,11 @@ void VertexDecoderJitCache::Jit_TcU8ToFloat() {
}
void VertexDecoderJitCache::Jit_TcU16Prescale() {
- fp.LDUR(32, neonScratchRegD, srcReg, dec_->tcoff);
- fp.UXTL(16, neonScratchRegQ, neonScratchRegD); // Widen to 32-bit
- fp.UCVTF(32, neonScratchRegD, neonScratchRegD);
- fp.FMUL(32, neonScratchRegD, neonScratchRegD, neonUVScaleReg); // TODO: FMLA
- fp.FADD(32, neonScratchRegD, neonScratchRegD, neonUVOffsetReg);
+ fp.LDUR(32, neonScratchReg2D, srcReg, dec_->tcoff);
+ fp.UXTL(16, neonScratchReg2Q, neonScratchReg2D); // Widen to 32-bit
+ fp.UCVTF(32, neonScratchReg2D, neonScratchReg2D, 15);
+ fp.MOV(neonScratchRegD, neonUVOffsetReg);
+ fp.FMLA(32, neonScratchRegD, neonScratchReg2D, neonUVScaleReg);
fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff);
}
@@ -642,9 +629,9 @@ void VertexDecoderJitCache::Jit_TcU16ToFloat() {
}
void VertexDecoderJitCache::Jit_TcFloatPrescale() {
- fp.LDUR(64, neonScratchRegD, srcReg, dec_->tcoff);
- fp.FMUL(32, neonScratchRegD, neonScratchRegD, neonUVScaleReg); // TODO: FMLA
- fp.FADD(32, neonScratchRegD, neonScratchRegD, neonUVOffsetReg);
+ fp.LDUR(64, neonScratchReg2D, srcReg, dec_->tcoff);
+ fp.MOV(neonScratchRegD, neonUVOffsetReg);
+ fp.FMLA(32, neonScratchRegD, neonScratchReg2D, neonUVScaleReg);
fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff);
}
diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp
index d6e37cf0ad1a..c31a5f1d581b 100644
--- a/GPU/Common/VertexDecoderCommon.cpp
+++ b/GPU/Common/VertexDecoderCommon.cpp
@@ -108,28 +108,36 @@ void DecVtxFormat::InitializeFromID(uint32_t id) {
void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound) {
// Find index bounds. Could cache this in display lists.
// Also, this could be greatly sped up with SSE2/NEON, although rarely a bottleneck.
- int lowerBound = 0x7FFFFFFF;
- int upperBound = 0;
u32 idx = vertType & GE_VTYPE_IDX_MASK;
- if (idx == GE_VTYPE_IDX_8BIT) {
- const u8 *ind8 = (const u8 *)inds;
+ if (idx == GE_VTYPE_IDX_16BIT) {
+ uint16_t upperBound = 0;
+ uint16_t lowerBound = 0xFFFF;
+ const u16_le *ind16 = (const u16_le *)inds;
for (int i = 0; i < count; i++) {
- u8 value = ind8[i];
+ u16 value = ind16[i];
if (value > upperBound)
upperBound = value;
if (value < lowerBound)
lowerBound = value;
}
- } else if (idx == GE_VTYPE_IDX_16BIT) {
- const u16_le *ind16 = (const u16_le *)inds;
+ *indexLowerBound = lowerBound;
+ *indexUpperBound = upperBound;
+ } else if (idx == GE_VTYPE_IDX_8BIT) {
+ uint8_t upperBound = 0;
+ uint8_t lowerBound = 0xFF;
+ const u8 *ind8 = (const u8 *)inds;
for (int i = 0; i < count; i++) {
- u16 value = ind16[i];
+ u8 value = ind8[i];
if (value > upperBound)
upperBound = value;
if (value < lowerBound)
lowerBound = value;
}
+ *indexLowerBound = lowerBound;
+ *indexUpperBound = upperBound;
} else if (idx == GE_VTYPE_IDX_32BIT) {
+ int lowerBound = 0x7FFFFFFF;
+ int upperBound = 0;
WARN_LOG_REPORT_ONCE(indexBounds32, G3D, "GetIndexBounds: Decoding 32-bit indexes");
const u32_le *ind32 = (const u32_le *)inds;
for (int i = 0; i < count; i++) {
@@ -143,12 +151,12 @@ void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBo
if (value < lowerBound)
lowerBound = value;
}
+ *indexLowerBound = (u16)lowerBound;
+ *indexUpperBound = (u16)upperBound;
} else {
- lowerBound = 0;
- upperBound = count - 1;
+ *indexLowerBound = 0;
+ *indexUpperBound = count - 1;
}
- *indexLowerBound = (u16)lowerBound;
- *indexUpperBound = (u16)upperBound;
}
void PrintDecodedVertex(const VertexReader &vtx) {
diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp
index f018980f4a7b..7780bfa28a26 100644
--- a/GPU/D3D11/DrawEngineD3D11.cpp
+++ b/GPU/D3D11/DrawEngineD3D11.cpp
@@ -598,7 +598,7 @@ void DrawEngineD3D11::DoFlush() {
prim = GE_PRIM_TRIANGLES;
VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
- u16 *const inds = decIndex_;
+ u16 *inds = decIndex_;
SoftwareTransformResult result{};
SoftwareTransformParams params{};
params.decoded = decoded_;
@@ -644,9 +644,8 @@ void DrawEngineD3D11::DoFlush() {
// Need to ApplyDrawState after ApplyTexture because depal can launch a render pass and that wrecks the state.
ApplyDrawState(prim);
- int indsOffset = 0;
if (result.action == SW_NOT_READY)
- swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result);
+ swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result);
if (result.setSafeSize)
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
@@ -684,11 +683,11 @@ void DrawEngineD3D11::DoFlush() {
UINT iOffset;
int iSize = sizeof(uint16_t) * result.drawNumTrans;
uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize);
- memcpy(iptr, inds + indsOffset, iSize);
+ memcpy(iptr, inds, iSize);
pushInds_->EndPush(context_);
context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
context_->DrawIndexed(result.drawNumTrans, 0, 0);
- } else if (result.drawNumTrans > 0) {
+ } else {
context_->Draw(result.drawNumTrans, 0);
}
} else if (result.action == SW_CLEAR) {
diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp
index 11323f374743..9efa233dd0b8 100644
--- a/GPU/Directx9/DrawEngineDX9.cpp
+++ b/GPU/Directx9/DrawEngineDX9.cpp
@@ -558,7 +558,7 @@ void DrawEngineDX9::DoFlush() {
prim = GE_PRIM_TRIANGLES;
VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
- u16 *const inds = decIndex_;
+ u16 *inds = decIndex_;
SoftwareTransformResult result{};
SoftwareTransformParams params{};
params.decoded = decoded_;
@@ -607,9 +607,8 @@ void DrawEngineDX9::DoFlush() {
ApplyDrawState(prim);
- int indsOffset = 0;
if (result.action == SW_NOT_READY)
- swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result);
+ swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result);
if (result.setSafeSize)
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
@@ -629,8 +628,8 @@ void DrawEngineDX9::DoFlush() {
device_->SetVertexDeclaration(transformedVertexDecl_);
if (result.drawIndexed) {
- device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, maxIndex, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds + indsOffset, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex));
- } else if (result.drawNumTrans > 0) {
+ device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, maxIndex, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex));
+ } else {
device_->DrawPrimitiveUP(d3d_prim[prim], D3DPrimCount(d3d_prim[prim], result.drawNumTrans), result.drawBuffer, sizeof(TransformedVertex));
}
} else if (result.action == SW_CLEAR) {
diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp
index 6d5bb85d2c73..b8cca50d99c2 100644
--- a/GPU/Directx9/GPU_DX9.cpp
+++ b/GPU/Directx9/GPU_DX9.cpp
@@ -123,7 +123,7 @@ void GPU_DX9::BeginFrame() {
drawEngine_.BeginFrame();
GPUCommonHW::BeginFrame();
- shaderManagerDX9_->DirtyShader();
+ shaderManagerDX9_->DirtyLastShader();
framebufferManager_->BeginFrame();
diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp
index 83a449888c56..21dccf7479bc 100644
--- a/GPU/Directx9/ShaderManagerDX9.cpp
+++ b/GPU/Directx9/ShaderManagerDX9.cpp
@@ -535,27 +535,23 @@ void ShaderManagerDX9::Clear() {
}
fsCache_.clear();
vsCache_.clear();
- DirtyShader();
+ DirtyLastShader();
}
void ShaderManagerDX9::ClearShaders() {
Clear();
}
-void ShaderManagerDX9::DirtyShader() {
+void ShaderManagerDX9::DirtyLastShader() {
// Forget the last shader ID
lastFSID_.set_invalid();
lastVSID_.set_invalid();
lastVShader_ = nullptr;
lastPShader_ = nullptr;
+ // TODO: Probably not necessary to dirty uniforms here on DX9.
gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
}
-void ShaderManagerDX9::DirtyLastShader() {
- lastVShader_ = nullptr;
- lastPShader_ = nullptr;
-}
-
VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellation, VertexDecoder *decoder, bool weightsAsFloat, bool useSkinInDecode, const ComputedPipelineState &pipelineState) {
VShaderID VSID;
if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) {
diff --git a/GPU/Directx9/ShaderManagerDX9.h b/GPU/Directx9/ShaderManagerDX9.h
index 68337503b40b..5ce429f6f9d2 100644
--- a/GPU/Directx9/ShaderManagerDX9.h
+++ b/GPU/Directx9/ShaderManagerDX9.h
@@ -79,7 +79,6 @@ class ShaderManagerDX9 : public ShaderManagerCommon {
void ClearShaders() override;
VSShader *ApplyShader(bool useHWTransform, bool useHWTessellation, VertexDecoder *decoder, bool weightsAsFloat, bool useSkinInDecode, const ComputedPipelineState &pipelineState);
- void DirtyShader();
void DirtyLastShader() override;
int GetNumVertexShaders() const { return (int)vsCache_.size(); }
diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp
index e4d5bbac31be..d1b957ac548a 100644
--- a/GPU/GLES/DrawEngineGLES.cpp
+++ b/GPU/GLES/DrawEngineGLES.cpp
@@ -357,7 +357,7 @@ void DrawEngineGLES::DoFlush() {
if (prim == GE_PRIM_TRIANGLE_STRIP)
prim = GE_PRIM_TRIANGLES;
- u16 *const inds = decIndex_;
+ u16 *inds = decIndex_;
SoftwareTransformResult result{};
// TODO: Keep this static? Faster than repopulating?
SoftwareTransformParams params{};
@@ -414,9 +414,8 @@ void DrawEngineGLES::DoFlush() {
// Need to ApplyDrawState after ApplyTexture because depal can launch a render pass and that wrecks the state.
ApplyDrawState(prim);
- int indsOffset = 0;
if (result.action == SW_NOT_READY)
- swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result);
+ swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, maxIndex, &result);
if (result.setSafeSize)
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
@@ -431,11 +430,11 @@ void DrawEngineGLES::DoFlush() {
if (result.action == SW_DRAW_PRIMITIVES) {
if (result.drawIndexed) {
vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), 4, &vertexBuffer);
- indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds + indsOffset, sizeof(uint16_t) * result.drawNumTrans, 2, &indexBuffer);
+ indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds, sizeof(uint16_t) * result.drawNumTrans, 2, &indexBuffer);
render_->DrawIndexed(
softwareInputLayout_, vertexBuffer, vertexBufferOffset, indexBuffer, indexBufferOffset,
glprim[prim], result.drawNumTrans, GL_UNSIGNED_SHORT);
- } else if (result.drawNumTrans > 0) {
+ } else {
vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), 4, &vertexBuffer);
render_->Draw(
softwareInputLayout_, vertexBuffer, vertexBufferOffset, glprim[prim], 0, result.drawNumTrans);
@@ -521,7 +520,7 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p
if (prevSizeU < size_u || prevSizeV < size_v) {
prevSizeU = size_u;
prevSizeV = size_v;
- if (!data_tex[0])
+ if (data_tex[0])
renderManager_->DeleteTexture(data_tex[0]);
data_tex[0] = renderManager_->CreateTexture(GL_TEXTURE_2D, size_u * 3, size_v, 1, 1);
renderManager_->TextureImage(data_tex[0], 0, size_u * 3, size_v, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false);
@@ -540,7 +539,7 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p
// Weight U
if (prevSizeWU < weights.size_u) {
prevSizeWU = weights.size_u;
- if (!data_tex[1])
+ if (data_tex[1])
renderManager_->DeleteTexture(data_tex[1]);
data_tex[1] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_u * 2, 1, 1, 1);
renderManager_->TextureImage(data_tex[1], 0, weights.size_u * 2, 1, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false);
@@ -552,7 +551,7 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p
// Weight V
if (prevSizeWV < weights.size_v) {
prevSizeWV = weights.size_v;
- if (!data_tex[2])
+ if (data_tex[2])
renderManager_->DeleteTexture(data_tex[2]);
data_tex[2] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_v * 2, 1, 1, 1);
renderManager_->TextureImage(data_tex[2], 0, weights.size_v * 2, 1, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false);
diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp
index 8c4e354a706a..293954271514 100644
--- a/GPU/GLES/GPU_GLES.cpp
+++ b/GPU/GLES/GPU_GLES.cpp
@@ -277,7 +277,7 @@ void GPU_GLES::BeginFrame() {
if (shaderCachePath_.Valid() && (gpuStats.numFlips & 4095) == 0) {
shaderManagerGL_->SaveCache(shaderCachePath_, &drawEngine_);
}
- shaderManagerGL_->DirtyShader();
+ shaderManagerGL_->DirtyLastShader();
// Not sure if this is really needed.
gstate_c.Dirty(DIRTY_ALL_UNIFORMS);
diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp
index 4c28ef04e8c1..54b3b4edaceb 100644
--- a/GPU/GLES/ShaderManagerGLES.cpp
+++ b/GPU/GLES/ShaderManagerGLES.cpp
@@ -76,6 +76,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
: render_(render), useHWTransform_(useHWTransform) {
PROFILE_THIS_SCOPE("shaderlink");
+ _assert_(render);
_assert_(vs);
_assert_(fs);
@@ -715,7 +716,7 @@ void ShaderManagerGLES::Clear() {
linkedShaderCache_.clear();
fsCache_.Clear();
vsCache_.Clear();
- DirtyShader();
+ DirtyLastShader();
}
void ShaderManagerGLES::ClearShaders() {
@@ -734,16 +735,12 @@ void ShaderManagerGLES::DeviceRestore(Draw::DrawContext *draw) {
draw_ = draw;
}
-void ShaderManagerGLES::DirtyShader() {
+void ShaderManagerGLES::DirtyLastShader() {
// Forget the last shader ID
lastFSID_.set_invalid();
lastVSID_.set_invalid();
- DirtyLastShader();
gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
shaderSwitchDirtyUniforms_ = 0;
-}
-
-void ShaderManagerGLES::DirtyLastShader() {
lastShader_ = nullptr;
lastVShaderSame_ = false;
}
@@ -986,7 +983,7 @@ enum class CacheDetectFlags {
};
#define CACHE_HEADER_MAGIC 0x83277592
-#define CACHE_VERSION 32
+#define CACHE_VERSION 33
struct CacheHeader {
uint32_t magic;
diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h
index db834dce3735..3ae0aaa60ae7 100644
--- a/GPU/GLES/ShaderManagerGLES.h
+++ b/GPU/GLES/ShaderManagerGLES.h
@@ -172,7 +172,6 @@ class ShaderManagerGLES : public ShaderManagerCommon {
void DeviceLost() override;
void DeviceRestore(Draw::DrawContext *draw) override;
- void DirtyShader();
void DirtyLastShader() override;
int GetNumVertexShaders() const { return (int)vsCache_.size(); }
diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp
index 8bbb390bc896..94dcf0db04a9 100644
--- a/GPU/GLES/TextureCacheGLES.cpp
+++ b/GPU/GLES/TextureCacheGLES.cpp
@@ -304,7 +304,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) {
bc = true;
} else {
int bpp = (int)Draw::DataFormatSizeInBytes(plan.replaced->Format());
- stride = std::max(mipWidth * bpp, 16);
+ stride = mipWidth * bpp;
dataSize = stride * mipHeight;
}
} else {
@@ -314,7 +314,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) {
} else {
bpp = (int)Draw::DataFormatSizeInBytes(dstFmt);
}
- stride = std::max(mipWidth * bpp, 16);
+ stride = mipWidth * bpp;
dataSize = stride * mipHeight;
}
diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp
index cd8824f2e333..9dfd2cdbbf8f 100644
--- a/GPU/GPUCommon.cpp
+++ b/GPU/GPUCommon.cpp
@@ -1704,9 +1704,7 @@ void GPUCommon::DoBlockTransfer(u32 skipDrawReason) {
memcpy(dstp, srcp, bytesToCopy);
if (MemBlockInfoDetailed(bytesToCopy)) {
- tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUBlockTransfer/", src, bytesToCopy);
- NotifyMemInfo(MemBlockFlags::READ, src, bytesToCopy, tag, tagSize);
- NotifyMemInfo(MemBlockFlags::WRITE, dst, bytesToCopy, tag, tagSize);
+ NotifyMemInfoCopy(dst, src, bytesToCopy, "GPUBlockTransfer/");
}
} else if ((srcDstOverlap || srcWraps || dstWraps) && (srcValid || srcWraps) && (dstValid || dstWraps)) {
// This path means we have either src/dst overlap, OR one or both of src and dst wrap.
@@ -1862,12 +1860,11 @@ bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags
// We use matching values in PerformReadbackToMemory/PerformWriteColorFromMemory.
// Since they're identical we don't need to copy.
if (dest != src) {
+ if (Memory::IsValidRange(dest, size) && Memory::IsValidRange(src, size)) {
+ memcpy(Memory::GetPointerWriteUnchecked(dest), Memory::GetPointerUnchecked(src), size);
+ }
if (MemBlockInfoDetailed(size)) {
- char tag[128];
- size_t tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUMemcpy/", src, size);
- Memory::Memcpy(dest, src, size, tag, tagSize);
- } else {
- Memory::Memcpy(dest, src, size, "GPUMemcpy");
+ NotifyMemInfoCopy(dest, src, size, "GPUMemcpy/");
}
}
}
@@ -1876,10 +1873,7 @@ bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags
}
if (MemBlockInfoDetailed(size)) {
- char tag[128];
- size_t tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUMemcpy/", src, size);
- NotifyMemInfo(MemBlockFlags::READ, src, size, tag, tagSize);
- NotifyMemInfo(MemBlockFlags::WRITE, dest, size, tag, tagSize);
+ NotifyMemInfoCopy(dest, src, size, "GPUMemcpy/");
}
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
if (!(flags & GPUCopyFlag::DEBUG_NOTIFIED))
diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp
index 5d53a1e32dff..bacff2f4d5d6 100644
--- a/GPU/Vulkan/DrawEngineVulkan.cpp
+++ b/GPU/Vulkan/DrawEngineVulkan.cpp
@@ -99,8 +99,8 @@ void DrawEngineVulkan::InitDeviceObjects() {
bindings[3].descriptorCount = 1;
bindings[3].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[3].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;
- if (gstate_c.Use(GPU_USE_GS_CULLING))
- bindings[3].stageFlags |= VK_SHADER_STAGE_GEOMETRY_BIT;
+ if (draw_->GetDeviceCaps().geometryShaderSupported)
+ bindings[3].stageFlags |= VK_SHADER_STAGE_GEOMETRY_BIT; // unlikely to have a penalty. if we check GPU_USE_GS_CULLING, we have problems on runtime toggle.
bindings[3].binding = DRAW_BINDING_DYNUBO_BASE;
bindings[4].descriptorCount = 1;
bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
@@ -858,7 +858,7 @@ void DrawEngineVulkan::DoFlush() {
if (prim == GE_PRIM_TRIANGLE_STRIP)
prim = GE_PRIM_TRIANGLES;
- u16 *const inds = decIndex_;
+ u16 *inds = decIndex_;
SoftwareTransformResult result{};
SoftwareTransformParams params{};
params.decoded = decoded_;
@@ -898,10 +898,9 @@ void DrawEngineVulkan::DoFlush() {
// Games sometimes expect exact matches (see #12626, for example) for equal comparisons.
if (result.action == SW_CLEAR && everUsedEqualDepth_ && gstate.isClearModeDepthMask() && result.depth > 0.0f && result.depth < 1.0f)
result.action = SW_NOT_READY;
- int indsOffset = 0;
if (result.action == SW_NOT_READY) {
swTransform.DetectOffsetTexture(maxIndex);
- swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result);
+ swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result);
}
if (result.setSafeSize)
@@ -972,9 +971,9 @@ void DrawEngineVulkan::DoFlush() {
if (result.drawIndexed) {
VkBuffer vbuf, ibuf;
vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), 4, &vbuf);
- ibOffset = (uint32_t)pushIndex_->Push(inds + indsOffset, sizeof(short) * result.drawNumTrans, 4, &ibuf);
+ ibOffset = (uint32_t)pushIndex_->Push(inds, sizeof(short) * result.drawNumTrans, 4, &ibuf);
renderManager->DrawIndexed(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1);
- } else if (result.drawNumTrans > 0) {
+ } else {
VkBuffer vbuf;
vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), 4, &vbuf);
renderManager->Draw(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, result.drawNumTrans);
diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp
index ae3f4d9d137b..10a46ca9ee64 100644
--- a/GPU/Vulkan/GPU_Vulkan.cpp
+++ b/GPU/Vulkan/GPU_Vulkan.cpp
@@ -25,6 +25,7 @@
#include "Common/GraphicsContext.h"
#include "Common/Serialize/Serializer.h"
#include "Common/TimeUtil.h"
+#include "Common/Thread/ThreadUtil.h"
#include "Core/Config.h"
#include "Core/Debugger/Breakpoints.h"
@@ -92,26 +93,10 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw)
if (discID.size()) {
File::CreateFullPath(GetSysDirectory(DIRECTORY_APP_CACHE));
shaderCachePath_ = GetSysDirectory(DIRECTORY_APP_CACHE) / (discID + ".vkshadercache");
- shaderCacheLoaded_ = false;
-
- std::thread th([&] {
- LoadCache(shaderCachePath_);
- shaderCacheLoaded_ = true;
- });
- th.detach();
- } else {
- shaderCacheLoaded_ = true;
+ LoadCache(shaderCachePath_);
}
}
-bool GPU_Vulkan::IsReady() {
- return shaderCacheLoaded_;
-}
-
-void GPU_Vulkan::CancelReady() {
- pipelineManager_->CancelCache();
-}
-
void GPU_Vulkan::LoadCache(const Path &filename) {
if (!g_Config.bShaderCache) {
WARN_LOG(G3D, "Shader cache disabled. Not loading.");
@@ -197,7 +182,6 @@ GPU_Vulkan::~GPU_Vulkan() {
shaderManager_->ClearShaders();
// other managers are deleted in ~GPUCommonHW.
-
if (draw_) {
VulkanRenderManager *rm = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
rm->ReleaseCompileQueue();
@@ -314,7 +298,7 @@ void GPU_Vulkan::BeginHostFrame() {
framebufferManager_->BeginFrame();
- shaderManagerVulkan_->DirtyShader();
+ shaderManagerVulkan_->DirtyLastShader();
gstate_c.Dirty(DIRTY_ALL);
if (gstate_c.useFlagsChanged) {
@@ -442,6 +426,13 @@ void GPU_Vulkan::DeviceLost() {
while (!IsReady()) {
sleep_ms(10);
}
+ // draw_ is normally actually still valid here in Vulkan. But we null it out in GPUCommonHW::DeviceLost so we don't try to use it again.
+ Draw::DrawContext *draw = draw_;
+ if (draw) {
+ VulkanRenderManager *rm = (VulkanRenderManager *)draw->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
+ rm->DrainAndBlockCompileQueue();
+ }
+
if (shaderCachePath_.Valid()) {
SaveCache(shaderCachePath_);
}
@@ -449,6 +440,11 @@ void GPU_Vulkan::DeviceLost() {
pipelineManager_->DeviceLost();
GPUCommonHW::DeviceLost();
+
+ if (draw) {
+ VulkanRenderManager *rm = (VulkanRenderManager *)draw->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
+ rm->ReleaseCompileQueue();
+ }
}
void GPU_Vulkan::DeviceRestore(Draw::DrawContext *draw) {
diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h
index d5bae9798800..3d943de1cf86 100644
--- a/GPU/Vulkan/GPU_Vulkan.h
+++ b/GPU/Vulkan/GPU_Vulkan.h
@@ -19,6 +19,7 @@
#include
#include
+#include
#include "Common/File/Path.h"
@@ -40,9 +41,6 @@ class GPU_Vulkan : public GPUCommonHW {
// This gets called on startup and when we get back from settings.
u32 CheckGPUFeatures() const override;
- bool IsReady() override;
- void CancelReady() override;
-
// These are where we can reset command buffers etc.
void BeginHostFrame() override;
void EndHostFrame() override;
@@ -83,5 +81,4 @@ class GPU_Vulkan : public GPUCommonHW {
PipelineManagerVulkan *pipelineManager_;
Path shaderCachePath_;
- std::atomic shaderCacheLoaded_{};
};
diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp
index ad978c524f7a..b2ad9d0739a6 100644
--- a/GPU/Vulkan/PipelineManagerVulkan.cpp
+++ b/GPU/Vulkan/PipelineManagerVulkan.cpp
@@ -719,8 +719,6 @@ bool PipelineManagerVulkan::LoadPipelineCache(FILE *file, bool loadRawPipelineCa
VulkanRenderManager *rm = (VulkanRenderManager *)drawContext->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
VulkanQueueRunner *queueRunner = rm->GetQueueRunner();
- cancelCache_ = false;
-
uint32_t size = 0;
if (loadRawPipelineCache) {
NOTICE_LOG(G3D, "WARNING: Using the badly tested raw pipeline cache path!!!!");
@@ -779,7 +777,7 @@ bool PipelineManagerVulkan::LoadPipelineCache(FILE *file, bool loadRawPipelineCa
int pipelineCreateFailCount = 0;
int shaderFailCount = 0;
for (uint32_t i = 0; i < size; i++) {
- if (failed || cancelCache_) {
+ if (failed) {
break;
}
StoredVulkanPipelineKey key;
@@ -824,7 +822,3 @@ bool PipelineManagerVulkan::LoadPipelineCache(FILE *file, bool loadRawPipelineCa
// We just ignore any failures.
return true;
}
-
-void PipelineManagerVulkan::CancelCache() {
- cancelCache_ = true;
-}
diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h
index 0876c4cc2dcc..b61a3faa107b 100644
--- a/GPU/Vulkan/PipelineManagerVulkan.h
+++ b/GPU/Vulkan/PipelineManagerVulkan.h
@@ -101,11 +101,9 @@ class PipelineManagerVulkan {
// Saves data for faster creation next time.
void SavePipelineCache(FILE *file, bool saveRawPipelineCache, ShaderManagerVulkan *shaderManager, Draw::DrawContext *drawContext);
bool LoadPipelineCache(FILE *file, bool loadRawPipelineCache, ShaderManagerVulkan *shaderManager, Draw::DrawContext *drawContext, VkPipelineLayout layout, int multiSampleLevel);
- void CancelCache();
private:
DenseHashMap pipelines_;
VkPipelineCache pipelineCache_ = VK_NULL_HANDLE;
VulkanContext *vulkan_;
- bool cancelCache_ = false;
};
diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp
index 452395a00f2f..5a36f2d540e3 100644
--- a/GPU/Vulkan/ShaderManagerVulkan.cpp
+++ b/GPU/Vulkan/ShaderManagerVulkan.cpp
@@ -47,6 +47,7 @@
// Most drivers treat vkCreateShaderModule as pretty much a memcpy. What actually
// takes time here, and makes this worthy of parallelization, is GLSLtoSPV.
// Takes ownership over tag.
+// This always returns something, checking the return value for null is not meaningful.
static Promise *CompileShaderModuleAsync(VulkanContext *vulkan, VkShaderStageFlagBits stage, const char *code, std::string *tag) {
auto compile = [=] {
PROFILE_THIS_SCOPE("shadercomp");
@@ -112,13 +113,10 @@ static Promise *CompileShaderModuleAsync(VulkanContext *vulkan,
VulkanFragmentShader::VulkanFragmentShader(VulkanContext *vulkan, FShaderID id, FragmentShaderFlags flags, const char *code)
: vulkan_(vulkan), id_(id), flags_(flags) {
+ _assert_(!id.is_invalid());
source_ = code;
module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_FRAGMENT_BIT, source_.c_str(), new std::string(FragmentShaderDesc(id)));
- if (!module_) {
- failed_ = true;
- } else {
- VERBOSE_LOG(G3D, "Compiled fragment shader:\n%s\n", (const char *)code);
- }
+ VERBOSE_LOG(G3D, "Compiled fragment shader:\n%s\n", (const char *)code);
}
VulkanFragmentShader::~VulkanFragmentShader() {
@@ -147,13 +145,10 @@ std::string VulkanFragmentShader::GetShaderString(DebugShaderStringType type) co
VulkanVertexShader::VulkanVertexShader(VulkanContext *vulkan, VShaderID id, VertexShaderFlags flags, const char *code, bool useHWTransform)
: vulkan_(vulkan), useHWTransform_(useHWTransform), flags_(flags), id_(id) {
+ _assert_(!id.is_invalid());
source_ = code;
module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_VERTEX_BIT, source_.c_str(), new std::string(VertexShaderDesc(id)));
- if (!module_) {
- failed_ = true;
- } else {
- VERBOSE_LOG(G3D, "Compiled vertex shader:\n%s\n", (const char *)code);
- }
+ VERBOSE_LOG(G3D, "Compiled vertex shader:\n%s\n", (const char *)code);
}
VulkanVertexShader::~VulkanVertexShader() {
@@ -182,13 +177,10 @@ std::string VulkanVertexShader::GetShaderString(DebugShaderStringType type) cons
VulkanGeometryShader::VulkanGeometryShader(VulkanContext *vulkan, GShaderID id, const char *code)
: vulkan_(vulkan), id_(id) {
+ _assert_(!id.is_invalid());
source_ = code;
module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_GEOMETRY_BIT, source_.c_str(), new std::string(GeometryShaderDesc(id).c_str()));
- if (!module_) {
- failed_ = true;
- } else {
- VERBOSE_LOG(G3D, "Compiled geometry shader:\n%s\n", (const char *)code);
- }
+ VERBOSE_LOG(G3D, "Compiled geometry shader:\n%s\n", (const char *)code);
}
VulkanGeometryShader::~VulkanGeometryShader() {
@@ -232,11 +224,12 @@ ShaderManagerVulkan::ShaderManagerVulkan(Draw::DrawContext *draw)
}
ShaderManagerVulkan::~ShaderManagerVulkan() {
- ClearShaders();
+ Clear();
delete[] codeBuffer_;
}
void ShaderManagerVulkan::DeviceLost() {
+ Clear();
draw_ = nullptr;
}
@@ -267,19 +260,15 @@ void ShaderManagerVulkan::Clear() {
void ShaderManagerVulkan::ClearShaders() {
Clear();
- DirtyShader();
+ DirtyLastShader();
gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE);
}
-void ShaderManagerVulkan::DirtyShader() {
+void ShaderManagerVulkan::DirtyLastShader() {
// Forget the last shader ID
lastFSID_.set_invalid();
lastVSID_.set_invalid();
lastGSID_.set_invalid();
- DirtyLastShader();
-}
-
-void ShaderManagerVulkan::DirtyLastShader() {
lastVShader_ = nullptr;
lastFShader_ = nullptr;
lastGShader_ = nullptr;
@@ -301,29 +290,95 @@ uint64_t ShaderManagerVulkan::UpdateUniforms(bool useBufferedRendering) {
}
void ShaderManagerVulkan::GetShaders(int prim, VertexDecoder *decoder, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, VulkanGeometryShader **gshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat, bool useSkinInDecode) {
+ VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
+
VShaderID VSID;
+ VulkanVertexShader *vs = nullptr;
if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) {
gstate_c.Clean(DIRTY_VERTEXSHADER_STATE);
ComputeVertexShaderID(&VSID, decoder, useHWTransform, useHWTessellation, weightsAsFloat, useSkinInDecode);
+ if (VSID == lastVSID_) {
+ _dbg_assert_(lastVShader_ != nullptr);
+ vs = lastVShader_;
+ } else if (!vsCache_.Get(VSID, &vs)) {
+ // Vertex shader not in cache. Let's compile it.
+ std::string genErrorString;
+ uint64_t uniformMask = 0; // Not used
+ uint32_t attributeMask = 0; // Not used
+ VertexShaderFlags flags{};
+ bool success = GenerateVertexShader(VSID, codeBuffer_, compat_, draw_->GetBugs(), &attributeMask, &uniformMask, &flags, &genErrorString);
+ _assert_msg_(success, "VS gen error: %s", genErrorString.c_str());
+ _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "VS length error: %d", (int)strlen(codeBuffer_));
+
+ // Don't need to re-lookup anymore, now that we lock wider.
+ vs = new VulkanVertexShader(vulkan, VSID, flags, codeBuffer_, useHWTransform);
+ vsCache_.Insert(VSID, vs);
+ }
+ lastVShader_ = vs;
+ lastVSID_ = VSID;
} else {
VSID = lastVSID_;
+ vs = lastVShader_;
}
+ *vshader = vs;
FShaderID FSID;
+ VulkanFragmentShader *fs = nullptr;
if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) {
gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE);
ComputeFragmentShaderID(&FSID, pipelineState, draw_->GetBugs());
+ if (FSID == lastFSID_) {
+ _dbg_assert_(lastFShader_ != nullptr);
+ fs = lastFShader_;
+ } else if (!fsCache_.Get(FSID, &fs)) {
+ // Fragment shader not in cache. Let's compile it.
+ std::string genErrorString;
+ uint64_t uniformMask = 0; // Not used
+ FragmentShaderFlags flags{};
+ bool success = GenerateFragmentShader(FSID, codeBuffer_, compat_, draw_->GetBugs(), &uniformMask, &flags, &genErrorString);
+ _assert_msg_(success, "FS gen error: %s", genErrorString.c_str());
+ _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "FS length error: %d", (int)strlen(codeBuffer_));
+
+ fs = new VulkanFragmentShader(vulkan, FSID, flags, codeBuffer_);
+ fsCache_.Insert(FSID, fs);
+ }
+ lastFShader_ = fs;
+ lastFSID_ = FSID;
} else {
FSID = lastFSID_;
+ fs = lastFShader_;
}
+ *fshader = fs;
GShaderID GSID;
+ VulkanGeometryShader *gs = nullptr;
if (gstate_c.IsDirty(DIRTY_GEOMETRYSHADER_STATE)) {
gstate_c.Clean(DIRTY_GEOMETRYSHADER_STATE);
ComputeGeometryShaderID(&GSID, draw_->GetBugs(), prim);
+ if (GSID == lastGSID_) {
+ // it's ok for this to be null.
+ gs = lastGShader_;
+ } else if (GSID.Bit(GS_BIT_ENABLED)) {
+ if (!gsCache_.Get(GSID, &gs)) {
+ // Geometry shader not in cache. Let's compile it.
+ std::string genErrorString;
+ bool success = GenerateGeometryShader(GSID, codeBuffer_, compat_, draw_->GetBugs(), &genErrorString);
+ _assert_msg_(success, "GS gen error: %s", genErrorString.c_str());
+ _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "GS length error: %d", (int)strlen(codeBuffer_));
+
+ gs = new VulkanGeometryShader(vulkan, GSID, codeBuffer_);
+ gsCache_.Insert(GSID, gs);
+ }
+ } else {
+ gs = nullptr;
+ }
+ lastGShader_ = gs;
+ lastGSID_ = GSID;
} else {
GSID = lastGSID_;
+ gs = lastGShader_;
}
+ *gshader = gs;
_dbg_assert_(FSID.Bit(FS_BIT_FLATSHADE) == VSID.Bit(VS_BIT_FLATSHADE));
_dbg_assert_(FSID.Bit(FS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE));
@@ -331,82 +386,6 @@ void ShaderManagerVulkan::GetShaders(int prim, VertexDecoder *decoder, VulkanVer
_dbg_assert_(GSID.Bit(GS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE));
}
- // Just update uniforms if this is the same shader as last time.
- if (lastVShader_ != nullptr && lastFShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_ && GSID == lastGSID_) {
- *vshader = lastVShader_;
- *fshader = lastFShader_;
- *gshader = lastGShader_;
- _dbg_assert_msg_((*vshader)->UseHWTransform() == useHWTransform, "Bad vshader was cached");
- // Already all set, no need to look up in shader maps.
- return;
- }
-
- VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
- VulkanVertexShader *vs = nullptr;
- if (!vsCache_.Get(VSID, &vs)) {
- // Vertex shader not in cache. Let's compile it.
- std::string genErrorString;
- uint64_t uniformMask = 0; // Not used
- uint32_t attributeMask = 0; // Not used
- VertexShaderFlags flags{};
- bool success = GenerateVertexShader(VSID, codeBuffer_, compat_, draw_->GetBugs(), &attributeMask, &uniformMask, &flags, &genErrorString);
- _assert_msg_(success, "VS gen error: %s", genErrorString.c_str());
- _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "VS length error: %d", (int)strlen(codeBuffer_));
-
- std::lock_guard guard(cacheLock_);
- if (!vsCache_.Get(VSID, &vs)) {
- vs = new VulkanVertexShader(vulkan, VSID, flags, codeBuffer_, useHWTransform);
- vsCache_.Insert(VSID, vs);
- }
- }
-
- VulkanFragmentShader *fs;
- if (!fsCache_.Get(FSID, &fs)) {
- // Fragment shader not in cache. Let's compile it.
- std::string genErrorString;
- uint64_t uniformMask = 0; // Not used
- FragmentShaderFlags flags{};
- bool success = GenerateFragmentShader(FSID, codeBuffer_, compat_, draw_->GetBugs(), &uniformMask, &flags, &genErrorString);
- _assert_msg_(success, "FS gen error: %s", genErrorString.c_str());
- _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "FS length error: %d", (int)strlen(codeBuffer_));
-
- std::lock_guard guard(cacheLock_);
- if (!fsCache_.Get(FSID, &fs)) {
- fs = new VulkanFragmentShader(vulkan, FSID, flags, codeBuffer_);
- fsCache_.Insert(FSID, fs);
- }
- }
-
- VulkanGeometryShader *gs;
- if (GSID.Bit(GS_BIT_ENABLED)) {
- if (!gsCache_.Get(GSID, &gs)) {
- // Geometry shader not in cache. Let's compile it.
- std::string genErrorString;
- bool success = GenerateGeometryShader(GSID, codeBuffer_, compat_, draw_->GetBugs(), &genErrorString);
- _assert_msg_(success, "GS gen error: %s", genErrorString.c_str());
- _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "GS length error: %d", (int)strlen(codeBuffer_));
-
- std::lock_guard guard(cacheLock_);
- if (!gsCache_.Get(GSID, &gs)) {
- gs = new VulkanGeometryShader(vulkan, GSID, codeBuffer_);
- gsCache_.Insert(GSID, gs);
- }
- }
- } else {
- gs = nullptr;
- }
-
- lastVSID_ = VSID;
- lastFSID_ = FSID;
- lastGSID_ = GSID;
-
- lastVShader_ = vs;
- lastFShader_ = fs;
- lastGShader_ = gs;
-
- *vshader = vs;
- *fshader = fs;
- *gshader = gs;
_dbg_assert_msg_((*vshader)->UseHWTransform() == useHWTransform, "Bad vshader was computed");
}
@@ -528,7 +507,7 @@ enum class VulkanCacheDetectFlags {
};
#define CACHE_HEADER_MAGIC 0xff51f420
-#define CACHE_VERSION 47
+#define CACHE_VERSION 48
struct VulkanCacheHeader {
uint32_t magic;
@@ -597,8 +576,7 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) {
continue;
}
_assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "VS length error: %d", (int)strlen(codeBuffer_));
- // Don't add the new shader if already compiled (can happen since this is a background thread).
- std::lock_guard guard(cacheLock_);
+ // Don't add the new shader if already compiled - though this should no longer happen.
if (!vsCache_.ContainsKey(id)) {
VulkanVertexShader *vs = new VulkanVertexShader(vulkan, id, flags, codeBuffer_, useHWTransform);
vsCache_.Insert(id, vs);
@@ -622,31 +600,32 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) {
continue;
}
_assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "FS length error: %d", (int)strlen(codeBuffer_));
- std::lock_guard guard(cacheLock_);
if (!fsCache_.ContainsKey(id)) {
VulkanFragmentShader *fs = new VulkanFragmentShader(vulkan, id, flags, codeBuffer_);
fsCache_.Insert(id, fs);
}
}
- for (int i = 0; i < header.numGeometryShaders; i++) {
- GShaderID id;
- if (fread(&id, sizeof(id), 1, f) != 1) {
- ERROR_LOG(G3D, "Vulkan shader cache truncated (in GeometryShaders)");
- return false;
- }
- std::string genErrorString;
- if (!GenerateGeometryShader(id, codeBuffer_, compat_, draw_->GetBugs(), &genErrorString)) {
- ERROR_LOG(G3D, "Failed to generate geometry shader during cache load");
- // We just ignore this one and carry on.
- failCount++;
- continue;
- }
- _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "GS length error: %d", (int)strlen(codeBuffer_));
- std::lock_guard guard(cacheLock_);
- if (!gsCache_.ContainsKey(id)) {
- VulkanGeometryShader *gs = new VulkanGeometryShader(vulkan, id, codeBuffer_);
- gsCache_.Insert(id, gs);
+ // If it's not enabled, don't create shaders cached from earlier runs - creation will likely fail.
+ if (gstate_c.Use(GPU_USE_GS_CULLING)) {
+ for (int i = 0; i < header.numGeometryShaders; i++) {
+ GShaderID id;
+ if (fread(&id, sizeof(id), 1, f) != 1) {
+ ERROR_LOG(G3D, "Vulkan shader cache truncated (in GeometryShaders)");
+ return false;
+ }
+ std::string genErrorString;
+ if (!GenerateGeometryShader(id, codeBuffer_, compat_, draw_->GetBugs(), &genErrorString)) {
+ ERROR_LOG(G3D, "Failed to generate geometry shader during cache load");
+ // We just ignore this one and carry on.
+ failCount++;
+ continue;
+ }
+ _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "GS length error: %d", (int)strlen(codeBuffer_));
+ if (!gsCache_.ContainsKey(id)) {
+ VulkanGeometryShader *gs = new VulkanGeometryShader(vulkan, id, codeBuffer_);
+ gsCache_.Insert(id, gs);
+ }
}
}
diff --git a/GPU/Vulkan/ShaderManagerVulkan.h b/GPU/Vulkan/ShaderManagerVulkan.h
index 80c1f39a131f..dc566a426eea 100644
--- a/GPU/Vulkan/ShaderManagerVulkan.h
+++ b/GPU/Vulkan/ShaderManagerVulkan.h
@@ -43,8 +43,6 @@ class VulkanFragmentShader {
const std::string &source() const { return source_; }
- bool Failed() const { return failed_; }
-
std::string GetShaderString(DebugShaderStringType type) const;
Promise *GetModule() { return module_; }
const FShaderID &GetID() const { return id_; }
@@ -68,7 +66,6 @@ class VulkanVertexShader {
const std::string &source() const { return source_; }
- bool Failed() const { return failed_; }
bool UseHWTransform() const { return useHWTransform_; } // TODO: Roll into flags
VertexShaderFlags Flags() const { return flags_; }
@@ -81,7 +78,6 @@ class VulkanVertexShader {
VulkanContext *vulkan_;
std::string source_;
- bool failed_ = false;
bool useHWTransform_;
VShaderID id_;
VertexShaderFlags flags_;
@@ -94,9 +90,8 @@ class VulkanGeometryShader {
const std::string &source() const { return source_; }
- bool Failed() const { return failed_; }
-
std::string GetShaderString(DebugShaderStringType type) const;
+
Promise *GetModule() const { return module_; }
const GShaderID &GetID() { return id_; }
@@ -105,7 +100,6 @@ class VulkanGeometryShader {
VulkanContext *vulkan_;
std::string source_;
- bool failed_ = false;
GShaderID id_;
};
@@ -119,7 +113,6 @@ class ShaderManagerVulkan : public ShaderManagerCommon {
void GetShaders(int prim, VertexDecoder *decoder, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, VulkanGeometryShader **gshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat, bool useSkinInDecode);
void ClearShaders() override;
- void DirtyShader();
void DirtyLastShader() override;
int GetNumVertexShaders() const { return (int)vsCache_.size(); }
@@ -130,6 +123,7 @@ class ShaderManagerVulkan : public ShaderManagerCommon {
VulkanVertexShader *GetVertexShaderFromID(VShaderID id) { return vsCache_.GetOrNull(id); }
VulkanFragmentShader *GetFragmentShaderFromID(FShaderID id) { return fsCache_.GetOrNull(id); }
VulkanGeometryShader *GetGeometryShaderFromID(GShaderID id) { return gsCache_.GetOrNull(id); }
+
VulkanVertexShader *GetVertexShaderFromModule(VkShaderModule module);
VulkanFragmentShader *GetFragmentShaderFromModule(VkShaderModule module);
VulkanGeometryShader *GetGeometryShaderFromModule(VkShaderModule module);
@@ -175,7 +169,6 @@ class ShaderManagerVulkan : public ShaderManagerCommon {
GSCache gsCache_;
char *codeBuffer_;
- std::mutex cacheLock_;
uint64_t uboAlignment_;
// Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time.
diff --git a/Qt/QtMain.cpp b/Qt/QtMain.cpp
index ea99d78ff5a8..7e7d57f480af 100644
--- a/Qt/QtMain.cpp
+++ b/Qt/QtMain.cpp
@@ -304,7 +304,7 @@ bool MainUI::HandleCustomEvent(QEvent *e) {
const char *filter = "All files (*.*)";
switch (fileType) {
case BrowseFileType::BOOTABLE:
- filter = "PSP ROMs (*.iso *.cso *.pbp *.elf *.zip *.ppdmp)";
+ filter = "PSP ROMs (*.iso *.cso *.chd *.pbp *.elf *.zip *.ppdmp)";
break;
case BrowseFileType::IMAGE:
filter = "Pictures (*.jpg *.png)";
@@ -731,18 +731,7 @@ void MainUI::updateAccelerometer() {
// TODO: Toggle it depending on whether it is enabled
QAccelerometerReading *reading = acc->reading();
if (reading) {
- AxisInput axis[3];
- for (int i = 0; i < 3; i++) {
- axis[i].deviceId = DEVICE_ID_ACCELEROMETER;
- }
-
- axis[0].axisId = JOYSTICK_AXIS_ACCELEROMETER_X;
- axis[0].value = reading->x();
- axis[1].axisId = JOYSTICK_AXIS_ACCELEROMETER_Y;
- axis[1].value = reading->y();
- axis[2].axisId = JOYSTICK_AXIS_ACCELEROMETER_Z;
- axis[2].value = reading->z();
- NativeAxis(axis, 3);
+ NativeAccelerometer(reading->x(), reading->y(), reading->z());
}
#endif
}
diff --git a/Qt/mainwindow.cpp b/Qt/mainwindow.cpp
index 6f9722d4963b..1c3401ec0f64 100644
--- a/Qt/mainwindow.cpp
+++ b/Qt/mainwindow.cpp
@@ -125,7 +125,7 @@ void MainWindow::bootDone()
/* SIGNALS */
void MainWindow::loadAct()
{
- QString filename = QFileDialog::getOpenFileName(NULL, "Load File", g_Config.currentDirectory.c_str(), "PSP ROMs (*.pbp *.elf *.iso *.cso *.prx)");
+ QString filename = QFileDialog::getOpenFileName(NULL, "Load File", g_Config.currentDirectory.c_str(), "PSP ROMs (*.pbp *.elf *.iso *.cso *.chd *.prx)");
if (QFile::exists(filename))
{
QFileInfo info(filename);
@@ -255,7 +255,7 @@ void MainWindow::resetAct()
void MainWindow::switchUMDAct()
{
- QString filename = QFileDialog::getOpenFileName(NULL, "Switch UMD", g_Config.currentDirectory.c_str(), "PSP ROMs (*.pbp *.elf *.iso *.cso *.prx)");
+ QString filename = QFileDialog::getOpenFileName(NULL, "Switch UMD", g_Config.currentDirectory.c_str(), "PSP ROMs (*.pbp *.elf *.iso *.cso *.chd *.prx)");
if (QFile::exists(filename))
{
QFileInfo info(filename);
diff --git a/README.md b/README.md
index 9a18bcc14b6f..92a73befab20 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,30 @@ If you want to download regularly updated builds for Android, Windows x86 and x6
For game compatibility, see [community compatibility feedback](https://report.ppsspp.org/games).
+What's new in 1.16.5
+====================
+- Additional crash and memory-leak fixes ([#18243], [#18244], [#18247])
+- Revert bad change that broke things with hardware transform turned off ([#18261])
+- Fix menu texture problem in Naruto Kizuna Drive in OpenGL ([#18255])
+- Apply flicker fix to WWE SvR 2007
+- More efficient handling of accelerometer events ([#18250])
+- Fix for jumpy graphics in Harvest Moon ([#18249])
+
+What's new in 1.16.4
+====================
+- Simplify shader cache lookups on Vulkan, hopefully fixing some crashes ([#18218])
+- Assorted improvements to the IR JITs ([#18228], [#18235], [#18211], more)
+- Other crash and stability fixes ([#18221], [#18220], [#18230], [#18216])
+- Some translation updates ([#18237], more)
+- Cleanups and assert fixes ([#18205], [#18201], [#18206])
+
+What's new in 1.16.3
+====================
+- Fix crash bug and performance issue in Vulkan shader cache ([#18183], [#18189])
+- Fix crash in icon loading in homebrew store ([#18185])
+- Add some memory safety check ([#18184], [#18194])
+- Fix problem when changing backend from the Windows menu ([#18182])
+
What's new in 1.16.2
====================
- Fix for HTTP support on Linux on networks with shaky or incomplete IPv6 support
@@ -400,4 +424,29 @@ Credit goes to:
[#18169]: https://github.com/hrydgard/ppsspp/issues/18169 "Better handling of shadergen failures, other minor things"
[#18151]: https://github.com/hrydgard/ppsspp/issues/18151 "GPU, VFS, UI: Fixed minor memleaks"
[#18165]: https://github.com/hrydgard/ppsspp/issues/18165 "x86jit: Fix flush for special-purpose reg"
-[#18158]: https://github.com/hrydgard/ppsspp/issues/18158 "Add -fno-math-errno"
\ No newline at end of file
+[#18158]: https://github.com/hrydgard/ppsspp/issues/18158 "Add -fno-math-errno"
+[#18183]: https://github.com/hrydgard/ppsspp/issues/18183 "Pipeline/shader race-condition-during-shutdown crash fix"
+[#18189]: https://github.com/hrydgard/ppsspp/issues/18189 "Be a bit smarter when loading the shader cache, avoid duplicating work"
+[#18185]: https://github.com/hrydgard/ppsspp/issues/18185 "Store: Fix race condition causing crashes if looking at another game before an icon finishes downloading"
+[#18184]: https://github.com/hrydgard/ppsspp/issues/18184 "Add memory bounds-check when expanding points, rects and lines to triangles"
+[#18194]: https://github.com/hrydgard/ppsspp/issues/18194 "Cleanups and comment clarifications"
+[#18182]: https://github.com/hrydgard/ppsspp/issues/18182 "Backend change from Win32 menu: Add quick workaround for instance counter misbehavior"
+[#18218]: https://github.com/hrydgard/ppsspp/issues/18218 "Vulkan: Simplify GetShaders and DirtyLastShader, making them internally consistent."
+[#18228]: https://github.com/hrydgard/ppsspp/issues/18228 "unittest: Add jit compare for jit IR"
+[#18235]: https://github.com/hrydgard/ppsspp/issues/18235 "irjit: Handle VDet"
+[#18211]: https://github.com/hrydgard/ppsspp/issues/18211 "More crash fix attempts"
+[#18221]: https://github.com/hrydgard/ppsspp/issues/18221 "Some cleanups and fixes to obscure crashes"
+[#18220]: https://github.com/hrydgard/ppsspp/issues/18220 "Add some missing locking in KeyMap.cpp."
+[#18230]: https://github.com/hrydgard/ppsspp/issues/18230 "Android: Minor activity lifecycle stuff"
+[#18216]: https://github.com/hrydgard/ppsspp/issues/18216 "Don't load the shader cache on a separate thread - all it does is already async"
+[#18237]: https://github.com/hrydgard/ppsspp/issues/18237 "UI/localization: Italian translation update"
+[#18205]: https://github.com/hrydgard/ppsspp/issues/18205 "http: Fix errors on connect"
+[#18201]: https://github.com/hrydgard/ppsspp/issues/18201 "Asserts and checks"
+[#18206]: https://github.com/hrydgard/ppsspp/issues/18206 "GPU: Handle invalid blendeq more accurately"
+[#18243]: https://github.com/hrydgard/ppsspp/issues/18243 "More crashfix/leakfix attempts"
+[#18244]: https://github.com/hrydgard/ppsspp/issues/18244 "Core: Stop leaking file loaders"
+[#18247]: https://github.com/hrydgard/ppsspp/issues/18247 "Jit: Assert on bad exit numbers, allow two more exits per block"
+[#18261]: https://github.com/hrydgard/ppsspp/issues/18261 "Revert \"Merge pull request #18184 from hrydgard/expand-lines-mem-fix\""
+[#18255]: https://github.com/hrydgard/ppsspp/issues/18255 "Fix issue uploading narrow textures in OpenGL."
+[#18250]: https://github.com/hrydgard/ppsspp/issues/18250 "Separate out accelerometer events from joystick axis events"
+[#18249]: https://github.com/hrydgard/ppsspp/issues/18249 "arm64jit: Avoid fused multiplies in vcrsp.t"
\ No newline at end of file
diff --git a/Tools/tag_release.sh b/Tools/tag_release.sh
new file mode 100755
index 000000000000..3d05e6b05aac
--- /dev/null
+++ b/Tools/tag_release.sh
@@ -0,0 +1,14 @@
+# Only for use during the process of making official releases
+
+if [ -z "$1" ]; then
+ echo "No argument supplied"
+ exit 1
+fi
+
+VER=$1
+
+git tag -a ${VER} -m '${VER}'; git push --tags origin ${VER}; git push origin master
+
+echo Now run the internal tool:
+echo ppsspp-build --commit ${VER} --gold --sign-code
+
diff --git a/UI/BackgroundAudio.cpp b/UI/BackgroundAudio.cpp
index 8e7ecc9fde03..c990da8a68bc 100644
--- a/UI/BackgroundAudio.cpp
+++ b/UI/BackgroundAudio.cpp
@@ -148,6 +148,7 @@ void WavData::Read(RIFFReader &file_) {
raw_data = (uint8_t *)malloc(numBytes);
raw_data_size = numBytes;
+
if (num_channels == 1 || num_channels == 2) {
file_.ReadData(raw_data, numBytes);
} else {
@@ -410,7 +411,11 @@ Sample *Sample::Load(const std::string &path) {
samples[i] = ConvertU8ToI16(wave.raw_data[i]);
}
}
- return new Sample(samples, wave.num_channels, wave.numFrames, wave.sample_rate);
+
+ // Protect against bad metadata.
+ int actualFrames = std::min(wave.numFrames, wave.raw_data_size / wave.raw_bytes_per_frame);
+
+ return new Sample(samples, wave.num_channels, actualFrames, wave.sample_rate);
}
static inline int16_t Clamp16(int32_t sample) {
diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp
index b150c85f2e1d..9628dfdc3d8c 100644
--- a/UI/GameSettingsScreen.cpp
+++ b/UI/GameSettingsScreen.cpp
@@ -1668,7 +1668,7 @@ void DeveloperToolsScreen::CreateViews() {
core->HideChoice(3);
}
// TODO: Enable on more architectures.
-#if !PPSSPP_ARCH(X86) && !PPSSPP_ARCH(AMD64)
+#if !PPSSPP_ARCH(X86) && !PPSSPP_ARCH(AMD64) && !PPSSPP_ARCH(ARM64)
core->HideChoice(3);
#endif
diff --git a/UI/MainScreen.cpp b/UI/MainScreen.cpp
index a90fef737473..6349daad3384 100644
--- a/UI/MainScreen.cpp
+++ b/UI/MainScreen.cpp
@@ -846,7 +846,7 @@ void GameBrowser::Refresh() {
}
} else if (!listingPending_) {
std::vector fileInfo;
- path_.GetListing(fileInfo, "iso:cso:pbp:elf:prx:ppdmp:");
+ path_.GetListing(fileInfo, "iso:cso:chd:pbp:elf:prx:ppdmp:");
for (size_t i = 0; i < fileInfo.size(); i++) {
bool isGame = !fileInfo[i].isDirectory;
bool isSaveData = false;
diff --git a/UI/MiscScreens.cpp b/UI/MiscScreens.cpp
index 1d462f6065ce..754c8f620616 100644
--- a/UI/MiscScreens.cpp
+++ b/UI/MiscScreens.cpp
@@ -136,21 +136,28 @@ class WaveAnimation : public Animation {
// 500 is enough for any resolution really. 24 * 500 = 12000 which fits handily in our UI vertex buffer (max 65536 per flush).
const int steps = std::max(20, std::min((int)g_display.dp_xres, 500));
- float step = (float)g_display.dp_xres / (float)steps;
+ float stepSize = (float)g_display.dp_xres / (float)steps;
t *= speed;
+ float stepx = x;
for (int n = 0; n < steps; n++) {
- float x = (float)n * step;
- float i = x * 1280 / bounds.w;
+ float nextx = stepx + stepSize;
+ // Round actual x and width to prevent gaps between waves.
+ float roundedx = floorf(stepx);
+ float w = floorf(nextx) - roundedx;
+ float i = stepx * 1280 / bounds.w;
float wave0 = sin(i*0.005+t*0.8)*0.05 + sin(i*0.002+t*0.25)*0.02 + sin(i*0.001+t*0.3)*0.03 + 0.625;
float wave1 = sin(i*0.0044+t*0.4)*0.07 + sin(i*0.003+t*0.1)*0.02 + sin(i*0.001+t*0.3)*0.01 + 0.625;
- dc.Draw()->RectVGradient(x, wave0*bounds.h, step, (1.0-wave0)*bounds.h, color, 0x00000000);
- dc.Draw()->RectVGradient(x, wave1*bounds.h, step, (1.0-wave1)*bounds.h, color, 0x00000000);
+
+ dc.Draw()->RectVGradient(roundedx, wave0*bounds.h, w, (1.0-wave0)*bounds.h, color, 0x00000000);
+ dc.Draw()->RectVGradient(roundedx, wave1*bounds.h, w, (1.0-wave1)*bounds.h, color, 0x00000000);
// Add some "antialiasing"
- dc.Draw()->RectVGradient(x, wave0*bounds.h-3.0f * g_display.pixel_in_dps_y, step, 3.0f * g_display.pixel_in_dps_y, 0x00000000, color);
- dc.Draw()->RectVGradient(x, wave1*bounds.h-3.0f * g_display.pixel_in_dps_y, step, 3.0f * g_display.pixel_in_dps_y, 0x00000000, color);
+ dc.Draw()->RectVGradient(roundedx, wave0*bounds.h-3.0f * g_display.pixel_in_dps_y, w, 3.0f * g_display.pixel_in_dps_y, 0x00000000, color);
+ dc.Draw()->RectVGradient(roundedx, wave1*bounds.h-3.0f * g_display.pixel_in_dps_y, w, 3.0f * g_display.pixel_in_dps_y, 0x00000000, color);
+
+ stepx = nextx;
}
dc.Flush();
diff --git a/UI/NativeApp.cpp b/UI/NativeApp.cpp
index 618c430d7571..f4056c10b32b 100644
--- a/UI/NativeApp.cpp
+++ b/UI/NativeApp.cpp
@@ -828,7 +828,7 @@ bool CreateGlobalPipelines();
bool NativeInitGraphics(GraphicsContext *graphicsContext) {
INFO_LOG(SYSTEM, "NativeInitGraphics");
- _assert_(g_screenManager);
+ _assert_msg_(g_screenManager, "No screenmanager, bad init order. Backend = %d", g_Config.iGPUBackend);
// We set this now so any resize during init is processed later.
resized = false;
@@ -1336,22 +1336,12 @@ static void ProcessOneAxisEvent(const AxisInput &axis) {
}
void NativeAxis(const AxisInput *axes, size_t count) {
- // figure out what the current tilt orientation is by checking the axis event
- // This is static, since we need to remember where we last were (in terms of orientation)
- static float tiltX;
- static float tiltY;
- static float tiltZ;
-
for (size_t i = 0; i < count; i++) {
ProcessOneAxisEvent(axes[i]);
- switch (axes[i].axisId) {
- case JOYSTICK_AXIS_ACCELEROMETER_X: tiltX = axes[i].value; break;
- case JOYSTICK_AXIS_ACCELEROMETER_Y: tiltY = axes[i].value; break;
- case JOYSTICK_AXIS_ACCELEROMETER_Z: tiltZ = axes[i].value; break;
- default: break;
- }
}
+}
+void NativeAccelerometer(float tiltX, float tiltY, float tiltZ) {
if (g_Config.iTiltInputType == TILT_NULL) {
// if tilt events are disabled, don't do anything special.
return;
@@ -1377,6 +1367,10 @@ void NativeAxis(const AxisInput *axes, size_t count) {
TiltEventProcessor::ProcessTilt(landscape, tiltBaseAngleY, tiltX, tiltY, tiltZ,
g_Config.bInvertTiltX, g_Config.bInvertTiltY,
xSensitivity, ySensitivity);
+
+ HLEPlugins::PluginDataAxis[JOYSTICK_AXIS_ACCELEROMETER_X] = tiltX;
+ HLEPlugins::PluginDataAxis[JOYSTICK_AXIS_ACCELEROMETER_Y] = tiltY;
+ HLEPlugins::PluginDataAxis[JOYSTICK_AXIS_ACCELEROMETER_Z] = tiltZ;
}
void System_PostUIMessage(const std::string &message, const std::string &value) {
diff --git a/UI/OnScreenDisplay.cpp b/UI/OnScreenDisplay.cpp
index ad8eac4ffd6e..5209673883e6 100644
--- a/UI/OnScreenDisplay.cpp
+++ b/UI/OnScreenDisplay.cpp
@@ -459,6 +459,8 @@ void OnScreenMessagesView::Draw(UIContext &dc) {
// Save the location of the popup, for easy dismissal.
dismissZones.push_back(ClickZone{ (int)j, b });
break;
+ default:
+ break;
}
break;
}
diff --git a/UI/RemoteISOScreen.cpp b/UI/RemoteISOScreen.cpp
index b80df3002142..9ade1fd80397 100644
--- a/UI/RemoteISOScreen.cpp
+++ b/UI/RemoteISOScreen.cpp
@@ -503,8 +503,8 @@ void RemoteISOConnectScreen::ExecuteLoad() {
class RemoteGameBrowser : public GameBrowser {
public:
- RemoteGameBrowser(const Path &url, BrowseFlags browseFlags, bool *gridStyle_, ScreenManager *screenManager, std::string lastText, std::string lastLink, UI::LayoutParams *layoutParams = nullptr)
- : GameBrowser(url, browseFlags, gridStyle_, screenManager, lastText, lastLink, layoutParams) {
+ RemoteGameBrowser(const Path &url, BrowseFlags browseFlags, bool *gridStyle, ScreenManager *screenManager, std::string lastText, std::string lastLink, UI::LayoutParams *layoutParams = nullptr)
+ : GameBrowser(url, browseFlags, gridStyle, screenManager, lastText, lastLink, layoutParams) {
initialPath_ = url;
}
diff --git a/UI/RetroAchievementScreens.cpp b/UI/RetroAchievementScreens.cpp
index d8ebceb9ffc9..a92663d3f53a 100644
--- a/UI/RetroAchievementScreens.cpp
+++ b/UI/RetroAchievementScreens.cpp
@@ -516,7 +516,7 @@ void RenderAchievement(UIContext &dc, const rc_client_achievement_t *achievement
char cacheKey[256];
snprintf(cacheKey, sizeof(cacheKey), "ai:%s:%s", achievement->badge_name, iconState == RC_CLIENT_ACHIEVEMENT_STATE_UNLOCKED ? "unlocked" : "locked");
if (RC_OK == rc_client_achievement_get_image_url(achievement, iconState, temp, sizeof(temp))) {
- Achievements::DownloadImageIfMissing(cacheKey, std::move(std::string(temp)));
+ Achievements::DownloadImageIfMissing(cacheKey, std::string(temp));
if (g_iconCache.BindIconTexture(&dc, cacheKey)) {
dc.Draw()->DrawTexRect(Bounds(bounds.x + padding, bounds.y + padding, iconSpace, iconSpace), 0.0f, 0.0f, 1.0f, 1.0f, whiteAlpha(alpha));
}
@@ -560,7 +560,7 @@ void RenderGameAchievementSummary(UIContext &dc, const Bounds &bounds, float alp
char cacheKey[256];
snprintf(cacheKey, sizeof(cacheKey), "gi:%s", gameInfo->badge_name);
if (RC_OK == rc_client_game_get_image_url(gameInfo, url, sizeof(url))) {
- Achievements::DownloadImageIfMissing(cacheKey, std::move(std::string(url)));
+ Achievements::DownloadImageIfMissing(cacheKey, std::string(url));
if (g_iconCache.BindIconTexture(&dc, cacheKey)) {
dc.Draw()->DrawTexRect(Bounds(bounds.x, bounds.y, iconSpace, iconSpace), 0.0f, 0.0f, 1.0f, 1.0f, whiteAlpha(alpha));
}
@@ -660,7 +660,7 @@ void RenderLeaderboardEntry(UIContext &dc, const rc_client_leaderboard_entry_t *
snprintf(cacheKey, sizeof(cacheKey), "lbe:%s", entry->user);
char temp[512];
if (RC_OK == rc_client_leaderboard_entry_get_user_image_url(entry, temp, sizeof(temp))) {
- Achievements::DownloadImageIfMissing(cacheKey, std::move(std::string(temp)));
+ Achievements::DownloadImageIfMissing(cacheKey, std::string(temp));
if (g_iconCache.BindIconTexture(&dc, cacheKey)) {
dc.Draw()->DrawTexRect(Bounds(bounds.x + iconLeft, bounds.y + 4.0f, 64.0f, 64.0f), 0.0f, 0.0f, 1.0f, 1.0f, whiteAlpha(alpha));
}
diff --git a/UI/TiltAnalogSettingsScreen.cpp b/UI/TiltAnalogSettingsScreen.cpp
index 716baf528995..af4456a54599 100644
--- a/UI/TiltAnalogSettingsScreen.cpp
+++ b/UI/TiltAnalogSettingsScreen.cpp
@@ -137,22 +137,8 @@ void TiltAnalogSettingsScreen::CreateViews() {
settings->Add(new Choice(di->T("Back")))->OnClick.Handle(this, &UIScreen::OnBack);
}
-void TiltAnalogSettingsScreen::axis(const AxisInput &axis) {
- UIDialogScreenWithGameBackground::axis(axis);
-
- if (axis.deviceId == DEVICE_ID_ACCELEROMETER) {
- switch (axis.axisId) {
- case JOYSTICK_AXIS_ACCELEROMETER_X: down_.x = axis.value; break;
- case JOYSTICK_AXIS_ACCELEROMETER_Y: down_.y = axis.value; break;
- case JOYSTICK_AXIS_ACCELEROMETER_Z: down_.z = axis.value; break;
- default: break;
- }
- }
-}
-
UI::EventReturn TiltAnalogSettingsScreen::OnCalibrate(UI::EventParams &e) {
- Lin::Vec3 down = down_.normalized();
- g_Config.fTiltBaseAngleY = atan2(down.z, down.x);
+ g_Config.fTiltBaseAngleY = TiltEventProcessor::GetCurrentYAngle();
return UI::EVENT_DONE;
}
diff --git a/UI/TiltAnalogSettingsScreen.h b/UI/TiltAnalogSettingsScreen.h
index 1c338e5e944c..b1416cfb69ea 100644
--- a/UI/TiltAnalogSettingsScreen.h
+++ b/UI/TiltAnalogSettingsScreen.h
@@ -29,7 +29,6 @@ class TiltAnalogSettingsScreen : public UIDialogScreenWithGameBackground {
TiltAnalogSettingsScreen(const Path &gamePath) : UIDialogScreenWithGameBackground(gamePath) {}
void CreateViews() override;
- void axis(const AxisInput &axis) override;
void update() override;
const char *tag() const override { return "TiltAnalogSettings"; }
diff --git a/UWP/CoreUWP/CoreUWP.vcxproj b/UWP/CoreUWP/CoreUWP.vcxproj
index d74fdbbb8d2f..4ea24c0ce377 100644
--- a/UWP/CoreUWP/CoreUWP.vcxproj
+++ b/UWP/CoreUWP/CoreUWP.vcxproj
@@ -66,7 +66,7 @@
false
false
pch.h
- ../../ffmpeg/Windows10/ARM/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories)
+ ../../ext/libchdr/include;../../ffmpeg/Windows10/ARM/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories)
USE_FFMPEG;WITH_UPNP;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;NOMINMAX;_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1;ARMIPS_USE_STD_FILESYSTEM;%(ClCompile.PreprocessorDefinitions)
stdcpp17
@@ -82,7 +82,7 @@
false
false
pch.h
- ../../ffmpeg/Windows10/ARM64/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories)
+ ../../ext/libchdr/include;../../ffmpeg/Windows10/ARM64/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories)
USE_FFMPEG;WITH_UPNP;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;NOMINMAX;_ARM64_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1;ARMIPS_USE_STD_FILESYSTEM;%(ClCompile.PreprocessorDefinitions)
stdcpp17
@@ -98,7 +98,7 @@
false
false
pch.h
- ../../ffmpeg/Windows10/x64/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories)
+ ../../ext/libchdr/include;../../ffmpeg/Windows10/x64/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories)
USE_FFMPEG;WITH_UPNP;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;NOMINMAX;_UNICODE;UNICODE;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions)
stdcpp17
@@ -1110,6 +1110,9 @@
{acb316ca-3ecb-48e5-be0a-91e72d5b0f12}
+
+ {191b6f52-ad66-4172-bd20-733eeeceef8c}
+
{d31fd4f0-53eb-477c-9dc7-149796f628e2}
diff --git a/UWP/PPSSPP_UWP.sln b/UWP/PPSSPP_UWP.sln
index 8571149bc01a..8f1c7d1ae8f7 100644
--- a/UWP/PPSSPP_UWP.sln
+++ b/UWP/PPSSPP_UWP.sln
@@ -36,6 +36,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cpu_features_UWP", "cpu_fea
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rcheevos_UWP", "rcheevos_UWP\rcheevos_UWP.vcxproj", "{4C9D52D0-310A-4347-8991-E3788CB22169}"
EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libchdr_UWP", "libchdr_UWP\libchdr_UWP.vcxproj", "{191B6F52-AD66-4172-BD20-733EEECEEF8C}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|ARM = Debug|ARM
@@ -310,6 +312,24 @@ Global
{4C9D52D0-310A-4347-8991-E3788CB22169}.UWP Gold|ARM64.Build.0 = Release|ARM64
{4C9D52D0-310A-4347-8991-E3788CB22169}.UWP Gold|x64.ActiveCfg = Release|x64
{4C9D52D0-310A-4347-8991-E3788CB22169}.UWP Gold|x64.Build.0 = Release|x64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|ARM.ActiveCfg = Debug|ARM
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|ARM.Build.0 = Debug|ARM
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|ARM64.ActiveCfg = Debug|ARM64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|ARM64.Build.0 = Debug|ARM64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|x64.ActiveCfg = Debug|x64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|x64.Build.0 = Debug|x64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|ARM.ActiveCfg = Release|ARM
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|ARM.Build.0 = Release|ARM
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|ARM64.ActiveCfg = Release|ARM64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|ARM64.Build.0 = Release|ARM64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|x64.ActiveCfg = Release|x64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|x64.Build.0 = Release|x64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|ARM.ActiveCfg = Release|ARM
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|ARM.Build.0 = Release|ARM
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|ARM64.ActiveCfg = Release|ARM64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|ARM64.Build.0 = Release|ARM64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|x64.ActiveCfg = Release|x64
+ {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
diff --git a/UWP/libchdr_UWP/libchdr_UWP.vcxproj b/UWP/libchdr_UWP/libchdr_UWP.vcxproj
new file mode 100644
index 000000000000..afd6fb6a3194
--- /dev/null
+++ b/UWP/libchdr_UWP/libchdr_UWP.vcxproj
@@ -0,0 +1,104 @@
+
+
+
+
+ Debug
+ ARM
+
+
+ Debug
+ ARM64
+
+
+ Debug
+ x64
+
+
+ Release
+ ARM
+
+
+ Release
+ ARM64
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {191b6f52-ad66-4172-bd20-733eeeceef8c}
+ StaticLibrary
+ libchdr
+ en-US
+ 14.0
+ true
+ Windows Store
+ 10.0
+ 10.0.19041.0
+ 10.0
+ libchdr_UWP
+
+
+
+ StaticLibrary
+ true
+
+
+ StaticLibrary
+ false
+ false
+
+
+
+
+
+
+
+
+
+
+
+ false
+
+
+
+ false
+ false
+ ..\..\ext\zlib;..\..\ext\libchdr\include;..\..\ext\libchdr\deps\lzma-22.01\include;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories)
+ NotUsing
+ _7ZIP_ST;_CRT_SECURE_NO_WARNINGS;_UNICODE;UNICODE;%(PreprocessorDefinitions)
+
+
+ Console
+ false
+ false
+
+
+
+
+
+
\ No newline at end of file
diff --git a/UWP/libchdr_UWP/libchdr_UWP.vcxproj.filters b/UWP/libchdr_UWP/libchdr_UWP.vcxproj.filters
new file mode 100644
index 000000000000..7a49828f1d8b
--- /dev/null
+++ b/UWP/libchdr_UWP/libchdr_UWP.vcxproj.filters
@@ -0,0 +1,67 @@
+
+
+
+
+ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01}
+ rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tga;tiff;tif;png;wav;mfcribbon-ms
+
+
+ {20de3681-6341-427f-a3a7-4ae259deb0b9}
+
+
+
+
+
+
+
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+ lzma
+
+
+
\ No newline at end of file
diff --git a/Windows/DinputDevice.cpp b/Windows/DinputDevice.cpp
index 2115d857f7d0..0581f522e31b 100644
--- a/Windows/DinputDevice.cpp
+++ b/Windows/DinputDevice.cpp
@@ -145,7 +145,7 @@ DinputDevice::DinputDevice(int devnum) {
return;
}
- getDevices(false);
+ getDevices(needsCheck_);
if ( (devnum >= (int)devices.size()) || FAILED(getPDI()->CreateDevice(devices.at(devnum).guidInstance, &pJoystick, NULL)))
{
return;
diff --git a/Windows/PPSSPP.sln b/Windows/PPSSPP.sln
index de5d07692648..93a9d22c626a 100644
--- a/Windows/PPSSPP.sln
+++ b/Windows/PPSSPP.sln
@@ -93,6 +93,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cpu_features", "..\ext\cpu_
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rcheevos", "..\ext\rcheevos-build\rcheevos.vcxproj", "{31694510-A8C0-40F6-B09B-E8DF825ADEFA}"
EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libchdr", "..\ext\libchdr.vcxproj", "{956F1F48-B612-46D8-89EE-96996DCD9383}"
+EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|ARM = Debug|ARM
@@ -395,6 +397,22 @@ Global
{31694510-A8C0-40F6-B09B-E8DF825ADEFA}.Release|Win32.Build.0 = Release|Win32
{31694510-A8C0-40F6-B09B-E8DF825ADEFA}.Release|x64.ActiveCfg = Release|x64
{31694510-A8C0-40F6-B09B-E8DF825ADEFA}.Release|x64.Build.0 = Release|x64
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|ARM.ActiveCfg = Debug|ARM
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|ARM.Build.0 = Debug|ARM
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|ARM64.ActiveCfg = Debug|ARM64
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|ARM64.Build.0 = Debug|ARM64
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|Win32.ActiveCfg = Debug|Win32
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|Win32.Build.0 = Debug|Win32
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|x64.ActiveCfg = Debug|x64
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|x64.Build.0 = Debug|x64
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|ARM.ActiveCfg = Release|ARM
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|ARM.Build.0 = Release|ARM
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|ARM64.ActiveCfg = Release|ARM64
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|ARM64.Build.0 = Release|ARM64
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|Win32.ActiveCfg = Release|Win32
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|Win32.Build.0 = Release|Win32
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|x64.ActiveCfg = Release|x64
+ {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
@@ -412,6 +430,7 @@ Global
{8BFD8150-94D5-4BF9-8A50-7BD9929A0850} = {39FCACF8-10D9-4D8D-97AA-7507436AD932}
{C249F016-7F82-45CF-BB6E-0642A988C4D3} = {39FCACF8-10D9-4D8D-97AA-7507436AD932}
{31694510-A8C0-40F6-B09B-E8DF825ADEFA} = {39FCACF8-10D9-4D8D-97AA-7507436AD932}
+ {956F1F48-B612-46D8-89EE-96996DCD9383} = {39FCACF8-10D9-4D8D-97AA-7507436AD932}
EndGlobalSection
GlobalSection(ExtensibilityGlobals) = postSolution
SolutionGuid = {2FD47774-A031-48F4-B645-A49A3140A29B}
diff --git a/Windows/PPSSPP.vcxproj b/Windows/PPSSPP.vcxproj
index 881a31cab516..01128f75fbac 100644
--- a/Windows/PPSSPP.vcxproj
+++ b/Windows/PPSSPP.vcxproj
@@ -1721,6 +1721,9 @@
{edfa2e87-8ac1-4853-95d4-d7594ff81947}
+
+ {956f1f48-b612-46d8-89ee-96996dcd9383}
+
{3baae095-e0ab-4b0e-b5df-ce39c8ae31de}
diff --git a/Windows/main.cpp b/Windows/main.cpp
index 5149ebcc5370..04b89c7372c0 100644
--- a/Windows/main.cpp
+++ b/Windows/main.cpp
@@ -554,7 +554,7 @@ bool System_MakeRequest(SystemRequestType type, int requestId, const std::string
std::wstring filter;
switch (type) {
case BrowseFileType::BOOTABLE:
- filter = MakeFilter(L"All supported file types (*.iso *.cso *.pbp *.elf *.prx *.zip *.ppdmp)|*.pbp;*.elf;*.iso;*.cso;*.prx;*.zip;*.ppdmp|PSP ROMs (*.iso *.cso *.pbp *.elf *.prx)|*.pbp;*.elf;*.iso;*.cso;*.prx|Homebrew/Demos installers (*.zip)|*.zip|All files (*.*)|*.*||");
+ filter = MakeFilter(L"All supported file types (*.iso *.cso *.chd *.pbp *.elf *.prx *.zip *.ppdmp)|*.pbp;*.elf;*.iso;*.cso;*.chd;*.prx;*.zip;*.ppdmp|PSP ROMs (*.iso *.cso *.chd *.pbp *.elf *.prx)|*.pbp;*.elf;*.iso;*.cso;*.chd;*.prx|Homebrew/Demos installers (*.zip)|*.zip|All files (*.*)|*.*||");
break;
case BrowseFileType::INI:
filter = MakeFilter(L"Ini files (*.ini)|*.ini|All files (*.*)|*.*||");
diff --git a/android/ab.cmd b/android/ab.cmd
index f205269c8b95..412ba8263e09 100644
--- a/android/ab.cmd
+++ b/android/ab.cmd
@@ -11,5 +11,5 @@ copy ..\assets\*.meta assets\
copy ..\assets\*.wav assets\
SET NDK=C:\Android\sdk\ndk\21.3.6528147
REM SET NDK=C:\Android\ndk
-SET NDK_MODULE_PATH=..\ext;..\ext\native\ext
+SET NDK_MODULE_PATH=..\ext
%NDK%/ndk-build -j32 %*
diff --git a/android/jni/Android.mk b/android/jni/Android.mk
index 338d52baa0b2..c70bba4eb4ce 100644
--- a/android/jni/Android.mk
+++ b/android/jni/Android.mk
@@ -4,11 +4,45 @@ SRC := ../..
include $(CLEAR_VARS)
include $(LOCAL_PATH)/Locals.mk
+LOCAL_CFLAGS += -D_7ZIP_ST -D__SWITCH__
+
+LZMA_FILES := \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/Alloc.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/Bcj2.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/Bcj2Enc.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/Bra.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/Bra86.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/CpuArch.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/Delta.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/LzFind.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/LzFindOpt.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/LzmaDec.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/LzmaEnc.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/Lzma86Dec.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/Lzma86Enc.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/LzmaLib.c \
+ $(SRC)/ext/libchdr/deps/lzma-22.01/src/Sort.c
+
+CHDR_FILES := \
+ ${LZMA_FILES} \
+ $(SRC)/ext/libchdr/src/libchdr_bitstream.c \
+ $(SRC)/ext/libchdr/src/libchdr_cdrom.c \
+ $(SRC)/ext/libchdr/src/libchdr_chd.c \
+ $(SRC)/ext/libchdr/src/libchdr_flac.c \
+ $(SRC)/ext/libchdr/src/libchdr_huffman.c
+
+LOCAL_MODULE := libchdr
+LOCAL_SRC_FILES := $(CHDR_FILES)
+include $(BUILD_STATIC_LIBRARY)
+
+include $(CLEAR_VARS)
+include $(LOCAL_PATH)/Locals.mk
+
LOCAL_C_INCLUDES += \
$(LOCAL_PATH)/../../ext/cpu_features/include \
$(LOCAL_PATH)/../../ext/rcheevos/include
-LOCAL_CFLAGS += -DSTACK_LINE_READER_BUFFER_SIZE=1024 -DHAVE_DLFCN_H -DRC_DISABLE_LUA
+LOCAL_CFLAGS += -DSTACK_LINE_READER_BUFFER_SIZE=1024 -DHAVE_DLFCN_H -DRC_DISABLE_LUA -D_7ZIP_ST
# http://software.intel.com/en-us/articles/getting-started-on-optimizing-ndk-project-for-multiple-cpu-architectures
@@ -288,7 +322,7 @@ include $(BUILD_STATIC_LIBRARY)
# Next up, Core, GPU, and other core parts shared by headless.
include $(CLEAR_VARS)
include $(LOCAL_PATH)/Locals.mk
-LOCAL_WHOLE_STATIC_LIBRARIES += ppsspp_common
+LOCAL_WHOLE_STATIC_LIBRARIES += ppsspp_common libchdr
ifeq ($(TARGET_ARCH_ABI),x86_64)
ARCH_FILES := \
@@ -357,7 +391,7 @@ ARCH_FILES := \
Arm64EmitterTest.cpp
endif
-VULKAN_FILES := \
+GPU_VULKAN_FILES := \
$(SRC)/GPU/Vulkan/DrawEngineVulkan.cpp \
$(SRC)/GPU/Vulkan/FramebufferManagerVulkan.cpp \
$(SRC)/GPU/Vulkan/GPU_Vulkan.cpp \
@@ -370,7 +404,7 @@ VULKAN_FILES := \
EXEC_AND_LIB_FILES := \
$(ARCH_FILES) \
- $(VULKAN_FILES) \
+ $(GPU_VULKAN_FILES) \
$(SRC)/ext/xxhash.c \
TestRunner.cpp \
$(SRC)/Core/MIPS/MIPS.cpp.arm \
diff --git a/android/jni/Locals.mk b/android/jni/Locals.mk
index aa71119f3b71..e176a2bdc747 100644
--- a/android/jni/Locals.mk
+++ b/android/jni/Locals.mk
@@ -20,6 +20,8 @@ LOCAL_C_INCLUDES := \
$(LOCAL_PATH)/../../ext/armips \
$(LOCAL_PATH)/../../ext/armips/ext/filesystem/include \
$(LOCAL_PATH)/../../ext/armips/ext/tinyformat \
+ $(LOCAL_PATH)/../../ext/libchdr/deps/lzma-22.01/include \
+ $(LOCAL_PATH)/../../ext/libchdr/include \
$(LOCAL_PATH)
LOCAL_STATIC_LIBRARIES := libzip glslang-build miniupnp-build
diff --git a/android/jni/app-android.cpp b/android/jni/app-android.cpp
index 1d667d72ebcb..79048d959508 100644
--- a/android/jni/app-android.cpp
+++ b/android/jni/app-android.cpp
@@ -688,8 +688,9 @@ extern "C" void Java_org_ppsspp_ppsspp_NativeApp_init
EARLY_LOG("NativeApp.init() -- begin");
PROFILE_INIT();
- std::lock_guard guard(renderLock); // Note: This is held for the rest of this function - intended?
+ std::lock_guard guard(renderLock);
renderer_inited = false;
+ exitRenderLoop = false;
androidVersion = jAndroidVersion;
deviceType = jdeviceType;
@@ -872,8 +873,14 @@ extern "C" void Java_org_ppsspp_ppsspp_NativeApp_pause(JNIEnv *, jclass) {
}
extern "C" void Java_org_ppsspp_ppsspp_NativeApp_shutdown(JNIEnv *, jclass) {
+ INFO_LOG(SYSTEM, "NativeApp.shutdown() -- begin");
+
if (renderer_inited && useCPUThread && graphicsContext) {
// Only used in Java EGL path.
+
+ // We can't lock renderLock here because the emu thread will be in NativeFrame
+ // which locks renderLock already, and only gets out once we call ThreadFrame()
+ // in a loop before, to empty the queue.
EmuThreadStop("shutdown");
INFO_LOG(SYSTEM, "BeginAndroidShutdown");
graphicsContext->BeginAndroidShutdown();
@@ -891,19 +898,19 @@ extern "C" void Java_org_ppsspp_ppsspp_NativeApp_shutdown(JNIEnv *, jclass) {
EmuThreadJoin();
}
- INFO_LOG(SYSTEM, "NativeApp.shutdown() -- begin");
- if (renderer_inited) {
- INFO_LOG(G3D, "Shutting down renderer");
- graphicsContext->Shutdown();
- delete graphicsContext;
- graphicsContext = nullptr;
- renderer_inited = false;
- } else {
- INFO_LOG(G3D, "Not shutting down renderer - not initialized");
- }
-
{
std::lock_guard guard(renderLock);
+
+ if (graphicsContext) {
+ INFO_LOG(G3D, "Shutting down renderer");
+ graphicsContext->Shutdown();
+ delete graphicsContext;
+ graphicsContext = nullptr;
+ renderer_inited = false;
+ } else {
+ INFO_LOG(G3D, "Not shutting down renderer - not initialized");
+ }
+
NativeShutdown();
g_VFS.Clear();
}
@@ -1135,6 +1142,9 @@ void UpdateRunLoopAndroid(JNIEnv *env) {
}
extern "C" void Java_org_ppsspp_ppsspp_NativeRenderer_displayRender(JNIEnv *env, jobject obj) {
+ // This doesn't get called on the Vulkan path.
+ _assert_(useCPUThread);
+
static bool hasSetThreadName = false;
if (!hasSetThreadName) {
hasSetThreadName = true;
@@ -1144,13 +1154,9 @@ extern "C" void Java_org_ppsspp_ppsspp_NativeRenderer_displayRender(JNIEnv *env,
if (IsVREnabled() && !StartVRRender())
return;
- if (useCPUThread) {
- // This is the "GPU thread". Call ThreadFrame.
- if (!graphicsContext || !graphicsContext->ThreadFrame()) {
- return;
- }
- } else {
- UpdateRunLoopAndroid(env);
+ // This is the "GPU thread". Call ThreadFrame.
+ if (!graphicsContext || !graphicsContext->ThreadFrame()) {
+ return;
}
if (IsVREnabled()) {
@@ -1234,18 +1240,7 @@ extern "C" jboolean Java_org_ppsspp_ppsspp_NativeApp_mouseWheelEvent(
extern "C" void JNICALL Java_org_ppsspp_ppsspp_NativeApp_accelerometer(JNIEnv *, jclass, float x, float y, float z) {
if (!renderer_inited)
return;
-
- AxisInput axis[3];
- for (int i = 0; i < 3; i++) {
- axis[i].deviceId = DEVICE_ID_ACCELEROMETER;
- }
- axis[0].axisId = JOYSTICK_AXIS_ACCELEROMETER_X;
- axis[0].value = x;
- axis[1].axisId = JOYSTICK_AXIS_ACCELEROMETER_Y;
- axis[1].value = y;
- axis[2].axisId = JOYSTICK_AXIS_ACCELEROMETER_Z;
- axis[2].value = z;
- NativeAxis(axis, 3);
+ NativeAccelerometer(x, y, z);
}
extern "C" void JNICALL Java_org_ppsspp_ppsspp_NativeApp_sendMessageFromJava(JNIEnv *env, jclass, jstring message, jstring param) {
@@ -1304,9 +1299,7 @@ extern "C" void JNICALL Java_org_ppsspp_ppsspp_NativeActivity_requestExitVulkanR
return;
}
exitRenderLoop = true;
- while (renderLoopRunning) {
- sleep_ms(5);
- }
+ // The caller joins the thread anyway, so no point in doing a wait loop here, only leads to misleading hang diagnostics.
}
void correctRatio(int &sz_x, int &sz_y, float scale) {
@@ -1457,15 +1450,24 @@ static void ProcessFrameCommands(JNIEnv *env) {
}
// This runs in Vulkan mode only.
+// This handles the entire lifecycle of the Vulkan context, init and exit.
extern "C" bool JNICALL Java_org_ppsspp_ppsspp_NativeActivity_runVulkanRenderLoop(JNIEnv *env, jobject obj, jobject _surf) {
_assert_(!useCPUThread);
if (!graphicsContext) {
ERROR_LOG(G3D, "runVulkanRenderLoop: Tried to enter without a created graphics context.");
+ renderLoopRunning = false;
+ exitRenderLoop = false;
return false;
}
- exitRenderLoop = false;
+ if (exitRenderLoop) {
+ WARN_LOG(G3D, "runVulkanRenderLoop: ExitRenderLoop requested at start, skipping the whole thing.");
+ renderLoopRunning = false;
+ exitRenderLoop = false;
+ return true;
+ }
+
// This is up here to prevent race conditions, in case we pause during init.
renderLoopRunning = true;
@@ -1507,11 +1509,11 @@ extern "C" bool JNICALL Java_org_ppsspp_ppsspp_NativeActivity_runVulkanRenderLoo
hasSetThreadName = true;
SetCurrentThreadName("AndroidRender");
}
- }
- while (!exitRenderLoop) {
- LockedNativeUpdateRender();
- ProcessFrameCommands(env);
+ while (!exitRenderLoop) {
+ LockedNativeUpdateRender();
+ ProcessFrameCommands(env);
+ }
}
INFO_LOG(G3D, "Leaving EGL/Vulkan render loop.");
@@ -1525,6 +1527,7 @@ extern "C" bool JNICALL Java_org_ppsspp_ppsspp_NativeActivity_runVulkanRenderLoo
INFO_LOG(G3D, "Shutting down graphics context from render thread...");
graphicsContext->ShutdownFromRenderThread();
renderLoopRunning = false;
+ exitRenderLoop = false;
WARN_LOG(G3D, "Render loop function exited.");
return true;
diff --git a/android/src/org/ppsspp/ppsspp/NativeActivity.java b/android/src/org/ppsspp/ppsspp/NativeActivity.java
index f9521d14eb2b..21ac0553e7c1 100644
--- a/android/src/org/ppsspp/ppsspp/NativeActivity.java
+++ b/android/src/org/ppsspp/ppsspp/NativeActivity.java
@@ -62,10 +62,10 @@ public abstract class NativeActivity extends Activity {
// Allows us to skip a lot of initialization on secondary calls to onCreate.
private static boolean initialized = false;
- // False to use C++ EGL, queried from C++ after NativeApp.init.
+ // False to use Vulkan, queried from C++ after NativeApp.init.
private static boolean javaGL = true;
- // Graphics and audio interfaces for EGL (javaGL = false)
+ // Graphics and audio interfaces for Vulkan (javaGL = false)
private NativeSurfaceView mSurfaceView;
private Surface mSurface;
private Thread mRenderLoopThread = null;
@@ -662,7 +662,7 @@ public void onCreate(Bundle savedInstanceState) {
Log.i(TAG, "setcontentview before");
setContentView(mSurfaceView);
Log.i(TAG, "setcontentview after");
- ensureRenderLoop();
+ startRenderLoopThread();
}
}
@@ -677,12 +677,18 @@ public void onWindowFocusChanged(boolean hasFocus) {
public void notifySurface(Surface surface) {
mSurface = surface;
+
+ if (!initialized) {
+ Log.e(TAG, "Can't deal with surfaces while not initialized");
+ return;
+ }
+
if (!javaGL) {
// If we got a surface, this starts the thread. If not, it doesn't.
if (mSurface == null) {
joinRenderLoopThread();
} else {
- ensureRenderLoop();
+ startRenderLoopThread();
}
}
updateSustainedPerformanceMode();
@@ -690,7 +696,7 @@ public void notifySurface(Surface surface) {
// Invariants: After this, mRenderLoopThread will be set, and the thread will be running,
// if in Vulkan mode.
- protected synchronized void ensureRenderLoop() {
+ protected synchronized void startRenderLoopThread() {
if (javaGL) {
Log.e(TAG, "JavaGL mode - should not get into ensureRenderLoop.");
return;
@@ -724,6 +730,7 @@ private synchronized void joinRenderLoopThread() {
mRenderLoopThread = null;
} catch (InterruptedException e) {
e.printStackTrace();
+ mRenderLoopThread = null;
}
}
}
@@ -739,33 +746,37 @@ void setupSystemUiCallback() {
navigationCallbackView = decorView;
}
- @Override
- protected void onStop() {
- super.onStop();
- Log.i(TAG, "onStop - do nothing special");
- }
-
@Override
protected void onDestroy() {
super.onDestroy();
Log.i(TAG, "onDestroy");
if (javaGL) {
- if (nativeRenderer != null && nativeRenderer.isRenderingFrame()) {
- Log.i(TAG, "Waiting for renderer to finish.");
- int tries = 200;
- do {
- try {
- Thread.sleep(10);
- } catch (InterruptedException e) {
- }
- tries--;
- } while (nativeRenderer.isRenderingFrame() && tries > 0);
+ if (nativeRenderer != null) {
+ if (nativeRenderer.isRenderingFrame()) {
+ Log.i(TAG, "Waiting for renderer to finish.");
+ int tries = 200;
+ do {
+ try {
+ Thread.sleep(10);
+ } catch (InterruptedException e) {
+ }
+ tries--;
+ } while (nativeRenderer.isRenderingFrame() && tries > 0);
+ } else {
+ Log.i(TAG, "nativerenderer done.");
+ nativeRenderer = null;
+ }
+ }
+ if (mGLSurfaceView != null) {
+ mGLSurfaceView.onDestroy();
+ mGLSurfaceView = null;
}
- mGLSurfaceView.onDestroy();
- mGLSurfaceView = null;
} else {
- mSurfaceView.onDestroy();
- mSurfaceView = null;
+ if (mSurfaceView != null) {
+ mSurfaceView.onDestroy();
+ mSurfaceView = null;
+ }
+ mSurface = null;
}
// Probably vain attempt to help the garbage collector...
@@ -781,7 +792,7 @@ protected void onDestroy() {
// TODO: Can we ensure that the GL thread has stopped rendering here?
// I've seen crashes that seem to indicate that sometimes it hasn't...
NativeApp.audioShutdown();
- if (shuttingDown || isFinishing()) {
+ if (shuttingDown) {
NativeApp.shutdown();
unregisterCallbacks();
initialized = false;
@@ -799,6 +810,7 @@ protected void onPause() {
super.onPause();
Log.i(TAG, "onPause");
loseAudioFocus(this.audioManager, this.audioFocusChangeListener);
+ sizeManager.setPaused(true);
NativeApp.pause();
if (!javaGL) {
mSurfaceView.onPause();
@@ -834,6 +846,7 @@ private boolean detectOpenGLES30() {
protected void onResume() {
super.onResume();
updateSustainedPerformanceMode();
+ sizeManager.setPaused(false);
if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.ICE_CREAM_SANDWICH) {
updateSystemUiVisibility();
}
@@ -862,7 +875,7 @@ protected void onResume() {
if (!javaGL) {
// Restart the render loop.
- ensureRenderLoop();
+ startRenderLoopThread();
}
}
diff --git a/android/src/org/ppsspp/ppsspp/SizeManager.java b/android/src/org/ppsspp/ppsspp/SizeManager.java
index 25509769eb4d..af81f640ddc3 100644
--- a/android/src/org/ppsspp/ppsspp/SizeManager.java
+++ b/android/src/org/ppsspp/ppsspp/SizeManager.java
@@ -39,10 +39,18 @@ public class SizeManager implements SurfaceHolder.Callback {
private Point desiredSize = new Point();
private int badOrientationCount = 0;
+
+ private boolean paused = false;
+
public SizeManager(final NativeActivity a) {
activity = a;
}
+
+ public void setPaused(boolean p) {
+ paused = p;
+ }
+
@TargetApi(Build.VERSION_CODES.P)
public void setSurfaceView(SurfaceView view) {
surfaceView = view;
@@ -107,7 +115,11 @@ public void surfaceChanged(SurfaceHolder holder, int format, int width, int heig
NativeApp.backbufferResize(width, height, format);
updateDisplayMeasurements();
- activity.notifySurface(holder.getSurface());
+ if (!paused) {
+ activity.notifySurface(holder.getSurface());
+ } else {
+ Log.i(TAG, "Skipping notifySurface while paused");
+ }
}
@Override
diff --git a/assets/compat.ini b/assets/compat.ini
index 3198f7450148..1e2bb0011cdb 100644
--- a/assets/compat.ini
+++ b/assets/compat.ini
@@ -1508,6 +1508,11 @@ ULES00227 = true
ULKS46057 = true
ULUS10050 = true
+# WWE Smackdown vs RAW 2007
+ULUS10199 = True
+ULES00631 = True
+ULJM05233 = True
+
# Rainbow Six: Vegas: See #9324
# Replaces a heuristic we added in #16971, which broke Castlevania - Symphony of the Night.
ULES00584 = true
diff --git a/assets/lang/en_US.ini b/assets/lang/en_US.ini
index 1a04c485edb5..c0a97a9c1263 100644
--- a/assets/lang/en_US.ini
+++ b/assets/lang/en_US.ini
@@ -40,7 +40,7 @@ Challenge Mode = Challenge Mode
Challenge Mode (no savestates) = Challenge Mode (no savestates)
Contacting RetroAchievements server... = Contacting RetroAchievements server...
Customize = Customize
-Earned = You have earned %d of %d achievements, and %d of %d points
+Earned = You have unlocked %d of %d achievements, and earned %d of %d points
Encore Mode = Encore Mode
Failed logging in to RetroAchievements = Failed logging in to RetroAchievements
Failed to connect to RetroAchievements. Achievements will not unlock. = Failed to connect to RetroAchievements. Achievements will not unlock.
diff --git a/assets/lang/it_IT.ini b/assets/lang/it_IT.ini
index aae480c9021f..20562bbce434 100644
--- a/assets/lang/it_IT.ini
+++ b/assets/lang/it_IT.ini
@@ -1,66 +1,66 @@
-[Achievements]
-%1: Attempt failed = %1: Attempt failed
-%1: Attempt started = %1: Attempt started
-%d achievements, %d points = %d achievements, %d points
+[Achievements]
+%d achievements, %d points = %d obiettivi, %d punti
+%1: Attempt failed = %1: Tentativo fallito
+%1: Attempt started = %1: Tentativo iniziato
Account = Account
-Achievement progress = Achievement progress
-Achievement unlocked = Achievement unlocked
-Achievements = Achievements
-Achievements are disabled = Achievements are disabled
-Achievements enabled = Achievements enabled
-Achievements with active challenges = Achievements with active challenges
-Almost completed achievements = Almost completed achievements
-Around me = Around me
-Can't log in to RetroAchievements right now = Can't log in to RetroAchievements right now
-Challenge indicator = Challenge indicator
-Challenge Mode = Challenge Mode
-Challenge Mode (no savestates) = Challenge Mode (no savestates)
-Contacting RetroAchievements server... = Contacting RetroAchievements server...
-Customize = Customize
-Earned = You have earned %d of %d achievements, and %d of %d points
-Encore Mode = Encore Mode
-Failed logging in to RetroAchievements = Failed logging in to RetroAchievements
-Failed to connect to RetroAchievements. Achievements will not unlock. = Failed to connect to RetroAchievements. Achievements will not unlock.
-Failed to identify game. Achievements will not unlock. = Failed to identify game. Achievements will not unlock.
-How to use RetroAchievements = How to use RetroAchievements
-In Encore mode - listings may be wrong below = In Encore mode - listings may be wrong below
-Leaderboard attempt started or failed = Leaderboard attempt started or failed
-Leaderboard result submitted = Leaderboard result submitted
-Leaderboard score submission = Leaderboard score submission
-Leaderboard submission is enabled = Leaderboard submission is enabled
-Leaderboard tracker = Leaderboard tracker
-Leaderboards = Leaderboards
-Links = Links
-Locked achievements = Locked achievements
-Log bad memory accesses = Log bad memory accesses
-Mastered %1 = Mastered %1
-Notifications = Notifications
-Recently unlocked achievements = Recently unlocked achievements
-Reconnected to RetroAchievements. = Reconnected to RetroAchievements.
-Register on www.retroachievements.org = Register on www.retroachievements.org
-RetroAchievements are not available for this game = RetroAchievements are not available for this game
-RetroAchievements website = RetroAchievements website
+Achievement unlocked = Obiettivo sbloccato
+Achievement progress = Progresso dell'obiettivo
+Achievements = Obiettivi
+Achievements enabled = Obiettivi abilitati
+Achievements are disabled = Gli obiettivi sono disabilitati
+Achievements with active challenges = Obiettivi con sfide attive
+Almost completed achievements = Obiettivi quasi completati
+Can't log in to RetroAchievements right now = Impossibile accedere a RetroAchievements in questo momento
+Challenge indicator = Indicatore sfida
+Challenge Mode = Modalità Sfida
+Challenge Mode (no savestates) = Modalità Sfida (senza stati salvati)
+Contacting RetroAchievements server... = Contatto con il server di RetroAchievements in corso...
+Customize = Personalizza
+Earned = Hai sbloccato %d su %d obiettivi, e guadagnato %d su %d punti
+Encore Mode = Modalità Encore
+Failed logging in to RetroAchievements = Accesso a RetroAchievements non riuscito
+Failed to connect to RetroAchievements. Achievements will not unlock. = Connessione a RetroAchievements non riuscita. Gli obiettivi non verranno sbloccati.
+Failed to identify game. Achievements will not unlock. = Identificazione del gioco non riuscita. Gli obiettivi non verranno sbloccati.
+How to use RetroAchievements = Come usare RetroAchievements
+In Encore mode - listings may be wrong below = In modalità Encore - le voci qui sotto potrebbero essere errate
+Leaderboard attempt started or failed = Tentativo di classifica iniziato o fallito
+Leaderboard result submitted = Risultato di classifica inviato
+Leaderboard score submission = Invio punteggio in classifica
+Leaderboard submission is enabled = L'invio in classifica è abilitato
+Leaderboards = Classifiche
+Leaderboard tracker = Tracciatore classifica
+Links = Link
+Locked achievements = Obiettivi bloccati
+Log bad memory accesses = Registra cattivi accessi alla memoria
+Mastered %1 = Padroneggiato %1
+Around me = Intorno a me
+Notifications = Notifiche
+Recently unlocked achievements = Obiettivi recentemente sbloccati
+Reconnected to RetroAchievements. = Riconnesso a RetroAchievements.
+Register on www.retroachievements.org = Registrati su www.retroachievements.org
+RetroAchievements are not available for this game = I RetroAchievements non sono disponibili per questo gioco
+RetroAchievements website = Sito web di RetroAchievements
Rich Presence = Rich Presence
-Save state loaded without achievement data = Save state loaded without achievement data
-Save states not available in Challenge Mode = Save states not available in Challenge Mode
-Sound Effects = Sound Effects
-Statistics = Statistics
-Submitted %1 for %2 = Submitted %1 for %2
-Syncing achievements data... = Syncing achievements data...
-Test Mode = Test Mode
-This feature is not available in Challenge Mode = This feature is not available in Challenge Mode
-This game has no achievements = This game has no achievements
-Top players = Top players
-Unlocked achievements = Unlocked achievements
-Unofficial achievements = Unofficial achievements
-Unsupported achievements = Unsupported achievements
+Save states not available in Challenge Mode = Stati salvati non disponibili in Modalità Sfida
+Save state loaded without achievement data = Stato salvato caricato senza dati degli obiettivi
+Sound Effects = Effetti Sonori
+Statistics = Statistiche
+Submitted %1 for %2 = Inviato %1 per %2
+Syncing achievements data... = Sincronizzazione dati obiettivi in corso...
+Test Mode = Modalità Test
+This feature is not available in Challenge Mode = Questa funzione non è disponibile in Modalità Sfida
+This game has no achievements = Questo gioco non ha obiettivi
+Top players = Migliori giocatori
+Unlocked achievements = Obiettivi sbloccati
+Unsupported achievements = Obiettivi non supportati
+Unofficial achievements = Obiettivi non ufficiali
[Audio]
-Alternate speed volume = Volume velocizzato alternativo
-Audio backend = Renderer Audio (necessita il riavvio)
+Alternate speed volume = Volume a velocità alternativa
+Audio backend = Renderer Audio (riavvio necessario)
Audio Error = Errore Audio
-Audio file format not supported. Must be WAV. = Audio file format not supported. Must be WAV.
-AudioBufferingForBluetooth = Buffer per Bluetooth (lento)
+Audio file format not supported. Must be WAV. = Formato audio non supportato. Deve essere WAV.
+AudioBufferingForBluetooth = Buffer compatibile con Bluetooth (più lento)
Auto = Automatico
Device = Dispositivo
Disabled = Disabilitato
@@ -71,7 +71,7 @@ Microphone = Microfono
Microphone Device = Periferica Microfono
Mute = Muto
Reverb volume = Riverb. volume
-Use new audio devices automatically = Usa il nuovo dispositivo audio automatic.
+Use new audio devices automatically = Usa nuovi dispositivi audio automaticamente.
Use global volume = Usa volume globale
WASAPI (fast) = WASAPI (veloce)
@@ -94,8 +94,8 @@ Calibrate Analog Stick = Calibra Stick Analogico
Calibrate = Calibra
Calibrated = Calibrato
Calibration = Calibratura
-Circular low end radius = Circular low end radius
-Circular stick input = Input stick circolare
+Circular low end radius = Raggio circolare di base
+Circular stick input = Input circolare levetta
Classic = Classico
Confine Mouse = Confina il mouse all'interno dell'area della finestra
Control Mapping = Mappatura dei Controlli
@@ -110,14 +110,14 @@ Double tap = Doppio tocco
Enable analog stick gesture = Enable analog stick gesture
Enable gesture control = Abilita controllo gesti
Enable standard shortcut keys = Abilita scorciatoie standard
-frames = frames
-Gesture = Gesti
+frames = frame
+Gesture = Gesto
Gesture mapping = Mappatura gesti
Glowing borders = Bordi luminosi
HapticFeedback = Risposta Tattile (vibrazione)
-Hide touch analog stick background circle = Nascondi cerchio dello stick analogico
+Hide touch analog stick background circle = Nascondi cerchio di background della levetta analogica touch
Icon = Icona
-Ignore gamepads when not focused = Ignora i gamepad se non attivi
+Ignore gamepads when not focused = Ignora i controller se non attiva
Ignore Windows Key = Ignora il tasto "Windows"
Invert Axes = Inverti gli Assi
Invert Tilt along X axis = Inverti Inclinazione sull'asse X
@@ -139,14 +139,14 @@ OnScreen = Controlli Touchscreen
Portrait = Ritratto
Portrait Reversed = Ritratto invertito
PSP Action Buttons = Pulsanti Azione PSP (△◯✕☐)
-Rapid fire interval = Rapid fire interval
+Rapid fire interval = Intervallo fuoco rapido
Raw input = Input grezzo
-Reset to defaults = Reset ai predefiniti
+Reset to defaults = Reimposta su valori predefiniti
Screen Rotation = Rotazione Schermo
Sensitivity (scale) = Sensibilità (scala)
Sensitivity = Sensibilità
Shape = Forma
-Show Touch Pause Menu Button = Mostra Tasto di Pausa
+Show Touch Pause Menu Button = Mostra Tasto Menu di Pausa
Sticky D-Pad (easier sweeping movements) = Sticky D-Pad (easier sweeping movements)
Swipe = Scorrimento
Swipe sensitivity = Sensibilità Scorrimento
@@ -159,7 +159,7 @@ Tilt Sensitivity along Y axis = Inverti Sensibilità sull'asse Y
To Calibrate = Tieni il dispositivo all'angolazione che preferisci e premi "Calibra".
Toggle mode = Scambia modalità
Repeat mode = Modalità ripeti
-Touch Control Visibility = Visibilità controlli Touch
+Touch Control Visibility = Visibilità Controlli Touch
Use custom right analog = Usa analogico destro personalizzato
Use Mouse Control = Usa il controllo con il mouse
Visibility = Visibilità
@@ -282,32 +282,32 @@ Backspace = Backspace
Block address = Blocca indirizzo
By Address = Per indirizzo
Copy savestates to memstick root = Copia gli stati salvati nella root della Memory Stick
-Create frame dump = Create frame dump
+Create frame dump = Crea dump frame
Create/Open textures.ini file for current game = Crea/Apri il file textures.ini per il gioco corrente
Current = Corrente
-Debug overlay = Debug overlay
-Debug stats = Debug stats
+Debug overlay = Overlay debug
+Debug stats = Statistiche debug
Dev Tools = Strumenti di sviluppo
-DevMenu = DevMenu
-Disabled JIT functionality = Disattivata Funzionalità JIT
-Draw Frametimes Graph = Disegna grafica dei frametimes
+DevMenu = MenuSvil
+Disabled JIT functionality = Funzionalità JIT Disattivata
+Draw Frametimes Graph = Disegna Grafico dei Frametime
Dump Decrypted Eboot = Crea EBOOT.BIN decriptato all'avvio del gioco
Dump next frame to log = Crea Log del Frame Successivo
Enable driver bug workarounds = Abilita espediente per superare i bug dei driver
Enable Logging = Attiva Log del Debug
Enter address = Inserire indirizzo
FPU = FPU
-Fragment = Fragment
+Fragment = Frammento
Frame timing = Frame timing
Framedump tests = Test del framedump
-Frame Profiler = Profilo Frame
-GPU Allocator Viewer = GPU Allocator Viewer
+Frame Profiler = Profilatore di Frame
+GPU Allocator Viewer = Visualizzatore dell'Allocatore GPU
GPU Driver Test = Test dei driver GPU
-GPU log profiler = GPU log profiler
+GPU log profiler = Profilatore dei registri GPU
GPU Profile = Profilo GPU
Jit Compare = Confronto Jit
JIT debug tools = Strumenti di debug JIT
-Log Dropped Frame Statistics = Statistiche dei Frame persi
+Log Dropped Frame Statistics = Statistiche dei Frame Persi
Log Level = Livello del Log
Log View = Visualizza Log
Logging Channels = Registra Canali
@@ -320,7 +320,7 @@ Random = Casuale
Replace textures = Sostituisci texture
Reset = Reset
Reset limited logging = Reset del logging limitato
-RestoreDefaultSettings = Si desidera davvero ripristinare le impostazioni?\n\n\nQuest'azione non può essere annullata.\nRiavviare PPSSPP per caricare i cambiamenti.
+RestoreDefaultSettings = Si desidera davvero ripristinare le impostazioni?\nQuest'azione non può essere annullata.\nRiavviare PPSSPP per caricare i cambiamenti.
RestoreGameDefaultSettings = Si desidera davvero ripristinare le impostazioni specifiche per il gioco\nai valori predefiniti?
Resume = Ripristina
Run CPU Tests = Fai Test CPU
@@ -332,10 +332,10 @@ Stats = Statistiche
System Information = Informazioni Sistema
Texture ini file created = Creato file ini delle texture
Texture Replacement = Sostituzione Texture
-Audio Debug = Audio debug
-Control Debug = Controllo debug
-Toggle Freeze = Attiva/Disattiva congelamento
-Touchscreen Test = Test del touchscreen
+Audio Debug = Debug Audio
+Control Debug = Debug Controlli
+Toggle Freeze = Attiva/Disattiva Congelamento
+Touchscreen Test = Test del Touchscreen
Ubershaders = Ubershaders
Vertex = Vertex
VFPU = VFPU
@@ -392,17 +392,17 @@ Load = Carica
Load completed = Caricamento completato.
Loading = Caricamento in corso.\nAttendere, prego...
LoadingFailed = Impossibile caricare i dati.
-Log in = Log in
-Log out = Log out
-Logged in! = Logged in!
-Logging in... = Logging in...
+Log in = Accedi
+Log out = Esci
+Logged in! = Accesso eseguito!
+Logging in... = Accesso in corso...
Move = Sposta
Move Down = Sposta giù
Move Up = Sposta su
Network Connection = Connessione di Rete
NEW DATA = NUOVI DATI
No = No
-None = None
+None = Nessuno
ObtainingIP = Cerco di ottenere l'indirizzo IP.\nAttendere, prego...
OK = OK
Old savedata detected = Rilevati vecchi dati salvati
@@ -473,14 +473,14 @@ MsgErrorSavedataDataBroken = Dati del salvataggio corrotti.
MsgErrorSavedataMSFull = Memory Stick piena. Controllare lo spazio a disposizione.
MsgErrorSavedataNoData = Attenzione: dati del salvataggio non trovati.
MsgErrorSavedataNoMS = Memory Stick non inserita.
-No EBOOT.PBP, misidentified game = No EBOOT.PBP, gioco identificato erroneamente.
+No EBOOT.PBP, misidentified game = Nessun EBOOT.PBP, gioco identificato erroneamente.
Not a valid disc image. = Immagine disco non valida.
OpenGLDriverError = Errore Driver OpenGL
PPSSPP doesn't support UMD Music. = PPSSPP non supporta Musica UMD.
PPSSPP doesn't support UMD Video. = PPSSPP non supporta Video UMD.
-PPSSPP plays PSP games, not PlayStation 1 or 2 games. = PPSSPP per giocare ai giochi PSP, non ai giochi PlayStation 1 o 2.
+PPSSPP plays PSP games, not PlayStation 1 or 2 games. = PPSSPP esegue giochi PSP, non giochi PlayStation 1 o 2.
PPSSPPDoesNotSupportInternet = PPSSPP attualmente non supporta connessioni Internet per DLC, PSN o aggiornamenti.
-PS1 EBOOTs are not supported by PPSSPP. = PS1 EBOOT non supportati da PPSSPP.
+PS1 EBOOTs are not supported by PPSSPP. = Gli EBOOT PS1 non sono supportati da PPSSPP.
PSX game image detected. = Il file è un'immagine MODE2. PPSSPP non supporta giochi per PS1.
RAR file detected (Require UnRAR) = Il file è compresso (RAR).\nPrima si deve decomprimere (prova UnRAR).
RAR file detected (Require WINRAR) = Il file è compresso (RAR).\nPrima si deve decomprimere (prova WinRAR).
@@ -488,11 +488,11 @@ Running slow: try frameskip, sound is choppy when slow = Emulazione lenta: prova
Running slow: Try turning off Software Rendering = Emulazione lenta: prova a disattivare "rendering software"
Save encryption failed. This save won't work on real PSP = Criptazione salvataggio fallita. Questo salvataggio non funzionerà su una PSP reale
textures.ini filenames may not be cross-platform = I nomi dei file "textures.ini" potrebbero non essere multi-piattaforma.
-This is a saved state, not a game. = Questo è un salvataggio di stato, non un gioco.
-This is save data, not a game. = Questi sono dei dati salvati, non un gioco.
+This is a saved state, not a game. = Questo è uno stato salvato, non un gioco.
+This is save data, not a game. = Questi sono dati salvati, non un gioco.
Unable to create cheat file, disk may be full = Impossibile creare il file cheat, il disco potrebbe essere pieno.
-Unable to initialize rendering engine. = Impossibile inizializzare il renderer.
-Unable to write savedata, disk may be full = Impossibile sovrascrivere i dati del salvataggio, il disco potrebbe essere pieno.
+Unable to initialize rendering engine. = Impossibile inizializzare il motore di rendering.
+Unable to write savedata, disk may be full = Impossibile scrivere i dati del salvataggio, il disco potrebbe essere pieno.
Warning: Video memory FULL, reducing upscaling and switching to slow caching mode = Attenzione: Memoria Video PIENA, ridurre l'upscaling e passare in modalità caching lenta.
Warning: Video memory FULL, switching to slow caching mode = Attenzione: Memoria Video PIENA, passare in modalità caching lenta.
ZIP file detected (Require UnRAR) = Il file è compresso (ZIP).\nPrima si deve decomprimere (prova UnRAR).
@@ -502,11 +502,11 @@ ZIP file detected (Require WINRAR) = Il file è compresso (ZIP).\nPrima si deve
Asia = Asia
Calculate CRC = Calcola CRC
ConfirmDelete = Elimina
-Create Game Config = Crea game config
+Create Game Config = Crea Configurazione di Gioco
Create Shortcut = Crea Scorciatoia
Delete Game = Elimina Gioco
-Delete Game Config = Elimina game config
-Delete Save Data = Elimina dati salvataggio
+Delete Game Config = Elimina Configurazione di Gioco
+Delete Save Data = Elimina Dati Salvataggio
Europe = Europa
Game = Gioco
Game Settings = Impostazioni Gioco
@@ -680,23 +680,23 @@ Zip archive corrupt = Archivio ZIP corrotto
Zip file does not contain PSP software = Il file ZIP non contiene software PSP
[KeyMapping]
-Allow combo mappings = Allow combo mappings
+Allow combo mappings = Consenti mappature combinate
Autoconfigure = Configurazione automatica
Autoconfigure for device = Configurazione Automatica per il Dispositivo
-Bind All = Mappa tutto
-Clear All = Pulisci Tutto
-Combo mappings are not enabled = Combo mappings are not enabled
-Default All = Ripristina Tutto
-Map a new key for = Seleziona Tasto di Controllo per:
-Map Key = Mappatura tasto
-Map Mouse = Mappatura mouse
+Bind All = Associa Tutti
+Clear All = Pulisci Tutti
+Combo mappings are not enabled = Le mappature combinate non sono abilitate
+Default All = Ripristina Tutti a Valori Predefiniti
+Map a new key for = Mappa un nuovo tasto per:
+Map Key = Mappa Tasto
+Map Mouse = Mappa Mouse
Replace = Sostituisci
Show PSP = Mostra PSP
You can press ESC to cancel. = Puoi premere Esc per annullare.
[MainMenu]
Browse = Sfoglia...
-Buy PPSSPP Gold = Acquista PPSSPP Gold
+Buy PPSSPP Gold = Compra PPSSPP Gold
Choose folder = Scegli cartella
Credits = Riconoscimenti
Exit = Esci
@@ -708,14 +708,14 @@ How to get games = Come ottenere giochi?
How to get homebrew & demos = Come ottenere homebrew o demo?
Load = Carica...
Loading... = Caricamento in corso...
-PPSSPP Homebrew Store = PPSSPP Homebrew Store
-PinPath = Pin
-PPSSPP can't load games or save right now = PPSSPP non può caricare i giochi o salvarli, in questo momento
+PPSSPP Homebrew Store = Store degli Homebrew di PPSSPP
+PinPath = Fissa
+PPSSPP can't load games or save right now = PPSSPP non può caricare i giochi o salvare in questo momento
Recent = Recenti
SavesAreTemporary = PPSSPP sta salvando in una cartella temporanea
-SavesAreTemporaryGuidance = Estrae PPSSPP da qualche parte per salvarlo permanentemente
+SavesAreTemporaryGuidance = Estrai PPSSPP da qualche parte per salvare permanentemente
SavesAreTemporaryIgnore = Ignora l'avviso
-UnpinPath = Stacca
+UnpinPath = Rimuovi dai fissati
UseBrowseOrLoad = Usa Sfoglia per scegliere una cartella, oppure Carica per scegliere un file.
www.ppsspp.org = www.ppsspp.org
@@ -872,22 +872,22 @@ Chinese (traditional) = Chinese (traditional)
Dutch = Dutch
English = English
French = French
-Game language = Game language
+Game language = Lingua del gioco
German = German
Italian = Italian
Japanese = Japanese
Korean = Korean
-Games often don't support all languages = Games often don't support all languages
+Games often don't support all languages = I giochi spesso non supportano tutte le lingue
Portuguese = Portuguese
Russian = Russian
Spanish = Spanish
[Pause]
-Cheats = Cheat
+Cheats = Trucchi
Continue = Continua
-Create Game Config = Crea game config
-Delete Game Config = Elimina game config
-Exit to menu = Vai al Menu
+Create Game Config = Crea Configurazione di Gioco
+Delete Game Config = Elimina Configurazione di Gioco
+Exit to menu = Vai al menu
Game Settings = Impostazioni Gioco
Load State = Carica Stato
Rewind = Riavvolgimento
@@ -898,7 +898,7 @@ Undo last load = Annulla ultimo caricamento
Undo last save = Annulla ultimo salvataggio
[PostShaders]
-(duplicated setting, previous slider will be used) = (parametri duplicati, verrà usata la precedente regolazione)
+(duplicated setting, previous slider will be used) = (parametro duplicato, verrà usata la precedente regolazione)
4xHqGLSL = 4xHQ GLSL
5xBR = Upscaler 5xBR in pixel art
5xBR-lv2 = Upscaler 5xBR-lv2 in pixel art
@@ -908,7 +908,7 @@ Animation speed (0 -> disable) = Velocità animazione (0 -> disabilita)
Aspect = Aspetto
Black border = Bordo nero
Bloom = Sfocatura luminosa
-BloomNoBlur = Bloom (no blur)
+BloomNoBlur = Bloom (senza sfocatura)
Brightness = Luminosità
Cartoon = Disegno
CatmullRom = Upscaler bicubico (Catmull-Rom)
@@ -916,7 +916,7 @@ ColorCorrection = Correzione dei colori
ColorPreservation = Preservazione colore
Contrast = Contrasto
CRT = Linee di scansione CRT
-FakeReflections = FakeReflections
+FakeReflections = RiflessiFinti
FXAA = Antialiasing FXAA
Gamma = Gamma
GreenLevel = Livello del verde
@@ -945,16 +945,16 @@ Vignette = Miniatura
[PSPCredits]
all the forum mods = tutte le mod del forum
build server = crea server
-Buy Gold = Versione Gold
-check = Prova anche il Dolphin, il miglior emulatore per Wii/GC:
-CheckOutPPSSPP = Da' un'occhiata a PPSSPP, il magnifico emulatore PSP: http://www.ppsspp.org/
+Buy Gold = Compra Gold
+check = Da' anche un'occhiata a Dolphin, il miglior emulatore per Wii/GC sulla piazza:
+CheckOutPPSSPP = Da' un'occhiata a PPSSPP, il meraviglioso emulatore PSP: http://www.ppsspp.org/
contributors = Collaboratori:
created = Realizzato da
Discord = Discord
info1 = PPSSPP è realizzato esclusivamente a scopo didattico.
info2 = Assicurarsi di avere i diritti di utilizzo dei propri giochi
info3 = possedendo una copia UMD autentica o acquistando una copia
-info4 = digitale dal negozio di PlayStation Network sulla propria PSP.
+info4 = digitale dal PlayStation Store sulla propria PSP.
info5 = PSP è un marchio di Sony, Inc.
iOS builds = versione iOS
license = Software Libero GPL 2.0+
@@ -979,12 +979,12 @@ translators4 = papel
translators5 =
translators6 =
Twitter @PPSSPP_emu = Twitter
-website = Visita il sito web:
+website = Da' un'occhiata al sito web:
written = Scritto in C++ per velocità e portabilità
[MemStick]
Already contains PSP data = Contiene già dati PSP
-Cancelled - try again = Cancelled - try again
+Cancelled - try again = Annullato - prova di nuovo
Create or Choose a PSP folder = Scegli o crea una cartella PSP
Current = Corrente
DataCanBeShared = I dati possono essere condivisi tra PPSSPP normale/Gold
@@ -1061,7 +1061,7 @@ Perfect Description = Emulazione perfetta per tutto il gioco - magnifico!
Plays = Giocabile
Plays Description = Completamente giocabile ma sono presenti dei glitch
ReportButton = Rapporto feedback
-Show disc CRC = Show disc CRC
+Show disc CRC = Mostra CRC disco
Speed = Velocità
Submit Feedback = Invia feedback
SuggestionConfig = Guarda i rapporti sul sito web per le impostazioni migliori.
@@ -1073,7 +1073,7 @@ SuggestionsWaiting = Invia e consulta i feedback degli altri utenti...
SuggestionUpgrade = Aggiorna alla nuova build del PPSSPP.
SuggestionVerifyDisc = Verifica se l'ISO in tuo possesso è una copia funzionante del tuo disco.
Unselected Overall Description = Quanto è stata precisa l'emulazione del gioco?
-View Feedback = Mostra i Feedback
+View Feedback = Visualizza tutti i Feedback
[Savedata]
Date = Data
@@ -1341,6 +1341,7 @@ Choices: = Scelte:
List: = Lista:
Progress: %1% = Avanzamento: %1%
Screen representation = Rappresentazione su schermo
+
[Upgrade]
Details = Dettagli
Dismiss = Ignora
diff --git a/assets/lang/ru_RU.ini b/assets/lang/ru_RU.ini
index 72ad38dd65c6..9db13ad47333 100644
--- a/assets/lang/ru_RU.ini
+++ b/assets/lang/ru_RU.ini
@@ -17,7 +17,7 @@ Challenge Mode = Режим испытания
Challenge Mode (no savestates) = Режим испытания (без сохранений состояния)
Contacting RetroAchievements server... = Подключение к серверу RetroAchievements...
Customize = Настроить
-Earned = Вы открыли %d из %d достижений и %d из %d очков
+Earned = Вы разблокировали %d из %d достижений и получили %d из %d очков
Encore Mode = Режим повтора
Failed logging in to RetroAchievements = Не удалось войти в RetroAchievements
Failed to connect to RetroAchievements. Achievements will not unlock. = Не удалось подключиться к RetroAchievements. Достижения не будут разблокированы.
@@ -510,7 +510,7 @@ Delete Save Data = Удалить сохранения
Europe = Европа
Game = Игра
Game Settings = Настройки игры
-Homebrew = Хоумбрю
+Homebrew = Homebrew
Hong Kong = Гонконг
InstallData = Установить данные
Japan = Япония
@@ -698,14 +698,14 @@ Browse = Обзор...
Buy PPSSPP Gold = PPSSPP Gold
Choose folder = Выберите папку
Credits = Авторы
-PPSSPP Homebrew Store = Магазин хоумбрю PPSSPP
+PPSSPP Homebrew Store = Магазин homebrew для PPSSPP
Exit = Выход
Game Settings = Настройки
Games = Игры
Give PPSSPP permission to access storage = Дать доступ к хранилищу данных
-Homebrew & Demos = Хоумбрю и демо
+Homebrew & Demos = Homebrew и демо
How to get games = Как получить игры?
-How to get homebrew & demos = Как получить хоумбрю и демо?
+How to get homebrew & demos = Как получить homebrew и демо?
Load = Открыть...
Loading... = Загрузка...
PinPath = Закрепить
@@ -1233,10 +1233,10 @@ Day Light Saving = Летнее время
DDMMYYYY = ДДММГГГГ
Decrease size = Уменьшить размер
Developer Tools = Инструменты разработчика
-Display Extra Info = Отображать дополнительную ниформацию
-Display Games on a grid = Отображать "Игры" в сетке
-Display Homebrew on a grid = Отображать "Хоумбрю и демо" в сетке
-Display Recent on a grid = Отображать "Недавние" в сетке
+Display Extra Info = Показывать дополнительную ниформацию
+Display Games on a grid = Показывать "Игры" в виде сетки
+Display Homebrew on a grid = Показывать "Homebrew и демо" в виде сетки
+Display Recent on a grid = Показывать "Недавние" в виде сетки
Dynarec (JIT) = Динамическая рекомпиляция (JIT)
Emulation = Эмуляция
Enable Cheats = Включить коды
@@ -1251,7 +1251,7 @@ Floating symbols = Парящие символы
Force real clock sync (slower, less lag) = Принудительная синхронизация реальной частоты ЦП (медленнее, меньше лагов)
Games list settings = Настройки списка игр
General = Основные
-Grid icon size = Размер ярлыков в режиме сетки
+Grid icon size = Размер ярлыков в виде сетки
Help the PPSSPP team = Помочь команде PPSSPP
Host (bugs, less lag) = Хост (возможны баги, меньше лагов)
Ignore bad memory accesses = Игнорировать ошибки доступа к памяти
diff --git a/ext/CMakeLists.txt b/ext/CMakeLists.txt
index 9bbb9b7a7452..1031f6edf91c 100644
--- a/ext/CMakeLists.txt
+++ b/ext/CMakeLists.txt
@@ -37,3 +37,5 @@ endif()
if(USE_DISCORD AND NOT IOS AND NOT LIBRETRO)
add_subdirectory(discord-rpc-build)
endif()
+
+add_subdirectory(libchdr-build)
diff --git a/ext/libchdr b/ext/libchdr
new file mode 160000
index 000000000000..9108f34a8922
--- /dev/null
+++ b/ext/libchdr
@@ -0,0 +1 @@
+Subproject commit 9108f34a892272f61c3ed3bff4bee728d4c1dd57
diff --git a/ext/libchdr-build/CMakeLists.txt b/ext/libchdr-build/CMakeLists.txt
new file mode 100644
index 000000000000..8b93aa42fe4d
--- /dev/null
+++ b/ext/libchdr-build/CMakeLists.txt
@@ -0,0 +1,38 @@
+cmake_minimum_required (VERSION 3.2.0)
+project (chdr)
+
+set(LZMA_DIR ../libchdr/deps/lzma-22.01/src)
+set(SRC_DIR ../libchdr/src)
+
+include_directories(../libchdr/deps/lzma-22.01/include)
+include_directories(../libchdr/include)
+
+add_definitions(-D_7ZIP_ST)
+
+# Hack - CpuArch.c has a workaround that we reuse.
+add_definitions(-D__SWITCH__)
+
+set(ALL_SOURCE_FILES
+ ${LZMA_DIR}/Alloc.c
+ ${LZMA_DIR}/Bcj2.c
+ ${LZMA_DIR}/Bcj2Enc.c
+ ${LZMA_DIR}/Bra.c
+ ${LZMA_DIR}/Bra86.c
+ ${LZMA_DIR}/CpuArch.c
+ ${LZMA_DIR}/Delta.c
+ ${LZMA_DIR}/LzFind.c
+ ${LZMA_DIR}/LzFindOpt.c
+ ${LZMA_DIR}/LzmaDec.c
+ ${LZMA_DIR}/LzmaEnc.c
+ ${LZMA_DIR}/Lzma86Dec.c
+ ${LZMA_DIR}/Lzma86Enc.c
+ ${LZMA_DIR}/LzmaLib.c
+ ${LZMA_DIR}/Sort.c
+ ${SRC_DIR}/libchdr_bitstream.c
+ ${SRC_DIR}/libchdr_cdrom.c
+ ${SRC_DIR}/libchdr_chd.c
+ ${SRC_DIR}/libchdr_flac.c
+ ${SRC_DIR}/libchdr_huffman.c
+ )
+
+add_library(chdr STATIC ${ALL_SOURCE_FILES})
diff --git a/ext/libchdr.vcxproj b/ext/libchdr.vcxproj
new file mode 100644
index 000000000000..89d56f3fbe2c
--- /dev/null
+++ b/ext/libchdr.vcxproj
@@ -0,0 +1,374 @@
+
+
+
+
+ Debug
+ ARM
+
+
+ Debug
+ ARM64
+
+
+ Debug
+ Win32
+
+
+ Debug
+ x64
+
+
+ Release
+ ARM
+
+
+ Release
+ ARM64
+
+
+ Release
+ Win32
+
+
+ Release
+ x64
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {956F1F48-B612-46D8-89EE-96996DCD9383}
+ Win32Proj
+ libchdr
+ libchdr_static
+ Unicode
+ StaticLibrary
+ $(SolutionDir)..\ext\libchdr\bin\$(Platform)_$(Configuration)\
+ $(SolutionDir)..\ext\libchdr\bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\
+ 10.0
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ $(DefaultPlatformToolset)
+
+
+ true
+ $(DefaultPlatformToolset)
+
+
+ true
+ $(DefaultPlatformToolset)
+
+
+ true
+ $(DefaultPlatformToolset)
+
+
+ false
+ true
+ $(DefaultPlatformToolset)
+
+
+ false
+ true
+ $(DefaultPlatformToolset)
+
+
+ false
+ true
+ $(DefaultPlatformToolset)
+
+
+ false
+ true
+ $(DefaultPlatformToolset)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ $(IncludePath);$(SolutionDir)..\ext\libchdr\include;$(UniversalCRT_IncludePath);
+ false
+
+
+ true
+ $(IncludePath);$(SolutionDir)..\ext\libchdr\include;$(UniversalCRT_IncludePath);
+ false
+
+
+ false
+ $(IncludePath);$(SolutionDir)..\ext\libchdr\include;$(UniversalCRT_IncludePath);
+ false
+
+
+ false
+ $(IncludePath);$(SolutionDir)..\ext\libchdr\include;$(UniversalCRT_IncludePath);
+ false
+
+
+
+ $(OutDir)$(TargetName).pdb
+
+
+
+
+
+
+ Level4
+ _7ZIP_ST;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ true
+ EnableFastChecks
+ MultiThreadedDebug
+ true
+ ProgramDatabase
+ false
+ libchdr\include;libchdr\deps\lzma-22.01\include;zlib;
+
+
+ Windows
+ true
+ MachineX86
+
+
+
+
+
+
+ Level4
+ _7ZIP_ST;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ EnableFastChecks
+ MultiThreadedDebug
+ true
+ ProgramDatabase
+ false
+ libchdr\include;libchdr\deps\lzma-22.01\include;zlib;
+
+
+ Windows
+ true
+
+
+
+
+
+
+ Level4
+ _7ZIP_ST;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ EnableFastChecks
+ MultiThreadedDebug
+ true
+ ProgramDatabase
+ false
+ libchdr\include;libchdr\deps\lzma-22.01\include;zlib;
+
+
+ Windows
+ true
+
+
+
+
+
+
+ Level4
+ _7ZIP_ST;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ EnableFastChecks
+ MultiThreadedDebug
+ true
+ ProgramDatabase
+ false
+ libchdr\include;libchdr\deps\lzma-22.01\include;zlib;
+
+
+ Windows
+ true
+
+
+
+
+ Level4
+
+
+ MaxSpeed
+ true
+ true
+ _7ZIP_ST;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ false
+ MultiThreaded
+ true
+ ProgramDatabase
+ AnySuitable
+ Speed
+ true
+ false
+ StreamingSIMDExtensions2
+ libchdr\include;libchdr\deps\lzma-22.01\include;zlib;
+
+
+ Windows
+ true
+ true
+ true
+ MachineX86
+
+
+
+
+ Level4
+
+
+ MaxSpeed
+ true
+ true
+ _7ZIP_ST;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ false
+ false
+ MultiThreaded
+ true
+ ProgramDatabase
+ false
+ true
+ AnySuitable
+ Speed
+ libchdr\include;libchdr\deps\lzma-22.01\include;zlib;
+
+
+ Windows
+ true
+ true
+ true
+
+
+
+
+ Level4
+
+
+ MaxSpeed
+ true
+ true
+ _7ZIP_ST;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ false
+ false
+ MultiThreaded
+ true
+ ProgramDatabase
+ false
+ true
+ AnySuitable
+ Speed
+ libchdr\include;libchdr\deps\lzma-22.01\include;zlib;
+
+
+ Windows
+ true
+ true
+ true
+
+
+
+
+ Level4
+
+
+ MaxSpeed
+ true
+ true
+ _7ZIP_ST;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)
+ false
+ false
+ MultiThreaded
+ true
+ ProgramDatabase
+ false
+ true
+ AnySuitable
+ Speed
+ libchdr\include;libchdr\deps\lzma-22.01\include;zlib;
+
+
+ Windows
+ true
+ true
+ true
+
+
+
+
+
+
\ No newline at end of file
diff --git a/ext/libchdr.vcxproj.filters b/ext/libchdr.vcxproj.filters
new file mode 100644
index 000000000000..d35dd7e48fb3
--- /dev/null
+++ b/ext/libchdr.vcxproj.filters
@@ -0,0 +1,72 @@
+
+
+
+
+
+
+
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+ LZMA
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {ff31581b-3ff9-4e39-bb96-2f662896cf70}
+
+
+
\ No newline at end of file
diff --git a/headless/Headless.vcxproj b/headless/Headless.vcxproj
index 65cc85d5b46c..b500d26de4eb 100644
--- a/headless/Headless.vcxproj
+++ b/headless/Headless.vcxproj
@@ -502,6 +502,9 @@
{edfa2e87-8ac1-4853-95d4-d7594ff81947}
+
+ {956f1f48-b612-46d8-89ee-96996dcd9383}
+
{3baae095-e0ab-4b0e-b5df-ce39c8ae31de}
diff --git a/libretro/Makefile.common b/libretro/Makefile.common
index 0168cd21e510..e8c6ed2d4113 100644
--- a/libretro/Makefile.common
+++ b/libretro/Makefile.common
@@ -226,6 +226,35 @@ SOURCES_C += \
COREFLAGS += -DSTACK_LINE_READER_BUFFER_SIZE=1024
COREFLAGS += -DHTTPS_NOT_AVAILABLE
+COREFLAGS += -D_7ZIP_ST
+INCFLAGS += -I$(EXTDIR)/libchdr/deps/lzma-22.01/include
+
+SOURCES_C += \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/Alloc.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/Bcj2.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/Bcj2Enc.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/Bra.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/Bra86.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/CpuArch.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/Delta.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/LzFind.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/LzFindOpt.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/LzmaDec.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/LzmaEnc.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/Lzma86Dec.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/Lzma86Enc.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/LzmaLib.c \
+ $(EXTDIR)/libchdr/deps/lzma-22.01/src/Sort.c
+
+INCFLAGS += -I$(EXTDIR)/libchdr/include
+
+SOURCES_C += \
+ $(EXTDIR)/libchdr/src/libchdr_bitstream.c \
+ $(EXTDIR)/libchdr/src/libchdr_cdrom.c \
+ $(EXTDIR)/libchdr/src/libchdr_chd.c \
+ $(EXTDIR)/libchdr/src/libchdr_flac.c \
+ $(EXTDIR)/libchdr/src/libchdr_huffman.c
+
ifeq ($(PLATFORM_EXT), android)
COREFLAGS += -DHAVE_DLFCN_H
else ifneq ($(PLATFORM_EXT), win32)
diff --git a/unittest/JitHarness.cpp b/unittest/JitHarness.cpp
index 4ad53fc65c1a..088416e2bcd8 100644
--- a/unittest/JitHarness.cpp
+++ b/unittest/JitHarness.cpp
@@ -34,6 +34,7 @@
#include "Core/MemMap.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
+#include "Core/Config.h"
#include "Core/HLE/HLE.h"
// Temporary hacks around annoying linking errors. Copied from Headless.
@@ -55,9 +56,15 @@ HLEFunction UnitTestFakeSyscalls[] = {
{0x1234BEEF, &UnitTestTerminator, "UnitTestTerminator"},
};
-double ExecCPUTest() {
+double ExecCPUTest(bool clearCache = true) {
int blockTicks = 1000000;
int total = 0;
+
+ if (MIPSComp::jit) {
+ currentMIPS->pc = PSP_GetUserMemoryBase();
+ MIPSComp::JitAt();
+ }
+
double st = time_now_d();
do {
for (int j = 0; j < 1000; ++j) {
@@ -73,6 +80,17 @@ double ExecCPUTest() {
while (time_now_d() - st < 0.5);
double elapsed = time_now_d() - st;
+ if (MIPSComp::jit) {
+ JitBlockCacheDebugInterface *cache = MIPSComp::jit->GetBlockCacheDebugInterface();
+ if (cache) {
+ JitBlockDebugInfo block = cache->GetBlockDebugInfo(0);
+ WARN_LOG(JIT, "Executed %d target instrs, %d IR, for %d orig", (int)block.targetDisasm.size(), (int)block.irDisasm.size(), (int)block.origDisasm.size());
+ }
+
+ if (clearCache)
+ MIPSComp::jit->ClearCache();
+ }
+
return total / elapsed;
}
@@ -108,6 +126,7 @@ static void DestroyJitHarness() {
bool TestJit() {
SetupJitHarness();
+ g_Config.bFastMemory = true;
currentMIPS->pc = PSP_GetUserMemoryBase();
u32 *p = (u32 *)Memory::GetPointer(currentMIPS->pc);
@@ -158,6 +177,7 @@ bool TestJit() {
*p++ = MIPS_MAKE_SYSCALL("UnitTestFakeSyscalls", "UnitTestTerminator");
*p++ = MIPS_MAKE_BREAK(1);
+ *p++ = MIPS_MAKE_JR_RA();
// Dogfood.
addr = currentMIPS->pc;
@@ -170,11 +190,15 @@ bool TestJit() {
printf("\n");
- double jit_speed = 0.0, interp_speed = 0.0;
+ double jit_speed = 0.0, jit_ir_speed = 0.0, ir_speed = 0.0, interp_speed = 0.0;
if (compileSuccess) {
interp_speed = ExecCPUTest();
+ mipsr4k.UpdateCore(CPUCore::IR_INTERPRETER);
+ ir_speed = ExecCPUTest();
mipsr4k.UpdateCore(CPUCore::JIT);
jit_speed = ExecCPUTest();
+ mipsr4k.UpdateCore(CPUCore::JIT_IR);
+ jit_ir_speed = ExecCPUTest(false);
// Disassemble
JitBlockCacheDebugInterface *cache = MIPSComp::jit->GetBlockCacheDebugInterface();
@@ -182,14 +206,14 @@ bool TestJit() {
JitBlockDebugInfo block = cache->GetBlockDebugInfo(0); // Should only be one block.
std::vector &lines = block.targetDisasm;
// Cut off at 25 due to the repetition above. Might need tweaking for large instructions.
- const int cutoff = 25;
+ const int cutoff = 50;
for (int i = 0; i < std::min((int)lines.size(), cutoff); i++) {
printf("%s\n", lines[i].c_str());
}
if (lines.size() > cutoff)
printf("...\n");
}
- printf("Jit was %fx faster than interp.\n\n", jit_speed / interp_speed);
+ printf("Jit was %fx faster than interp, IR was %fx faster, JIT IR %fx.\n\n", jit_speed / interp_speed, ir_speed / interp_speed, jit_ir_speed / interp_speed);
}
printf("\n");
diff --git a/unittest/UnitTest.cpp b/unittest/UnitTest.cpp
index ac7b5ba76339..92ed78760e4b 100644
--- a/unittest/UnitTest.cpp
+++ b/unittest/UnitTest.cpp
@@ -58,6 +58,7 @@
#include "Common/Render/DrawBuffer.h"
#include "Common/System/NativeApp.h"
#include "Common/System/System.h"
+#include "Common/Thread/ThreadUtil.h"
#include "Common/ArmEmitter.h"
#include "Common/BitScan.h"
@@ -1038,6 +1039,8 @@ TestItem availableTests[] = {
};
int main(int argc, const char *argv[]) {
+ SetCurrentThreadName("UnitTest");
+
cpu_info.bNEON = true;
cpu_info.bVFP = true;
cpu_info.bVFPv3 = true;
diff --git a/unittest/UnitTests.vcxproj b/unittest/UnitTests.vcxproj
index 7417f4f38b63..76ce39678097 100644
--- a/unittest/UnitTests.vcxproj
+++ b/unittest/UnitTests.vcxproj
@@ -420,6 +420,9 @@
{edfa2e87-8ac1-4853-95d4-d7594ff81947}
+
+ {956f1f48-b612-46d8-89ee-96996dcd9383}
+
{3baae095-e0ab-4b0e-b5df-ce39c8ae31de}