diff --git a/.gitmodules b/.gitmodules index 2dd1e55e8095..82a4f4bf24c6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -50,3 +50,6 @@ [submodule "ext/naett"] path = ext/naett url = https://github.com/erkkah/naett.git +[submodule "ext/libchdr"] + path = ext/libchdr + url = https://github.com/rtissera/libchdr.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 8b455b21bc47..af616d3e2a9f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1343,17 +1343,20 @@ else() SDL/SDLVulkanGraphicsContext.cpp ) endif() - if(SDL2_ttf_FOUND OR SDL2_ttf_PKGCONFIG_FOUND) + if(SDL2_ttf_FOUND OR + (SDL2_ttf_PKGCONFIG_FOUND AND + SDL2_ttf_PKGCONFIG_VERSION VERSION_GREATER_EQUAL "2.0.18")) add_definitions(-DUSE_SDL2_TTF) if(FONTCONFIG_FOUND) add_definitions(-DUSE_SDL2_TTF_FONTCONFIG) set(nativeExtraLibs ${nativeExtraLibs} Fontconfig::Fontconfig) endif() + elseif(SDL2_ttf_PKGCONFIG_FOUND) + message(WARNING "Found SDL2_ttf <2.0.18 - this is too old, falling back to atlas") endif() if(SDL2_ttf_FOUND) set(nativeExtraLibs ${nativeExtraLibs} SDL2_ttf::SDL2_ttf) elseif(SDL2_ttf_PKGCONFIG_FOUND) - add_definitions(-DUSE_SDL2_TTF_PKGCONFIG) set(nativeExtraLibs ${nativeExtraLibs} PkgConfig::SDL2_ttf_PKGCONFIG) endif() if(APPLE) @@ -2314,7 +2317,9 @@ else() include_directories(ext/zstd/lib) endif() -target_link_libraries(${CoreLibName} Common native kirk cityhash sfmt19937 xbrz xxhash rcheevos ${GlslangLibs} +include_directories(ext/libchdr/include) + +target_link_libraries(${CoreLibName} Common native chdr kirk cityhash sfmt19937 xbrz xxhash rcheevos ${GlslangLibs} ${CoreExtraLibs} ${OPENGL_LIBRARIES} ${X11_LIBRARIES} ${CMAKE_DL_LIBS}) if(NOT HTTPS_NOT_AVAILABLE) diff --git a/Common/Arm64Emitter.cpp b/Common/Arm64Emitter.cpp index a5d87c5a11fc..1d2c8b0438b6 100644 --- a/Common/Arm64Emitter.cpp +++ b/Common/Arm64Emitter.cpp @@ -4204,6 +4204,14 @@ void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch, bo if (negate) { FNEG(32, Rd, Rd); } + } else if (TryAnyMOVI(32, Rd, ival)) { + if (negate) { + FNEG(32, Rd, Rd); + } + } else if (TryAnyMOVI(32, Rd, ival ^ 0x80000000)) { + if (!negate) { + FNEG(32, Rd, Rd); + } } else { _assert_msg_(scratch != INVALID_REG, "Failed to find a way to generate FP immediate %f without scratch", value); if (negate) { @@ -4214,6 +4222,96 @@ void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch, bo } } +bool ARM64FloatEmitter::TryMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) { + if (size == 8) { + // Can always do 8. + MOVI(size, Rd, elementValue & 0xFF); + return true; + } else if (size == 16) { + if ((elementValue & 0xFF00) == 0) { + MOVI(size, Rd, elementValue & 0xFF, 0); + return true; + } else if ((elementValue & 0x00FF) == 0) { + MOVI(size, Rd, (elementValue >> 8) & 0xFF, 8); + return true; + } else if ((elementValue & 0xFF00) == 0xFF00) { + MVNI(size, Rd, ~elementValue & 0xFF, 0); + return true; + } else if ((elementValue & 0x00FF) == 0x00FF) { + MVNI(size, Rd, (~elementValue >> 8) & 0xFF, 8); + return true; + } + + return false; + } else if (size == 32) { + for (int shift = 0; shift < 32; shift += 8) { + uint32_t mask = 0xFFFFFFFF &~ (0xFF << shift); + if ((elementValue & mask) == 0) { + MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift); + return true; + } else if ((elementValue & mask) == mask) { + MVNI(size, Rd, (~elementValue >> shift) & 0xFF, shift); + return true; + } + } + + // Maybe an MSL shift will work? + for (int shift = 8; shift <= 16; shift += 8) { + uint32_t mask = 0xFFFFFFFF & ~(0xFF << shift); + uint32_t ones = (1 << shift) - 1; + uint32_t notOnes = 0xFFFFFF00 << shift; + if ((elementValue & mask) == ones) { + MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift, true); + return true; + } else if ((elementValue & mask) == notOnes) { + MVNI(size, Rd, (elementValue >> shift) & 0xFF, shift, true); + return true; + } + } + + return false; + } else if (size == 64) { + uint8_t imm8 = 0; + for (int i = 0; i < 8; ++i) { + uint8_t byte = (elementValue >> (i * 8)) & 0xFF; + if (byte != 0 && byte != 0xFF) + return false; + + if (byte == 0xFF) + imm8 |= 1 << i; + } + + // Didn't run into any partial bytes, so size 64 is doable. + MOVI(size, Rd, imm8); + return true; + } + return false; +} + +bool ARM64FloatEmitter::TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) { + // Try the original size first in case that's more optimal. + if (TryMOVI(size, Rd, elementValue)) + return true; + + uint64_t value = elementValue; + if (size != 64) { + uint64_t masked = elementValue & ((1 << size) - 1); + for (int i = size; i < 64; ++i) { + value |= masked << i; + } + } + + for (int attempt = 8; attempt <= 64; attempt += attempt) { + // Original size was already attempted above. + if (attempt != size) { + if (TryMOVI(attempt, Rd, value)) + return true; + } + } + + return false; +} + void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) { u32 val; bool shift; diff --git a/Common/Arm64Emitter.h b/Common/Arm64Emitter.h index cd4a54cb73e9..0c3603d1bf9e 100644 --- a/Common/Arm64Emitter.h +++ b/Common/Arm64Emitter.h @@ -925,6 +925,10 @@ class ARM64FloatEmitter void ORR(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0); void BIC(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0); + bool TryMOVI(u8 size, ARM64Reg Rd, uint64_t value); + // Allow using a different size. Unclear if there's a penalty. + bool TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t value); + // One source void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn); diff --git a/Common/Common.vcxproj.filters b/Common/Common.vcxproj.filters index 288b7dc289e5..a0852244bacc 100644 --- a/Common/Common.vcxproj.filters +++ b/Common/Common.vcxproj.filters @@ -1073,6 +1073,9 @@ {34f45db9-5c08-49cb-b349-b9e760ce3213} + + {b681797d-7747-487f-b448-5ef5b2d2805b} + diff --git a/Common/Data/Collections/Hashmaps.h b/Common/Data/Collections/Hashmaps.h index 939cb8eda994..a9e5dece99e6 100644 --- a/Common/Data/Collections/Hashmaps.h +++ b/Common/Data/Collections/Hashmaps.h @@ -72,7 +72,7 @@ class DenseHashMap { } bool ContainsKey(const Key &key) const { - // Slightly wasteful. + // Slightly wasteful, though compiler might optimize it. Value value; return Get(key, &value); } @@ -135,6 +135,7 @@ class DenseHashMap { return false; } + // This will never crash if you call it without locking - but, the value might not be right. size_t size() const { return count_; } diff --git a/Common/Data/Format/IniFile.cpp b/Common/Data/Format/IniFile.cpp index f3b35cd60958..4e23bfda9f51 100644 --- a/Common/Data/Format/IniFile.cpp +++ b/Common/Data/Format/IniFile.cpp @@ -173,7 +173,7 @@ std::string* Section::GetLine(const char* key, std::string* valueOut, std::strin if (!strcasecmp(lineKey.c_str(), key)) return &line; } - return 0; + return nullptr; } const std::string* Section::GetLine(const char* key, std::string* valueOut, std::string* commentOut) const @@ -186,7 +186,7 @@ const std::string* Section::GetLine(const char* key, std::string* valueOut, std: if (!strcasecmp(lineKey.c_str(), key)) return &line; } - return 0; + return nullptr; } void Section::Set(const char* key, uint32_t newValue) { @@ -423,14 +423,14 @@ const Section* IniFile::GetSection(const char* sectionName) const { for (const auto &iter : sections) if (!strcasecmp(iter->name().c_str(), sectionName)) return iter.get(); - return nullptr ; + return nullptr; } Section* IniFile::GetSection(const char* sectionName) { for (const auto &iter : sections) if (!strcasecmp(iter->name().c_str(), sectionName)) return iter.get(); - return 0; + return nullptr; } Section* IniFile::GetOrCreateSection(const char* sectionName) { diff --git a/Common/Data/Text/I18n.h b/Common/Data/Text/I18n.h index dba943fdbc9c..65baa2aec419 100644 --- a/Common/Data/Text/I18n.h +++ b/Common/Data/Text/I18n.h @@ -116,8 +116,9 @@ class I18NRepo { std::string LanguageID(); std::shared_ptr GetCategory(I18NCat category); - std::shared_ptr GetCategoryByName(const char *name); + // Translate the string, by looking up "key" in the file, and falling back to either def or key, in that order, if the lookup fails. + // def can (and usually is) set to nullptr. const char *T(I18NCat category, const char *key, const char *def = nullptr) { if (category == I18NCat::NONE) return def ? def : key; diff --git a/Common/File/DirListing.cpp b/Common/File/DirListing.cpp index 009c9b944829..0ee76098a16d 100644 --- a/Common/File/DirListing.cpp +++ b/Common/File/DirListing.cpp @@ -184,7 +184,7 @@ bool GetFilesInDir(const Path &directory, std::vector *files, const ch std::string tmp; while (*filter) { if (*filter == ':') { - filters.insert(std::move(tmp)); + filters.insert(tmp); tmp.clear(); } else { tmp.push_back(*filter); @@ -192,7 +192,7 @@ bool GetFilesInDir(const Path &directory, std::vector *files, const ch filter++; } if (!tmp.empty()) - filters.insert(std::move(tmp)); + filters.insert(tmp); } #if PPSSPP_PLATFORM(WINDOWS) diff --git a/Common/GPU/OpenGL/GLFrameData.cpp b/Common/GPU/OpenGL/GLFrameData.cpp index fa5a051d3055..a82669dc8ad5 100644 --- a/Common/GPU/OpenGL/GLFrameData.cpp +++ b/Common/GPU/OpenGL/GLFrameData.cpp @@ -32,25 +32,25 @@ void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) { } pushBuffers.clear(); for (auto shader : shaders) { - if (skipGLCalls) + if (skipGLCalls && shader) shader->shader = 0; // prevent the glDeleteShader delete shader; } shaders.clear(); for (auto program : programs) { - if (skipGLCalls) + if (skipGLCalls && program) program->program = 0; // prevent the glDeleteProgram delete program; } programs.clear(); for (auto buffer : buffers) { - if (skipGLCalls) + if (skipGLCalls && buffer) buffer->buffer_ = 0; delete buffer; } buffers.clear(); for (auto texture : textures) { - if (skipGLCalls) + if (skipGLCalls && texture) texture->texture = 0; delete texture; } diff --git a/Common/GPU/OpenGL/GLRenderManager.h b/Common/GPU/OpenGL/GLRenderManager.h index 1fb5c8247390..8aed292758cf 100644 --- a/Common/GPU/OpenGL/GLRenderManager.h +++ b/Common/GPU/OpenGL/GLRenderManager.h @@ -349,24 +349,31 @@ class GLRenderManager { } void DeleteShader(GLRShader *shader) { + _dbg_assert_(shader != nullptr); deleter_.shaders.push_back(shader); } void DeleteProgram(GLRProgram *program) { + _dbg_assert_(program != nullptr); deleter_.programs.push_back(program); } void DeleteBuffer(GLRBuffer *buffer) { + _dbg_assert_(buffer != nullptr); deleter_.buffers.push_back(buffer); } void DeleteTexture(GLRTexture *texture) { + _dbg_assert_(texture != nullptr); deleter_.textures.push_back(texture); } void DeleteInputLayout(GLRInputLayout *inputLayout) { + _dbg_assert_(inputLayout != nullptr); deleter_.inputLayouts.push_back(inputLayout); } void DeleteFramebuffer(GLRFramebuffer *framebuffer) { + _dbg_assert_(framebuffer != nullptr); deleter_.framebuffers.push_back(framebuffer); } void DeletePushBuffer(GLPushBuffer *pushbuffer) { + _dbg_assert_(pushbuffer != nullptr); deleter_.pushBuffers.push_back(pushbuffer); } diff --git a/Common/GPU/OpenGL/thin3d_gl.cpp b/Common/GPU/OpenGL/thin3d_gl.cpp index 66c408f95375..59cd3eba3c6a 100644 --- a/Common/GPU/OpenGL/thin3d_gl.cpp +++ b/Common/GPU/OpenGL/thin3d_gl.cpp @@ -934,7 +934,7 @@ void OpenGLTexture::UpdateTextureLevels(GLRenderManager *render, const uint8_t * OpenGLTexture::~OpenGLTexture() { if (tex_) { render_->DeleteTexture(tex_); - tex_ = 0; + tex_ = nullptr; generatedMips_ = false; } } diff --git a/Common/GPU/Vulkan/VulkanDebug.cpp b/Common/GPU/Vulkan/VulkanDebug.cpp index 022093e217c2..c0bf23567771 100644 --- a/Common/GPU/Vulkan/VulkanDebug.cpp +++ b/Common/GPU/Vulkan/VulkanDebug.cpp @@ -90,6 +90,19 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback( break; } + /* + // Can be used to temporarily turn errors into info for easier debugging. + switch (messageCode) { + case 1544472022: + if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) { + messageSeverity = (VkDebugUtilsMessageSeverityFlagBitsEXT)((messageSeverity & ~VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT); + } + break; + default: + break; + } + */ + int count; { std::lock_guard lock(g_errorCountMutex); diff --git a/Common/GPU/Vulkan/VulkanFramebuffer.cpp b/Common/GPU/Vulkan/VulkanFramebuffer.cpp index 975dab04bd57..19de9b67c458 100644 --- a/Common/GPU/Vulkan/VulkanFramebuffer.cpp +++ b/Common/GPU/Vulkan/VulkanFramebuffer.cpp @@ -2,6 +2,35 @@ #include "Common/GPU/Vulkan/VulkanFramebuffer.h" #include "Common/GPU/Vulkan/VulkanQueueRunner.h" +static const char *rpTypeDebugNames[] = { + "RENDER", + "RENDER_DEPTH", + "RENDER_INPUT", + "RENDER_DEPTH_INPUT", + "MV_RENDER", + "MV_RENDER_DEPTH", + "MV_RENDER_INPUT", + "MV_RENDER_DEPTH_INPUT", + "MS_RENDER", + "MS_RENDER_DEPTH", + "MS_RENDER_INPUT", + "MS_RENDER_DEPTH_INPUT", + "MS_MV_RENDER", + "MS_MV_RENDER_DEPTH", + "MS_MV_RENDER_INPUT", + "MS_MV_RENDER_DEPTH_INPUT", + "BACKBUF", +}; + +const char *GetRPTypeName(RenderPassType rpType) { + uint32_t index = (uint32_t)rpType; + if (index < ARRAY_SIZE(rpTypeDebugNames)) { + return rpTypeDebugNames[index]; + } else { + return "N/A"; + } +} + VkSampleCountFlagBits MultiSampleLevelToFlagBits(int count) { // TODO: Check hardware support here, or elsewhere? // Some hardware only supports 4x. @@ -387,12 +416,25 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas } if (isBackbuffer) { + // We don't specify any explicit transitions for these, so let's use subpass dependencies. + // This makes sure that writes to the depth image are done before we try to write to it again. + // From Sascha's examples. deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL; deps[numDeps].dstSubpass = 0; - deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; + deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; + deps[numDeps].srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + deps[numDeps].dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; + numDeps++; + // Dependencies for the color image. + deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL; + deps[numDeps].dstSubpass = 0; + deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; - deps[numDeps].srcAccessMask = 0; + deps[numDeps].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT; deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT; numDeps++; } @@ -494,6 +536,10 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas res = vkCreateRenderPass(vulkan->GetDevice(), &rp, nullptr, &pass); } + if (pass) { + vulkan->SetDebugName(pass, VK_OBJECT_TYPE_RENDER_PASS, GetRPTypeName(rpType)); + } + _assert_(res == VK_SUCCESS); _assert_(pass != VK_NULL_HANDLE); return pass; diff --git a/Common/GPU/Vulkan/VulkanFramebuffer.h b/Common/GPU/Vulkan/VulkanFramebuffer.h index 465983efaa7c..97ff9e367a56 100644 --- a/Common/GPU/Vulkan/VulkanFramebuffer.h +++ b/Common/GPU/Vulkan/VulkanFramebuffer.h @@ -157,3 +157,5 @@ class VKRRenderPass { VkSampleCountFlagBits sampleCounts[(size_t)RenderPassType::TYPE_COUNT]; RPKey key_; }; + +const char *GetRPTypeName(RenderPassType rpType); diff --git a/Common/GPU/Vulkan/VulkanLoader.cpp b/Common/GPU/Vulkan/VulkanLoader.cpp index f30092ab1bff..3c7069c4a1cc 100644 --- a/Common/GPU/Vulkan/VulkanLoader.cpp +++ b/Common/GPU/Vulkan/VulkanLoader.cpp @@ -314,7 +314,7 @@ static void VulkanFreeLibrary(VulkanLibraryHandle &h) { } void VulkanSetAvailable(bool available) { - INFO_LOG(G3D, "Forcing Vulkan availability to true"); + INFO_LOG(G3D, "Setting Vulkan availability to true"); g_vulkanAvailabilityChecked = true; g_vulkanMayBeAvailable = available; } diff --git a/Common/GPU/Vulkan/VulkanMemory.cpp b/Common/GPU/Vulkan/VulkanMemory.cpp index 457d75d51f15..f29fc33d0b18 100644 --- a/Common/GPU/Vulkan/VulkanMemory.cpp +++ b/Common/GPU/Vulkan/VulkanMemory.cpp @@ -291,7 +291,7 @@ VulkanPushPool::Block VulkanPushPool::CreateBlock(size_t size) { _assert_(result == VK_SUCCESS); result = vmaMapMemory(vulkan_->Allocator(), block.allocation, (void **)(&block.writePtr)); - _assert_msg_(result == VK_SUCCESS, "VulkanPushPool: Failed to map memory (result = %08x)", result); + _assert_msg_(result == VK_SUCCESS, "VulkanPushPool: Failed to map memory (result = %s)", VulkanResultToString(result)); _assert_msg_(block.writePtr != nullptr, "VulkanPushPool: Failed to map memory on block of size %d", (int)block.size); return block; diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index c4a04f90bc98..e6bb324f76c7 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -674,26 +674,6 @@ const char *AspectToString(VkImageAspectFlags aspect) { } } -static const char *rpTypeDebugNames[] = { - "RENDER", - "RENDER_DEPTH", - "RENDER_INPUT", - "RENDER_DEPTH_INPUT", - "MV_RENDER", - "MV_RENDER_DEPTH", - "MV_RENDER_INPUT", - "MV_RENDER_DEPTH_INPUT", - "MS_RENDER", - "MS_RENDER_DEPTH", - "MS_RENDER_INPUT", - "MS_RENDER_DEPTH_INPUT", - "MS_MV_RENDER", - "MS_MV_RENDER_DEPTH", - "MS_MV_RENDER_INPUT", - "MS_MV_RENDER_DEPTH_INPUT", - "BACKBUF", -}; - std::string VulkanQueueRunner::StepToString(VulkanContext *vulkan, const VKRStep &step) { char buffer[256]; switch (step.stepType) { @@ -703,7 +683,7 @@ std::string VulkanQueueRunner::StepToString(VulkanContext *vulkan, const VKRStep int h = step.render.framebuffer ? step.render.framebuffer->height : vulkan->GetBackbufferHeight(); int actual_w = step.render.renderArea.extent.width; int actual_h = step.render.renderArea.extent.height; - const char *renderCmd = rpTypeDebugNames[(size_t)step.render.renderPassType]; + const char *renderCmd = GetRPTypeName(step.render.renderPassType); snprintf(buffer, sizeof(buffer), "%s %s %s (draws: %d, %dx%d/%dx%d)", renderCmd, step.tag, step.render.framebuffer ? step.render.framebuffer->Tag() : "", step.render.numDraws, actual_w, actual_h, w, h); break; } diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index fff194adc256..4414af64f9c0 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -288,7 +288,6 @@ bool VulkanRenderManager::CreateBackbuffers() { return false; } - VkCommandBuffer cmdInit = GetInitCmd(); if (!queueRunner_.CreateSwapchain(cmdInit)) { @@ -310,6 +309,11 @@ bool VulkanRenderManager::CreateBackbuffers() { outOfDateFrames_ = 0; + for (int i = 0; i < vulkan_->GetInflightFrames(); i++) { + auto &frameData = frameData_[i]; + frameData.readyForFence = true; // Just in case. + } + // Start the thread(s). if (HasBackbuffers()) { run_ = true; // For controlling the compiler thread's exit diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index dc9aaf60ef0e..c5b4d8787625 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -874,8 +874,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread) caps_.tesselationShaderSupported = vulkan->GetDeviceFeatures().enabled.standard.tessellationShader != 0; caps_.dualSourceBlend = vulkan->GetDeviceFeatures().enabled.standard.dualSrcBlend != 0; caps_.depthClampSupported = vulkan->GetDeviceFeatures().enabled.standard.depthClamp != 0; + + // Comment out these two to test geometry shader culling on any geometry shader-supporting hardware. caps_.clipDistanceSupported = vulkan->GetDeviceFeatures().enabled.standard.shaderClipDistance != 0; caps_.cullDistanceSupported = vulkan->GetDeviceFeatures().enabled.standard.shaderCullDistance != 0; + caps_.framebufferBlitSupported = true; caps_.framebufferCopySupported = true; caps_.framebufferDepthBlitSupported = vulkan->GetDeviceInfo().canBlitToPreferredDepthStencilFormat; diff --git a/Common/Input/InputState.h b/Common/Input/InputState.h index c01680a0ae64..a94547443d23 100644 --- a/Common/Input/InputState.h +++ b/Common/Input/InputState.h @@ -31,7 +31,7 @@ enum InputDeviceID { DEVICE_ID_XINPUT_1 = 21, DEVICE_ID_XINPUT_2 = 22, DEVICE_ID_XINPUT_3 = 23, - DEVICE_ID_ACCELEROMETER = 30, + DEVICE_ID_ACCELEROMETER = 30, // no longer used DEVICE_ID_XR_HMD = 39, DEVICE_ID_XR_CONTROLLER_LEFT = 40, DEVICE_ID_XR_CONTROLLER_RIGHT = 41, diff --git a/Common/Input/KeyCodes.h b/Common/Input/KeyCodes.h index dc2b9897ce37..8614e5a2d287 100644 --- a/Common/Input/KeyCodes.h +++ b/Common/Input/KeyCodes.h @@ -305,7 +305,7 @@ enum InputAxis { JOYSTICK_AXIS_MOUSE_REL_X = 26, JOYSTICK_AXIS_MOUSE_REL_Y = 27, - // Mobile device accelerometer/gyro + // Mobile device accelerometer/gyro. NOTE: These are no longer passed around internally, only used for the plugin API. JOYSTICK_AXIS_ACCELEROMETER_X = 40, JOYSTICK_AXIS_ACCELEROMETER_Y = 41, JOYSTICK_AXIS_ACCELEROMETER_Z = 42, diff --git a/Common/Log.cpp b/Common/Log.cpp index 28c2f821d005..988e51f439fb 100644 --- a/Common/Log.cpp +++ b/Common/Log.cpp @@ -25,6 +25,7 @@ #include "StringUtils.h" #include "Common/Data/Encoding/Utf8.h" #include "Common/Thread/ThreadUtil.h" +#include "Common/TimeUtil.h" #if PPSSPP_PLATFORM(ANDROID) #include @@ -38,10 +39,12 @@ static bool hitAnyAsserts = false; std::mutex g_extraAssertInfoMutex; std::string g_extraAssertInfo = "menu"; +double g_assertInfoTime = 0.0; void SetExtraAssertInfo(const char *info) { std::lock_guard guard(g_extraAssertInfoMutex); g_extraAssertInfo = info ? info : "menu"; + g_assertInfoTime = time_now_d(); } bool HandleAssert(const char *function, const char *file, int line, const char *expression, const char* format, ...) { @@ -57,7 +60,8 @@ bool HandleAssert(const char *function, const char *file, int line, const char * char formatted[LOG_BUF_SIZE + 128]; { std::lock_guard guard(g_extraAssertInfoMutex); - snprintf(formatted, sizeof(formatted), "(%s:%s:%d): [%s] (%s) %s", file, function, line, expression, g_extraAssertInfo.c_str(), text); + double delta = time_now_d() - g_assertInfoTime; + snprintf(formatted, sizeof(formatted), "(%s:%s:%d): [%s] (%s, %0.1fs) %s", file, function, line, expression, g_extraAssertInfo.c_str(), delta, text); } // Normal logging (will also log to Android log) diff --git a/Common/Net/HTTPClient.cpp b/Common/Net/HTTPClient.cpp index b68ad6bfe961..c6366af8262d 100644 --- a/Common/Net/HTTPClient.cpp +++ b/Common/Net/HTTPClient.cpp @@ -30,6 +30,7 @@ #include "Common/Net/URL.h" #include "Common/File/FileDescriptor.h" +#include "Common/SysError.h" #include "Common/Thread/ThreadUtil.h" #include "Common/Data/Encoding/Compression.h" #include "Common/Net/NetBuffer.h" @@ -97,7 +98,7 @@ static void FormatAddr(char *addrbuf, size_t bufsize, const addrinfo *info) { switch (info->ai_family) { case AF_INET: case AF_INET6: - inet_ntop(info->ai_family, info->ai_addr, addrbuf, bufsize); + inet_ntop(info->ai_family, &((sockaddr_in *)info->ai_addr)->sin_addr, addrbuf, bufsize); break; default: snprintf(addrbuf, bufsize, "(Unknown AF %d)", info->ai_family); @@ -131,11 +132,22 @@ bool Connection::Connect(int maxTries, double timeout, bool *cancelConnect) { // Start trying to connect (async with timeout.) errno = 0; if (connect(sock, possible->ai_addr, (int)possible->ai_addrlen) < 0) { - if (errno != 0 && errno != EINPROGRESS) { - char addrStr[128]; +#if PPSSPP_PLATFORM(WINDOWS) + int errorCode = WSAGetLastError(); + std::string errorString = GetStringErrorMsg(errorCode); + bool unreachable = errorCode == WSAENETUNREACH; + bool inProgress = errorCode == WSAEINPROGRESS || errorCode == WSAEWOULDBLOCK; +#else + int errorCode = errno; + std::string errorString = strerror(errno); + bool unreachable = errorCode == ENETUNREACH; + bool inProgress = errorCode == EINPROGRESS || errorCode == EWOULDBLOCK; +#endif + if (!inProgress) { + char addrStr[128]{}; FormatAddr(addrStr, sizeof(addrStr), possible); - if (errno != ENETUNREACH) { - ERROR_LOG(HTTP, "connect(%d) call to %s failed (%d: %s)", sock, addrStr, errno, strerror(errno)); + if (!unreachable) { + ERROR_LOG(HTTP, "connect(%d) call to %s failed (%d: %s)", sock, addrStr, errorCode, errorString.c_str()); } else { INFO_LOG(HTTP, "connect(%d): Ignoring unreachable resolved address %s", sock, addrStr); } @@ -207,9 +219,9 @@ namespace http { // TODO: do something sane here constexpr const char *DEFAULT_USERAGENT = "PPSSPP"; +constexpr const char *HTTP_VERSION = "1.1"; Client::Client() { - httpVersion_ = "1.1"; userAgent_ = DEFAULT_USERAGENT; } @@ -341,7 +353,7 @@ int Client::SendRequestWithData(const char *method, const RequestParams &req, co "\r\n"; buffer.Printf(tpl, - method, req.resource.c_str(), httpVersion_, + method, req.resource.c_str(), HTTP_VERSION, host_.c_str(), userAgent_.c_str(), req.acceptMime, diff --git a/Common/Net/HTTPClient.h b/Common/Net/HTTPClient.h index dd104e2fa603..619ab80423b6 100644 --- a/Common/Net/HTTPClient.h +++ b/Common/Net/HTTPClient.h @@ -86,7 +86,6 @@ class Client : public net::Connection { protected: std::string userAgent_; - const char *httpVersion_; double dataTimeout_ = 900.0; }; diff --git a/Common/Render/Text/draw_text_sdl.cpp b/Common/Render/Text/draw_text_sdl.cpp index 8a4178d71a1d..494858355648 100644 --- a/Common/Render/Text/draw_text_sdl.cpp +++ b/Common/Render/Text/draw_text_sdl.cpp @@ -378,7 +378,7 @@ void TextDrawerSDL::DrawStringBitmap(std::vector &bitmapData, TextStrin font = fallbackFonts_[0]; } -#ifndef USE_SDL2_TTF_PKGCONFIG +#if SDL_TTF_VERSION_ATLEAST(2, 20, 0) if (align & ALIGN_HCENTER) TTF_SetFontWrappedAlign(font, TTF_WRAPPED_ALIGN_CENTER); else if (align & ALIGN_RIGHT) diff --git a/Common/System/NativeApp.h b/Common/System/NativeApp.h index 94a67c9eb9c8..4799fa6f11a8 100644 --- a/Common/System/NativeApp.h +++ b/Common/System/NativeApp.h @@ -55,6 +55,7 @@ bool NativeIsRestarting(); void NativeTouch(const TouchInput &touch); bool NativeKey(const KeyInput &key); void NativeAxis(const AxisInput *axis, size_t count); +void NativeAccelerometer(float tiltX, float tiltY, float tiltZ); // Called when it's process a frame, including rendering. If the device can keep up, this // will be called sixty times per second. Main thread. diff --git a/Common/Thread/Promise.h b/Common/Thread/Promise.h index f8dbaf9e6a1e..93e4dfd98507 100644 --- a/Common/Thread/Promise.h +++ b/Common/Thread/Promise.h @@ -45,6 +45,7 @@ class PromiseTask : public Task { template class Promise { public: + // Never fails. static Promise *Spawn(ThreadManager *threadman, std::function fun, TaskType taskType, TaskPriority taskPriority = TaskPriority::NORMAL) { Mailbox *mailbox = new Mailbox(); diff --git a/Common/UI/PopupScreens.cpp b/Common/UI/PopupScreens.cpp index 95714b9de76f..1d470d1242da 100644 --- a/Common/UI/PopupScreens.cpp +++ b/Common/UI/PopupScreens.cpp @@ -122,7 +122,11 @@ void PopupMultiChoice::UpdateText() { if (index < 0 || index >= numChoices_) { valueText_ = "(invalid choice)"; // Shouldn't happen. Should be no need to translate this. } else { - valueText_ = T(category_, choices_[index]); + if (choices_[index]) { + valueText_ = T(category_, choices_[index]); + } else { + valueText_ = ""; + } } } diff --git a/Common/UI/Screen.cpp b/Common/UI/Screen.cpp index 80b8fced14ff..bd8c713da2dc 100644 --- a/Common/UI/Screen.cpp +++ b/Common/UI/Screen.cpp @@ -227,9 +227,11 @@ void ScreenManager::getFocusPosition(float &x, float &y, float &z) { } void ScreenManager::sendMessage(const char *msg, const char *value) { - if (!strcmp(msg, "recreateviews")) + if (!msg) { + _dbg_assert_msg_(false, "Empty msg in ScreenManager::sendMessage"); + } else if (!strcmp(msg, "recreateviews")) { RecreateAllViews(); - if (!strcmp(msg, "lost_focus")) { + } else if (!strcmp(msg, "lost_focus")) { TouchInput input{}; input.x = -50000.0f; input.y = -50000.0f; @@ -238,6 +240,7 @@ void ScreenManager::sendMessage(const char *msg, const char *value) { input.id = 0; touch(input); } + if (!stack_.empty()) stack_.back().screen->sendMessage(msg, value); } diff --git a/Common/x64Emitter.cpp b/Common/x64Emitter.cpp index c2a5ba8c4d49..814fc7e0d631 100644 --- a/Common/x64Emitter.cpp +++ b/Common/x64Emitter.cpp @@ -1697,7 +1697,6 @@ void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, ar void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only -// THESE TWO ARE UNTESTED. void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);} void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);} @@ -1892,6 +1891,9 @@ void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);} void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);} +void XEmitter::INSERTPS(X64Reg dest, OpArg arg, u8 dstsubreg, u8 srcsubreg, u8 zmask) { WriteSSE41Op(0x66, 0x3A21, dest, arg, 1); Write8((srcsubreg << 6) | (dstsubreg << 4) | zmask); } +void XEmitter::EXTRACTPS(OpArg dest, X64Reg arg, u8 subreg) { WriteSSE41Op(0x66, 0x3A17, arg, dest, 1); Write8(subreg); } + void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);} void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);} void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);} @@ -2084,7 +2086,7 @@ void XEmitter::VCVTTPD2DQ(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits, void XEmitter::VCVTTSS2SI(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(0, 0xF3, 0x2C, regOp1, arg, 0, bits == 64 ? 1 : 0); } void XEmitter::VCVTTSD2SI(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(0, 0xF2, 0x2C, regOp1, arg, 0, bits == 64 ? 1 : 0); } void XEmitter::VEXTRACTPS(OpArg arg, X64Reg regOp1, u8 subreg) { WriteAVXOp(0, 0x66, 0x3A17, regOp1, arg, 1); Write8(subreg); } -void XEmitter::VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 subreg) { WriteAVXOp(0, 0x66, 0x3A21, regOp1, regOp2, arg, 1); Write8(subreg); } +void XEmitter::VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 dstsubreg, u8 srcsubreg, u8 zmask) { WriteAVXOp(0, 0x66, 0x3A21, regOp1, regOp2, arg, 1); Write8((srcsubreg << 6) | (dstsubreg << 4) | zmask); } void XEmitter::VLDDQU(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits, 0xF2, sseLDDQU, regOp1, arg); } void XEmitter::VMOVAPS(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits, 0x00, sseMOVAPfromRM, regOp1, arg); } void XEmitter::VMOVAPD(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits, 0x66, sseMOVAPfromRM, regOp1, arg); } diff --git a/Common/x64Emitter.h b/Common/x64Emitter.h index 16f30a35b0f2..832ed767cbdd 100644 --- a/Common/x64Emitter.h +++ b/Common/x64Emitter.h @@ -684,12 +684,14 @@ class XEmitter // SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask". void DPPD(X64Reg dest, OpArg src, u8 arg); - - // These are probably useful for VFPU emulation. - void INSERTPS(X64Reg dest, OpArg src, u8 arg); - void EXTRACTPS(OpArg dest, X64Reg src, u8 arg); #endif + // SSE4: Insert and extract for floats. + // Note: insert from memory or an XMM. + void INSERTPS(X64Reg dest, OpArg arg, u8 dstsubreg, u8 srcsubreg = 0, u8 zmask = 0); + // Extract to memory or GPR. + void EXTRACTPS(OpArg dest, X64Reg arg, u8 subreg); + // SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy. void HADDPS(X64Reg dest, OpArg src); @@ -1040,7 +1042,7 @@ class XEmitter // Can only extract from the low 128 bits. void VEXTRACTPS(OpArg arg, X64Reg regOp1, u8 subreg); // Can only insert into the low 128 bits, zeros upper bits. Inserts from XMM. - void VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 subreg); + void VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 dstsubreg, u8 srcsubreg = 0, u8 zmask = 0); void VLDDQU(int bits, X64Reg regOp1, OpArg arg); void VMOVAPS(int bits, X64Reg regOp1, OpArg arg); void VMOVAPD(int bits, X64Reg regOp1, OpArg arg); diff --git a/Core/ControlMapper.cpp b/Core/ControlMapper.cpp index 5fd5c50ee0fe..b27a619cfa07 100644 --- a/Core/ControlMapper.cpp +++ b/Core/ControlMapper.cpp @@ -480,8 +480,9 @@ void ControlMapper::Axis(const AxisInput &axis) { double now = time_now_d(); std::lock_guard guard(mutex_); - if (axis.deviceId < DEVICE_ID_COUNT) { - deviceTimestamps_[(int)axis.deviceId] = now; + size_t deviceIndex = (size_t)axis.deviceId; // this'll wrap around ANY (-1) to max, which will eliminate it on the next line, if such an event appears by mistake. + if (deviceIndex < (size_t)DEVICE_ID_COUNT) { + deviceTimestamps_[deviceIndex] = now; } if (axis.value >= 0.0f) { InputMapping mapping(axis.deviceId, axis.axisId, 1); diff --git a/Core/ControlMapper.h b/Core/ControlMapper.h index c90ae68daea3..c2d6c4cd1669 100644 --- a/Core/ControlMapper.h +++ b/Core/ControlMapper.h @@ -62,7 +62,7 @@ class ControlMapper { float virtKeys_[VIRTKEY_COUNT]{}; bool virtKeyOn_[VIRTKEY_COUNT]{}; // Track boolean output separaately since thresholds may differ. - double deviceTimestamps_[42]{}; + double deviceTimestamps_[(size_t)DEVICE_ID_COUNT]{}; int lastNonDeadzoneDeviceID_[2]{}; diff --git a/Core/Core.vcxproj b/Core/Core.vcxproj index cbba66cf140f..4b037a33843a 100644 --- a/Core/Core.vcxproj +++ b/Core/Core.vcxproj @@ -138,7 +138,7 @@ Level3 - ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib + ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib _CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_32=1;_M_IX86=1;_DEBUG;_LIB;_UNICODE;UNICODE;MINIUPNP_STATICLIB;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions) StreamingSIMDExtensions2 Precise @@ -165,7 +165,7 @@ Level3 - ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib + ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib _CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_64=1;_M_X64=1;_DEBUG;_LIB;_UNICODE;UNICODE;MINIUPNP_STATICLIB;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions) NotSet Precise @@ -193,7 +193,7 @@ Level3 - ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib + ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib _CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_64=1;_DEBUG;_LIB;_UNICODE;UNICODE;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions) NotSet Precise @@ -221,7 +221,7 @@ Level3 - ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib + ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib _CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_32=1;_DEBUG;_LIB;_UNICODE;UNICODE;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions) NotSet Precise @@ -253,7 +253,7 @@ MaxSpeed true true - ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib + ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib false StreamingSIMDExtensions2 Precise @@ -286,7 +286,7 @@ MaxSpeed true true - ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib + ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib NotSet Precise false @@ -321,7 +321,7 @@ MaxSpeed true true - ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib + ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib NotSet Precise false @@ -356,7 +356,7 @@ MaxSpeed true true - ..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib + ..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib NotSet Precise false @@ -1466,6 +1466,9 @@ {129e5e2b-39c1-4d84-96fe-dfd22dbb4a25} + + {956f1f48-b612-46d8-89ee-96996dcd9383} + {d8a71225-178b-424e-96c1-cc3be2c1b047} diff --git a/Core/Debugger/MemBlockInfo.cpp b/Core/Debugger/MemBlockInfo.cpp index 93f44f41d0ad..9daa39d4faee 100644 --- a/Core/Debugger/MemBlockInfo.cpp +++ b/Core/Debugger/MemBlockInfo.cpp @@ -17,8 +17,10 @@ #include #include +#include #include #include +#include #include "Common/Log.h" #include "Common/Serialize/Serializer.h" @@ -78,12 +80,15 @@ struct PendingNotifyMem { MemBlockFlags flags; uint32_t start; uint32_t size; + uint32_t copySrc; uint64_t ticks; uint32_t pc; char tag[128]; }; -static constexpr size_t MAX_PENDING_NOTIFIES = 512; +// 160 KB. +static constexpr size_t MAX_PENDING_NOTIFIES = 1024; +static constexpr size_t MAX_PENDING_NOTIFIES_THREAD = 1000; static MemSlabMap allocMap; static MemSlabMap suballocMap; static MemSlabMap writeMap; @@ -93,9 +98,17 @@ static std::atomic pendingNotifyMinAddr1; static std::atomic pendingNotifyMaxAddr1; static std::atomic pendingNotifyMinAddr2; static std::atomic pendingNotifyMaxAddr2; -static std::mutex pendingMutex; +// To prevent deadlocks, acquire Read before Write if you're going to acquire both. +static std::mutex pendingWriteMutex; +static std::mutex pendingReadMutex; static int detailedOverride; +static std::thread flushThread; +static std::atomic flushThreadRunning; +static std::atomic flushThreadPending; +static std::mutex flushLock; +static std::condition_variable flushCond; + MemSlabMap::MemSlabMap() { Reset(); } @@ -369,9 +382,32 @@ void MemSlabMap::FillHeads(Slab *slab) { } } +size_t FormatMemWriteTagAtNoFlush(char *buf, size_t sz, const char *prefix, uint32_t start, uint32_t size); + void FlushPendingMemInfo() { - std::lock_guard guard(pendingMutex); - for (const auto &info : pendingNotifies) { + // This lock prevents us from another thread reading while we're busy flushing. + std::lock_guard guard(pendingReadMutex); + std::vector thisBatch; + { + std::lock_guard guard(pendingWriteMutex); + thisBatch = std::move(pendingNotifies); + pendingNotifies.clear(); + pendingNotifies.reserve(MAX_PENDING_NOTIFIES); + + pendingNotifyMinAddr1 = 0xFFFFFFFF; + pendingNotifyMaxAddr1 = 0; + pendingNotifyMinAddr2 = 0xFFFFFFFF; + pendingNotifyMaxAddr2 = 0; + } + + for (const auto &info : thisBatch) { + if (info.copySrc != 0) { + char tagData[128]; + size_t tagSize = FormatMemWriteTagAtNoFlush(tagData, sizeof(tagData), info.tag, info.copySrc, info.size); + writeMap.Mark(info.start, info.size, info.ticks, info.pc, true, tagData); + continue; + } + if (info.flags & MemBlockFlags::ALLOC) { allocMap.Mark(info.start, info.size, info.ticks, info.pc, true, info.tag); } else if (info.flags & MemBlockFlags::FREE) { @@ -392,11 +428,6 @@ void FlushPendingMemInfo() { writeMap.Mark(info.start, info.size, info.ticks, info.pc, true, info.tag); } } - pendingNotifies.clear(); - pendingNotifyMinAddr1 = 0xFFFFFFFF; - pendingNotifyMaxAddr1 = 0; - pendingNotifyMinAddr2 = 0xFFFFFFFF; - pendingNotifyMaxAddr2 = 0; } static inline uint32_t NormalizeAddress(uint32_t addr) { @@ -411,6 +442,9 @@ static inline bool MergeRecentMemInfo(const PendingNotifyMem &info, size_t copyL for (size_t i = 1; i <= 4; ++i) { auto &prev = pendingNotifies[pendingNotifies.size() - i]; + if (prev.copySrc != 0) + return false; + if (prev.flags != info.flags) continue; @@ -440,7 +474,7 @@ void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_ bool needFlush = false; // When the setting is off, we skip smaller info to keep things fast. - if (MemBlockInfoDetailed(size)) { + if (MemBlockInfoDetailed(size) && flags != MemBlockFlags::READ) { PendingNotifyMem info{ flags, start, size }; info.ticks = CoreTiming::GetTicks(); info.pc = pc; @@ -452,7 +486,7 @@ void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_ memcpy(info.tag, tagStr, copyLength); info.tag[copyLength] = 0; - std::lock_guard guard(pendingMutex); + std::lock_guard guard(pendingWriteMutex); // Sometimes we get duplicates, quickly check. if (!MergeRecentMemInfo(info, copyLength)) { if (start < 0x08000000) { @@ -464,11 +498,15 @@ void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_ } pendingNotifies.push_back(info); } - needFlush = pendingNotifies.size() > MAX_PENDING_NOTIFIES; + needFlush = pendingNotifies.size() > MAX_PENDING_NOTIFIES_THREAD; } if (needFlush) { - FlushPendingMemInfo(); + { + std::lock_guard guard(flushLock); + flushThreadPending = true; + } + flushCond.notify_one(); } if (!(flags & MemBlockFlags::SKIP_MEMCHECK)) { @@ -484,6 +522,50 @@ void NotifyMemInfo(MemBlockFlags flags, uint32_t start, uint32_t size, const cha NotifyMemInfoPC(flags, start, size, currentMIPS->pc, str, strLength); } +void NotifyMemInfoCopy(uint32_t destPtr, uint32_t srcPtr, uint32_t size, const char *prefix) { + if (size == 0) + return; + + bool needsFlush = false; + if (CBreakPoints::HasMemChecks()) { + // This will cause a flush, but it's needed to trigger memchecks with proper data. + char tagData[128]; + size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), prefix, srcPtr, size); + NotifyMemInfo(MemBlockFlags::READ, srcPtr, size, tagData, tagSize); + NotifyMemInfo(MemBlockFlags::WRITE, destPtr, size, tagData, tagSize); + } else if (MemBlockInfoDetailed(size)) { + srcPtr = NormalizeAddress(srcPtr); + destPtr = NormalizeAddress(destPtr); + + PendingNotifyMem info{ MemBlockFlags::WRITE, destPtr, size }; + info.copySrc = srcPtr; + info.ticks = CoreTiming::GetTicks(); + info.pc = currentMIPS->pc; + + // Store the prefix for now. The correct tag will be calculated on flush. + truncate_cpy(info.tag, prefix); + + std::lock_guard guard(pendingWriteMutex); + if (destPtr < 0x08000000) { + pendingNotifyMinAddr1 = std::min(pendingNotifyMinAddr1.load(), destPtr); + pendingNotifyMaxAddr1 = std::max(pendingNotifyMaxAddr1.load(), destPtr + size); + } else { + pendingNotifyMinAddr2 = std::min(pendingNotifyMinAddr2.load(), destPtr); + pendingNotifyMaxAddr2 = std::max(pendingNotifyMaxAddr2.load(), destPtr + size); + } + pendingNotifies.push_back(info); + needsFlush = pendingNotifies.size() > MAX_PENDING_NOTIFIES_THREAD; + } + + if (needsFlush) { + { + std::lock_guard guard(flushLock); + flushThreadPending = true; + } + flushCond.notify_one(); + } +} + std::vector FindMemInfo(uint32_t start, uint32_t size) { start = NormalizeAddress(start); @@ -520,13 +602,15 @@ std::vector FindMemInfoByFlag(MemBlockFlags flags, uint32_t start, return results; } -static const char *FindWriteTagByFlag(MemBlockFlags flags, uint32_t start, uint32_t size) { +static const char *FindWriteTagByFlag(MemBlockFlags flags, uint32_t start, uint32_t size, bool flush = true) { start = NormalizeAddress(start); - if (pendingNotifyMinAddr1 < start + size && pendingNotifyMaxAddr1 >= start) - FlushPendingMemInfo(); - if (pendingNotifyMinAddr2 < start + size && pendingNotifyMaxAddr2 >= start) - FlushPendingMemInfo(); + if (flush) { + if (pendingNotifyMinAddr1 < start + size && pendingNotifyMaxAddr1 >= start) + FlushPendingMemInfo(); + if (pendingNotifyMinAddr2 < start + size && pendingNotifyMaxAddr2 >= start) + FlushPendingMemInfo(); + } if (flags & MemBlockFlags::ALLOC) { const char *tag = allocMap.FastFindWriteTag(MemBlockFlags::ALLOC, start, size); @@ -564,22 +648,63 @@ size_t FormatMemWriteTagAt(char *buf, size_t sz, const char *prefix, uint32_t st return snprintf(buf, sz, "%s%08x_size_%08x", prefix, start, size); } +size_t FormatMemWriteTagAtNoFlush(char *buf, size_t sz, const char *prefix, uint32_t start, uint32_t size) { + const char *tag = FindWriteTagByFlag(MemBlockFlags::WRITE, start, size, false); + if (tag && strcmp(tag, "MemInit") != 0) { + return snprintf(buf, sz, "%s%s", prefix, tag); + } + // Fall back to alloc and texture, especially for VRAM. We prefer write above. + tag = FindWriteTagByFlag(MemBlockFlags::ALLOC | MemBlockFlags::TEXTURE, start, size, false); + if (tag) { + return snprintf(buf, sz, "%s%s", prefix, tag); + } + return snprintf(buf, sz, "%s%08x_size_%08x", prefix, start, size); +} + +static void FlushMemInfoThread() { + while (flushThreadRunning.load()) { + flushThreadPending = false; + FlushPendingMemInfo(); + + std::unique_lock guard(flushLock); + flushCond.wait(guard, [] { + return flushThreadPending.load(); + }); + } +} + void MemBlockInfoInit() { - std::lock_guard guard(pendingMutex); + std::lock_guard guard(pendingReadMutex); + std::lock_guard guardW(pendingWriteMutex); pendingNotifies.reserve(MAX_PENDING_NOTIFIES); pendingNotifyMinAddr1 = 0xFFFFFFFF; pendingNotifyMaxAddr1 = 0; pendingNotifyMinAddr2 = 0xFFFFFFFF; pendingNotifyMaxAddr2 = 0; + + flushThreadRunning = true; + flushThreadPending = false; + flushThread = std::thread(&FlushMemInfoThread); } void MemBlockInfoShutdown() { - std::lock_guard guard(pendingMutex); - allocMap.Reset(); - suballocMap.Reset(); - writeMap.Reset(); - textureMap.Reset(); - pendingNotifies.clear(); + { + std::lock_guard guard(pendingReadMutex); + std::lock_guard guardW(pendingWriteMutex); + allocMap.Reset(); + suballocMap.Reset(); + writeMap.Reset(); + textureMap.Reset(); + pendingNotifies.clear(); + } + + if (flushThreadRunning.load()) { + std::lock_guard guard(flushLock); + flushThreadRunning = false; + flushThreadPending = true; + } + flushCond.notify_one(); + flushThread.join(); } void MemBlockInfoDoState(PointerWrap &p) { diff --git a/Core/Debugger/MemBlockInfo.h b/Core/Debugger/MemBlockInfo.h index 108423d53f4b..b07c326f82b0 100644 --- a/Core/Debugger/MemBlockInfo.h +++ b/Core/Debugger/MemBlockInfo.h @@ -53,6 +53,7 @@ struct MemBlockInfo { void NotifyMemInfo(MemBlockFlags flags, uint32_t start, uint32_t size, const char *tag, size_t tagLength); void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_t pc, const char *tag, size_t tagLength); +void NotifyMemInfoCopy(uint32_t destPtr, uint32_t srcPtr, uint32_t size, const char *prefix); // This lets us avoid calling strlen on string constants, instead the string length (including null, // so we have to subtract 1) is computed at compile time. diff --git a/Core/FileSystems/BlockDevices.cpp b/Core/FileSystems/BlockDevices.cpp index c1c930f0c610..eaeb4c6261ee 100644 --- a/Core/FileSystems/BlockDevices.cpp +++ b/Core/FileSystems/BlockDevices.cpp @@ -24,8 +24,11 @@ #include "Common/System/OSD.h" #include "Common/Log.h" #include "Common/Swap.h" +#include "Common/File/FileUtil.h" +#include "Common/File/DirListing.h" #include "Core/Loaders.h" #include "Core/FileSystems/BlockDevices.h" +#include "libchdr/chd.h" extern "C" { @@ -37,19 +40,28 @@ extern "C" std::mutex NPDRMDemoBlockDevice::mutex_; BlockDevice *constructBlockDevice(FileLoader *fileLoader) { - // Check for CISO if (!fileLoader->Exists()) return nullptr; - char buffer[4]{}; - size_t size = fileLoader->ReadAt(0, 1, 4, buffer); - if (size == 4 && !memcmp(buffer, "CISO", 4)) + char buffer[8]{}; + size_t size = fileLoader->ReadAt(0, 1, 8, buffer); + if (size != 8) { + // Bad or empty file + return nullptr; + } + + // Check for CISO + if (!memcmp(buffer, "CISO", 4)) { return new CISOFileBlockDevice(fileLoader); - if (size == 4 && !memcmp(buffer, "\x00PBP", 4)) { + } else if (!memcmp(buffer, "\x00PBP", 4)) { uint32_t psarOffset = 0; size = fileLoader->ReadAt(0x24, 1, 4, &psarOffset); if (size == 4 && psarOffset < fileLoader->FileSize()) return new NPDRMDemoBlockDevice(fileLoader); + } else if (!memcmp(buffer, "MComprHD", 8)) { + return new CHDFileBlockDevice(fileLoader); } + + // Should be just a regular ISO. Let's open it as a plain block device and let the other systems take over. return new FileBlockDevice(fileLoader); } @@ -393,7 +405,7 @@ NPDRMDemoBlockDevice::NPDRMDemoBlockDevice(FileLoader *fileLoader) fileLoader_->ReadAt(0x24, 1, 4, &psarOffset); size_t readSize = fileLoader_->ReadAt(psarOffset, 1, 256, &np_header); - if(readSize!=256){ + if (readSize != 256){ ERROR_LOG(LOADER, "Invalid NPUMDIMG header!"); } @@ -445,7 +457,6 @@ NPDRMDemoBlockDevice::NPDRMDemoBlockDevice(FileLoader *fileLoader) } currentBlock = -1; - } NPDRMDemoBlockDevice::~NPDRMDemoBlockDevice() @@ -520,3 +531,150 @@ bool NPDRMDemoBlockDevice::ReadBlock(int blockNumber, u8 *outPtr, bool uncached) return true; } + +/* + * CHD file + */ +static const UINT8 nullsha1[CHD_SHA1_BYTES] = { 0 }; + +struct CHDImpl { + chd_file *chd = nullptr; + const chd_header *header = nullptr; +}; + +CHDFileBlockDevice::CHDFileBlockDevice(FileLoader *fileLoader) + : BlockDevice(fileLoader), impl_(new CHDImpl()) +{ + Path paths[8]; + paths[0] = fileLoader->GetPath(); + int depth = 0; + + /* + // TODO: Support parent/child CHD files. + + // Default, in case of failure + numBlocks = 0; + + chd_header childHeader; + + chd_error err = chd_read_header(paths[0].c_str(), &childHeader); + if (err != CHDERR_NONE) { + ERROR_LOG(LOADER, "Error loading CHD header for '%s': %s", paths[0].c_str(), chd_error_string(err)); + NotifyReadError(); + return; + } + + if (memcmp(nullsha1, childHeader.parentsha1, sizeof(childHeader.sha1)) != 0) { + chd_header parentHeader; + + // Look for parent CHD in current directory + Path chdDir = paths[0].NavigateUp(); + + std::vector files; + if (File::GetFilesInDir(chdDir, &files)) { + parentHeader.length = 0; + + for (const auto &file : files) { + std::string extension = file.fullName.GetFileExtension(); + if (extension != ".chd") { + continue; + } + + if (chd_read_header(filepath.c_str(), &parentHeader) == CHDERR_NONE && + memcmp(parentHeader.sha1, childHeader.parentsha1, sizeof(parentHeader.sha1)) == 0) { + // ERROR_LOG(LOADER, "Checking '%s'", filepath.c_str()); + paths[++depth] = filepath; + break; + } + } + + // Check if parentHeader was opened + if (parentHeader.length == 0) { + ERROR_LOG(LOADER, "Error loading CHD '%s': parents not found", fileLoader->GetPath().c_str()); + NotifyReadError(); + return; + } + memcpy(childHeader.parentsha1, parentHeader.parentsha1, sizeof(childHeader.parentsha1)); + } while (memcmp(nullsha1, childHeader.parentsha1, sizeof(childHeader.sha1)) != 0); + } + */ + + chd_file *parent = NULL; + chd_file *child = NULL; + + FILE *file = File::OpenCFile(paths[depth], "rb"); + if (!file) { + ERROR_LOG(LOADER, "Error opening CHD file '%s'", paths[depth].c_str()); + NotifyReadError(); + return; + } + chd_error err = chd_open_file(file, CHD_OPEN_READ, NULL, &child); + if (err != CHDERR_NONE) { + ERROR_LOG(LOADER, "Error loading CHD '%s': %s", paths[depth].c_str(), chd_error_string(err)); + NotifyReadError(); + return; + } + + // We won't enter this loop until we enable the parent/child stuff above. + for (int d = depth - 1; d >= 0; d--) { + parent = child; + child = NULL; + // TODO: Use chd_open_file + err = chd_open(paths[d].c_str(), CHD_OPEN_READ, parent, &child); + if (err != CHDERR_NONE) { + ERROR_LOG(LOADER, "Error loading CHD '%s': %s", paths[d].c_str(), chd_error_string(err)); + NotifyReadError(); + return; + } + } + impl_->chd = child; + + impl_->header = chd_get_header(impl_->chd); + readBuffer = new u8[impl_->header->hunkbytes]; + currentHunk = -1; + blocksPerHunk = impl_->header->hunkbytes / impl_->header->unitbytes; + numBlocks = impl_->header->unitcount; +} + +CHDFileBlockDevice::~CHDFileBlockDevice() +{ + if (numBlocks > 0) { + chd_close(impl_->chd); + delete[] readBuffer; + } +} + +bool CHDFileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr, bool uncached) +{ + if ((u32)blockNumber >= numBlocks) { + memset(outPtr, 0, GetBlockSize()); + return false; + } + u32 hunk = blockNumber / blocksPerHunk; + u32 blockInHunk = blockNumber % blocksPerHunk; + + if (currentHunk != hunk) { + chd_error err = chd_read(impl_->chd, hunk, readBuffer); + if (err != CHDERR_NONE) { + ERROR_LOG(LOADER, "CHD read failed: %d %d %s", blockNumber, hunk, chd_error_string(err)); + NotifyReadError(); + } + } + memcpy(outPtr, readBuffer + blockInHunk * impl_->header->unitbytes, GetBlockSize()); + + return true; +} + +bool CHDFileBlockDevice::ReadBlocks(u32 minBlock, int count, u8 *outPtr) { + if (minBlock >= numBlocks) { + memset(outPtr, 0, GetBlockSize() * count); + return false; + } + + for (int i = 0; i < count; i++) { + if (!ReadBlock(minBlock + i, outPtr + i * GetBlockSize())) { + return false; + } + } + return true; +} diff --git a/Core/FileSystems/BlockDevices.h b/Core/FileSystems/BlockDevices.h index 3575d8cded8a..fd27f667fb91 100644 --- a/Core/FileSystems/BlockDevices.h +++ b/Core/FileSystems/BlockDevices.h @@ -130,5 +130,23 @@ class NPDRMDemoBlockDevice : public BlockDevice { u8 *tempBuf; }; +struct CHDImpl; + +class CHDFileBlockDevice : public BlockDevice { +public: + CHDFileBlockDevice(FileLoader *fileLoader); + ~CHDFileBlockDevice(); + bool ReadBlock(int blockNumber, u8 *outPtr, bool uncached = false) override; + bool ReadBlocks(u32 minBlock, int count, u8 *outPtr) override; + u32 GetNumBlocks() override { return numBlocks; } + bool IsDisc() override { return true; } + +private: + std::unique_ptr impl_; + u8 *readBuffer; + u32 currentHunk; + u32 blocksPerHunk; + u32 numBlocks; +}; BlockDevice *constructBlockDevice(FileLoader *fileLoader); diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index 30afa6e26b2c..4695d13926aa 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -159,16 +159,19 @@ static int Replace_memcpy() { RETURN(destPtr); if (MemBlockInfoDetailed(bytes)) { - char tagData[128]; - size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy/", srcPtr, bytes); - NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize); - NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize); - // It's pretty common that games will copy video data. - if (!strcmp(tagData, "ReplaceMemcpy/VideoDecode") || !strcmp(tagData, "ReplaceMemcpy/VideoDecodeRange")) { - if (bytes == 512 * 272 * 4) { + // Detect that by manually reading the tag when the size looks right. + if (bytes == 512 * 272 * 4) { + char tagData[128]; + size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy/", srcPtr, bytes); + NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize); + NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize); + + if (!strcmp(tagData, "ReplaceMemcpy/VideoDecode") || !strcmp(tagData, "ReplaceMemcpy/VideoDecodeRange")) { gpu->PerformWriteFormattedFromMemory(destPtr, bytes, 512, GE_FORMAT_8888); } + } else { + NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemcpy/"); } } @@ -212,16 +215,19 @@ static int Replace_memcpy_jak() { RETURN(destPtr); if (MemBlockInfoDetailed(bytes)) { - char tagData[128]; - size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy/", srcPtr, bytes); - NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize); - NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize); - // It's pretty common that games will copy video data. - if (!strcmp(tagData, "ReplaceMemcpy/VideoDecode") || !strcmp(tagData, "ReplaceMemcpy/VideoDecodeRange")) { - if (bytes == 512 * 272 * 4) { + // Detect that by manually reading the tag when the size looks right. + if (bytes == 512 * 272 * 4) { + char tagData[128]; + size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy/", srcPtr, bytes); + NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize); + NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize); + + if (!strcmp(tagData, "ReplaceMemcpy/VideoDecode") || !strcmp(tagData, "ReplaceMemcpy/VideoDecodeRange")) { gpu->PerformWriteFormattedFromMemory(destPtr, bytes, 512, GE_FORMAT_8888); } + } else { + NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemcpy/"); } } @@ -252,10 +258,7 @@ static int Replace_memcpy16() { RETURN(destPtr); if (MemBlockInfoDetailed(bytes)) { - char tagData[128]; - size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy16/", srcPtr, bytes); - NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize); - NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize); + NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemcpy16/"); } return 10 + bytes / 4; // approximation @@ -294,10 +297,7 @@ static int Replace_memcpy_swizzled() { RETURN(0); if (MemBlockInfoDetailed(pitch * h)) { - char tagData[128]; - size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpySwizzle/", srcPtr, pitch * h); - NotifyMemInfo(MemBlockFlags::READ, srcPtr, pitch * h, tagData, tagSize); - NotifyMemInfo(MemBlockFlags::WRITE, destPtr, pitch * h, tagData, tagSize); + NotifyMemInfoCopy(destPtr, srcPtr, pitch * h, "ReplaceMemcpySwizzle/"); } return 10 + (pitch * h) / 4; // approximation @@ -326,10 +326,7 @@ static int Replace_memmove() { RETURN(destPtr); if (MemBlockInfoDetailed(bytes)) { - char tagData[128]; - size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemmove/", srcPtr, bytes); - NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize); - NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize); + NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemmove/"); } return 10 + bytes / 4; // approximation @@ -1590,7 +1587,10 @@ std::vector GetReplacementFuncIndexes(u64 hash, int funcSize) { return emptyResult; } -const ReplacementTableEntry *GetReplacementFunc(int i) { +const ReplacementTableEntry *GetReplacementFunc(size_t i) { + if (i >= ARRAY_SIZE(entries)) { + return nullptr; + } return &entries[i]; } diff --git a/Core/HLE/ReplaceTables.h b/Core/HLE/ReplaceTables.h index 94ee26d69ea1..980f506b6af1 100644 --- a/Core/HLE/ReplaceTables.h +++ b/Core/HLE/ReplaceTables.h @@ -64,7 +64,7 @@ void Replacement_Shutdown(); int GetNumReplacementFuncs(); std::vector GetReplacementFuncIndexes(u64 hash, int funcSize); -const ReplacementTableEntry *GetReplacementFunc(int index); +const ReplacementTableEntry *GetReplacementFunc(size_t index); void WriteReplaceInstructions(u32 address, u64 hash, int size); void RestoreReplacedInstruction(u32 address); diff --git a/Core/HLE/sceDmac.cpp b/Core/HLE/sceDmac.cpp index f7bcf0d0f6f5..8feb1fc89e74 100644 --- a/Core/HLE/sceDmac.cpp +++ b/Core/HLE/sceDmac.cpp @@ -51,12 +51,11 @@ static int __DmacMemcpy(u32 dst, u32 src, u32 size) { } if (!skip && size != 0) { currentMIPS->InvalidateICache(src, size); + if (Memory::IsValidRange(dst, size) && Memory::IsValidRange(src, size)) { + memcpy(Memory::GetPointerWriteUnchecked(dst), Memory::GetPointerUnchecked(src), size); + } if (MemBlockInfoDetailed(size)) { - char tagData[128]; - size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "DmacMemcpy/", src, size); - Memory::Memcpy(dst, src, size, tagData, tagSize); - } else { - Memory::Memcpy(dst, src, size, "DmacMemcpy"); + NotifyMemInfoCopy(dst, src, size, "DmacMemcpy/"); } currentMIPS->InvalidateICache(dst, size); } diff --git a/Core/HLE/sceIo.cpp b/Core/HLE/sceIo.cpp index b554d34e806b..d8599f98650a 100644 --- a/Core/HLE/sceIo.cpp +++ b/Core/HLE/sceIo.cpp @@ -1486,6 +1486,12 @@ static u32 sceIoLseek32Async(int id, int offset, int whence) { } static FileNode *__IoOpen(int &error, const char *filename, int flags, int mode) { + if (!filename) { + // To prevent crashes. Not sure about the correct value. + error = SCE_KERNEL_ERROR_ERRNO_FILE_NOT_FOUND; + return nullptr; + } + int access = FILEACCESS_NONE; if (flags & PSP_O_RDONLY) access |= FILEACCESS_READ; diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index ec4b452a6402..76e1788e397e 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -657,10 +657,7 @@ static u32 sceKernelMemcpy(u32 dst, u32 src, u32 size) } if (MemBlockInfoDetailed(size)) { - char tagData[128]; - size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "KernelMemcpy/", src, size); - NotifyMemInfo(MemBlockFlags::READ, src, size, tagData, tagSize); - NotifyMemInfo(MemBlockFlags::WRITE, dst, size, tagData, tagSize); + NotifyMemInfoCopy(dst, src, size, "KernelMemcpy/"); } return dst; @@ -693,10 +690,7 @@ static u32 sysclib_memcpy(u32 dst, u32 src, u32 size) { memcpy(Memory::GetPointerWriteUnchecked(dst), Memory::GetPointerUnchecked(src), size); } if (MemBlockInfoDetailed(size)) { - char tagData[128]; - size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "KernelMemcpy/", src, size); - NotifyMemInfo(MemBlockFlags::READ, src, size, tagData, tagSize); - NotifyMemInfo(MemBlockFlags::WRITE, dst, size, tagData, tagSize); + NotifyMemInfoCopy(dst, src, size, "KernelMemcpy/"); } return dst; } @@ -797,10 +791,7 @@ static u32 sysclib_memmove(u32 dst, u32 src, u32 size) { memmove(Memory::GetPointerWriteUnchecked(dst), Memory::GetPointerUnchecked(src), size); } if (MemBlockInfoDetailed(size)) { - char tagData[128]; - size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "KernelMemmove/", src, size); - NotifyMemInfo(MemBlockFlags::READ, src, size, tagData, tagSize); - NotifyMemInfo(MemBlockFlags::WRITE, dst, size, tagData, tagSize); + NotifyMemInfoCopy(dst, src, size, "KernelMemmove/"); } return 0; } diff --git a/Core/KeyMap.cpp b/Core/KeyMap.cpp index 0b9a18dff118..fae0a0665fea 100644 --- a/Core/KeyMap.cpp +++ b/Core/KeyMap.cpp @@ -516,11 +516,11 @@ bool InputMappingsFromPspButton(int btn, std::vector *mapping return false; } bool mapped = false; - for (auto iter2 = iter->second.begin(); iter2 != iter->second.end(); ++iter2) { - bool ignore = ignoreMouse && iter2->HasMouse(); + for (auto &iter2 : iter->second) { + bool ignore = ignoreMouse && iter2.HasMouse(); if (mappings && !ignore) { mapped = true; - mappings->push_back(*iter2); + mappings->push_back(iter2); } } return mapped; @@ -536,8 +536,6 @@ bool PspButtonHasMappings(int btn) { } MappedAnalogAxes MappedAxesForDevice(InputDeviceID deviceId) { - MappedAnalogAxes result{}; - // Find the axisId mapped for a specific virtual button. auto findAxisId = [&](int btn) -> MappedAnalogAxis { MappedAnalogAxis info{ -1 }; @@ -563,6 +561,7 @@ MappedAnalogAxes MappedAxesForDevice(InputDeviceID deviceId) { return MappedAnalogAxis{ -1 }; }; + MappedAnalogAxes result; std::lock_guard guard(g_controllerMapLock); result.leftX = findAxisIdPair(VIRTKEY_AXIS_X_MIN, VIRTKEY_AXIS_X_MAX); result.leftY = findAxisIdPair(VIRTKEY_AXIS_Y_MIN, VIRTKEY_AXIS_Y_MAX); @@ -621,6 +620,7 @@ bool ReplaceSingleKeyMapping(int btn, int index, MultiInputMapping key) { } void DeleteNthMapping(int key, int number) { + std::lock_guard guard(g_controllerMapLock); auto iter = g_controllerMap.find(key); if (iter != g_controllerMap.end()) { if (number < iter->second.size()) { @@ -699,6 +699,8 @@ void LoadFromIni(IniFile &file) { return; } + std::lock_guard guard(g_controllerMapLock); + Section *controls = file.GetOrCreateSection("ControlMapping"); for (size_t i = 0; i < ARRAY_SIZE(psp_button_names); i++) { std::string value; @@ -730,6 +732,8 @@ void LoadFromIni(IniFile &file) { void SaveToIni(IniFile &file) { Section *controls = file.GetOrCreateSection("ControlMapping"); + std::lock_guard guard(g_controllerMapLock); + for (size_t i = 0; i < ARRAY_SIZE(psp_button_names); i++) { std::vector keys; InputMappingsFromPspButton(psp_button_names[i].key, &keys, false); diff --git a/Core/Loaders.cpp b/Core/Loaders.cpp index 28190edd8080..6bb5f5d34365 100644 --- a/Core/Loaders.cpp +++ b/Core/Loaders.cpp @@ -94,6 +94,8 @@ IdentifiedFileType Identify_File(FileLoader *fileLoader, std::string *errorStrin return IdentifiedFileType::PSP_ISO; } else if (extension == ".cso") { return IdentifiedFileType::PSP_ISO; + } else if (extension == ".chd") { + return IdentifiedFileType::PSP_ISO; } else if (extension == ".ppst") { return IdentifiedFileType::PPSSPP_SAVESTATE; } else if (extension == ".ppdmp") { diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index c0ae1071a5ac..4cfbc2512449 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -561,7 +561,7 @@ void ArmJit::Comp_ReplacementFunc(MIPSOpcode op) const ReplacementTableEntry *entry = GetReplacementFunc(index); if (!entry) { - ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding); + ERROR_LOG_REPORT_ONCE(replFunc, HLE, "Invalid replacement op %08x at %08x", op.encoding, js.compilerPC); return; } @@ -745,7 +745,9 @@ void ArmJit::UpdateRoundingMode(u32 fcr31) { // I don't think this gives us that much benefit. void ArmJit::WriteExit(u32 destination, int exit_num) { - // TODO: Check destination is valid and trigger exception. + // NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks). + _assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num. dest=%08x", destination); + WriteDownCount(); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp index 775b95df1527..d5dd416d6f52 100644 --- a/Core/MIPS/ARM64/Arm64CompVFPU.cpp +++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp @@ -1504,7 +1504,7 @@ namespace MIPSComp { void Arm64Jit::Comp_VCrossQuat(MIPSOpcode op) { // This op does not support prefixes anyway. CONDITIONAL_DISABLE(VFPU_VEC); - if (js.HasUnknownPrefix()) + if (!js.HasNoPrefix()) DISABLE; VectorSize sz = GetVecSize(op); @@ -1521,20 +1521,26 @@ namespace MIPSComp { if (sz == V_Triple) { MIPSReg temp3 = fpr.GetTempV(); + MIPSReg temp4 = fpr.GetTempV(); fpr.MapRegV(temp3, MAP_DIRTY | MAP_NOINIT); + fpr.MapRegV(temp4, MAP_DIRTY | MAP_NOINIT); // Cross product vcrsp.t - // Compute X - fp.FMUL(S0, fpr.V(sregs[1]), fpr.V(tregs[2])); - fp.FMSUB(S0, fpr.V(sregs[2]), fpr.V(tregs[1]), S0); + // Note: using FMSUB here causes accuracy issues, see #18203. + // Compute X: s[1] * t[2] - s[2] * t[1] + fp.FMUL(fpr.V(temp3), fpr.V(sregs[1]), fpr.V(tregs[2])); + fp.FMUL(fpr.V(temp4), fpr.V(sregs[2]), fpr.V(tregs[1])); + fp.FSUB(S0, fpr.V(temp3), fpr.V(temp4)); - // Compute Y - fp.FMUL(S1, fpr.V(sregs[2]), fpr.V(tregs[0])); - fp.FMSUB(S1, fpr.V(sregs[0]), fpr.V(tregs[2]), S1); + // Compute Y: s[2] * t[0] - s[0] * t[2] + fp.FMUL(fpr.V(temp3), fpr.V(sregs[2]), fpr.V(tregs[0])); + fp.FMUL(fpr.V(temp4), fpr.V(sregs[0]), fpr.V(tregs[2])); + fp.FSUB(S1, fpr.V(temp3), fpr.V(temp4)); - // Compute Z + // Compute Z: s[0] * t[1] - s[1] * t[0] fp.FMUL(fpr.V(temp3), fpr.V(sregs[0]), fpr.V(tregs[1])); - fp.FMSUB(fpr.V(temp3), fpr.V(sregs[1]), fpr.V(tregs[0]), fpr.V(temp3)); + fp.FMUL(fpr.V(temp4), fpr.V(sregs[1]), fpr.V(tregs[0])); + fp.FSUB(fpr.V(temp3), fpr.V(temp3), fpr.V(temp4)); fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT); fp.FMOV(fpr.V(dregs[0]), S0); diff --git a/Core/MIPS/ARM64/Arm64IRAsm.cpp b/Core/MIPS/ARM64/Arm64IRAsm.cpp index 42bee863a22a..d623c6cd58c1 100644 --- a/Core/MIPS/ARM64/Arm64IRAsm.cpp +++ b/Core/MIPS/ARM64/Arm64IRAsm.cpp @@ -50,8 +50,18 @@ static void ShowPC(void *membase, void *jitbase) { } void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { - BeginWrite(GetMemoryProtectPageSize()); + // This will be used as a writable scratch area, always 32-bit accessible. const u8 *start = AlignCodePage(); + if (DebugProfilerEnabled()) { + ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE); + hooks_.profilerPC = (uint32_t *)GetWritableCodePtr(); + Write32(0); + hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr(); + Write32(0); + } + + const u8 *disasmStart = AlignCodePage(); + BeginWrite(GetMemoryProtectPageSize()); if (jo.useStaticAlloc) { saveStaticRegisters_ = AlignCode16(); @@ -63,8 +73,6 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { regs_.EmitLoadStaticRegisters(); LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); RET(); - - start = saveStaticRegisters_; } else { saveStaticRegisters_ = nullptr; loadStaticRegisters_ = nullptr; @@ -152,13 +160,17 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { MOVI2R(JITBASEREG, (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE); LoadStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); MovFromPC(SCRATCH1); + WriteDebugPC(SCRATCH1); outerLoopPCInSCRATCH1_ = GetCodePtr(); MovToPC(SCRATCH1); outerLoop_ = GetCodePtr(); SaveStaticRegisters(); // Advance can change the downcount, so must save/restore RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE); QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); LoadStaticRegisters(); @@ -191,6 +203,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { } MovFromPC(SCRATCH1); + WriteDebugPC(SCRATCH1); #ifdef MASKED_PSP_MEMORY ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK); #endif @@ -206,7 +219,9 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { // No block found, let's jit. We don't need to save static regs, they're all callee saved. RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::COMPILING); QuickCallFunction(SCRATCH1_64, &MIPSComp::JitAt); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); // Let's just dispatch again, we'll enter the block since we know it's there. @@ -221,6 +236,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { const uint8_t *quitLoop = GetCodePtr(); SetJumpTarget(badCoreState); + WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING); SaveStaticRegisters(); RestoreRoundingMode(true); @@ -251,7 +267,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { // Leave this at the end, add more stuff above. if (enableDisasm) { - std::vector lines = DisassembleArm64(start, (int)(GetCodePtr() - start)); + std::vector lines = DisassembleArm64(disasmStart, (int)(GetCodePtr() - disasmStart)); for (auto s : lines) { INFO_LOG(JIT, "%s", s.c_str()); } diff --git a/Core/MIPS/ARM64/Arm64IRCompALU.cpp b/Core/MIPS/ARM64/Arm64IRCompALU.cpp index e83fabd6b902..4aeb2ceeb200 100644 --- a/Core/MIPS/ARM64/Arm64IRCompALU.cpp +++ b/Core/MIPS/ARM64/Arm64IRCompALU.cpp @@ -170,9 +170,18 @@ void Arm64JitBackend::CompIR_Compare(IRInst inst) { break; case IROp::SltU: - regs_.Map(inst); - CMP(regs_.R(inst.src1), regs_.R(inst.src2)); - CSET(regs_.R(inst.dest), CC_LO); + if (regs_.IsGPRImm(inst.src1) && regs_.GetGPRImm(inst.src1) == 0) { + // This is kinda common, same as != 0. Avoid flushing src1. + regs_.SpillLockGPR(inst.src2, inst.dest); + regs_.MapGPR(inst.src2); + regs_.MapGPR(inst.dest, MIPSMap::NOINIT); + CMP(regs_.R(inst.src2), 0); + CSET(regs_.R(inst.dest), CC_NEQ); + } else { + regs_.Map(inst); + CMP(regs_.R(inst.src1), regs_.R(inst.src2)); + CSET(regs_.R(inst.dest), CC_LO); + } break; case IROp::SltUConst: diff --git a/Core/MIPS/ARM64/Arm64IRCompFPU.cpp b/Core/MIPS/ARM64/Arm64IRCompFPU.cpp index 74f62da5aa26..99b502c74b3c 100644 --- a/Core/MIPS/ARM64/Arm64IRCompFPU.cpp +++ b/Core/MIPS/ARM64/Arm64IRCompFPU.cpp @@ -298,17 +298,23 @@ void Arm64JitBackend::CompIR_FCompare(IRInst inst) { case IROp::FCmpVfpuAggregate: regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY); - MOVI2R(SCRATCH1, inst.dest); - // Grab the any bit. - TST(regs_.R(IRREG_VFPU_CC), SCRATCH1); - CSET(SCRATCH2, CC_NEQ); - // Now the all bit, by clearing our mask to zero. - BICS(WZR, SCRATCH1, regs_.R(IRREG_VFPU_CC)); - CSET(SCRATCH1, CC_EQ); + if (inst.dest == 1) { + // Just replicate the lowest bit to the others. + BFI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), 4, 1); + BFI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), 5, 1); + } else { + MOVI2R(SCRATCH1, inst.dest); + // Grab the any bit. + TST(regs_.R(IRREG_VFPU_CC), SCRATCH1); + CSET(SCRATCH2, CC_NEQ); + // Now the all bit, by clearing our mask to zero. + BICS(WZR, SCRATCH1, regs_.R(IRREG_VFPU_CC)); + CSET(SCRATCH1, CC_EQ); - // Insert the bits into place. - BFI(regs_.R(IRREG_VFPU_CC), SCRATCH2, 4, 1); - BFI(regs_.R(IRREG_VFPU_CC), SCRATCH1, 5, 1); + // Insert the bits into place. + BFI(regs_.R(IRREG_VFPU_CC), SCRATCH2, 4, 1); + BFI(regs_.R(IRREG_VFPU_CC), SCRATCH1, 5, 1); + } break; default: @@ -502,6 +508,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) { auto callFuncF_F = [&](float (*func)(float)) { regs_.FlushBeforeCall(); + WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER); + // It might be in a non-volatile register. // TODO: May have to handle a transfer if SIMD here. if (regs_.IsFPRMapped(inst.src1)) { @@ -521,6 +529,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) { if (regs_.F(inst.dest) != S0) { fp_.FMOV(regs_.F(inst.dest), S0); } + + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); }; switch (inst.op) { diff --git a/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp b/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp index 42a966d4371a..d0fde9f6f2fc 100644 --- a/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp +++ b/Core/MIPS/ARM64/Arm64IRCompLoadStore.cpp @@ -80,7 +80,12 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) { // If it's about to be clobbered, don't waste time pointerifying. Use displacement. bool clobbersSrc1 = !readsFromSrc1 && regs_.IsGPRClobbered(inst.src1); - int32_t imm = (int32_t)inst.constant; + int64_t imm = (int32_t)inst.constant; + // It can't be this negative, must be a constant address with the top bit set. + if ((imm & 0xC0000000) == 0x80000000) { + imm = (uint64_t)(uint32_t)inst.constant; + } + LoadStoreArg addrArg; if (inst.src1 == MIPS_REG_ZERO) { // The constant gets applied later. @@ -100,7 +105,7 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) { // Since we can't modify src1, let's just use a temp reg while copying. if (!addrArg.useRegisterOffset) { - ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), (s64)imm, SCRATCH2); + ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), imm, SCRATCH2); #ifdef MASKED_PSP_MEMORY ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK, SCRATCH2); #endif @@ -114,7 +119,7 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) { // The offset gets set later. addrArg.base = regs_.MapGPRAsPointer(inst.src1); } else { - ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), (s64)imm, SCRATCH2); + ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), imm, SCRATCH2); #ifdef MASKED_PSP_MEMORY ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK, SCRATCH2); #endif @@ -137,15 +142,15 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) { int scale = IROpToByteWidth(inst.op); if (imm > 0 && (imm & (scale - 1)) == 0 && imm <= 0xFFF * scale) { // Okay great, use the LDR/STR form. - addrArg.immOffset = imm; + addrArg.immOffset = (int)imm; addrArg.useUnscaled = false; } else if (imm >= -256 && imm < 256) { // An unscaled offset (LDUR/STUR) should work fine for this range. - addrArg.immOffset = imm; + addrArg.immOffset = (int)imm; addrArg.useUnscaled = true; } else { // No luck, we'll need to load into a register. - MOVI2R(SCRATCH1, (s64)imm); + MOVI2R(SCRATCH1, imm); addrArg.regOffset = SCRATCH1; addrArg.useRegisterOffset = true; addrArg.signExtendRegOffset = true; diff --git a/Core/MIPS/ARM64/Arm64IRCompSystem.cpp b/Core/MIPS/ARM64/Arm64IRCompSystem.cpp index 282d4fd2ef86..8fba3c320525 100644 --- a/Core/MIPS/ARM64/Arm64IRCompSystem.cpp +++ b/Core/MIPS/ARM64/Arm64IRCompSystem.cpp @@ -21,9 +21,11 @@ #include "Common/Profiler/Profiler.h" #include "Core/Core.h" +#include "Core/Debugger/Breakpoints.h" #include "Core/HLE/HLE.h" #include "Core/HLE/ReplaceTables.h" #include "Core/MemMap.h" +#include "Core/MIPS/MIPSAnalyst.h" #include "Core/MIPS/IR/IRInterpreter.h" #include "Core/MIPS/ARM64/Arm64IRJit.h" #include "Core/MIPS/ARM64/Arm64IRRegCache.h" @@ -70,6 +72,7 @@ void Arm64JitBackend::CompIR_Basic(IRInst inst) { break; case IROp::SetPCConst: + lastConstPC_ = inst.constant; MOVI2R(SCRATCH1, inst.constant); MovToPC(SCRATCH1); break; @@ -85,37 +88,118 @@ void Arm64JitBackend::CompIR_Breakpoint(IRInst inst) { switch (inst.op) { case IROp::Breakpoint: + { FlushAll(); // Note: the constant could be a delay slot. MOVI2R(W0, inst.constant); QuickCallFunction(SCRATCH2_64, &IRRunBreakpoint); + + ptrdiff_t distance = dispatcherCheckCoreState_ - GetCodePointer(); + if (distance >= -0x100000 && distance < 0x100000) { + CBNZ(W0, dispatcherCheckCoreState_); + } else { + FixupBranch keepOnKeepingOn = CBZ(W0); + B(dispatcherCheckCoreState_); + SetJumpTarget(keepOnKeepingOn); + } break; + } case IROp::MemoryCheck: - { - ARM64Reg addrBase = regs_.MapGPR(inst.src1); - FlushAll(); - ADDI2R(W1, addrBase, inst.constant, SCRATCH1); - MovFromPC(W0); - ADDI2R(W0, W0, inst.dest, SCRATCH1); - QuickCallFunction(SCRATCH2_64, &IRRunMemCheck); + if (regs_.IsGPRImm(inst.src1)) { + uint32_t iaddr = regs_.GetGPRImm(inst.src1) + inst.constant; + uint32_t checkedPC = lastConstPC_ + inst.dest; + int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC); + if (size == 0) { + checkedPC += 4; + size = MIPSAnalyst::OpMemoryAccessSize(checkedPC); + } + bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC); + + MemCheck check; + if (CBreakPoints::GetMemCheckInRange(iaddr, size, &check)) { + if (!(check.cond & MEMCHECK_READ) && !isWrite) + break; + if (!(check.cond & (MEMCHECK_WRITE | MEMCHECK_WRITE_ONCHANGE)) && isWrite) + break; + + // We need to flush, or conditions and log expressions will see old register values. + FlushAll(); + + MOVI2R(W0, checkedPC); + MOVI2R(W1, iaddr); + QuickCallFunction(SCRATCH2_64, &IRRunMemCheck); + + ptrdiff_t distance = dispatcherCheckCoreState_ - GetCodePointer(); + if (distance >= -0x100000 && distance < 0x100000) { + CBNZ(W0, dispatcherCheckCoreState_); + } else { + FixupBranch keepOnKeepingOn = CBZ(W0); + B(dispatcherCheckCoreState_); + SetJumpTarget(keepOnKeepingOn); + } + } + } else { + uint32_t checkedPC = lastConstPC_ + inst.dest; + int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC); + if (size == 0) { + checkedPC += 4; + size = MIPSAnalyst::OpMemoryAccessSize(checkedPC); + } + bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC); + + const auto memchecks = CBreakPoints::GetMemCheckRanges(isWrite); + // We can trivially skip if there are no checks for this type (i.e. read vs write.) + if (memchecks.empty()) + break; + + ARM64Reg addrBase = regs_.MapGPR(inst.src1); + ADDI2R(SCRATCH1, addrBase, inst.constant, SCRATCH2); + + // We need to flush, or conditions and log expressions will see old register values. + FlushAll(); + + std::vector hitChecks; + for (auto it : memchecks) { + if (it.end != 0) { + CMPI2R(SCRATCH1, it.start - size, SCRATCH2); + MOVI2R(SCRATCH2, it.end); + CCMP(SCRATCH1, SCRATCH2, 0xF, CC_HI); + hitChecks.push_back(B(CC_LO)); + } else { + CMPI2R(SCRATCH1, it.start, SCRATCH2); + hitChecks.push_back(B(CC_EQ)); + } + } + + FixupBranch noHits = B(); + + // Okay, now land any hit here. + for (auto &fixup : hitChecks) + SetJumpTarget(fixup); + hitChecks.clear(); + + MOVI2R(W0, checkedPC); + MOV(W1, SCRATCH1); + QuickCallFunction(SCRATCH2_64, &IRRunMemCheck); + + ptrdiff_t distance = dispatcherCheckCoreState_ - GetCodePointer(); + if (distance >= -0x100000 && distance < 0x100000) { + CBNZ(W0, dispatcherCheckCoreState_); + } else { + FixupBranch keepOnKeepingOn = CBZ(W0); + B(dispatcherCheckCoreState_); + SetJumpTarget(keepOnKeepingOn); + } + + SetJumpTarget(noHits); + } break; - } default: INVALIDOP; break; } - - // Both return a flag on whether to bail out. - ptrdiff_t distance = dispatcherCheckCoreState_ - GetCodePointer(); - if (distance >= -0x100000 && distance < 0x100000) { - CBNZ(W0, dispatcherCheckCoreState_); - } else { - FixupBranch keepOnKeepingOn = CBZ(W0); - B(dispatcherCheckCoreState_); - SetJumpTarget(keepOnKeepingOn); - } } void Arm64JitBackend::CompIR_System(IRInst inst) { @@ -126,6 +210,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) { FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL); #ifdef USE_PROFILER // When profiling, we can't skip CallSyscall, since it times syscalls. MOVI2R(W0, inst.constant); @@ -145,6 +230,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) { } #endif + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // This is always followed by an ExitToPC, where we check coreState. break; @@ -152,7 +238,9 @@ void Arm64JitBackend::CompIR_System(IRInst inst) { case IROp::CallReplacement: FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT); QuickCallFunction(SCRATCH2_64, GetReplacementFunc(inst.constant)->replaceFunc); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); SUB(DOWNCOUNTREG, DOWNCOUNTREG, W0); break; @@ -274,6 +362,66 @@ void Arm64JitBackend::CompIR_ValidateAddress(IRInst inst) { INVALIDOP; break; } + + if (regs_.IsGPRMappedAsPointer(inst.src1)) { + if (!jo.enablePointerify) { + SUB(SCRATCH1_64, regs_.RPtr(inst.src1), MEMBASEREG); + ADDI2R(SCRATCH1, SCRATCH1, inst.constant, SCRATCH2); + } else { + ADDI2R(SCRATCH1, regs_.R(inst.src1), inst.constant, SCRATCH2); + } + } else { + regs_.Map(inst); + ADDI2R(SCRATCH1, regs_.R(inst.src1), inst.constant, SCRATCH2); + } + ANDI2R(SCRATCH1, SCRATCH1, 0x3FFFFFFF, SCRATCH2); + + std::vector validJumps; + + FixupBranch unaligned; + if (alignment == 2) { + unaligned = TBNZ(SCRATCH1, 0); + } else if (alignment != 1) { + TSTI2R(SCRATCH1, alignment - 1, SCRATCH2); + unaligned = B(CC_NEQ); + } + + CMPI2R(SCRATCH1, PSP_GetUserMemoryEnd() - alignment, SCRATCH2); + FixupBranch tooHighRAM = B(CC_HI); + CMPI2R(SCRATCH1, PSP_GetKernelMemoryBase(), SCRATCH2); + validJumps.push_back(B(CC_HS)); + + CMPI2R(SCRATCH1, PSP_GetVidMemEnd() - alignment, SCRATCH2); + FixupBranch tooHighVid = B(CC_HI); + CMPI2R(SCRATCH1, PSP_GetVidMemBase(), SCRATCH2); + validJumps.push_back(B(CC_HS)); + + CMPI2R(SCRATCH1, PSP_GetScratchpadMemoryEnd() - alignment, SCRATCH2); + FixupBranch tooHighScratch = B(CC_HI); + CMPI2R(SCRATCH1, PSP_GetScratchpadMemoryBase(), SCRATCH2); + validJumps.push_back(B(CC_HS)); + + if (alignment != 1) + SetJumpTarget(unaligned); + SetJumpTarget(tooHighRAM); + SetJumpTarget(tooHighVid); + SetJumpTarget(tooHighScratch); + + // If we got here, something unusual and bad happened, so we'll always go back to the dispatcher. + // Because of that, we can avoid flushing outside this case. + auto regsCopy = regs_; + regsCopy.FlushAll(); + + // Ignores the return value, always returns to the dispatcher. + // Otherwise would need a thunk to restore regs. + MOV(W0, SCRATCH1); + MOVI2R(W1, alignment); + MOVI2R(W2, isWrite ? 1 : 0); + QuickCallFunction(SCRATCH2, &ReportBadAddress); + B(dispatcherCheckCoreState_); + + for (FixupBranch &b : validJumps) + SetJumpTarget(b); } } // namespace MIPSComp diff --git a/Core/MIPS/ARM64/Arm64IRJit.cpp b/Core/MIPS/ARM64/Arm64IRJit.cpp index b99e11674438..ab7692dcc536 100644 --- a/Core/MIPS/ARM64/Arm64IRJit.cpp +++ b/Core/MIPS/ARM64/Arm64IRJit.cpp @@ -76,6 +76,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer())); wroteCheckedOffset = true; + WriteDebugPC(startPC); + // Check the sign bit to check if negative. FixupBranch normalEntry = TBZ(DOWNCOUNTREG, 31); MOVI2R(SCRATCH1, startPC); @@ -87,6 +89,7 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) const u8 *blockStart = GetCodePointer(); block->SetTargetOffset((int)GetOffset(blockStart)); compilingBlockNum_ = block_num; + lastConstPC_ = 0; regs_.Start(block); @@ -128,6 +131,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) } if (jo.enableBlocklink && jo.useBackJump) { + WriteDebugPC(startPC); + // Small blocks are common, check if it's < 32KB long. ptrdiff_t distance = blockStart - GetCodePointer(); if (distance >= -0x8000 && distance < 0x8000) { @@ -228,8 +233,10 @@ void Arm64JitBackend::CompIR_Generic(IRInst inst) { FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET); MOVI2R(X0, value); QuickCallFunction(SCRATCH2_64, &DoIRInst); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // We only need to check the return value if it's a potential exit. @@ -255,12 +262,14 @@ void Arm64JitBackend::CompIR_Interpret(IRInst inst) { // IR protects us against this being a branching instruction (well, hopefully.) FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET); if (DebugStatsEnabled()) { MOVP2R(X0, MIPSGetName(op)); QuickCallFunction(SCRATCH2_64, &NotifyMIPSInterpret); } MOVI2R(X0, inst.constant); QuickCallFunction(SCRATCH2_64, MIPSGetInterpretFunc(op)); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); } @@ -353,6 +362,32 @@ void Arm64JitBackend::MovToPC(ARM64Reg r) { STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc)); } +void Arm64JitBackend::WriteDebugPC(uint32_t pc) { + if (hooks_.profilerPC) { + int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr()); + MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset); + MOVI2R(SCRATCH1, pc); + STR(SCRATCH1, JITBASEREG, SCRATCH2); + } +} + +void Arm64JitBackend::WriteDebugPC(ARM64Reg r) { + if (hooks_.profilerPC) { + int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr()); + MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset); + STR(r, JITBASEREG, SCRATCH2); + } +} + +void Arm64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) { + if (hooks_.profilerPC) { + int offset = (int)((const u8 *)hooks_.profilerStatus - GetBasePtr()); + MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset); + MOVI2R(SCRATCH1, (int)status); + STR(SCRATCH1, JITBASEREG, SCRATCH2); + } +} + void Arm64JitBackend::SaveStaticRegisters() { if (jo.useStaticAlloc) { QuickCallFunction(SCRATCH2_64, saveStaticRegisters_); diff --git a/Core/MIPS/ARM64/Arm64IRJit.h b/Core/MIPS/ARM64/Arm64IRJit.h index fa2428504c84..055e525565f8 100644 --- a/Core/MIPS/ARM64/Arm64IRJit.h +++ b/Core/MIPS/ARM64/Arm64IRJit.h @@ -57,6 +57,11 @@ class Arm64JitBackend : public Arm64Gen::ARM64CodeBlock, public IRNativeBackend void UpdateRoundingMode(bool force = false); void MovFromPC(Arm64Gen::ARM64Reg r); void MovToPC(Arm64Gen::ARM64Reg r); + // Destroys SCRATCH2. + void WriteDebugPC(uint32_t pc); + void WriteDebugPC(Arm64Gen::ARM64Reg r); + // Destroys SCRATCH2. + void WriteDebugProfilerStatus(IRProfilerStatus status); void SaveStaticRegisters(); void LoadStaticRegisters(); @@ -145,6 +150,8 @@ class Arm64JitBackend : public Arm64Gen::ARM64CodeBlock, public IRNativeBackend int jitStartOffset_ = 0; int compilingBlockNum_ = -1; int logBlocks_ = 0; + // Only useful in breakpoints, where it's set immediately prior. + uint32_t lastConstPC_ = 0; }; class Arm64IRJit : public IRNativeJit { diff --git a/Core/MIPS/ARM64/Arm64IRRegCache.cpp b/Core/MIPS/ARM64/Arm64IRRegCache.cpp index 0ce5422fd5ce..f48207fa5fcd 100644 --- a/Core/MIPS/ARM64/Arm64IRRegCache.cpp +++ b/Core/MIPS/ARM64/Arm64IRRegCache.cpp @@ -347,7 +347,7 @@ void Arm64IRRegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) { } } -bool Arm64IRRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) { +bool Arm64IRRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) { // No special flags, skip the check for a little speed. return true; } @@ -437,19 +437,21 @@ void Arm64IRRegCache::FlushAll(bool gprs, bool fprs) { // Note: make sure not to change the registers when flushing: // Branching code may expect the armreg to retain its value. + auto needsFlush = [&](IRReg i) { + if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic) + return false; + if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty) + return false; + return true; + }; + // Try to flush in pairs when possible. for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) { - if (mr[i].loc == MIPSLoc::MEM || mr[i].loc == MIPSLoc::MEM || mr[i].isStatic || mr[i + 1].isStatic) + if (!needsFlush(i) || !needsFlush(i + 1)) continue; // Ignore multilane regs. Could handle with more smartness... if (mr[i].lane != -1 || mr[i + 1].lane != -1) continue; - if (mr[i].nReg != -1 && !nr[mr[i].nReg].isDirty) - continue; - if (mr[i + 1].nReg != -1 && !nr[mr[i + 1].nReg].isDirty) - continue; - if (mr[i].loc == MIPSLoc::MEM || mr[i + 1].loc == MIPSLoc::MEM) - continue; int offset = GetMipsRegOffset(i); diff --git a/Core/MIPS/ARM64/Arm64IRRegCache.h b/Core/MIPS/ARM64/Arm64IRRegCache.h index 3a9bf77ab9f4..9f0b0cbbac02 100644 --- a/Core/MIPS/ARM64/Arm64IRRegCache.h +++ b/Core/MIPS/ARM64/Arm64IRRegCache.h @@ -86,7 +86,7 @@ class Arm64IRRegCache : public IRNativeRegCacheBase { const int *GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const override; void AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) override; - bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) override; + bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) override; void LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) override; void StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) override; void SetNativeRegValue(IRNativeReg nreg, uint32_t imm) override; diff --git a/Core/MIPS/ARM64/Arm64Jit.cpp b/Core/MIPS/ARM64/Arm64Jit.cpp index d1f1062f1ef3..9abb69920bfc 100644 --- a/Core/MIPS/ARM64/Arm64Jit.cpp +++ b/Core/MIPS/ARM64/Arm64Jit.cpp @@ -562,7 +562,8 @@ void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op) const ReplacementTableEntry *entry = GetReplacementFunc(index); if (!entry) { - ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding); + ERROR_LOG_REPORT_ONCE(replFunc, HLE, "Invalid replacement op %08x at %08x", op.encoding, js.compilerPC); + // TODO: What should we do here? We're way off in the weeds probably. return; } @@ -724,8 +725,11 @@ void Arm64Jit::UpdateRoundingMode(u32 fcr31) { // though, as we need to have the SUBS flag set in the end. So with block linking in the mix, // I don't think this gives us that much benefit. void Arm64Jit::WriteExit(u32 destination, int exit_num) { - // TODO: Check destination is valid and trigger exception. - WriteDownCount(); + // NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks). + _assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num. dest=%08x", destination); + + // NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks). + WriteDownCount(); //If nobody has taken care of this yet (this can be removed when all branches are done) JitBlock *b = js.curBlock; b->exitAddress[exit_num] = destination; diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index e42ac78d2588..6f44c2ccdd84 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -1675,7 +1675,7 @@ namespace MIPSComp { if (homogenous) { // This is probably even what the hardware basically does, wiring t[3] to 1.0f. ir.Write(IROp::Vec4Init, IRVTEMP_PFX_T, (int)Vec4Init::AllONE); - ir.Write(IROp::Vec4Blend, IRVTEMP_PFX_T, t, IRVTEMP_PFX_T, 0x7); + ir.Write(IROp::Vec4Blend, IRVTEMP_PFX_T, IRVTEMP_PFX_T, t, 0x7); t = IRVTEMP_PFX_T; } for (int i = 0; i < 4; i++) @@ -1771,7 +1771,20 @@ namespace MIPSComp { // d[0] = s[0]*t[1] - s[1]*t[0] // Note: this operates on two vectors, not a 2x2 matrix. - DISABLE; + VectorSize sz = GetVecSize(op); + if (sz != V_Pair) + DISABLE; + + u8 sregs[4], dregs[4], tregs[4]; + GetVectorRegsPrefixS(sregs, sz, _VS); + GetVectorRegsPrefixT(tregs, sz, _VT); + GetVectorRegsPrefixD(dregs, V_Single, _VD); + + ir.Write(IROp::FMul, IRVTEMP_0, sregs[1], tregs[0]); + ir.Write(IROp::FMul, dregs[0], sregs[0], tregs[1]); + ir.Write(IROp::FSub, dregs[0], dregs[0], IRVTEMP_0); + + ApplyPrefixD(dregs, V_Single, _VD); } void IRFrontend::Comp_Vi2x(MIPSOpcode op) { diff --git a/Core/MIPS/IR/IRNativeCommon.cpp b/Core/MIPS/IR/IRNativeCommon.cpp index 6ce1e0f0d614..784d0c7ae23c 100644 --- a/Core/MIPS/IR/IRNativeCommon.cpp +++ b/Core/MIPS/IR/IRNativeCommon.cpp @@ -15,10 +15,15 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include +#include +#include #include "Common/Profiler/Profiler.h" #include "Common/StringUtils.h" #include "Common/TimeUtil.h" +#include "Core/Core.h" #include "Core/Debugger/SymbolMap.h" +#include "Core/MemMap.h" #include "Core/MIPS/MIPSTables.h" #include "Core/MIPS/IR/IRNativeCommon.h" @@ -28,18 +33,57 @@ namespace MIPSComp { // Compile time flag to enable debug stats for not compiled ops. static constexpr bool enableDebugStats = false; +// Compile time flag for enabling the simple IR jit profiler. +static constexpr bool enableDebugProfiler = false; // Used only for debugging when enableDebug is true above. static std::map debugSeenNotCompiledIR; static std::map debugSeenNotCompiled; +static std::map, int> debugSeenPCUsage; static double lastDebugStatsLog = 0.0; +static constexpr double debugStatsFrequency = 5.0; + +static std::thread debugProfilerThread; +std::atomic debugProfilerThreadStatus = false; + +template +class IRProfilerTopValues { +public: + void Add(const std::pair &v, int c) { + for (int i = 0; i < N; ++i) { + if (c > counts[i]) { + counts[i] = c; + values[i] = v; + return; + } + } + } + + int counts[N]{}; + std::pair values[N]{}; +}; + +const char *IRProfilerStatusToString(IRProfilerStatus s) { + switch (s) { + case IRProfilerStatus::NOT_RUNNING: return "NOT_RUNNING"; + case IRProfilerStatus::IN_JIT: return "IN_JIT"; + case IRProfilerStatus::TIMER_ADVANCE: return "TIMER_ADVANCE"; + case IRProfilerStatus::COMPILING: return "COMPILING"; + case IRProfilerStatus::MATH_HELPER: return "MATH_HELPER"; + case IRProfilerStatus::REPLACEMENT: return "REPLACEMENT"; + case IRProfilerStatus::SYSCALL: return "SYSCALL"; + case IRProfilerStatus::INTERPRET: return "INTERPRET"; + case IRProfilerStatus::IR_INTERPRET: return "IR_INTERPRET"; + } + return "INVALID"; +} static void LogDebugStats() { - if (!enableDebugStats) + if (!enableDebugStats && !enableDebugProfiler) return; double now = time_now_d(); - if (now < lastDebugStatsLog + 1.0) + if (now < lastDebugStatsLog + debugStatsFrequency) return; lastDebugStatsLog = now; @@ -63,16 +107,36 @@ static void LogDebugStats() { } debugSeenNotCompiled.clear(); + IRProfilerTopValues<4> slowestPCs; + int64_t totalCount = 0; + for (auto it : debugSeenPCUsage) { + slowestPCs.Add(it.first, it.second); + totalCount += it.second; + } + debugSeenPCUsage.clear(); + if (worstIROp != -1) WARN_LOG(JIT, "Most not compiled IR op: %s (%d)", GetIRMeta((IROp)worstIROp)->name, worstIRVal); if (worstName != nullptr) WARN_LOG(JIT, "Most not compiled op: %s (%d)", worstName, worstVal); + if (slowestPCs.counts[0] != 0) { + for (int i = 0; i < 4; ++i) { + uint32_t pc = slowestPCs.values[i].first; + const char *status = IRProfilerStatusToString(slowestPCs.values[i].second); + const std::string label = g_symbolMap ? g_symbolMap->GetDescription(pc) : ""; + WARN_LOG(JIT, "Slowest sampled PC #%d: %08x (%s)/%s (%f%%)", i, pc, label.c_str(), status, 100.0 * (double)slowestPCs.counts[i] / (double)totalCount); + } + } } bool IRNativeBackend::DebugStatsEnabled() const { return enableDebugStats; } +bool IRNativeBackend::DebugProfilerEnabled() const { + return enableDebugProfiler; +} + void IRNativeBackend::NotifyMIPSInterpret(const char *name) { _assert_(enableDebugStats); debugSeenNotCompiled[name]++; @@ -98,8 +162,32 @@ uint32_t IRNativeBackend::DoIRInst(uint64_t value) { return IRInterpret(currentMIPS, &inst, 1); } +int IRNativeBackend::ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_t isWrite) { + const auto toss = [&](MemoryExceptionType t) { + Core_MemoryException(addr, alignment, currentMIPS->pc, t); + return coreState != CORE_RUNNING ? 1 : 0; + }; + + if (!Memory::IsValidRange(addr, alignment)) { + MemoryExceptionType t = isWrite == 1 ? MemoryExceptionType::WRITE_WORD : MemoryExceptionType::READ_WORD; + if (alignment > 4) + t = isWrite ? MemoryExceptionType::WRITE_BLOCK : MemoryExceptionType::READ_BLOCK; + return toss(t); + } else if (alignment > 1 && (addr & (alignment - 1)) != 0) { + return toss(MemoryExceptionType::ALIGNMENT); + } + return 0; +} + IRNativeBackend::IRNativeBackend(IRBlockCache &blocks) : blocks_(blocks) {} +IRNativeBackend::~IRNativeBackend() { + if (debugProfilerThreadStatus) { + debugProfilerThreadStatus = false; + debugProfilerThread.join(); + } +} + void IRNativeBackend::CompileIRInst(IRInst inst) { switch (inst.op) { case IROp::Nop: @@ -401,6 +489,20 @@ void IRNativeJit::Init(IRNativeBackend &backend) { // Wanted this to be a reference, but vtbls get in the way. Shouldn't change. hooks_ = backend.GetNativeHooks(); + + if (enableDebugProfiler && hooks_.profilerPC) { + debugProfilerThreadStatus = true; + debugProfilerThread = std::thread([&] { + // Spin, spin spin... maybe could at least hook into sleeps. + while (debugProfilerThreadStatus) { + IRProfilerStatus stat = *hooks_.profilerStatus; + uint32_t pc = *hooks_.profilerPC; + if (stat != IRProfilerStatus::NOT_RUNNING && stat != IRProfilerStatus::SYSCALL) { + debugSeenPCUsage[std::make_pair(pc, stat)]++; + } + } + }); + } } bool IRNativeJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) { @@ -412,7 +514,7 @@ void IRNativeJit::FinalizeTargetBlock(IRBlock *block, int block_num) { } void IRNativeJit::RunLoopUntil(u64 globalticks) { - if constexpr (enableDebugStats) { + if constexpr (enableDebugStats || enableDebugProfiler) { LogDebugStats(); } @@ -443,13 +545,27 @@ bool IRNativeJit::DescribeCodePtr(const u8 *ptr, std::string &name) { return false; int block_num = -1; + int block_offset = INT_MAX; for (int i = 0; i < blocks_.GetNumBlocks(); ++i) { const auto &b = blocks_.GetBlock(i); - // We allocate linearly. - if (b->GetTargetOffset() <= offset) + int b_start = b->GetTargetOffset(); + if (b_start > offset) + continue; + + int b_end = backend_->GetNativeBlock(i)->checkedOffset; + int b_offset = offset - b_start; + if (b_end > b_start && b_end >= offset) { + // For sure within the block. block_num = i; - if (b->GetTargetOffset() > offset) + block_offset = b_offset; break; + } + + if (b_offset < block_offset) { + // Possibly within the block, unless in some other block... + block_num = i; + block_offset = b_offset; + } } // Used by profiling tools that don't like spaces. @@ -466,9 +582,9 @@ bool IRNativeJit::DescribeCodePtr(const u8 *ptr, std::string &name) { // It helps to know which func this block is inside. const std::string label = g_symbolMap ? g_symbolMap->GetDescription(start) : ""; if (!label.empty()) - name = StringFromFormat("block%d_%08x_%s", block_num, start, label.c_str()); + name = StringFromFormat("block%d_%08x_%s_0x%x", block_num, start, label.c_str(), block_offset); else - name = StringFromFormat("block%d_%08x", block_num, start); + name = StringFromFormat("block%d_%08x_0x%x", block_num, start, block_offset); return true; } return false; diff --git a/Core/MIPS/IR/IRNativeCommon.h b/Core/MIPS/IR/IRNativeCommon.h index 7da5d3a8318d..4afc50369891 100644 --- a/Core/MIPS/IR/IRNativeCommon.h +++ b/Core/MIPS/IR/IRNativeCommon.h @@ -25,12 +25,27 @@ namespace MIPSComp { typedef void (*IRNativeFuncNoArg)(); +enum class IRProfilerStatus : int32_t { + NOT_RUNNING, + IN_JIT, + TIMER_ADVANCE, + COMPILING, + MATH_HELPER, + REPLACEMENT, + SYSCALL, + INTERPRET, + IR_INTERPRET, +}; + struct IRNativeHooks { IRNativeFuncNoArg enterDispatcher = nullptr; const uint8_t *dispatcher = nullptr; const uint8_t *dispatchFetch = nullptr; const uint8_t *crashHandler = nullptr; + + uint32_t *profilerPC = nullptr; + IRProfilerStatus *profilerStatus = nullptr; }; struct IRNativeBlockExit { @@ -47,7 +62,7 @@ struct IRNativeBlock { class IRNativeBackend { public: IRNativeBackend(IRBlockCache &blocks); - virtual ~IRNativeBackend() {} + virtual ~IRNativeBackend(); void CompileIRInst(IRInst inst); @@ -120,6 +135,7 @@ class IRNativeBackend { // Returns true when debugging statistics should be compiled in. bool DebugStatsEnabled() const; + bool DebugProfilerEnabled() const; // Callback (compile when DebugStatsEnabled()) to log a base interpreter hit. // Call the func returned by MIPSGetInterpretFunc(op) directly for interpret. @@ -131,6 +147,8 @@ class IRNativeBackend { // Callback to log AND perform an IR interpreter inst. Returns 0 or a PC to jump to. static uint32_t DoIRInst(uint64_t inst); + static int ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_t isWrite); + void AddLinkableExit(int block_num, uint32_t pc, int exitStartOffset, int exitLen); void EraseAllLinks(int block_num); diff --git a/Core/MIPS/IR/IRPassSimplify.cpp b/Core/MIPS/IR/IRPassSimplify.cpp index 87aa27687d39..2079b799d6a3 100644 --- a/Core/MIPS/IR/IRPassSimplify.cpp +++ b/Core/MIPS/IR/IRPassSimplify.cpp @@ -1794,7 +1794,8 @@ bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &o bool spModified = false; for (IRInst inst : in.GetInstructions()) { IRMemoryOpInfo info = IROpMemoryAccessSize(inst.op); - if (info.size != 0 && inst.src1 == MIPS_REG_SP) { + // Note: we only combine word aligned accesses. + if (info.size != 0 && inst.src1 == MIPS_REG_SP && info.size == 4) { if (spModified) { // No good, it was modified and then we did more accesses. Can't combine. spUpper = -1; @@ -1805,11 +1806,6 @@ bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &o spUpper = -1; break; } - if (info.size == 16 && (inst.constant & 0xF) != 0) { - // Shouldn't happen, sp should always be aligned. - spUpper = -1; - break; - } spLower = std::min(spLower, (int)inst.constant); spUpper = std::max(spUpper, (int)inst.constant + info.size); @@ -1828,7 +1824,7 @@ bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &o std::map checks; const auto addValidate = [&](IROp validate, uint8_t sz, const IRInst &inst, bool isStore) { - if (inst.src1 == MIPS_REG_SP && skipSP) { + if (inst.src1 == MIPS_REG_SP && skipSP && validate == IROp::ValidateAddress32) { if (!flushedSP) { out.Write(IROp::ValidateAddress32, 0, MIPS_REG_SP, spWrite ? 1U : 0U, spLower); if (spUpper > spLower + 4) diff --git a/Core/MIPS/IR/IRRegCache.cpp b/Core/MIPS/IR/IRRegCache.cpp index 698929a21ad8..38c2fe2bd535 100644 --- a/Core/MIPS/IR/IRRegCache.cpp +++ b/Core/MIPS/IR/IRRegCache.cpp @@ -160,7 +160,7 @@ bool IRNativeRegCacheBase::IsFPRMapped(IRReg fpr) { } int IRNativeRegCacheBase::GetFPRLaneCount(IRReg fpr) { - if (!IsFPRMapped(fpr) || mr[fpr + 32].lane > 0) + if (!IsFPRMapped(fpr)) return 0; if (mr[fpr + 32].lane == -1) return 1; @@ -406,12 +406,12 @@ IRNativeReg IRNativeRegCacheBase::FindFreeReg(MIPSLoc type, MIPSMap flags) const bool IRNativeRegCacheBase::IsGPRClobbered(IRReg gpr) const { _dbg_assert_(IsValidGPR(gpr)); - return IsRegClobbered(MIPSLoc::REG, MIPSMap::INIT, gpr); + return IsRegClobbered(MIPSLoc::REG, gpr); } bool IRNativeRegCacheBase::IsFPRClobbered(IRReg fpr) const { _dbg_assert_(IsValidFPR(fpr)); - return IsRegClobbered(MIPSLoc::FREG, MIPSMap::INIT, fpr + 32); + return IsRegClobbered(MIPSLoc::FREG, fpr + 32); } IRUsage IRNativeRegCacheBase::GetNextRegUsage(const IRSituation &info, MIPSLoc type, IRReg r) const { @@ -423,7 +423,7 @@ IRUsage IRNativeRegCacheBase::GetNextRegUsage(const IRSituation &info, MIPSLoc t return IRUsage::UNKNOWN; } -bool IRNativeRegCacheBase::IsRegClobbered(MIPSLoc type, MIPSMap flags, IRReg r) const { +bool IRNativeRegCacheBase::IsRegClobbered(MIPSLoc type, IRReg r) const { static const int UNUSED_LOOKAHEAD_OPS = 30; IRSituation info; @@ -450,6 +450,21 @@ bool IRNativeRegCacheBase::IsRegClobbered(MIPSLoc type, MIPSMap flags, IRReg r) return false; } +bool IRNativeRegCacheBase::IsRegRead(MIPSLoc type, IRReg first) const { + static const int UNUSED_LOOKAHEAD_OPS = 30; + + IRSituation info; + info.lookaheadCount = UNUSED_LOOKAHEAD_OPS; + // We look starting one ahead, unlike spilling. + info.currentIndex = irIndex_ + 1; + info.instructions = irBlock_->GetInstructions(); + info.numInstructions = irBlock_->GetNumInstructions(); + + // Note: this intentionally doesn't look at the full reg, only the lane. + IRUsage usage = GetNextRegUsage(info, type, first); + return usage == IRUsage::READ; +} + IRNativeReg IRNativeRegCacheBase::FindBestToSpill(MIPSLoc type, MIPSMap flags, bool unusedOnly, bool *clobbered) const { int allocCount = 0, base = 0; const int *allocOrder = GetAllocationOrder(type, flags, allocCount, base); @@ -501,7 +516,7 @@ IRNativeReg IRNativeRegCacheBase::FindBestToSpill(MIPSLoc type, MIPSMap flags, b return -1; } -bool IRNativeRegCacheBase::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) { +bool IRNativeRegCacheBase::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) { int allocCount = 0, base = 0; const int *allocOrder = GetAllocationOrder(type, flags, allocCount, base); @@ -514,6 +529,11 @@ bool IRNativeRegCacheBase::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, return false; } +bool IRNativeRegCacheBase::TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) { + // To be overridden if the backend supports transfers. + return false; +} + void IRNativeRegCacheBase::DiscardNativeReg(IRNativeReg nreg) { _assert_msg_(nreg >= 0 && nreg < config_.totalNativeRegs, "DiscardNativeReg on invalid register %d", nreg); if (nr[nreg].mipsReg != IRREG_INVALID) { @@ -930,11 +950,14 @@ IRNativeReg IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRReg first, int la case MIPSLoc::REG: if (type != MIPSLoc::REG) { nreg = AllocateReg(type, flags); - } else if (!IsNativeRegCompatible(nreg, type, flags)) { + } else if (!IsNativeRegCompatible(nreg, type, flags, lanes)) { // If it's not compatible, we'll need to reallocate. - // TODO: Could do a transfer and avoid memory flush. - FlushNativeReg(nreg); - nreg = AllocateReg(type, flags); + if (TransferNativeReg(nreg, -1, type, first, lanes, flags)) { + nreg = mr[first].nReg; + } else { + FlushNativeReg(nreg); + nreg = AllocateReg(type, flags); + } } break; @@ -942,9 +965,13 @@ IRNativeReg IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRReg first, int la case MIPSLoc::VREG: if (type != mr[first].loc) { nreg = AllocateReg(type, flags); - } else if (!IsNativeRegCompatible(nreg, type, flags)) { - FlushNativeReg(nreg); - nreg = AllocateReg(type, flags); + } else if (!IsNativeRegCompatible(nreg, type, flags, lanes)) { + if (TransferNativeReg(nreg, -1, type, first, lanes, flags)) { + nreg = mr[first].nReg; + } else { + FlushNativeReg(nreg); + nreg = AllocateReg(type, flags); + } } break; @@ -981,10 +1008,13 @@ void IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg fi _assert_msg_(!mreg.isStatic, "Cannot MapNativeReg a static reg mismatch"); if ((flags & MIPSMap::NOINIT) != MIPSMap::NOINIT) { // If we need init, we have to flush mismatches. - // TODO: Do a shuffle if interior only? - // TODO: We may also be motivated to have multiple read-only "views" or an IRReg. - // For example Vec4Scale v0..v3, v0..v3, v3 - FlushNativeReg(mreg.nReg); + if (!TransferNativeReg(mreg.nReg, nreg, type, first, lanes, flags)) { + // TODO: We may also be motivated to have multiple read-only "views" or an IRReg. + // For example Vec4Scale v0..v3, v0..v3, v3 + FlushNativeReg(mreg.nReg); + } + // The mismatch has been "resolved" now. + mismatch = false; } else if (oldlanes != 1) { // Even if we don't care about the current contents, we can't discard outside. bool extendsBefore = oldlane > i; @@ -1017,6 +1047,9 @@ void IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg fi DiscardNativeReg(mreg.nReg); else FlushNativeReg(mreg.nReg); + + // That took care of the mismatch, either by clobber or flush. + mismatch = false; } } } @@ -1027,8 +1060,8 @@ void IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg fi if ((flags & MIPSMap::NOINIT) != MIPSMap::NOINIT) { // We better not be trying to map to a different nreg if it's in one now. // This might happen on some sort of transfer... - // TODO: Make a direct transfer, i.e. FREG -> VREG? - FlushNativeReg(mreg.nReg); + if (!TransferNativeReg(mreg.nReg, nreg, type, first, lanes, flags)) + FlushNativeReg(mreg.nReg); } else { DiscardNativeReg(mreg.nReg); } diff --git a/Core/MIPS/IR/IRRegCache.h b/Core/MIPS/IR/IRRegCache.h index c85bb41848ee..4301886b4414 100644 --- a/Core/MIPS/IR/IRRegCache.h +++ b/Core/MIPS/IR/IRRegCache.h @@ -209,13 +209,14 @@ class IRNativeRegCacheBase { IRNativeReg AllocateReg(MIPSLoc type, MIPSMap flags); IRNativeReg FindFreeReg(MIPSLoc type, MIPSMap flags) const; IRNativeReg FindBestToSpill(MIPSLoc type, MIPSMap flags, bool unusedOnly, bool *clobbered) const; - virtual bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags); + virtual bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes); virtual void DiscardNativeReg(IRNativeReg nreg); virtual void FlushNativeReg(IRNativeReg nreg); virtual void DiscardReg(IRReg mreg); virtual void FlushReg(IRReg mreg); virtual void AdjustNativeRegAsPtr(IRNativeReg nreg, bool state); virtual void MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg first, int lanes, MIPSMap flags); + virtual bool TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags); virtual IRNativeReg MapNativeReg(MIPSLoc type, IRReg first, int lanes, MIPSMap flags); IRNativeReg MapNativeRegAsPointer(IRReg gpr); @@ -238,7 +239,8 @@ class IRNativeRegCacheBase { void SetSpillLockIRIndex(IRReg reg, int index); int GetMipsRegOffset(IRReg r); - bool IsRegClobbered(MIPSLoc type, MIPSMap flags, IRReg r) const; + bool IsRegClobbered(MIPSLoc type, IRReg r) const; + bool IsRegRead(MIPSLoc type, IRReg r) const; IRUsage GetNextRegUsage(const IRSituation &info, MIPSLoc type, IRReg r) const; bool IsValidGPR(IRReg r) const; diff --git a/Core/MIPS/JitCommon/JitBlockCache.cpp b/Core/MIPS/JitCommon/JitBlockCache.cpp index 1e503da62176..f8ba2436c6e2 100644 --- a/Core/MIPS/JitCommon/JitBlockCache.cpp +++ b/Core/MIPS/JitCommon/JitBlockCache.cpp @@ -31,6 +31,7 @@ #include "Core/MemMap.h" #include "Core/CoreTiming.h" #include "Core/Reporting.h" +#include "Core/Config.h" #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSTables.h" @@ -246,8 +247,7 @@ static void ExpandRange(std::pair &range, u32 newStart, u32 newEnd) { void JitBlockCache::FinalizeBlock(int block_num, bool block_link) { JitBlock &b = blocks_[block_num]; - - _assert_msg_(Memory::IsValidAddress(b.originalAddress), "FinalizeBlock: Bad originalAddress %08x in block %d", b.originalAddress, block_num); + _assert_msg_(Memory::IsValidAddress(b.originalAddress), "FinalizeBlock: Bad originalAddress %08x in block %d (b.num: %d) proxy: %s sz: %d", b.originalAddress, block_num, b.blockNum, b.proxyFor ? "y" : "n", b.codeSize); b.originalFirstOpcode = Memory::Read_Opcode_JIT(b.originalAddress); MIPSOpcode opcode = GetEmuHackOpForBlock(block_num); @@ -462,6 +462,11 @@ void JitBlockCache::UnlinkBlock(int i) { if (ppp.first == ppp.second) return; for (auto iter = ppp.first; iter != ppp.second; ++iter) { + if ((size_t)iter->second >= num_blocks_) { + // Something probably went very wrong. Try to stumble along nevertheless. + ERROR_LOG(JIT, "UnlinkBlock: Invalid block number %d", iter->second); + continue; + } JitBlock &sourceBlock = blocks_[iter->second]; for (int e = 0; e < MAX_JIT_BLOCK_EXITS; e++) { if (sourceBlock.exitAddress[e] == b.originalAddress) diff --git a/Core/MIPS/JitCommon/JitBlockCache.h b/Core/MIPS/JitCommon/JitBlockCache.h index 3049300f9aa7..09eae3cc2138 100644 --- a/Core/MIPS/JitCommon/JitBlockCache.h +++ b/Core/MIPS/JitCommon/JitBlockCache.h @@ -29,7 +29,7 @@ #include "Core/MIPS/MIPS.h" #if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64) -const int MAX_JIT_BLOCK_EXITS = 2; +const int MAX_JIT_BLOCK_EXITS = 4; #else const int MAX_JIT_BLOCK_EXITS = 8; #endif diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index 35d3caa56a78..9fcc409419cc 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -1446,7 +1446,7 @@ namespace MIPSInt d[0] += s[2] * t[2] + s[3] * t[3]; } - ApplyPrefixD(d, sz); + ApplyPrefixD(d, V_Single); WriteVector(d, V_Single, vd); PC += 4; EatPrefixes(); diff --git a/Core/MIPS/RiscV/RiscVAsm.cpp b/Core/MIPS/RiscV/RiscVAsm.cpp index 135e0604e8a3..730a6d9dcc5a 100644 --- a/Core/MIPS/RiscV/RiscVAsm.cpp +++ b/Core/MIPS/RiscV/RiscVAsm.cpp @@ -45,8 +45,19 @@ static void ShowPC(u32 downcount, void *membase, void *jitbase) { } void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { - BeginWrite(GetMemoryProtectPageSize()); + // This will be used as a writable scratch area, always 32-bit accessible. const u8 *start = AlignCodePage(); + if (DebugProfilerEnabled()) { + ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE); + hooks_.profilerPC = (uint32_t *)GetWritableCodePtr(); + *hooks_.profilerPC = 0; + hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr() + 1; + *hooks_.profilerStatus = IRProfilerStatus::NOT_RUNNING; + SetCodePointer(GetCodePtr() + sizeof(uint32_t) * 2, GetWritableCodePtr() + sizeof(uint32_t) * 2); + } + + const u8 *disasmStart = AlignCodePage(); + BeginWrite(GetMemoryProtectPageSize()); if (jo.useStaticAlloc) { saveStaticRegisters_ = AlignCode16(); @@ -58,8 +69,6 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { regs_.EmitLoadStaticRegisters(); LW(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); RET(); - - start = saveStaticRegisters_; } else { saveStaticRegisters_ = nullptr; loadStaticRegisters_ = nullptr; @@ -124,14 +133,18 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { LI(JITBASEREG, GetBasePtr() - MIPS_EMUHACK_OPCODE, SCRATCH1); LoadStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); MovFromPC(SCRATCH1); + WriteDebugPC(SCRATCH1); outerLoopPCInSCRATCH1_ = GetCodePtr(); MovToPC(SCRATCH1); outerLoop_ = GetCodePtr(); // Advance can change the downcount (or thread), so must save/restore around it. SaveStaticRegisters(); RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE); QuickCallFunction(&CoreTiming::Advance, X7); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); LoadStaticRegisters(); @@ -162,6 +175,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { } LWU(SCRATCH1, CTXREG, offsetof(MIPSState, pc)); + WriteDebugPC(SCRATCH1); #ifdef MASKED_PSP_MEMORY LI(SCRATCH2, 0x3FFFFFFF); AND(SCRATCH1, SCRATCH1, SCRATCH2); @@ -180,7 +194,9 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { // No block found, let's jit. We don't need to save static regs, they're all callee saved. RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::COMPILING); QuickCallFunction(&MIPSComp::JitAt, X7); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); // Try again, the block index should be set now. @@ -195,6 +211,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { const uint8_t *quitLoop = GetCodePtr(); SetJumpTarget(badCoreState); + WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING); SaveStaticRegisters(); RestoreRoundingMode(true); diff --git a/Core/MIPS/RiscV/RiscVCompFPU.cpp b/Core/MIPS/RiscV/RiscVCompFPU.cpp index 3836ae77e4e8..132ef8e58c1c 100644 --- a/Core/MIPS/RiscV/RiscVCompFPU.cpp +++ b/Core/MIPS/RiscV/RiscVCompFPU.cpp @@ -520,20 +520,32 @@ void RiscVJitBackend::CompIR_FCompare(IRInst inst) { case IROp::FCmpVfpuAggregate: regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY); - ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest); - // This is the "any bit", easy. - SNEZ(SCRATCH2, SCRATCH1); - // To compare to inst.dest for "all", let's simply subtract it and compare to zero. - ADDI(SCRATCH1, SCRATCH1, -inst.dest); - SEQZ(SCRATCH1, SCRATCH1); - // Now we combine those together. - SLLI(SCRATCH1, SCRATCH1, 5); - SLLI(SCRATCH2, SCRATCH2, 4); - OR(SCRATCH1, SCRATCH1, SCRATCH2); + if (inst.dest == 1) { + ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest); + // Negate so 1 becomes all bits set and zero stays zero, then mask to 0x30. + NEG(SCRATCH1, SCRATCH1); + ANDI(SCRATCH1, SCRATCH1, 0x30); + + // Reject the old any/all bits and replace them with our own. + ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30); + OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1); + } else { + ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest); + FixupBranch skipZero = BEQ(SCRATCH1, R_ZERO); - // Reject those any/all bits and replace them with our own. - ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30); - OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1); + // To compare to inst.dest for "all", let's simply subtract it and compare to zero. + ADDI(SCRATCH1, SCRATCH1, -inst.dest); + SEQZ(SCRATCH1, SCRATCH1); + // Now we combine with the "any" bit. + SLLI(SCRATCH1, SCRATCH1, 5); + ORI(SCRATCH1, SCRATCH1, 0x10); + + SetJumpTarget(skipZero); + + // Reject the old any/all bits and replace them with our own. + ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30); + OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1); + } break; default: @@ -573,6 +585,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) { auto callFuncF_F = [&](float (*func)(float)) { regs_.FlushBeforeCall(); + WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER); + // It might be in a non-volatile register. // TODO: May have to handle a transfer if SIMD here. if (regs_.IsFPRMapped(inst.src1)) { @@ -588,6 +602,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) { if (regs_.F(inst.dest) != F10) { FMV(32, regs_.F(inst.dest), F10); } + + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); }; RiscVReg tempReg = INVALID_REG; diff --git a/Core/MIPS/RiscV/RiscVCompLoadStore.cpp b/Core/MIPS/RiscV/RiscVCompLoadStore.cpp index 80b149ca02f3..9db1ebb657c6 100644 --- a/Core/MIPS/RiscV/RiscVCompLoadStore.cpp +++ b/Core/MIPS/RiscV/RiscVCompLoadStore.cpp @@ -59,8 +59,19 @@ int32_t RiscVJitBackend::AdjustForAddressOffset(RiscVGen::RiscVReg *reg, int32_t if (constant > 0) constant &= Memory::MEMVIEW32_MASK; #endif - LI(SCRATCH2, constant); - ADD(SCRATCH1, *reg, SCRATCH2); + // It can't be this negative, must be a constant with top bit set. + if ((constant & 0xC0000000) == 0x80000000) { + if (cpu_info.RiscV_Zba) { + LI(SCRATCH2, constant); + ADD_UW(SCRATCH1, SCRATCH2, *reg); + } else { + LI(SCRATCH2, (uint32_t)constant); + ADD(SCRATCH1, *reg, SCRATCH2); + } + } else { + LI(SCRATCH2, constant); + ADD(SCRATCH1, *reg, SCRATCH2); + } *reg = SCRATCH1; return 0; } diff --git a/Core/MIPS/RiscV/RiscVCompSystem.cpp b/Core/MIPS/RiscV/RiscVCompSystem.cpp index 878687e64d94..4605648ed8e2 100644 --- a/Core/MIPS/RiscV/RiscVCompSystem.cpp +++ b/Core/MIPS/RiscV/RiscVCompSystem.cpp @@ -188,6 +188,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) { FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL); #ifdef USE_PROFILER // When profiling, we can't skip CallSyscall, since it times syscalls. LI(X10, (int32_t)inst.constant); @@ -207,6 +208,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) { } #endif + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // This is always followed by an ExitToPC, where we check coreState. break; @@ -214,7 +216,9 @@ void RiscVJitBackend::CompIR_System(IRInst inst) { case IROp::CallReplacement: FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT); QuickCallFunction(GetReplacementFunc(inst.constant)->replaceFunc, SCRATCH2); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); SUB(DOWNCOUNTREG, DOWNCOUNTREG, X10); break; diff --git a/Core/MIPS/RiscV/RiscVJit.cpp b/Core/MIPS/RiscV/RiscVJit.cpp index 8d3f0155c32e..be4a4534827c 100644 --- a/Core/MIPS/RiscV/RiscVJit.cpp +++ b/Core/MIPS/RiscV/RiscVJit.cpp @@ -67,6 +67,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer())); wroteCheckedOffset = true; + WriteDebugPC(startPC); + FixupBranch normalEntry = BGE(DOWNCOUNTREG, R_ZERO); LI(SCRATCH1, startPC); QuickJ(R_RA, outerLoopPCInSCRATCH1_); @@ -118,6 +120,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) } if (jo.enableBlocklink && jo.useBackJump) { + WriteDebugPC(startPC); + // Most blocks shouldn't be >= 4KB, so usually we can just BGE. if (BInRange(blockStart)) { BGE(DOWNCOUNTREG, R_ZERO, blockStart); @@ -218,7 +222,9 @@ void RiscVJitBackend::CompIR_Generic(IRInst inst) { FlushAll(); LI(X10, value, SCRATCH2); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET); QuickCallFunction(&DoIRInst, SCRATCH2); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // We only need to check the return value if it's a potential exit. @@ -241,12 +247,14 @@ void RiscVJitBackend::CompIR_Interpret(IRInst inst) { // IR protects us against this being a branching instruction (well, hopefully.) FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET); if (DebugStatsEnabled()) { LI(X10, MIPSGetName(op)); QuickCallFunction(&NotifyMIPSInterpret, SCRATCH2); } LI(X10, (int32_t)inst.constant); QuickCallFunction((const u8 *)MIPSGetInterpretFunc(op), SCRATCH2); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); } @@ -329,6 +337,32 @@ void RiscVJitBackend::MovToPC(RiscVReg r) { SW(r, CTXREG, offsetof(MIPSState, pc)); } +void RiscVJitBackend::WriteDebugPC(uint32_t pc) { + if (hooks_.profilerPC) { + int offset = (const u8 *)hooks_.profilerPC - GetBasePtr(); + LI(SCRATCH2, hooks_.profilerPC); + LI(R_RA, (int32_t)pc); + SW(R_RA, SCRATCH2, 0); + } +} + +void RiscVJitBackend::WriteDebugPC(RiscVReg r) { + if (hooks_.profilerPC) { + int offset = (const u8 *)hooks_.profilerPC - GetBasePtr(); + LI(SCRATCH2, hooks_.profilerPC); + SW(r, SCRATCH2, 0); + } +} + +void RiscVJitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) { + if (hooks_.profilerPC) { + int offset = (const u8 *)hooks_.profilerStatus - GetBasePtr(); + LI(SCRATCH2, hooks_.profilerStatus); + LI(R_RA, (int)status); + SW(R_RA, SCRATCH2, 0); + } +} + void RiscVJitBackend::SaveStaticRegisters() { if (jo.useStaticAlloc) { QuickCallFunction(saveStaticRegisters_); diff --git a/Core/MIPS/RiscV/RiscVJit.h b/Core/MIPS/RiscV/RiscVJit.h index 46448c4c716b..7ccbcce90b36 100644 --- a/Core/MIPS/RiscV/RiscVJit.h +++ b/Core/MIPS/RiscV/RiscVJit.h @@ -50,6 +50,9 @@ class RiscVJitBackend : public RiscVGen::RiscVCodeBlock, public IRNativeBackend void ApplyRoundingMode(bool force = false); void MovFromPC(RiscVGen::RiscVReg r); void MovToPC(RiscVGen::RiscVReg r); + void WriteDebugPC(uint32_t pc); + void WriteDebugPC(RiscVGen::RiscVReg r); + void WriteDebugProfilerStatus(IRProfilerStatus status); void SaveStaticRegisters(); void LoadStaticRegisters(); diff --git a/Core/MIPS/RiscV/RiscVRegCache.cpp b/Core/MIPS/RiscV/RiscVRegCache.cpp index 7a3e6505cb3e..25528aa3aacb 100644 --- a/Core/MIPS/RiscV/RiscVRegCache.cpp +++ b/Core/MIPS/RiscV/RiscVRegCache.cpp @@ -303,11 +303,11 @@ void RiscVRegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) { } } -bool RiscVRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) { +bool RiscVRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) { // No special flags except VREG, skip the check for a little speed. if (type != MIPSLoc::VREG) return true; - return IRNativeRegCacheBase::IsNativeRegCompatible(nreg, type, flags); + return IRNativeRegCacheBase::IsNativeRegCompatible(nreg, type, flags, lanes); } void RiscVRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) { diff --git a/Core/MIPS/RiscV/RiscVRegCache.h b/Core/MIPS/RiscV/RiscVRegCache.h index facfa5219570..e0075f2c619e 100644 --- a/Core/MIPS/RiscV/RiscVRegCache.h +++ b/Core/MIPS/RiscV/RiscVRegCache.h @@ -76,7 +76,7 @@ class RiscVRegCache : public IRNativeRegCacheBase { const int *GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const override; void AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) override; - bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) override; + bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) override; void LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) override; void StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) override; void SetNativeRegValue(IRNativeReg nreg, uint32_t imm) override; diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index c7e2e2fed802..2f561a2cd05a 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -605,7 +605,7 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) { const ReplacementTableEntry *entry = GetReplacementFunc(index); if (!entry) { - ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding); + ERROR_LOG_REPORT_ONCE(replFunc, HLE, "Invalid replacement op %08x at %08x", op.encoding, js.compilerPC); return; } @@ -708,7 +708,7 @@ static void HitInvalidBranch(uint32_t dest) { } void Jit::WriteExit(u32 destination, int exit_num) { - _dbg_assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num"); + _assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num. dest=%08x", destination); if (!Memory::IsValidAddress(destination) || (destination & 3) != 0) { ERROR_LOG_REPORT(JIT, "Trying to write block exit to illegal destination %08x: pc = %08x", destination, currentMIPS->pc); diff --git a/Core/MIPS/x86/X64IRAsm.cpp b/Core/MIPS/x86/X64IRAsm.cpp index fc763bd07c7d..2e095c4c1288 100644 --- a/Core/MIPS/x86/X64IRAsm.cpp +++ b/Core/MIPS/x86/X64IRAsm.cpp @@ -49,8 +49,21 @@ static void ShowPC(void *membase, void *jitbase) { } void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { - BeginWrite(GetMemoryProtectPageSize()); + // This will be used as a writable scratch area, always 32-bit accessible. const u8 *start = AlignCodePage(); + if (DebugProfilerEnabled()) { + ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE); + hooks_.profilerPC = (uint32_t *)GetWritableCodePtr(); + Write32(0); + hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr(); + Write32(0); + } + + EmitFPUConstants(); + EmitVecConstants(); + + const u8 *disasmStart = AlignCodePage(); + BeginWrite(GetMemoryProtectPageSize()); jo.downcountInRegister = false; #if PPSSPP_ARCH(AMD64) @@ -58,7 +71,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { int jitbaseCtxDisp = 0; // We pre-bake the MIPS_EMUHACK_OPCODE subtraction into our jitbase value. intptr_t jitbase = (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE; - if ((jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) && !Accessible((const u8 *)&mipsState->f[0], GetBasePtr())) { + if ((jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) && !Accessible((const u8 *)&mipsState->f[0], (const u8 *)jitbase)) { jo.reserveR15ForAsm = true; jitbaseInR15 = true; } else { @@ -83,8 +96,6 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { if (jo.downcountInRegister) MOV(32, R(DOWNCOUNTREG), MDisp(CTXREG, downcountOffset)); RET(); - - start = saveStaticRegisters_; } else { saveStaticRegisters_ = nullptr; loadStaticRegisters_ = nullptr; @@ -146,14 +157,18 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { MOV(PTRBITS, R(CTXREG), ImmPtr(&mipsState->f[0])); LoadStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); MovFromPC(SCRATCH1); + WriteDebugPC(SCRATCH1); outerLoopPCInSCRATCH1_ = GetCodePtr(); MovToPC(SCRATCH1); outerLoop_ = GetCodePtr(); // Advance can change the downcount (or thread), so must save/restore around it. SaveStaticRegisters(); RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); LoadStaticRegisters(); @@ -209,6 +224,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { } MovFromPC(SCRATCH1); + WriteDebugPC(SCRATCH1); #ifdef MASKED_PSP_MEMORY AND(32, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK)); #endif @@ -247,7 +263,9 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { // No block found, let's jit. We don't need to save static regs, they're all callee saved. RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::COMPILING); ABI_CallFunction(&MIPSComp::JitAt); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); // Let's just dispatch again, we'll enter the block since we know it's there. JMP(dispatcherNoCheck_, true); @@ -265,6 +283,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { const uint8_t *quitLoop = GetCodePtr(); SetJumpTarget(badCoreState); + WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING); SaveStaticRegisters(); RestoreRoundingMode(true); ABI_PopAllCalleeSavedRegsAndAdjustStack(); @@ -283,16 +302,13 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { // Leave this at the end, add more stuff above. if (enableDisasm) { #if PPSSPP_ARCH(AMD64) - std::vector lines = DisassembleX86(start, (int)(GetCodePtr() - start)); + std::vector lines = DisassembleX86(disasmStart, (int)(GetCodePtr() - disasmStart)); for (auto s : lines) { INFO_LOG(JIT, "%s", s.c_str()); } #endif } - EmitFPUConstants(); - EmitVecConstants(); - // Let's spare the pre-generated code from unprotect-reprotect. AlignCodePage(); jitStartOffset_ = (int)(GetCodePtr() - start); diff --git a/Core/MIPS/x86/X64IRCompALU.cpp b/Core/MIPS/x86/X64IRCompALU.cpp index fc8d7c9b140b..66fe205b2057 100644 --- a/Core/MIPS/x86/X64IRCompALU.cpp +++ b/Core/MIPS/x86/X64IRCompALU.cpp @@ -151,8 +151,52 @@ void X64JitBackend::CompIR_Bits(IRInst inst) { break; case IROp::ReverseBits: + regs_.Map(inst); + if (inst.src1 != inst.dest) { + MOV(32, regs_.R(inst.dest), regs_.R(inst.src1)); + } + + // Swap even/odd bits (in bits: 0123 -> 1032.) + LEA(32, SCRATCH1, MScaled(regs_.RX(inst.dest), 2, 0)); + SHR(32, regs_.R(inst.dest), Imm8(1)); + XOR(32, regs_.R(inst.dest), R(SCRATCH1)); + AND(32, regs_.R(inst.dest), Imm32(0x55555555)); + XOR(32, regs_.R(inst.dest), R(SCRATCH1)); + + // Swap pairs of bits (in bits: 10325476 -> 32107654.) + LEA(32, SCRATCH1, MScaled(regs_.RX(inst.dest), 4, 0)); + SHR(32, regs_.R(inst.dest), Imm8(2)); + XOR(32, regs_.R(inst.dest), R(SCRATCH1)); + AND(32, regs_.R(inst.dest), Imm32(0x33333333)); + XOR(32, regs_.R(inst.dest), R(SCRATCH1)); + + // Swap nibbles (in nibbles: ABCD -> BADC.) + MOV(32, R(SCRATCH1), regs_.R(inst.dest)); + SHL(32, R(SCRATCH1), Imm8(4)); + SHR(32, regs_.R(inst.dest), Imm8(4)); + XOR(32, regs_.R(inst.dest), R(SCRATCH1)); + AND(32, regs_.R(inst.dest), Imm32(0x0F0F0F0F)); + XOR(32, regs_.R(inst.dest), R(SCRATCH1)); + + // Finally, swap the bytes to drop everything into place (nibbles: BADCFEHG -> HGFEDCBA.) + BSWAP(32, regs_.RX(inst.dest)); + break; + case IROp::BSwap16: - CompIR_Generic(inst); + regs_.Map(inst); + if (cpu_info.bBMI2) { + // Rotate to put it into the correct register, then swap. + if (inst.dest != inst.src1) + RORX(32, regs_.RX(inst.dest), regs_.R(inst.src1), 16); + else + ROR(32, regs_.R(inst.dest), Imm8(16)); + BSWAP(32, regs_.RX(inst.dest)); + } else { + if (inst.dest != inst.src1) + MOV(32, regs_.R(inst.dest), regs_.R(inst.src1)); + BSWAP(32, regs_.RX(inst.dest)); + ROR(32, regs_.R(inst.dest), Imm8(16)); + } break; case IROp::Clz: @@ -220,8 +264,24 @@ void X64JitBackend::CompIR_Compare(IRInst inst) { break; case IROp::SltU: - regs_.Map(inst); - setCC(regs_.R(inst.src2), CC_B); + if (regs_.IsGPRImm(inst.src1) && regs_.GetGPRImm(inst.src1) == 0) { + // This is kinda common, same as != 0. Avoid flushing src1. + regs_.SpillLockGPR(inst.src2, inst.dest); + regs_.MapGPR(inst.src2); + regs_.MapGPR(inst.dest, MIPSMap::NOINIT); + if (inst.dest != inst.src2 && regs_.HasLowSubregister(regs_.RX(inst.dest))) { + XOR(32, regs_.R(inst.dest), regs_.R(inst.dest)); + TEST(32, regs_.R(inst.src2), regs_.R(inst.src2)); + SETcc(CC_NE, regs_.R(inst.dest)); + } else { + CMP(32, regs_.R(inst.src2), Imm8(0)); + SETcc(CC_NE, R(SCRATCH1)); + MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1)); + } + } else { + regs_.Map(inst); + setCC(regs_.R(inst.src2), CC_B); + } break; case IROp::SltUConst: diff --git a/Core/MIPS/x86/X64IRCompFPU.cpp b/Core/MIPS/x86/X64IRCompFPU.cpp index 644dff7138ba..0085cadb5460 100644 --- a/Core/MIPS/x86/X64IRCompFPU.cpp +++ b/Core/MIPS/x86/X64IRCompFPU.cpp @@ -43,10 +43,12 @@ using namespace X64IRJitConstants; void X64JitBackend::EmitFPUConstants() { EmitConst4x32(&constants.noSignMask, 0x7FFFFFFF); EmitConst4x32(&constants.signBitAll, 0x80000000); + EmitConst4x32(&constants.positiveZeroes, 0x00000000); EmitConst4x32(&constants.positiveInfinity, 0x7F800000); EmitConst4x32(&constants.qNAN, 0x7FC00000); EmitConst4x32(&constants.positiveOnes, 0x3F800000); EmitConst4x32(&constants.negativeOnes, 0xBF800000); + EmitConst4x32(&constants.maxIntBelowAsFloat, 0x4EFFFFFF); constants.mulTableVi2f = (const float *)GetCodePointer(); for (uint8_t i = 0; i < 32; ++i) { @@ -57,20 +59,14 @@ void X64JitBackend::EmitFPUConstants() { Write32(val); } - constants.mulTableVf2i = (const double *)GetCodePointer(); + constants.mulTableVf2i = (const float *)GetCodePointer(); for (uint8_t i = 0; i < 32; ++i) { - double fval = (1UL << i); - uint64_t val; + float fval = (float)(1ULL << i); + uint32_t val; memcpy(&val, &fval, sizeof(val)); - Write64(val); + Write32(val); } - - // Note: this first one is (double)(int)0x80000000, sign extended. - constants.minIntAsDouble = (const double *)GetCodePointer(); - Write64(0xC1E0000000000000ULL); - constants.maxIntAsDouble = (const double *)GetCodePointer(); - Write64(0x41DFFFFFFFC00000ULL); } void X64JitBackend::CopyVec4ToFPRLane0(Gen::X64Reg dest, Gen::X64Reg src, int lane) { @@ -210,9 +206,9 @@ void X64JitBackend::CompIR_FAssign(IRInst inst) { // Just to make sure we don't generate bad code. if (inst.dest == inst.src1) break; - if (regs_.IsFPRMapped(inst.src1 & 3) && regs_.GetFPRLaneCount(inst.src1 & ~3) == 4 && (inst.dest & ~3) != (inst.src1 & ~3)) { + if (regs_.IsFPRMapped(inst.src1 & 3) && regs_.GetFPRLaneCount(inst.src1) == 4 && (inst.dest & ~3) != (inst.src1 & ~3)) { // Okay, this is an extract. Avoid unvec4ing src1. - regs_.SpillLockFPR(inst.src1); + regs_.SpillLockFPR(inst.src1 & ~3); regs_.MapFPR(inst.dest, MIPSMap::NOINIT); CopyVec4ToFPRLane0(regs_.FX(inst.dest), regs_.FX(inst.src1 & ~3), inst.src1 & 3); } else { @@ -233,8 +229,30 @@ void X64JitBackend::CompIR_FAssign(IRInst inst) { break; case IROp::FSign: - CompIR_Generic(inst); + { + X64Reg tempReg = regs_.MapWithFPRTemp(inst); + + // Set tempReg to +1.0 or -1.0 per sign bit. + if (cpu_info.bAVX) { + VANDPS(128, tempReg, regs_.FX(inst.src1), M(constants.signBitAll)); // rip accessible + } else { + MOVAPS(tempReg, regs_.F(inst.src1)); + ANDPS(tempReg, M(constants.signBitAll)); // rip accessible + } + ORPS(tempReg, M(constants.positiveOnes)); // rip accessible + + // Set dest = 0xFFFFFFFF if +0.0 or -0.0. + if (inst.dest != inst.src1) { + XORPS(regs_.FX(inst.dest), regs_.F(inst.dest)); + CMPPS(regs_.FX(inst.dest), regs_.F(inst.src1), CMP_EQ); + } else { + CMPPS(regs_.FX(inst.dest), M(constants.positiveZeroes), CMP_EQ); // rip accessible + } + + // Now not the mask to keep zero if it was zero. + ANDNPS(regs_.FX(inst.dest), R(tempReg)); break; + } default: INVALIDOP; @@ -273,25 +291,22 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) { break; case IRFpCompareMode::EqualOrdered: + { + // Since UCOMISS doesn't give us ordered == directly, CMPSS is better. + regs_.SpillLockFPR(inst.src1, inst.src2); + X64Reg tempReg = regs_.GetAndLockTempFPR(); regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } }); - // Clear the upper bits of SCRATCH1 so we can AND later. - // We don't have a single flag we can check, unfortunately. - XOR(32, R(SCRATCH1), R(SCRATCH1)); - UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2)); - // E/ZF = EQUAL or UNORDERED (not exactly what we want.) - SETcc(CC_E, R(SCRATCH1)); - if (regs_.HasLowSubregister(regs_.RX(IRREG_FPCOND))) { - // NP/!PF = ORDERED. - SETcc(CC_NP, regs_.R(IRREG_FPCOND)); - AND(32, regs_.R(IRREG_FPCOND), R(SCRATCH1)); + + if (cpu_info.bAVX) { + VCMPSS(tempReg, regs_.FX(inst.src1), regs_.F(inst.src2), CMP_EQ); } else { - MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1)); - // Neither of those affected flags, luckily. - // NP/!PF = ORDERED. - SETcc(CC_NP, R(SCRATCH1)); - AND(32, regs_.R(IRREG_FPCOND), R(SCRATCH1)); + MOVAPS(tempReg, regs_.F(inst.src1)); + CMPSS(tempReg, regs_.F(inst.src2), CMP_EQ); } + MOVD_xmm(regs_.R(IRREG_FPCOND), tempReg); + AND(32, regs_.R(IRREG_FPCOND), Imm32(1)); break; + } case IRFpCompareMode::EqualUnordered: regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } }); @@ -458,23 +473,69 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) { case IROp::FCmpVfpuAggregate: regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY); - // First, clear out the bits we're aggregating. - // The register refuses writes to bits outside 0x3F, and we're setting 0x30. - AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF)); + if (inst.dest == 1) { + // Special case 1, which is not uncommon. + AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF)); + BT(32, regs_.R(IRREG_VFPU_CC), Imm8(0)); + FixupBranch skip = J_CC(CC_NC); + OR(32, regs_.R(IRREG_VFPU_CC), Imm8(0x30)); + SetJumpTarget(skip); + } else if (inst.dest == 3) { + AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF)); + MOV(32, R(SCRATCH1), regs_.R(IRREG_VFPU_CC)); + AND(32, R(SCRATCH1), Imm8(3)); + // 0, 1, and 3 are already correct for the any and all bits. + CMP(32, R(SCRATCH1), Imm8(2)); + + FixupBranch skip = J_CC(CC_NE); + SUB(32, R(SCRATCH1), Imm8(1)); + SetJumpTarget(skip); - // Set the any bit. - TEST(32, regs_.R(IRREG_VFPU_CC), Imm32(inst.dest)); - SETcc(CC_NZ, R(SCRATCH1)); - SHL(32, R(SCRATCH1), Imm8(4)); - OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1)); + SHL(32, R(SCRATCH1), Imm8(4)); + OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1)); + } else if (inst.dest == 0xF) { + XOR(32, R(SCRATCH1), R(SCRATCH1)); - // Next up, the "all" bit. A bit annoying... - MOV(32, R(SCRATCH1), regs_.R(IRREG_VFPU_CC)); - AND(32, R(SCRATCH1), Imm8(inst.dest)); - CMP(32, R(SCRATCH1), Imm8(inst.dest)); - SETcc(CC_E, R(SCRATCH1)); - SHL(32, R(SCRATCH1), Imm8(5)); - OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1)); + // Clear out the bits we're aggregating. + // The register refuses writes to bits outside 0x3F, and we're setting 0x30. + AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF)); + + // Set the any bit, just using the AND above. + FixupBranch noneSet = J_CC(CC_Z); + OR(32, regs_.R(IRREG_VFPU_CC), Imm8(0x10)); + + // Next up, the "all" bit. + CMP(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF)); + SETcc(CC_E, R(SCRATCH1)); + SHL(32, R(SCRATCH1), Imm8(5)); + OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1)); + + SetJumpTarget(noneSet); + } else { + XOR(32, R(SCRATCH1), R(SCRATCH1)); + + // Clear out the bits we're aggregating. + // The register refuses writes to bits outside 0x3F, and we're setting 0x30. + AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF)); + + // Set the any bit. + if (regs_.HasLowSubregister(regs_.RX(IRREG_VFPU_CC))) + TEST(8, regs_.R(IRREG_VFPU_CC), Imm8(inst.dest)); + else + TEST(32, regs_.R(IRREG_VFPU_CC), Imm32(inst.dest)); + FixupBranch noneSet = J_CC(CC_Z); + OR(32, regs_.R(IRREG_VFPU_CC), Imm8(0x10)); + + // Next up, the "all" bit. A bit annoying... + MOV(32, R(SCRATCH1), regs_.R(IRREG_VFPU_CC)); + AND(32, R(SCRATCH1), Imm8(inst.dest)); + CMP(32, R(SCRATCH1), Imm8(inst.dest)); + SETcc(CC_E, R(SCRATCH1)); + SHL(32, R(SCRATCH1), Imm8(5)); + OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1)); + + SetJumpTarget(noneSet); + } break; default: @@ -579,11 +640,14 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) { case IROp::FCvtWS: { regs_.Map(inst); - UCOMISS(regs_.FX(inst.src1), M(constants.positiveInfinity)); // rip accessible + UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1)); - // UCOMISS set ZF if EQUAL (to infinity) or UNORDERED. - FixupBranch skip = J_CC(CC_NZ); + // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat. + // We want noSignMask otherwise, GREATER or UNORDERED. + FixupBranch isNAN = J_CC(CC_P); + FixupBranch skip = J_CC(CC_BE); + SetJumpTarget(isNAN); MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible SetJumpTarget(skip); @@ -599,54 +663,65 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) { regs_.Map(inst); if (cpu_info.bSSE4_1) { int scale = inst.src2 & 0x1F; - int rmode = inst.src2 >> 6; + IRRoundMode rmode = (IRRoundMode)(inst.src2 >> 6); - CVTSS2SD(regs_.FX(inst.dest), regs_.F(inst.src1)); - if (scale != 0) - MULSD(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible + if (scale != 0 && cpu_info.bAVX) { + VMULSS(regs_.FX(inst.dest), regs_.FX(inst.src1), M(&constants.mulTableVf2i[scale])); // rip accessible + } else { + if (inst.dest != inst.src1) + MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1)); + if (scale != 0) + MULSS(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible + } - // On NAN, we want maxInt anyway, so let's let it be the second param. - MAXSD(regs_.FX(inst.dest), M(constants.minIntAsDouble)); // rip accessible - MINSD(regs_.FX(inst.dest), M(constants.maxIntAsDouble)); // rip accessible + UCOMISS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible switch (rmode) { - case 0: - ROUNDNEARPD(regs_.FX(inst.dest), regs_.F(inst.dest)); - CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest)); + case IRRoundMode::RINT_0: + ROUNDNEARPS(regs_.FX(inst.dest), regs_.F(inst.dest)); + CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest)); break; - case 1: - CVTTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest)); + case IRRoundMode::CAST_1: + CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest)); break; - case 2: - ROUNDCEILPD(regs_.FX(inst.dest), regs_.F(inst.dest)); - CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest)); + case IRRoundMode::CEIL_2: + ROUNDCEILPS(regs_.FX(inst.dest), regs_.F(inst.dest)); + CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest)); break; - case 3: - ROUNDFLOORPD(regs_.FX(inst.dest), regs_.F(inst.dest)); - CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest)); + case IRRoundMode::FLOOR_3: + ROUNDFLOORPS(regs_.FX(inst.dest), regs_.F(inst.dest)); + CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest)); break; } + + // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat. + // We want noSignMask otherwise, GREATER or UNORDERED. + FixupBranch isNAN = J_CC(CC_P); + FixupBranch skip = J_CC(CC_BE); + SetJumpTarget(isNAN); + MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible + SetJumpTarget(skip); } else { int scale = inst.src2 & 0x1F; - int rmode = inst.src2 >> 6; + IRRoundMode rmode = (IRRoundMode)(inst.src2 >> 6); int setMXCSR = -1; bool useTrunc = false; switch (rmode) { - case 0: + case IRRoundMode::RINT_0: // TODO: Could skip if hasSetRounding, but we don't have the flag. setMXCSR = 0; break; - case 1: + case IRRoundMode::CAST_1: useTrunc = true; break; - case 2: + case IRRoundMode::CEIL_2: setMXCSR = 2; break; - case 3: + case IRRoundMode::FLOOR_3: setMXCSR = 1; break; } @@ -665,21 +740,26 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) { LDMXCSR(MDisp(CTXREG, tempOffset)); } - CVTSS2SD(regs_.FX(inst.dest), regs_.F(inst.src1)); + if (inst.dest != inst.src1) + MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1)); if (scale != 0) - MULSD(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); + MULSS(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible - // On NAN, we want maxInt anyway, so let's let it be the second param. - MAXSD(regs_.FX(inst.dest), M(constants.minIntAsDouble)); - MINSD(regs_.FX(inst.dest), M(constants.maxIntAsDouble)); + UCOMISS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible if (useTrunc) { - CVTTSD2SI(SCRATCH1, regs_.F(inst.dest)); + CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest)); } else { - CVTSD2SI(SCRATCH1, regs_.F(inst.dest)); + CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest)); } - MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1)); + // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat. + // We want noSignMask otherwise, GREATER or UNORDERED. + FixupBranch isNAN = J_CC(CC_P); + FixupBranch skip = J_CC(CC_BE); + SetJumpTarget(isNAN); + MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible + SetJumpTarget(skip); // Return MXCSR to its previous value. if (setMXCSR != -1) { @@ -704,46 +784,105 @@ void X64JitBackend::CompIR_FRound(IRInst inst) { CONDITIONAL_DISABLE; switch (inst.op) { + case IROp::FCeil: + case IROp::FFloor: case IROp::FRound: - CompIR_Generic(inst); - break; + if (cpu_info.bSSE4_1) { + regs_.Map(inst); + UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible - case IROp::FTrunc: - { - regs_.SpillLockFPR(inst.dest, inst.src1); - X64Reg tempZero = regs_.GetAndLockTempFPR(); - regs_.Map(inst); + switch (inst.op) { + case IROp::FCeil: + ROUNDCEILPS(regs_.FX(inst.dest), regs_.F(inst.src1)); + break; - CVTTSS2SI(SCRATCH1, regs_.F(inst.src1)); + case IROp::FFloor: + ROUNDFLOORPS(regs_.FX(inst.dest), regs_.F(inst.src1)); + break; - // Did we get an indefinite integer value? - CMP(32, R(SCRATCH1), Imm32(0x80000000)); - FixupBranch wasExact = J_CC(CC_NE); + case IROp::FRound: + ROUNDNEARPS(regs_.FX(inst.dest), regs_.F(inst.src1)); + break; - XORPS(tempZero, R(tempZero)); - if (inst.dest == inst.src1) { - CMPSS(regs_.FX(inst.dest), R(tempZero), CMP_LT); - } else if (cpu_info.bAVX) { - VCMPSS(regs_.FX(inst.dest), regs_.FX(inst.src1), R(tempZero), CMP_LT); + default: + INVALIDOP; + } + CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest)); + // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat. + // We want noSignMask otherwise, GREATER or UNORDERED. + FixupBranch isNAN = J_CC(CC_P); + FixupBranch skip = J_CC(CC_BE); + SetJumpTarget(isNAN); + MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible + + SetJumpTarget(skip); } else { - MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1)); - CMPSS(regs_.FX(inst.dest), R(tempZero), CMP_LT); - } + regs_.Map(inst); - // At this point, -inf = 0xffffffff, inf/nan = 0x00000000. - // We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits. - MOVD_xmm(R(SCRATCH1), regs_.FX(inst.dest)); - XOR(32, R(SCRATCH1), Imm32(0x7fffffff)); + int setMXCSR = -1; + switch (inst.op) { + case IROp::FRound: + // TODO: Could skip if hasSetRounding, but we don't have the flag. + setMXCSR = 0; + break; + case IROp::FCeil: + setMXCSR = 2; + break; + case IROp::FFloor: + setMXCSR = 1; + break; + default: + INVALIDOP; + } - SetJumpTarget(wasExact); - MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1)); + // TODO: Might be possible to cache this and update between instructions? + // Probably kinda expensive to switch each time... + if (setMXCSR != -1) { + STMXCSR(MDisp(CTXREG, mxcsrTempOffset)); + MOV(32, R(SCRATCH1), MDisp(CTXREG, mxcsrTempOffset)); + AND(32, R(SCRATCH1), Imm32(~(3 << 13))); + if (setMXCSR != 0) { + OR(32, R(SCRATCH1), Imm32(setMXCSR << 13)); + } + MOV(32, MDisp(CTXREG, tempOffset), R(SCRATCH1)); + LDMXCSR(MDisp(CTXREG, tempOffset)); + } + + UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible + + CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1)); + // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat. + // We want noSignMask otherwise, GREATER or UNORDERED. + FixupBranch isNAN = J_CC(CC_P); + FixupBranch skip = J_CC(CC_BE); + SetJumpTarget(isNAN); + MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible + + SetJumpTarget(skip); + + // Return MXCSR to its previous value. + if (setMXCSR != -1) { + LDMXCSR(MDisp(CTXREG, mxcsrTempOffset)); + } + } break; - } - case IROp::FCeil: - case IROp::FFloor: - CompIR_Generic(inst); + case IROp::FTrunc: + { + regs_.Map(inst); + UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible + + CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1)); + // UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat. + // We want noSignMask otherwise, GREATER or UNORDERED. + FixupBranch isNAN = J_CC(CC_P); + FixupBranch skip = J_CC(CC_BE); + SetJumpTarget(isNAN); + MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible + + SetJumpTarget(skip); break; + } default: INVALIDOP; @@ -833,6 +972,7 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) { auto callFuncF_F = [&](const void *func) { regs_.FlushBeforeCall(); + WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER); #if X64JIT_USE_XMM_CALL if (regs_.IsFPRMapped(inst.src1)) { @@ -865,6 +1005,8 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) { regs_.MapFPR(inst.dest, MIPSMap::NOINIT); MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1)); #endif + + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); }; switch (inst.op) { diff --git a/Core/MIPS/x86/X64IRCompLoadStore.cpp b/Core/MIPS/x86/X64IRCompLoadStore.cpp index d033832bf3f0..9b3eea1341d4 100644 --- a/Core/MIPS/x86/X64IRCompLoadStore.cpp +++ b/Core/MIPS/x86/X64IRCompLoadStore.cpp @@ -45,35 +45,41 @@ Gen::OpArg X64JitBackend::PrepareSrc1Address(IRInst inst) { // If it's about to be clobbered, don't waste time pointerifying. Use displacement. bool clobbersSrc1 = !readsFromSrc1 && regs_.IsGPRClobbered(inst.src1); + int32_t disp = (int32_t)inst.constant; + // It can't be this negative, must be a constant address with the top bit set. + if ((disp & 0xC0000000) == 0x80000000) { + disp = inst.constant & 0x7FFFFFFF; + } + #ifdef MASKED_PSP_MEMORY - if (inst.constant > 0) - inst.constant &= Memory::MEMVIEW32_MASK; + if (disp > 0) + disp &= Memory::MEMVIEW32_MASK; #endif OpArg addrArg; if (inst.src1 == MIPS_REG_ZERO) { #ifdef MASKED_PSP_MEMORY - inst.constant &= Memory::MEMVIEW32_MASK; + disp &= Memory::MEMVIEW32_MASK; #endif #if PPSSPP_ARCH(AMD64) - addrArg = MDisp(MEMBASEREG, inst.constant & 0x7FFFFFFF); + addrArg = MDisp(MEMBASEREG, disp & 0x7FFFFFFF); #else - addrArg = M(Memory::base + inst.constant); + addrArg = M(Memory::base + disp); #endif } else if ((jo.cachePointers || src1IsPointer) && !readsFromSrc1 && (!clobbersSrc1 || src1IsPointer)) { X64Reg src1 = regs_.MapGPRAsPointer(inst.src1); - addrArg = MDisp(src1, (int)inst.constant); + addrArg = MDisp(src1, disp); } else { regs_.MapGPR(inst.src1); #ifdef MASKED_PSP_MEMORY - LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), (int)inst.constant)); + LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), disp)); AND(PTRBITS, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK)); addrArg = MDisp(SCRATCH1, (intptr_t)Memory::base); #else #if PPSSPP_ARCH(AMD64) - addrArg = MComplex(MEMBASEREG, regs_.RX(inst.src1), SCALE_1, (int)inst.constant); + addrArg = MComplex(MEMBASEREG, regs_.RX(inst.src1), SCALE_1, disp); #else - addrArg = MDisp(regs_.RX(inst.src1), Memory::base + inst.constant); + addrArg = MDisp(regs_.RX(inst.src1), Memory::base + disp); #endif #endif } diff --git a/Core/MIPS/x86/X64IRCompSystem.cpp b/Core/MIPS/x86/X64IRCompSystem.cpp index b310aade78d0..9d1723aef552 100644 --- a/Core/MIPS/x86/X64IRCompSystem.cpp +++ b/Core/MIPS/x86/X64IRCompSystem.cpp @@ -20,9 +20,11 @@ #include "Common/Profiler/Profiler.h" #include "Core/Core.h" +#include "Core/Debugger/Breakpoints.h" #include "Core/HLE/HLE.h" #include "Core/HLE/ReplaceTables.h" #include "Core/MemMap.h" +#include "Core/MIPS/MIPSAnalyst.h" #include "Core/MIPS/IR/IRInterpreter.h" #include "Core/MIPS/x86/X64IRJit.h" #include "Core/MIPS/x86/X64IRRegCache.h" @@ -62,6 +64,20 @@ void X64JitBackend::CompIR_Basic(IRInst inst) { regs_.Map(inst); if (inst.constant == 0) { XORPS(regs_.FX(inst.dest), regs_.F(inst.dest)); + } else if (inst.constant == 0x7FFFFFFF) { + MOVSS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible + } else if (inst.constant == 0x80000000) { + MOVSS(regs_.FX(inst.dest), M(constants.signBitAll)); // rip accessible + } else if (inst.constant == 0x7F800000) { + MOVSS(regs_.FX(inst.dest), M(constants.positiveInfinity)); // rip accessible + } else if (inst.constant == 0x7FC00000) { + MOVSS(regs_.FX(inst.dest), M(constants.qNAN)); // rip accessible + } else if (inst.constant == 0x3F800000) { + MOVSS(regs_.FX(inst.dest), M(constants.positiveOnes)); // rip accessible + } else if (inst.constant == 0xBF800000) { + MOVSS(regs_.FX(inst.dest), M(constants.negativeOnes)); // rip accessible + } else if (inst.constant == 0x4EFFFFFF) { + MOVSS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible } else { MOV(32, R(SCRATCH1), Imm32(inst.constant)); MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1)); @@ -74,6 +90,7 @@ void X64JitBackend::CompIR_Basic(IRInst inst) { break; case IROp::SetPCConst: + lastConstPC_ = inst.constant; MOV(32, R(SCRATCH1), Imm32(inst.constant)); MovToPC(SCRATCH1); break; @@ -97,17 +114,80 @@ void X64JitBackend::CompIR_Breakpoint(IRInst inst) { break; case IROp::MemoryCheck: - { - X64Reg addrBase = regs_.MapGPR(inst.src1); - FlushAll(); - LEA(32, addrBase, MDisp(addrBase, inst.constant)); - MovFromPC(SCRATCH1); - LEA(32, SCRATCH1, MDisp(SCRATCH1, inst.dest)); - ABI_CallFunctionRR((const void *)&IRRunMemCheck, SCRATCH1, addrBase); - TEST(32, R(EAX), R(EAX)); - J_CC(CC_NZ, dispatcherCheckCoreState_, true); + if (regs_.IsGPRImm(inst.src1)) { + uint32_t iaddr = regs_.GetGPRImm(inst.src1) + inst.constant; + uint32_t checkedPC = lastConstPC_ + inst.dest; + int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC); + if (size == 0) { + checkedPC += 4; + size = MIPSAnalyst::OpMemoryAccessSize(checkedPC); + } + bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC); + + MemCheck check; + if (CBreakPoints::GetMemCheckInRange(iaddr, size, &check)) { + if (!(check.cond & MEMCHECK_READ) && !isWrite) + break; + if (!(check.cond & (MEMCHECK_WRITE | MEMCHECK_WRITE_ONCHANGE)) && isWrite) + break; + + // We need to flush, or conditions and log expressions will see old register values. + FlushAll(); + + ABI_CallFunctionCC((const void *)&IRRunMemCheck, checkedPC, iaddr); + TEST(32, R(EAX), R(EAX)); + J_CC(CC_NZ, dispatcherCheckCoreState_, true); + } + } else { + uint32_t checkedPC = lastConstPC_ + inst.dest; + int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC); + if (size == 0) { + checkedPC += 4; + size = MIPSAnalyst::OpMemoryAccessSize(checkedPC); + } + bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC); + + const auto memchecks = CBreakPoints::GetMemCheckRanges(isWrite); + // We can trivially skip if there are no checks for this type (i.e. read vs write.) + if (memchecks.empty()) + break; + + X64Reg addrBase = regs_.MapGPR(inst.src1); + LEA(32, SCRATCH1, MDisp(addrBase, inst.constant)); + + // We need to flush, or conditions and log expressions will see old register values. + FlushAll(); + + std::vector hitChecks; + for (auto it : memchecks) { + if (it.end != 0) { + CMP(32, R(SCRATCH1), Imm32(it.start - size)); + FixupBranch skipNext = J_CC(CC_BE); + + CMP(32, R(SCRATCH1), Imm32(it.end)); + hitChecks.push_back(J_CC(CC_B, true)); + + SetJumpTarget(skipNext); + } else { + CMP(32, R(SCRATCH1), Imm32(it.start)); + hitChecks.push_back(J_CC(CC_E, true)); + } + } + + FixupBranch noHits = J(true); + + // Okay, now land any hit here. + for (auto &fixup : hitChecks) + SetJumpTarget(fixup); + hitChecks.clear(); + + ABI_CallFunctionAA((const void *)&IRRunMemCheck, Imm32(checkedPC), R(SCRATCH1)); + TEST(32, R(EAX), R(EAX)); + J_CC(CC_NZ, dispatcherCheckCoreState_, true); + + SetJumpTarget(noHits); + } break; - } default: INVALIDOP; @@ -123,6 +203,7 @@ void X64JitBackend::CompIR_System(IRInst inst) { FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL); #ifdef USE_PROFILER // When profiling, we can't skip CallSyscall, since it times syscalls. ABI_CallFunctionC((const u8 *)&CallSyscall, inst.constant); @@ -139,6 +220,7 @@ void X64JitBackend::CompIR_System(IRInst inst) { } #endif + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // This is always followed by an ExitToPC, where we check coreState. break; @@ -146,14 +228,26 @@ void X64JitBackend::CompIR_System(IRInst inst) { case IROp::CallReplacement: FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT); ABI_CallFunction(GetReplacementFunc(inst.constant)->replaceFunc); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); //SUB(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG), R(EAX)); SUB(32, MDisp(CTXREG, downcountOffset), R(EAX)); break; case IROp::Break: - CompIR_Generic(inst); + FlushAll(); + // This doesn't naturally have restore/apply around it. + RestoreRoundingMode(true); + SaveStaticRegisters(); + MovFromPC(SCRATCH1); + ABI_CallFunctionR((const void *)&Core_Break, SCRATCH1); + LoadStaticRegisters(); + ApplyRoundingMode(true); + MovFromPC(SCRATCH1); + LEA(32, SCRATCH1, MDisp(SCRATCH1, 4)); + JMP(dispatcherPCInSCRATCH1_, true); break; default: @@ -191,8 +285,34 @@ void X64JitBackend::CompIR_Transfer(IRInst inst) { break; case IROp::FpCtrlFromReg: + regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } }); + // Mask out the unused bits, and store fcr31 (using fpcond as a temp.) + MOV(32, regs_.R(IRREG_FPCOND), Imm32(0x0181FFFF)); + AND(32, regs_.R(IRREG_FPCOND), regs_.R(inst.src1)); + MOV(32, MDisp(CTXREG, fcr31Offset), regs_.R(IRREG_FPCOND)); + + // With that done, grab bit 23, the actual fpcond. + SHR(32, regs_.R(IRREG_FPCOND), Imm8(23)); + AND(32, regs_.R(IRREG_FPCOND), Imm32(1)); + break; + case IROp::FpCtrlToReg: - CompIR_Generic(inst); + regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::INIT } }); + // Start by clearing the fpcond bit (might as well mask while we're here.) + MOV(32, regs_.R(inst.dest), Imm32(0x0101FFFF)); + AND(32, regs_.R(inst.dest), MDisp(CTXREG, fcr31Offset)); + + AND(32, regs_.R(IRREG_FPCOND), Imm32(1)); + if (cpu_info.bBMI2) { + RORX(32, SCRATCH1, regs_.R(IRREG_FPCOND), 32 - 23); + } else { + MOV(32, R(SCRATCH1), regs_.R(IRREG_FPCOND)); + SHL(32, R(SCRATCH1), Imm8(23)); + } + OR(32, regs_.R(inst.dest), R(SCRATCH1)); + + // Update fcr31 while we were here, for consistency. + MOV(32, MDisp(CTXREG, fcr31Offset), regs_.R(inst.dest)); break; case IROp::VfpuCtrlToReg: @@ -221,23 +341,6 @@ void X64JitBackend::CompIR_Transfer(IRInst inst) { } } -int ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_t isWrite) { - const auto toss = [&](MemoryExceptionType t) { - Core_MemoryException(addr, alignment, currentMIPS->pc, t); - return coreState != CORE_RUNNING ? 1 : 0; - }; - - if (!Memory::IsValidRange(addr, alignment)) { - MemoryExceptionType t = isWrite == 1 ? MemoryExceptionType::WRITE_WORD : MemoryExceptionType::READ_WORD; - if (alignment > 4) - t = isWrite ? MemoryExceptionType::WRITE_BLOCK : MemoryExceptionType::READ_BLOCK; - return toss(t); - } else if (alignment > 1 && (addr & (alignment - 1)) != 0) { - return toss(MemoryExceptionType::ALIGNMENT); - } - return 0; -} - void X64JitBackend::CompIR_ValidateAddress(IRInst inst) { CONDITIONAL_DISABLE; @@ -265,10 +368,17 @@ void X64JitBackend::CompIR_ValidateAddress(IRInst inst) { break; } - // This is unfortunate... - FlushAll(); - regs_.Map(inst); - LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), inst.constant)); + if (regs_.IsGPRMappedAsPointer(inst.src1)) { + LEA(PTRBITS, SCRATCH1, MDisp(regs_.RXPtr(inst.src1), inst.constant)); +#if defined(MASKED_PSP_MEMORY) + SUB(PTRBITS, R(SCRATCH1), ImmPtr(Memory::base)); +#else + SUB(PTRBITS, R(SCRATCH1), R(MEMBASEREG)); +#endif + } else { + regs_.Map(inst); + LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), inst.constant)); + } AND(32, R(SCRATCH1), Imm32(0x3FFFFFFF)); std::vector validJumps; @@ -282,25 +392,32 @@ void X64JitBackend::CompIR_ValidateAddress(IRInst inst) { CMP(32, R(SCRATCH1), Imm32(PSP_GetUserMemoryEnd() - alignment)); FixupBranch tooHighRAM = J_CC(CC_A); CMP(32, R(SCRATCH1), Imm32(PSP_GetKernelMemoryBase())); - validJumps.push_back(J_CC(CC_AE)); + validJumps.push_back(J_CC(CC_AE, true)); CMP(32, R(SCRATCH1), Imm32(PSP_GetVidMemEnd() - alignment)); FixupBranch tooHighVid = J_CC(CC_A); CMP(32, R(SCRATCH1), Imm32(PSP_GetVidMemBase())); - validJumps.push_back(J_CC(CC_AE)); + validJumps.push_back(J_CC(CC_AE, true)); CMP(32, R(SCRATCH1), Imm32(PSP_GetScratchpadMemoryEnd() - alignment)); FixupBranch tooHighScratch = J_CC(CC_A); CMP(32, R(SCRATCH1), Imm32(PSP_GetScratchpadMemoryBase())); - validJumps.push_back(J_CC(CC_AE)); + validJumps.push_back(J_CC(CC_AE, true)); + if (alignment != 1) + SetJumpTarget(unaligned); SetJumpTarget(tooHighRAM); SetJumpTarget(tooHighVid); SetJumpTarget(tooHighScratch); + // If we got here, something unusual and bad happened, so we'll always go back to the dispatcher. + // Because of that, we can avoid flushing outside this case. + auto regsCopy = regs_; + regsCopy.FlushAll(); + + // Ignores the return value, always returns to the dispatcher. + // Otherwise would need a thunk to restore regs. ABI_CallFunctionACC((const void *)&ReportBadAddress, R(SCRATCH1), alignment, isWrite); - TEST(32, R(EAX), R(EAX)); - validJumps.push_back(J_CC(CC_Z)); JMP(dispatcherCheckCoreState_, true); for (FixupBranch &b : validJumps) diff --git a/Core/MIPS/x86/X64IRJit.cpp b/Core/MIPS/x86/X64IRJit.cpp index f70901eba8c3..98279e39895d 100644 --- a/Core/MIPS/x86/X64IRJit.cpp +++ b/Core/MIPS/x86/X64IRJit.cpp @@ -19,6 +19,7 @@ #if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64) #include +#include "Common/StringUtils.h" #include "Core/MemMap.h" #include "Core/MIPS/MIPSTables.h" #include "Core/MIPS/x86/X64IRJit.h" @@ -63,6 +64,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) { SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer())); wroteCheckedOffset = true; + WriteDebugPC(startPC); + // TODO: See if we can get flags to always have the downcount compare. if (jo.downcountInRegister) { TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG)); @@ -79,6 +82,7 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) { const u8 *blockStart = GetCodePointer(); block->SetTargetOffset((int)GetOffset(blockStart)); compilingBlockNum_ = block_num; + lastConstPC_ = 0; regs_.Start(block); @@ -120,6 +124,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) { } if (jo.enableBlocklink && jo.useBackJump) { + WriteDebugPC(startPC); + if (jo.downcountInRegister) { TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG)); } else { @@ -214,11 +220,13 @@ void X64JitBackend::CompIR_Generic(IRInst inst) { FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET); #if PPSSPP_ARCH(AMD64) ABI_CallFunctionP((const void *)&DoIRInst, (void *)value); #else ABI_CallFunctionCC((const void *)&DoIRInst, (u32)(value & 0xFFFFFFFF), (u32)(value >> 32)); #endif + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // We only need to check the return value if it's a potential exit. @@ -236,10 +244,12 @@ void X64JitBackend::CompIR_Interpret(IRInst inst) { // IR protects us against this being a branching instruction (well, hopefully.) FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET); if (DebugStatsEnabled()) { ABI_CallFunctionP((const void *)&NotifyMIPSInterpret, (void *)MIPSGetName(op)); } ABI_CallFunctionC((const void *)MIPSGetInterpretFunc(op), inst.constant); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); } @@ -265,7 +275,31 @@ bool X64JitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const { } else if (ptr == applyRoundingMode_) { name = "applyRoundingMode"; } else if (ptr >= GetBasePtr() && ptr < GetBasePtr() + jitStartOffset_) { - name = "fixedCode"; + if (ptr == constants.noSignMask) { + name = "constants.noSignMask"; + } else if (ptr == constants.signBitAll) { + name = "constants.signBitAll"; + } else if (ptr == constants.positiveZeroes) { + name = "constants.positiveZeroes"; + } else if (ptr == constants.positiveInfinity) { + name = "constants.positiveInfinity"; + } else if (ptr == constants.positiveOnes) { + name = "constants.positiveOnes"; + } else if (ptr == constants.negativeOnes) { + name = "constants.negativeOnes"; + } else if (ptr == constants.qNAN) { + name = "constants.qNAN"; + } else if (ptr == constants.maxIntBelowAsFloat) { + name = "constants.maxIntBelowAsFloat"; + } else if ((const float *)ptr >= constants.mulTableVi2f && (const float *)ptr < constants.mulTableVi2f + 32) { + name = StringFromFormat("constants.mulTableVi2f[%d]", (int)((const float *)ptr - constants.mulTableVi2f)); + } else if ((const float *)ptr >= constants.mulTableVf2i && (const float *)ptr < constants.mulTableVf2i + 32) { + name = StringFromFormat("constants.mulTableVf2i[%d]", (int)((const float *)ptr - constants.mulTableVf2i)); + } else if ((const Float4Constant *)ptr >= constants.vec4InitValues && (const Float4Constant *)ptr < constants.vec4InitValues + 8) { + name = StringFromFormat("constants.vec4InitValues[%d]", (int)((const Float4Constant *)ptr - constants.vec4InitValues)); + } else { + name = "fixedCode"; + } } else { return IRNativeBackend::DescribeCodePtr(ptr, name); } @@ -320,6 +354,21 @@ void X64JitBackend::MovToPC(X64Reg r) { MOV(32, MDisp(CTXREG, pcOffset), R(r)); } +void X64JitBackend::WriteDebugPC(uint32_t pc) { + if (hooks_.profilerPC) + MOV(32, M(hooks_.profilerPC), Imm32(pc)); +} + +void X64JitBackend::WriteDebugPC(Gen::X64Reg r) { + if (hooks_.profilerPC) + MOV(32, M(hooks_.profilerPC), R(r)); +} + +void X64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) { + if (hooks_.profilerPC) + MOV(32, M(hooks_.profilerStatus), Imm32((int32_t)status)); +} + void X64JitBackend::SaveStaticRegisters() { if (jo.useStaticAlloc) { //CALL(saveStaticRegisters_); diff --git a/Core/MIPS/x86/X64IRJit.h b/Core/MIPS/x86/X64IRJit.h index 6a2c09aef5d2..15a2fb9b449c 100644 --- a/Core/MIPS/x86/X64IRJit.h +++ b/Core/MIPS/x86/X64IRJit.h @@ -66,6 +66,9 @@ class X64JitBackend : public Gen::XCodeBlock, public IRNativeBackend { void ApplyRoundingMode(bool force = false); void MovFromPC(Gen::X64Reg r); void MovToPC(Gen::X64Reg r); + void WriteDebugPC(uint32_t pc); + void WriteDebugPC(Gen::X64Reg r); + void WriteDebugProfilerStatus(IRProfilerStatus status); void SaveStaticRegisters(); void LoadStaticRegisters(); @@ -144,14 +147,14 @@ class X64JitBackend : public Gen::XCodeBlock, public IRNativeBackend { struct Constants { const void *noSignMask; const void *signBitAll; + const void *positiveZeroes; const void *positiveInfinity; const void *positiveOnes; const void *negativeOnes; const void *qNAN; + const void *maxIntBelowAsFloat; const float *mulTableVi2f; - const double *mulTableVf2i; - const double *minIntAsDouble; - const double *maxIntAsDouble; + const float *mulTableVf2i; const Float4Constant *vec4InitValues; }; Constants constants; @@ -159,6 +162,8 @@ class X64JitBackend : public Gen::XCodeBlock, public IRNativeBackend { int jitStartOffset_ = 0; int compilingBlockNum_ = -1; int logBlocks_ = 0; + // Only useful in breakpoints, where it's set immediately prior. + uint32_t lastConstPC_ = 0; }; class X64IRJit : public IRNativeJit { diff --git a/Core/MIPS/x86/X64IRRegCache.cpp b/Core/MIPS/x86/X64IRRegCache.cpp index a169a43791c8..cfbb57712e26 100644 --- a/Core/MIPS/x86/X64IRRegCache.cpp +++ b/Core/MIPS/x86/X64IRRegCache.cpp @@ -147,6 +147,67 @@ void X64IRRegCache::FlushBeforeCall() { #endif } +void X64IRRegCache::FlushAll(bool gprs, bool fprs) { + // Note: make sure not to change the registers when flushing: + // Branching code may expect the x64reg to retain its value. + + auto needsFlush = [&](IRReg i) { + if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic) + return false; + if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty) + return false; + return true; + }; + + auto isSingleFloat = [&](IRReg i) { + if (mr[i].lane != -1 || mr[i].loc != MIPSLoc::FREG) + return false; + return true; + }; + + // Sometimes, float/vector regs may be in separate regs in a sequence. + // It's worth combining and flushing together. + for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) { + if (!needsFlush(i) || !needsFlush(i + 1)) + continue; + // GPRs are probably not worth it. Merging Vec2s might be, but pretty uncommon. + if (!isSingleFloat(i) || !isSingleFloat(i + 1)) + continue; + + X64Reg regs[4]{ INVALID_REG, INVALID_REG, INVALID_REG, INVALID_REG }; + regs[0] = FromNativeReg(mr[i + 0].nReg); + regs[1] = FromNativeReg(mr[i + 1].nReg); + + bool flushVec4 = i + 3 < TOTAL_MAPPABLE_IRREGS && needsFlush(i + 2) && needsFlush(i + 3); + if (flushVec4 && isSingleFloat(i + 2) && isSingleFloat(i + 3) && (i & 3) == 0) { + regs[2] = FromNativeReg(mr[i + 2].nReg); + regs[3] = FromNativeReg(mr[i + 3].nReg); + + // Note that this doesn't change the low lane of any of these regs. + emit_->UNPCKLPS(regs[1], ::R(regs[3])); + emit_->UNPCKLPS(regs[0], ::R(regs[2])); + emit_->UNPCKLPS(regs[0], ::R(regs[1])); + emit_->MOVAPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]); + + for (int j = 0; j < 4; ++j) + DiscardReg(i + j); + i += 3; + continue; + } + + // TODO: Maybe this isn't always worth doing. + emit_->UNPCKLPS(regs[0], ::R(regs[1])); + emit_->MOVLPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]); + + DiscardReg(i); + DiscardReg(i + 1); + ++i; + continue; + } + + IRNativeRegCacheBase::FlushAll(gprs, fprs); +} + X64Reg X64IRRegCache::TryMapTempImm(IRReg r, X64Map flags) { _dbg_assert_(IsValidGPR(r)); @@ -353,6 +414,8 @@ void X64IRRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) { emit_->MOVSS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first))); else if (lanes == 2) emit_->MOVLPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first))); + else if (lanes == 4 && (first & 3) == 0) + emit_->MOVAPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first))); else if (lanes == 4) emit_->MOVUPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first))); else @@ -381,6 +444,8 @@ void X64IRRegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) { emit_->MOVSS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r); else if (lanes == 2) emit_->MOVLPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r); + else if (lanes == 4 && (first & 3) == 0) + emit_->MOVAPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r); else if (lanes == 4) emit_->MOVUPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r); else @@ -388,6 +453,275 @@ void X64IRRegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) { } } +bool X64IRRegCache::TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) { + bool allowed = !mr[nr[nreg].mipsReg].isStatic; + // There's currently no support for non-XMMs here. + allowed = allowed && type == MIPSLoc::FREG; + + if (dest == -1) + dest = nreg; + + if (allowed && (flags == MIPSMap::INIT || flags == MIPSMap::DIRTY)) { + // Alright, changing lane count (possibly including lane position.) + IRReg oldfirst = nr[nreg].mipsReg; + int oldlanes = 0; + while (mr[oldfirst + oldlanes].nReg == nreg) + oldlanes++; + _assert_msg_(oldlanes != 0, "TransferNativeReg encountered nreg mismatch"); + _assert_msg_(oldlanes != lanes, "TransferNativeReg transfer to same lanecount, misaligned?"); + + if (lanes == 1 && TransferVecTo1(nreg, dest, first, oldlanes)) + return true; + if (oldlanes == 1 && Transfer1ToVec(nreg, dest, first, lanes)) + return true; + } + + return IRNativeRegCacheBase::TransferNativeReg(nreg, dest, type, first, lanes, flags); +} + +bool X64IRRegCache::TransferVecTo1(IRNativeReg nreg, IRNativeReg dest, IRReg first, int oldlanes) { + IRReg oldfirst = nr[nreg].mipsReg; + + // Is it worth preserving any of the old regs? + int numKept = 0; + for (int i = 0; i < oldlanes; ++i) { + // Skip whichever one this is extracting. + if (oldfirst + i == first) + continue; + // If 0 isn't being transfered, easy to keep in its original reg. + if (i == 0 && dest != nreg) { + numKept++; + continue; + } + + IRNativeReg freeReg = FindFreeReg(MIPSLoc::FREG, MIPSMap::INIT); + if (freeReg != -1 && IsRegRead(MIPSLoc::FREG, oldfirst + i)) { + // If there's one free, use it. Don't modify nreg, though. + u8 shuf = VFPU_SWIZZLE(i, i, i, i); + if (i == 0) { + emit_->MOVAPS(FromNativeReg(freeReg), ::R(FromNativeReg(nreg))); + } else if (cpu_info.bAVX) { + emit_->VPERMILPS(128, FromNativeReg(freeReg), ::R(FromNativeReg(nreg)), shuf); + } else if (i == 2) { + emit_->MOVHLPS(FromNativeReg(freeReg), FromNativeReg(nreg)); + } else { + emit_->MOVAPS(FromNativeReg(freeReg), ::R(FromNativeReg(nreg))); + emit_->SHUFPS(FromNativeReg(freeReg), ::R(FromNativeReg(freeReg)), shuf); + } + + // Update accounting. + nr[freeReg].isDirty = nr[nreg].isDirty; + nr[freeReg].mipsReg = oldfirst + i; + mr[oldfirst + i].lane = -1; + mr[oldfirst + i].nReg = freeReg; + numKept++; + } + } + + // Unless all other lanes were kept, store. + if (nr[nreg].isDirty && numKept < oldlanes - 1) { + StoreNativeReg(nreg, oldfirst, oldlanes); + // Set false even for regs that were split out, since they were flushed too. + for (int i = 0; i < oldlanes; ++i) { + if (mr[oldfirst + i].nReg != -1) + nr[mr[oldfirst + i].nReg].isDirty = false; + } + } + + // Next, shuffle the desired element into first place. + u8 shuf = VFPU_SWIZZLE(mr[first].lane, mr[first].lane, mr[first].lane, mr[first].lane); + if (mr[first].lane > 0 && cpu_info.bAVX && dest != nreg) { + emit_->VPERMILPS(128, FromNativeReg(dest), ::R(FromNativeReg(nreg)), shuf); + } else if (mr[first].lane <= 0 && dest != nreg) { + emit_->MOVAPS(FromNativeReg(dest), ::R(FromNativeReg(nreg))); + } else if (mr[first].lane == 2) { + emit_->MOVHLPS(FromNativeReg(dest), FromNativeReg(nreg)); + } else if (mr[first].lane > 0) { + if (dest != nreg) + emit_->MOVAPS(FromNativeReg(dest), ::R(FromNativeReg(nreg))); + emit_->SHUFPS(FromNativeReg(dest), ::R(FromNativeReg(dest)), shuf); + } + + // Now update accounting. + for (int i = 0; i < oldlanes; ++i) { + auto &mreg = mr[oldfirst + i]; + if (oldfirst + i == first) { + mreg.lane = -1; + mreg.nReg = dest; + } else if (mreg.nReg == nreg && i == 0 && nreg != dest) { + // Still in the same register, but no longer a vec. + mreg.lane = -1; + } else if (mreg.nReg == nreg) { + // No longer in a register. + mreg.nReg = -1; + mreg.lane = -1; + mreg.loc = MIPSLoc::MEM; + } + } + + if (dest != nreg) { + nr[dest].isDirty = nr[nreg].isDirty; + if (oldfirst == first) { + nr[nreg].mipsReg = -1; + nr[nreg].isDirty = false; + } + } + nr[dest].mipsReg = first; + + return true; +} + +bool X64IRRegCache::Transfer1ToVec(IRNativeReg nreg, IRNativeReg dest, IRReg first, int lanes) { + X64Reg cur[4]{}; + int numInRegs = 0; + u8 blendMask = 0; + for (int i = 0; i < lanes; ++i) { + if (mr[first + i].lane != -1 || (i != 0 && mr[first + i].spillLockIRIndex >= irIndex_)) { + // Can't do it, either double mapped or overlapping vec. + return false; + } + + if (mr[first + i].nReg == -1) { + cur[i] = INVALID_REG; + blendMask |= 1 << i; + } else { + cur[i] = FromNativeReg(mr[first + i].nReg); + numInRegs++; + } + } + + // Shouldn't happen, this should only get called to transfer one in a reg. + if (numInRegs == 0) + return false; + + // Move things together into a reg. + if (lanes == 4 && cpu_info.bSSE4_1 && numInRegs == 1 && (first & 3) == 0) { + // Use a blend to grab the rest. BLENDPS is pretty good. + if (cpu_info.bAVX && nreg != dest) { + if (cur[0] == INVALID_REG) { + // Broadcast to all lanes, then blend from memory to replace. + emit_->VPERMILPS(128, FromNativeReg(dest), ::R(FromNativeReg(nreg)), 0); + emit_->BLENDPS(FromNativeReg(dest), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask); + } else { + emit_->VBLENDPS(128, FromNativeReg(dest), FromNativeReg(nreg), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask); + } + cur[0] = FromNativeReg(dest); + } else { + if (cur[0] == INVALID_REG) + emit_->SHUFPS(FromNativeReg(nreg), ::R(FromNativeReg(nreg)), 0); + emit_->BLENDPS(FromNativeReg(nreg), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask); + // If this is not dest, it'll get moved there later. + cur[0] = FromNativeReg(nreg); + } + } else if (lanes == 4) { + if (blendMask == 0) { + // y = yw##, x = xz##, x = xyzw. + emit_->UNPCKLPS(cur[1], ::R(cur[3])); + emit_->UNPCKLPS(cur[0], ::R(cur[2])); + emit_->UNPCKLPS(cur[0], ::R(cur[1])); + } else if (blendMask == 0b1100) { + // x = xy##, then load zw. + emit_->UNPCKLPS(cur[0], ::R(cur[1])); + emit_->MOVHPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 2))); + } else if (blendMask == 0b1010 && cpu_info.bSSE4_1 && (first & 3) == 0) { + // x = x#z#, x = xyzw. + emit_->SHUFPS(cur[0], ::R(cur[2]), VFPU_SWIZZLE(0, 0, 0, 0)); + emit_->BLENDPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask); + } else if (blendMask == 0b0110 && cpu_info.bSSE4_1 && (first & 3) == 0) { + // x = x##w, x = xyzw. + emit_->SHUFPS(cur[0], ::R(cur[3]), VFPU_SWIZZLE(0, 0, 0, 0)); + emit_->BLENDPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask); + } else if (blendMask == 0b1001 && cpu_info.bSSE4_1 && (first & 3) == 0) { + // y = #yz#, y = xyzw. + emit_->SHUFPS(cur[1], ::R(cur[2]), VFPU_SWIZZLE(0, 0, 0, 0)); + emit_->BLENDPS(cur[1], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask); + // Will be moved to dest as needed. + cur[0] = cur[1]; + } else if (blendMask == 0b0101 && cpu_info.bSSE4_1 && (first & 3) == 0) { + // y = #y#w, y = xyzw. + emit_->SHUFPS(cur[1], ::R(cur[3]), VFPU_SWIZZLE(0, 0, 0, 0)); + emit_->BLENDPS(cur[1], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask); + // Will be moved to dest as needed. + cur[0] = cur[1]; + } else if (blendMask == 0b1000) { + // x = xz##, z = w###, y = yw##, x = xyzw. + emit_->UNPCKLPS(cur[0], ::R(cur[2])); + emit_->MOVSS(cur[2], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 3))); + emit_->UNPCKLPS(cur[1], ::R(cur[2])); + emit_->UNPCKLPS(cur[0], ::R(cur[1])); + } else if (blendMask == 0b0100) { + // y = yw##, w = z###, x = xz##, x = xyzw. + emit_->UNPCKLPS(cur[1], ::R(cur[3])); + emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 2))); + emit_->UNPCKLPS(cur[0], ::R(cur[3])); + emit_->UNPCKLPS(cur[0], ::R(cur[1])); + } else if (blendMask == 0b0010) { + // z = zw##, w = y###, x = xy##, x = xyzw. + emit_->UNPCKLPS(cur[2], ::R(cur[3])); + emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 1))); + emit_->UNPCKLPS(cur[0], ::R(cur[3])); + emit_->MOVLHPS(cur[0], cur[2]); + } else if (blendMask == 0b0001) { + // y = yw##, w = x###, w = xz##, w = xyzw. + emit_->UNPCKLPS(cur[1], ::R(cur[3])); + emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 0))); + emit_->UNPCKLPS(cur[3], ::R(cur[2])); + emit_->UNPCKLPS(cur[3], ::R(cur[1])); + // Will be moved to dest as needed. + cur[0] = cur[3]; + } else if (blendMask == 0b0011) { + // z = zw##, w = xy##, w = xyzw. + emit_->UNPCKLPS(cur[2], ::R(cur[3])); + emit_->MOVLPS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 0))); + emit_->MOVLHPS(cur[3], cur[2]); + // Will be moved to dest as needed. + cur[0] = cur[3]; + } else { + // This must mean no SSE4, and numInRegs <= 2 in trickier cases. + return false; + } + } else if (lanes == 2) { + if (cur[0] != INVALID_REG && cur[1] != INVALID_REG) { + emit_->UNPCKLPS(cur[0], ::R(cur[1])); + } else if (cur[0] != INVALID_REG && cpu_info.bSSE4_1) { + emit_->INSERTPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 1)), 1); + } else { + return false; + } + } else { + return false; + } + + mr[first].lane = 0; + for (int i = 0; i < lanes; ++i) { + if (mr[first + i].nReg != -1) { + // If this was dirty, the combined reg is now dirty. + if (nr[mr[first + i].nReg].isDirty) + nr[dest].isDirty = true; + + // Throw away the other register we're no longer using. + if (i != 0) + DiscardNativeReg(mr[first + i].nReg); + } + + // And set it as using the new one. + mr[first + i].lane = i; + mr[first + i].loc = MIPSLoc::FREG; + mr[first + i].nReg = dest; + } + + if (cur[0] != FromNativeReg(dest)) + emit_->MOVAPS(FromNativeReg(dest), ::R(cur[0])); + + if (dest != nreg) { + nr[dest].mipsReg = first; + nr[nreg].mipsReg = -1; + nr[nreg].isDirty = false; + } + + return true; +} + void X64IRRegCache::SetNativeRegValue(IRNativeReg nreg, uint32_t imm) { X64Reg r = FromNativeReg(nreg); _dbg_assert_(nreg >= 0 && nreg < NUM_X_REGS); diff --git a/Core/MIPS/x86/X64IRRegCache.h b/Core/MIPS/x86/X64IRRegCache.h index 90e0259914cd..8a21f563d8f2 100644 --- a/Core/MIPS/x86/X64IRRegCache.h +++ b/Core/MIPS/x86/X64IRRegCache.h @@ -92,6 +92,8 @@ class X64IRRegCache : public IRNativeRegCacheBase { void MapWithFlags(IRInst inst, X64IRJitConstants::X64Map destFlags, X64IRJitConstants::X64Map src1Flags = X64IRJitConstants::X64Map::NONE, X64IRJitConstants::X64Map src2Flags = X64IRJitConstants::X64Map::NONE); + // Note: may change the high lanes of single-register XMMs. + void FlushAll(bool gprs = true, bool fprs = true) override; void FlushBeforeCall(); Gen::X64Reg GetAndLockTempGPR(); @@ -115,8 +117,12 @@ class X64IRRegCache : public IRNativeRegCacheBase { void StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) override; void SetNativeRegValue(IRNativeReg nreg, uint32_t imm) override; void StoreRegValue(IRReg mreg, uint32_t imm) override; + bool TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) override; private: + bool TransferVecTo1(IRNativeReg nreg, IRNativeReg dest, IRReg first, int oldlanes); + bool Transfer1ToVec(IRNativeReg nreg, IRNativeReg dest, IRReg first, int lanes); + IRNativeReg GPRToNativeReg(Gen::X64Reg r) { return (IRNativeReg)r; } diff --git a/Core/MemMapHelpers.h b/Core/MemMapHelpers.h index 6f2ceaca637f..5f89f60312ff 100644 --- a/Core/MemMapHelpers.h +++ b/Core/MemMapHelpers.h @@ -69,13 +69,12 @@ inline void Memcpy(const u32 to_address, const u32 from_address, const u32 len, memcpy(to, from, len); if (MemBlockInfoDetailed(len)) { - char tagData[128]; if (!tag) { - tagLen = FormatMemWriteTagAt(tagData, sizeof(tagData), "Memcpy/", from_address, len); - tag = tagData; + NotifyMemInfoCopy(to_address, from_address, len, "Memcpy/"); + } else { + NotifyMemInfo(MemBlockFlags::READ, from_address, len, tag, tagLen); + NotifyMemInfo(MemBlockFlags::WRITE, to_address, len, tag, tagLen); } - NotifyMemInfo(MemBlockFlags::READ, from_address, len, tag, tagLen); - NotifyMemInfo(MemBlockFlags::WRITE, to_address, len, tag, tagLen); } } diff --git a/Core/System.cpp b/Core/System.cpp index 3158d6a5bafe..0c7bf42fb4c9 100644 --- a/Core/System.cpp +++ b/Core/System.cpp @@ -91,7 +91,7 @@ MetaFileSystem pspFileSystem; ParamSFOData g_paramSFO; static GlobalUIState globalUIState; CoreParameter g_CoreParameter; -static FileLoader *loadedFile; +static FileLoader *g_loadedFile; // For background loading thread. static std::mutex loadingLock; // For loadingReason updates. @@ -324,6 +324,7 @@ bool CPU_Init(std::string *errorString, FileLoader *loadedFile) { // If they shut down early, we'll catch it when load completes. // Note: this may return before init is complete, which is checked if CPU_IsReady(). + g_loadedFile = loadedFile; if (!LoadFile(&loadedFile, &g_CoreParameter.errorString)) { CPU_Shutdown(); g_CoreParameter.fileToStart.clear(); @@ -368,8 +369,8 @@ void CPU_Shutdown() { Memory::Shutdown(); HLEPlugins::Shutdown(); - delete loadedFile; - loadedFile = nullptr; + delete g_loadedFile; + g_loadedFile = nullptr; delete g_CoreParameter.mountIsoLoader; delete g_symbolMap; @@ -380,8 +381,8 @@ void CPU_Shutdown() { // TODO: Maybe loadedFile doesn't even belong here... void UpdateLoadedFile(FileLoader *fileLoader) { - delete loadedFile; - loadedFile = fileLoader; + delete g_loadedFile; + g_loadedFile = fileLoader; } void Core_UpdateState(CoreState newState) { diff --git a/Core/TiltEventProcessor.cpp b/Core/TiltEventProcessor.cpp index 14486e84bd64..571f58f84f55 100644 --- a/Core/TiltEventProcessor.cpp +++ b/Core/TiltEventProcessor.cpp @@ -19,6 +19,12 @@ static u32 tiltButtonsDown = 0; float rawTiltAnalogX; float rawTiltAnalogY; +float g_currentYAngle = 0.0f; + +float GetCurrentYAngle() { + return g_currentYAngle; +} + // These functions generate tilt events given the current Tilt amount, // and the deadzone radius. void GenerateAnalogStickEvent(float analogX, float analogY); @@ -73,6 +79,7 @@ void ProcessTilt(bool landscape, float calibrationAngle, float x, float y, float Lin::Vec3 down = Lin::Vec3(x, y, z).normalized(); float angleAroundX = atan2(down.z, down.y); + g_currentYAngle = angleAroundX; // TODO: Should smooth this out over time a bit. float yAngle = angleAroundX - calibrationAngle; float xAngle = asinf(down.x); diff --git a/Core/TiltEventProcessor.h b/Core/TiltEventProcessor.h index 3eda969e17c4..d16f0020d6f4 100644 --- a/Core/TiltEventProcessor.h +++ b/Core/TiltEventProcessor.h @@ -1,5 +1,7 @@ #pragma once +#include "Common/Math/lin/vec3.h" + namespace TiltEventProcessor { // generates a tilt in the correct coordinate system based on @@ -7,6 +9,8 @@ namespace TiltEventProcessor { void ProcessTilt(bool landscape, const float calibrationAngle, float x, float y, float z, bool invertX, bool invertY, float xSensitivity, float ySensitivity); void ResetTiltEvents(); +float GetCurrentYAngle(); + // Lets you preview the amount of tilt in TiltAnalogSettingsScreen. extern float rawTiltAnalogX; extern float rawTiltAnalogY; diff --git a/Core/Util/PPGeDraw.cpp b/Core/Util/PPGeDraw.cpp index 1f09bb6e60c9..050551717663 100644 --- a/Core/Util/PPGeDraw.cpp +++ b/Core/Util/PPGeDraw.cpp @@ -827,7 +827,7 @@ static void PPGeResetCurrentText() { // Draws some text using the one font we have in the atlas. void PPGeDrawCurrentText(u32 color) { // If the atlas is larger than 512x512, need to use windows into it. - bool useTextureWindow = g_Config.bSoftwareRendering && atlasWidth > 512 || atlasHeight > 512; + bool useTextureWindow = g_Config.bSoftwareRendering && (atlasWidth > 512 || atlasHeight > 512); uint32_t texturePosX = 0; uint32_t texturePosY = 0; @@ -855,7 +855,7 @@ void PPGeDrawCurrentText(u32 color) { int wantedPosX = (int)floorf(c.sx * textureMaxPosX); int wantedPosY = (int)floorf(c.sy * textureMaxPosY); - if (useTextureWindow && wantedPosX != texturePosX || wantedPosY != texturePosY) { + if (useTextureWindow && (wantedPosX != texturePosX || wantedPosY != texturePosY)) { EndVertexDataAndDraw(GE_PRIM_RECTANGLES); uint32_t offset = atlasWidth * wantedPosY * 256 + wantedPosX * 256; diff --git a/GPU/Common/GPUStateUtils.cpp b/GPU/Common/GPUStateUtils.cpp index bf881694541a..2177bf31dba6 100644 --- a/GPU/Common/GPUStateUtils.cpp +++ b/GPU/Common/GPUStateUtils.cpp @@ -290,8 +290,15 @@ ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat) { return REPLACE_BLEND_READ_FRAMEBUFFER; } - default: + case GE_BLENDMODE_MUL_AND_ADD: + case GE_BLENDMODE_MUL_AND_SUBTRACT: + case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: + // Handled below. break; + + default: + // Other blend equations simply don't blend on hardware. + return REPLACE_BLEND_NO; } GEBlendSrcFactor funcA = gstate.getBlendFuncA(); diff --git a/GPU/Common/ShaderId.cpp b/GPU/Common/ShaderId.cpp index 3e9b77111177..a23c37cd0539 100644 --- a/GPU/Common/ShaderId.cpp +++ b/GPU/Common/ShaderId.cpp @@ -275,21 +275,6 @@ bool FragmentIdNeedsFramebufferRead(const FShaderID &id) { (ReplaceBlendType)id.Bits(FS_BIT_REPLACE_BLEND, 3) == REPLACE_BLEND_READ_FRAMEBUFFER; } -static GEBlendMode SanitizeBlendEq(GEBlendMode beq) { - switch (beq) { - case GE_BLENDMODE_MUL_AND_ADD: - case GE_BLENDMODE_MUL_AND_SUBTRACT: - case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE: - case GE_BLENDMODE_MIN: - case GE_BLENDMODE_MAX: - case GE_BLENDMODE_ABSDIFF: - return beq; - default: - // Just return something that won't cause a shader gen failure. - return GE_BLENDMODE_MUL_AND_ADD; - } -} - // Here we must take all the bits of the gstate that determine what the fragment shader will // look like, and concatenate them together into an ID. void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pipelineState, const Draw::Bugs &bugs) { @@ -384,7 +369,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip // 3 bits. id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend); // 11 bits total. - id.SetBits(FS_BIT_BLENDEQ, 3, SanitizeBlendEq(gstate.getBlendEq())); + id.SetBits(FS_BIT_BLENDEQ, 3, gstate.getBlendEq()); id.SetBits(FS_BIT_BLENDFUNC_A, 4, gstate.getBlendFuncA()); id.SetBits(FS_BIT_BLENDFUNC_B, 4, gstate.getBlendFuncB()); } diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index 2d9f2719bced..15ae074d1c60 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -90,19 +90,22 @@ static void RotateUVThrough(TransformedVertex v[4]) { // Clears on the PSP are best done by drawing a series of vertical strips // in clear mode. This tries to detect that. static bool IsReallyAClear(const TransformedVertex *transformed, int numVerts, float x2, float y2) { - if (transformed[0].x != 0.0f || transformed[0].y != 0.0f) + if (transformed[0].x < 0.0f || transformed[0].y < 0.0f || transformed[0].x > 0.5f || transformed[0].y > 0.5f) return false; + const float originY = transformed[0].y; + // Color and Z are decided by the second vertex, so only need to check those for matching color. - u32 matchcolor = transformed[1].color0_32; - float matchz = transformed[1].z; + const u32 matchcolor = transformed[1].color0_32; + const float matchz = transformed[1].z; for (int i = 1; i < numVerts; i++) { if ((i & 1) == 0) { // Top left of a rectangle - if (transformed[i].y != 0.0f) + if (transformed[i].y != originY) return false; - if (i > 0 && transformed[i].x != transformed[i - 1].x) + float gap = fabsf(transformed[i].x - transformed[i - 1].x); // Should probably do some smarter check. + if (i > 0 && gap > 0.0625) return false; } else { if (transformed[i].color0_32 != matchcolor || transformed[i].z != matchz) @@ -547,7 +550,7 @@ void SoftwareTransform::DetectOffsetTexture(int maxIndex) { } // NOTE: The viewport must be up to date! -void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *inds, int &indsOffset, int indexBufferSize, int &maxIndex, SoftwareTransformResult *result) { +void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *&inds, int &maxIndex, SoftwareTransformResult *result) { TransformedVertex *transformed = params_.transformed; TransformedVertex *transformedExpanded = params_.transformedExpanded; bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0; @@ -560,11 +563,7 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy bool useBufferedRendering = fbman->UseBufferedRendering(); if (prim == GE_PRIM_RECTANGLES) { - if (!ExpandRectangles(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) { - result->drawIndexed = false; - result->drawNumTrans = 0; - return; - } + ExpandRectangles(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode); result->drawBuffer = transformedExpanded; result->drawIndexed = true; @@ -582,19 +581,11 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy } } } else if (prim == GE_PRIM_POINTS) { - if (!ExpandPoints(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) { - result->drawIndexed = false; - result->drawNumTrans = 0; - return; - } + ExpandPoints(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode); result->drawBuffer = transformedExpanded; result->drawIndexed = true; } else if (prim == GE_PRIM_LINES) { - if (!ExpandLines(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) { - result->drawIndexed = false; - result->drawNumTrans = 0; - return; - } + ExpandLines(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode); result->drawBuffer = transformedExpanded; result->drawIndexed = true; } else { @@ -686,21 +677,15 @@ void SoftwareTransform::CalcCullParams(float &minZValue, float &maxZValue) { std::swap(minZValue, maxZValue); } -bool SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { - // Before we start, do a sanity check - does the output fit? - if ((vertexCount / 2) * 6 > indexBufferSize - indsOffset) { - // Won't fit, kill the draw. - return false; - } - +void SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { // Rectangles always need 2 vertices, disregard the last one if there's an odd number. vertexCount = vertexCount & ~1; numTrans = 0; TransformedVertex *trans = &transformedExpanded[0]; - const u16 *indsIn = (const u16 *)(inds + indsOffset); - int newIndsOffset = indsOffset + vertexCount; - u16 *indsOut = inds + newIndsOffset; + const u16 *indsIn = (const u16 *)inds; + u16 *newInds = inds + vertexCount; + u16 *indsOut = newInds; maxIndex = 4 * (vertexCount / 2); for (int i = 0; i < vertexCount; i += 2) { @@ -745,33 +730,23 @@ bool SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *in indsOut[3] = i * 2 + 3; indsOut[4] = i * 2 + 0; indsOut[5] = i * 2 + 2; - trans += 4; indsOut += 6; numTrans += 6; } - - indsOffset = newIndsOffset; - return true; + inds = newInds; } -bool SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { - // Before we start, do a sanity check - does the output fit? - if ((vertexCount / 2) * 6 > indexBufferSize - indsOffset) { - // Won't fit, kill the draw. - return false; - } - +void SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { // Lines always need 2 vertices, disregard the last one if there's an odd number. vertexCount = vertexCount & ~1; numTrans = 0; TransformedVertex *trans = &transformedExpanded[0]; - - const u16 *indsIn = (const u16 *)(inds + indsOffset); - int newIndsOffset = indsOffset + vertexCount; - u16 *indsOut = inds + newIndsOffset; + const u16 *indsIn = (const u16 *)inds; + u16 *newInds = inds + vertexCount; + u16 *indsOut = newInds; float dx = 1.0f * gstate_c.vpWidthScale * (1.0f / fabsf(gstate.getViewportXScale())); float dy = 1.0f * gstate_c.vpHeightScale * (1.0f / fabsf(gstate.getViewportYScale())); @@ -884,23 +859,17 @@ bool SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *inds, i } } - indsOffset = newIndsOffset; - return true; + inds = newInds; } -bool SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { - // Before we start, do a sanity check - does the output fit? - if (vertexCount * 6 > indexBufferSize - indsOffset) { - // Won't fit, kill the draw. - return false; - } +void SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { numTrans = 0; TransformedVertex *trans = &transformedExpanded[0]; - const u16 *indsIn = (const u16 *)(inds + indsOffset); - int newIndsOffset = indsOffset + vertexCount; - u16 *indsOut = inds + newIndsOffset; + const u16 *indsIn = (const u16 *)inds; + u16 *newInds = inds + vertexCount; + u16 *indsOut = newInds; float dx = 1.0f * gstate_c.vpWidthScale * (1.0f / gstate.getViewportXScale()); float dy = 1.0f * gstate_c.vpHeightScale * (1.0f / gstate.getViewportYScale()); @@ -959,7 +928,5 @@ bool SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *inds, numTrans += 6; } - - indsOffset = newIndsOffset; - return true; + inds = newInds; } diff --git a/GPU/Common/SoftwareTransformCommon.h b/GPU/Common/SoftwareTransformCommon.h index da15ffad9305..480bd18e519e 100644 --- a/GPU/Common/SoftwareTransformCommon.h +++ b/GPU/Common/SoftwareTransformCommon.h @@ -62,18 +62,19 @@ struct SoftwareTransformParams { class SoftwareTransform { public: - SoftwareTransform(SoftwareTransformParams ¶ms) : params_(params) {} + SoftwareTransform(SoftwareTransformParams ¶ms) : params_(params) { + } void SetProjMatrix(const float mtx[14], bool invertedX, bool invertedY, const Lin::Vec3 &trans, const Lin::Vec3 &scale); void Decode(int prim, u32 vertexType, const DecVtxFormat &decVtxFormat, int maxIndex, SoftwareTransformResult *result); void DetectOffsetTexture(int maxIndex); - void BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *inds, int &indsOffset, int indexBufferSize, int &maxIndex, SoftwareTransformResult *result); + void BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *&inds, int &maxIndex, SoftwareTransformResult *result); protected: void CalcCullParams(float &minZValue, float &maxZValue); - bool ExpandRectangles(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); - bool ExpandLines(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); - bool ExpandPoints(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); + void ExpandRectangles(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); + void ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); + void ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); const SoftwareTransformParams ¶ms_; Lin::Matrix4x4 projMatrix_; diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp index 83948a16adee..719dfa329d7f 100644 --- a/GPU/Common/VertexDecoderArm64.cpp +++ b/GPU/Common/VertexDecoderArm64.cpp @@ -27,10 +27,6 @@ #include "GPU/Common/VertexDecoderCommon.h" alignas(16) static float bones[16 * 8]; // First four are kept in registers -alignas(16) static float boneMask[4] = {1.0f, 1.0f, 1.0f, 0.0f}; - -static const float by128 = 1.0f / 128.0f; -static const float by32768 = 1.0f / 32768.0f; using namespace Arm64Gen; @@ -50,7 +46,7 @@ static const ARM64Reg scratchReg = W6; static const ARM64Reg scratchReg64 = X6; static const ARM64Reg scratchReg2 = W7; static const ARM64Reg scratchReg3 = W8; -static const ARM64Reg fullAlphaReg = W12; +static const ARM64Reg alphaNonFullReg = W12; static const ARM64Reg boundsMinUReg = W13; static const ARM64Reg boundsMinVReg = W14; static const ARM64Reg boundsMaxUReg = W15; @@ -63,6 +59,8 @@ static const ARM64Reg fpScratchReg4 = S7; static const ARM64Reg neonScratchRegD = D2; static const ARM64Reg neonScratchRegQ = Q2; +static const ARM64Reg neonScratchReg2D = D3; +static const ARM64Reg neonScratchReg2Q = Q3; static const ARM64Reg neonUVScaleReg = D0; static const ARM64Reg neonUVOffsetReg = D1; @@ -150,6 +148,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int bool prescaleStep = false; bool skinning = false; + bool updateTexBounds = false; bool log = false; @@ -165,6 +164,9 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int dec.steps_[i] == &VertexDecoder::Step_WeightsFloatSkin) { skinning = true; } + if (dec.steps_[i] == &VertexDecoder::Step_TcU16ThroughToFloat) { + updateTexBounds = true; + } } // Not used below, but useful for logging. @@ -172,24 +174,22 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int // if (skinning) log = true; + bool updateFullAlpha = dec.col; + if (updateFullAlpha && (dec.VertexType() & GE_VTYPE_COL_MASK) == GE_VTYPE_COL_565) + updateFullAlpha = false; + // GPRs 0-15 do not need to be saved. // We don't use any higher GPRs than 16. So: - uint64_t regs_to_save = 1 << 16; // Arm64Gen::ALL_CALLEE_SAVED; + uint64_t regs_to_save = updateTexBounds ? 1 << 16 : 0; // We only need to save Q8-Q15 if skinning is used. uint64_t regs_to_save_fp = dec.skinInDecode ? Arm64Gen::ALL_CALLEE_SAVED_FP : 0; - fp.ABI_PushRegisters(regs_to_save, regs_to_save_fp); + // Only bother making stack space and setting up FP if there are saved regs. + if (regs_to_save || regs_to_save_fp) + fp.ABI_PushRegisters(regs_to_save, regs_to_save_fp); // Keep the scale/offset in a few fp registers if we need it. if (prescaleStep) { - fp.LDR(64, INDEX_UNSIGNED, neonUVScaleReg, X3, 0); - fp.LDR(64, INDEX_UNSIGNED, neonUVOffsetReg, X3, 8); - if ((dec.VertexType() & GE_VTYPE_TC_MASK) == GE_VTYPE_TC_8BIT) { - fp.MOVI2FDUP(neonScratchRegD, by128, scratchReg); - fp.FMUL(32, neonUVScaleReg, neonUVScaleReg, neonScratchRegD); - } else if ((dec.VertexType() & GE_VTYPE_TC_MASK) == GE_VTYPE_TC_16BIT) { - fp.MOVI2FDUP(neonScratchRegD, by32768, scratchReg); - fp.FMUL(32, neonUVScaleReg, neonUVScaleReg, neonScratchRegD); - } + fp.LDP(64, INDEX_SIGNED, neonUVScaleReg, neonUVOffsetReg, X3, 0); } // Add code to convert matrices to 4x4. @@ -197,43 +197,48 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int if (dec.skinInDecode) { // Copying from R3 to R4 MOVP2R(X3, gstate.boneMatrix); - MOVP2R(X4, bones); - MOVP2R(X5, boneMask); - fp.LDR(128, INDEX_UNSIGNED, Q3, X5, 0); + // This is only used with more than 4 weights, and points to the first of them. + if (dec.nweights > 4) + MOVP2R(X4, &bones[16 * 4]); + + // Construct a mask to zero out the top lane with. + fp.MVNI(32, Q3, 0); + fp.MOVI(32, Q4, 0); + fp.EXT(Q3, Q3, Q4, 4); + for (int i = 0; i < dec.nweights; i++) { - // Note that INDEX_UNSIGNED does not support offsets not aligned to the data size so we must use POST. - fp.LDR(128, INDEX_POST, Q4, X3, 12); // Load 128 bits even though we just want 96 - fp.LDR(128, INDEX_POST, Q5, X3, 12); - fp.LDR(128, INDEX_POST, Q6, X3, 12); - fp.LDR(128, INDEX_POST, Q7, X3, 12); + // This loads Q4,Q5,Q6 with 12 floats and increases X3, all in one go. + fp.LD1(32, 3, INDEX_POST, Q4, X3); + // Now sort those floats into 4 regs: ABCD EFGH IJKL -> ABC0 DEF0 GHI0 JKL0. + // Go backwards to avoid overwriting. + fp.EXT(Q7, Q6, Q6, 4); // I[JKLI]JKL + fp.EXT(Q6, Q5, Q6, 8); // EF[GHIJ]KL + fp.EXT(Q5, Q4, Q5, 12); // ABC[DEFG]H + + ARM64Reg matrixRow[4]{ Q4, Q5, Q6, Q7 }; // First four matrices are in registers Q16+. if (i < 4) { - fp.FMUL(32, (ARM64Reg)(Q16 + i * 4), Q4, Q3); - fp.FMUL(32, (ARM64Reg)(Q17 + i * 4), Q5, Q3); - fp.FMUL(32, (ARM64Reg)(Q18 + i * 4), Q6, Q3); - fp.FMUL(32, (ARM64Reg)(Q19 + i * 4), Q7, Q3); - ADDI2R(X4, X4, 16 * 4); - } else { - fp.FMUL(32, Q4, Q4, Q3); - fp.FMUL(32, Q5, Q5, Q3); - fp.FMUL(32, Q6, Q6, Q3); - fp.FMUL(32, Q7, Q7, Q3); - fp.STR(128, INDEX_UNSIGNED, Q4, X4, 0); - fp.STR(128, INDEX_UNSIGNED, Q5, X4, 16); - fp.STR(128, INDEX_UNSIGNED, Q6, X4, 32); - fp.STR(128, INDEX_UNSIGNED, Q7, X4, 48); - ADDI2R(X4, X4, 16 * 4); + for (int w = 0; w < 4; ++w) + matrixRow[w] = (ARM64Reg)(Q16 + i * 4 + w); } + // Zero out the top lane of each one with the mask created above. + fp.AND(matrixRow[0], Q4, Q3); + fp.AND(matrixRow[1], Q5, Q3); + fp.AND(matrixRow[2], Q6, Q3); + fp.AND(matrixRow[3], Q7, Q3); + + if (i >= 4) + fp.ST1(32, 4, INDEX_POST, matrixRow[0], X4); } } - if (dec.col) { - // Or LDB and skip the conditional? This is probably cheaper. - MOVI2R(fullAlphaReg, 0xFF); + if (updateFullAlpha) { + // This ends up non-zero if alpha is not full. + // Often we just ORN into it. + MOVI2R(alphaNonFullReg, 0); } - if (dec.tc && dec.throughmode) { - // TODO: Smarter, only when doing bounds. + if (updateTexBounds) { MOVP2R(scratchReg64, &gstate_c.vertBounds.minU); LDRH(INDEX_UNSIGNED, boundsMinUReg, scratchReg64, offsetof(KnownVertexBounds, minU)); LDRH(INDEX_UNSIGNED, boundsMaxUReg, scratchReg64, offsetof(KnownVertexBounds, maxU)); @@ -259,16 +264,14 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int SUBS(counterReg, counterReg, 1); B(CC_NEQ, loopStart); - if (dec.col) { + if (updateFullAlpha) { + FixupBranch skip = CBZ(alphaNonFullReg); MOVP2R(tempRegPtr, &gstate_c.vertexFullAlpha); - CMP(fullAlphaReg, 0); - FixupBranch skip = B(CC_NEQ); - STRB(INDEX_UNSIGNED, fullAlphaReg, tempRegPtr, 0); + STRB(INDEX_UNSIGNED, WZR, tempRegPtr, 0); SetJumpTarget(skip); } - if (dec.tc && dec.throughmode) { - // TODO: Smarter, only when doing bounds. + if (updateTexBounds) { MOVP2R(scratchReg64, &gstate_c.vertBounds.minU); STRH(INDEX_UNSIGNED, boundsMinUReg, scratchReg64, offsetof(KnownVertexBounds, minU)); STRH(INDEX_UNSIGNED, boundsMaxUReg, scratchReg64, offsetof(KnownVertexBounds, maxU)); @@ -276,7 +279,8 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int STRH(INDEX_UNSIGNED, boundsMaxVReg, scratchReg64, offsetof(KnownVertexBounds, maxV)); } - fp.ABI_PopRegisters(regs_to_save, regs_to_save_fp); + if (regs_to_save || regs_to_save_fp) + fp.ABI_PopRegisters(regs_to_save, regs_to_save_fp); RET(); @@ -342,13 +346,11 @@ void VertexDecoderJitCache::Jit_ApplyWeights() { break; default: // Matrices 4+ need to be loaded from memory. - fp.LDP(128, INDEX_SIGNED, Q8, Q9, scratchReg64, 0); - fp.LDP(128, INDEX_SIGNED, Q10, Q11, scratchReg64, 2 * 16); + fp.LD1(32, 4, INDEX_POST, Q8, scratchReg64); fp.FMLA(32, Q4, Q8, neonWeightRegsQ[i >> 2], i & 3); fp.FMLA(32, Q5, Q9, neonWeightRegsQ[i >> 2], i & 3); fp.FMLA(32, Q6, Q10, neonWeightRegsQ[i >> 2], i & 3); fp.FMLA(32, Q7, Q11, neonWeightRegsQ[i >> 2], i & 3); - ADDI2R(scratchReg64, scratchReg64, 4 * 16); break; } } @@ -482,13 +484,8 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() { void VertexDecoderJitCache::Jit_Color8888() { LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->coloff); - // Set flags to determine if alpha != 0xFF. - ORN(tempReg2, WZR, tempReg1, ArithOption(tempReg1, ST_ASR, 24)); - CMP(tempReg2, 0); - - // Clear fullAlphaReg when the inverse was not 0. - // fullAlphaReg = tempReg2 == 0 ? fullAlphaReg : 0 + 1; - CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ); + // Or any non-set bits into alphaNonFullReg. This way it's non-zero if not full. + ORN(alphaNonFullReg, alphaNonFullReg, tempReg1, ArithOption(tempReg1, ST_ASR, 24)); STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.c0off); } @@ -508,15 +505,10 @@ void VertexDecoderJitCache::Jit_Color4444() { // And expand to 8 bits. ORR(tempReg1, tempReg2, tempReg2, ArithOption(tempReg2, ST_LSL, 4)); - STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.c0off); - - // Set flags to determine if alpha != 0xFF. - ORN(tempReg2, WZR, tempReg1, ArithOption(tempReg1, ST_ASR, 24)); - CMP(tempReg2, 0); + // Or any non-set bits into alphaNonFullReg. This way it's non-zero if not full. + ORN(alphaNonFullReg, alphaNonFullReg, tempReg1, ArithOption(tempReg1, ST_ASR, 24)); - // Clear fullAlphaReg when the inverse was not 0. - // fullAlphaReg = tempReg2 == 0 ? fullAlphaReg : 0 + 1; - CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ); + STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.c0off); } void VertexDecoderJitCache::Jit_Color565() { @@ -540,7 +532,7 @@ void VertexDecoderJitCache::Jit_Color565() { ORR(tempReg3, tempReg3, tempReg1, ArithOption(tempReg1, ST_LSR, 4)); ORR(tempReg2, tempReg2, tempReg3, ArithOption(tempReg3, ST_LSL, 8)); - // Add in full alpha. No need to update fullAlphaReg. + // Add in full alpha. No need to update alphaNonFullReg. ORRI2R(tempReg1, tempReg2, 0xFF000000, scratchReg); STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.c0off); @@ -566,15 +558,10 @@ void VertexDecoderJitCache::Jit_Color5551() { ANDI2R(tempReg1, tempReg1, 0xFF000000, scratchReg); ORR(tempReg2, tempReg2, tempReg1); - // Set flags to determine if alpha != 0xFF. - ORN(tempReg3, WZR, tempReg1, ArithOption(tempReg1, ST_ASR, 24)); - CMP(tempReg3, 0); + // Or any non-set bits into alphaNonFullReg. This way it's non-zero if not full. + ORN(alphaNonFullReg, alphaNonFullReg, tempReg1, ArithOption(tempReg1, ST_ASR, 24)); STR(INDEX_UNSIGNED, tempReg2, dstReg, dec_->decFmt.c0off); - - // Clear fullAlphaReg when the inverse was not 0. - // fullAlphaReg = tempReg3 == 0 ? fullAlphaReg : 0 + 1; - CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ); } void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() { @@ -608,12 +595,12 @@ void VertexDecoderJitCache::Jit_TcFloat() { } void VertexDecoderJitCache::Jit_TcU8Prescale() { - fp.LDUR(16, neonScratchRegD, srcReg, dec_->tcoff); - fp.UXTL(8, neonScratchRegQ, neonScratchRegD); // Widen to 16-bit - fp.UXTL(16, neonScratchRegQ, neonScratchRegD); // Widen to 32-bit - fp.UCVTF(32, neonScratchRegD, neonScratchRegD); - fp.FMUL(32, neonScratchRegD, neonScratchRegD, neonUVScaleReg); // TODO: FMLA - fp.FADD(32, neonScratchRegD, neonScratchRegD, neonUVOffsetReg); + fp.LDUR(16, neonScratchReg2D, srcReg, dec_->tcoff); + fp.UXTL(8, neonScratchReg2Q, neonScratchReg2D); // Widen to 16-bit + fp.UXTL(16, neonScratchReg2Q, neonScratchReg2D); // Widen to 32-bit + fp.UCVTF(32, neonScratchReg2D, neonScratchReg2D, 7); + fp.MOV(neonScratchRegD, neonUVOffsetReg); + fp.FMLA(32, neonScratchRegD, neonScratchReg2D, neonUVScaleReg); fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff); } @@ -626,11 +613,11 @@ void VertexDecoderJitCache::Jit_TcU8ToFloat() { } void VertexDecoderJitCache::Jit_TcU16Prescale() { - fp.LDUR(32, neonScratchRegD, srcReg, dec_->tcoff); - fp.UXTL(16, neonScratchRegQ, neonScratchRegD); // Widen to 32-bit - fp.UCVTF(32, neonScratchRegD, neonScratchRegD); - fp.FMUL(32, neonScratchRegD, neonScratchRegD, neonUVScaleReg); // TODO: FMLA - fp.FADD(32, neonScratchRegD, neonScratchRegD, neonUVOffsetReg); + fp.LDUR(32, neonScratchReg2D, srcReg, dec_->tcoff); + fp.UXTL(16, neonScratchReg2Q, neonScratchReg2D); // Widen to 32-bit + fp.UCVTF(32, neonScratchReg2D, neonScratchReg2D, 15); + fp.MOV(neonScratchRegD, neonUVOffsetReg); + fp.FMLA(32, neonScratchRegD, neonScratchReg2D, neonUVScaleReg); fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff); } @@ -642,9 +629,9 @@ void VertexDecoderJitCache::Jit_TcU16ToFloat() { } void VertexDecoderJitCache::Jit_TcFloatPrescale() { - fp.LDUR(64, neonScratchRegD, srcReg, dec_->tcoff); - fp.FMUL(32, neonScratchRegD, neonScratchRegD, neonUVScaleReg); // TODO: FMLA - fp.FADD(32, neonScratchRegD, neonScratchRegD, neonUVOffsetReg); + fp.LDUR(64, neonScratchReg2D, srcReg, dec_->tcoff); + fp.MOV(neonScratchRegD, neonUVOffsetReg); + fp.FMLA(32, neonScratchRegD, neonScratchReg2D, neonUVScaleReg); fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff); } diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp index d6e37cf0ad1a..c31a5f1d581b 100644 --- a/GPU/Common/VertexDecoderCommon.cpp +++ b/GPU/Common/VertexDecoderCommon.cpp @@ -108,28 +108,36 @@ void DecVtxFormat::InitializeFromID(uint32_t id) { void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound) { // Find index bounds. Could cache this in display lists. // Also, this could be greatly sped up with SSE2/NEON, although rarely a bottleneck. - int lowerBound = 0x7FFFFFFF; - int upperBound = 0; u32 idx = vertType & GE_VTYPE_IDX_MASK; - if (idx == GE_VTYPE_IDX_8BIT) { - const u8 *ind8 = (const u8 *)inds; + if (idx == GE_VTYPE_IDX_16BIT) { + uint16_t upperBound = 0; + uint16_t lowerBound = 0xFFFF; + const u16_le *ind16 = (const u16_le *)inds; for (int i = 0; i < count; i++) { - u8 value = ind8[i]; + u16 value = ind16[i]; if (value > upperBound) upperBound = value; if (value < lowerBound) lowerBound = value; } - } else if (idx == GE_VTYPE_IDX_16BIT) { - const u16_le *ind16 = (const u16_le *)inds; + *indexLowerBound = lowerBound; + *indexUpperBound = upperBound; + } else if (idx == GE_VTYPE_IDX_8BIT) { + uint8_t upperBound = 0; + uint8_t lowerBound = 0xFF; + const u8 *ind8 = (const u8 *)inds; for (int i = 0; i < count; i++) { - u16 value = ind16[i]; + u8 value = ind8[i]; if (value > upperBound) upperBound = value; if (value < lowerBound) lowerBound = value; } + *indexLowerBound = lowerBound; + *indexUpperBound = upperBound; } else if (idx == GE_VTYPE_IDX_32BIT) { + int lowerBound = 0x7FFFFFFF; + int upperBound = 0; WARN_LOG_REPORT_ONCE(indexBounds32, G3D, "GetIndexBounds: Decoding 32-bit indexes"); const u32_le *ind32 = (const u32_le *)inds; for (int i = 0; i < count; i++) { @@ -143,12 +151,12 @@ void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBo if (value < lowerBound) lowerBound = value; } + *indexLowerBound = (u16)lowerBound; + *indexUpperBound = (u16)upperBound; } else { - lowerBound = 0; - upperBound = count - 1; + *indexLowerBound = 0; + *indexUpperBound = count - 1; } - *indexLowerBound = (u16)lowerBound; - *indexUpperBound = (u16)upperBound; } void PrintDecodedVertex(const VertexReader &vtx) { diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index f018980f4a7b..7780bfa28a26 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -598,7 +598,7 @@ void DrawEngineD3D11::DoFlush() { prim = GE_PRIM_TRIANGLES; VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount()); - u16 *const inds = decIndex_; + u16 *inds = decIndex_; SoftwareTransformResult result{}; SoftwareTransformParams params{}; params.decoded = decoded_; @@ -644,9 +644,8 @@ void DrawEngineD3D11::DoFlush() { // Need to ApplyDrawState after ApplyTexture because depal can launch a render pass and that wrecks the state. ApplyDrawState(prim); - int indsOffset = 0; if (result.action == SW_NOT_READY) - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); + swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result); if (result.setSafeSize) framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight); @@ -684,11 +683,11 @@ void DrawEngineD3D11::DoFlush() { UINT iOffset; int iSize = sizeof(uint16_t) * result.drawNumTrans; uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize); - memcpy(iptr, inds + indsOffset, iSize); + memcpy(iptr, inds, iSize); pushInds_->EndPush(context_); context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset); context_->DrawIndexed(result.drawNumTrans, 0, 0); - } else if (result.drawNumTrans > 0) { + } else { context_->Draw(result.drawNumTrans, 0); } } else if (result.action == SW_CLEAR) { diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index 11323f374743..9efa233dd0b8 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -558,7 +558,7 @@ void DrawEngineDX9::DoFlush() { prim = GE_PRIM_TRIANGLES; VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount()); - u16 *const inds = decIndex_; + u16 *inds = decIndex_; SoftwareTransformResult result{}; SoftwareTransformParams params{}; params.decoded = decoded_; @@ -607,9 +607,8 @@ void DrawEngineDX9::DoFlush() { ApplyDrawState(prim); - int indsOffset = 0; if (result.action == SW_NOT_READY) - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); + swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result); if (result.setSafeSize) framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight); @@ -629,8 +628,8 @@ void DrawEngineDX9::DoFlush() { device_->SetVertexDeclaration(transformedVertexDecl_); if (result.drawIndexed) { - device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, maxIndex, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds + indsOffset, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex)); - } else if (result.drawNumTrans > 0) { + device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, maxIndex, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex)); + } else { device_->DrawPrimitiveUP(d3d_prim[prim], D3DPrimCount(d3d_prim[prim], result.drawNumTrans), result.drawBuffer, sizeof(TransformedVertex)); } } else if (result.action == SW_CLEAR) { diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 6d5bb85d2c73..b8cca50d99c2 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -123,7 +123,7 @@ void GPU_DX9::BeginFrame() { drawEngine_.BeginFrame(); GPUCommonHW::BeginFrame(); - shaderManagerDX9_->DirtyShader(); + shaderManagerDX9_->DirtyLastShader(); framebufferManager_->BeginFrame(); diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index 83a449888c56..21dccf7479bc 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -535,27 +535,23 @@ void ShaderManagerDX9::Clear() { } fsCache_.clear(); vsCache_.clear(); - DirtyShader(); + DirtyLastShader(); } void ShaderManagerDX9::ClearShaders() { Clear(); } -void ShaderManagerDX9::DirtyShader() { +void ShaderManagerDX9::DirtyLastShader() { // Forget the last shader ID lastFSID_.set_invalid(); lastVSID_.set_invalid(); lastVShader_ = nullptr; lastPShader_ = nullptr; + // TODO: Probably not necessary to dirty uniforms here on DX9. gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); } -void ShaderManagerDX9::DirtyLastShader() { - lastVShader_ = nullptr; - lastPShader_ = nullptr; -} - VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellation, VertexDecoder *decoder, bool weightsAsFloat, bool useSkinInDecode, const ComputedPipelineState &pipelineState) { VShaderID VSID; if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) { diff --git a/GPU/Directx9/ShaderManagerDX9.h b/GPU/Directx9/ShaderManagerDX9.h index 68337503b40b..5ce429f6f9d2 100644 --- a/GPU/Directx9/ShaderManagerDX9.h +++ b/GPU/Directx9/ShaderManagerDX9.h @@ -79,7 +79,6 @@ class ShaderManagerDX9 : public ShaderManagerCommon { void ClearShaders() override; VSShader *ApplyShader(bool useHWTransform, bool useHWTessellation, VertexDecoder *decoder, bool weightsAsFloat, bool useSkinInDecode, const ComputedPipelineState &pipelineState); - void DirtyShader(); void DirtyLastShader() override; int GetNumVertexShaders() const { return (int)vsCache_.size(); } diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index e4d5bbac31be..d1b957ac548a 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -357,7 +357,7 @@ void DrawEngineGLES::DoFlush() { if (prim == GE_PRIM_TRIANGLE_STRIP) prim = GE_PRIM_TRIANGLES; - u16 *const inds = decIndex_; + u16 *inds = decIndex_; SoftwareTransformResult result{}; // TODO: Keep this static? Faster than repopulating? SoftwareTransformParams params{}; @@ -414,9 +414,8 @@ void DrawEngineGLES::DoFlush() { // Need to ApplyDrawState after ApplyTexture because depal can launch a render pass and that wrecks the state. ApplyDrawState(prim); - int indsOffset = 0; if (result.action == SW_NOT_READY) - swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); + swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, maxIndex, &result); if (result.setSafeSize) framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight); @@ -431,11 +430,11 @@ void DrawEngineGLES::DoFlush() { if (result.action == SW_DRAW_PRIMITIVES) { if (result.drawIndexed) { vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), 4, &vertexBuffer); - indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds + indsOffset, sizeof(uint16_t) * result.drawNumTrans, 2, &indexBuffer); + indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds, sizeof(uint16_t) * result.drawNumTrans, 2, &indexBuffer); render_->DrawIndexed( softwareInputLayout_, vertexBuffer, vertexBufferOffset, indexBuffer, indexBufferOffset, glprim[prim], result.drawNumTrans, GL_UNSIGNED_SHORT); - } else if (result.drawNumTrans > 0) { + } else { vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), 4, &vertexBuffer); render_->Draw( softwareInputLayout_, vertexBuffer, vertexBufferOffset, glprim[prim], 0, result.drawNumTrans); @@ -521,7 +520,7 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p if (prevSizeU < size_u || prevSizeV < size_v) { prevSizeU = size_u; prevSizeV = size_v; - if (!data_tex[0]) + if (data_tex[0]) renderManager_->DeleteTexture(data_tex[0]); data_tex[0] = renderManager_->CreateTexture(GL_TEXTURE_2D, size_u * 3, size_v, 1, 1); renderManager_->TextureImage(data_tex[0], 0, size_u * 3, size_v, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false); @@ -540,7 +539,7 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p // Weight U if (prevSizeWU < weights.size_u) { prevSizeWU = weights.size_u; - if (!data_tex[1]) + if (data_tex[1]) renderManager_->DeleteTexture(data_tex[1]); data_tex[1] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_u * 2, 1, 1, 1); renderManager_->TextureImage(data_tex[1], 0, weights.size_u * 2, 1, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false); @@ -552,7 +551,7 @@ void TessellationDataTransferGLES::SendDataToShader(const SimpleVertex *const *p // Weight V if (prevSizeWV < weights.size_v) { prevSizeWV = weights.size_v; - if (!data_tex[2]) + if (data_tex[2]) renderManager_->DeleteTexture(data_tex[2]); data_tex[2] = renderManager_->CreateTexture(GL_TEXTURE_2D, weights.size_v * 2, 1, 1, 1); renderManager_->TextureImage(data_tex[2], 0, weights.size_v * 2, 1, 1, Draw::DataFormat::R32G32B32A32_FLOAT, nullptr, GLRAllocType::NONE, false); diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index 8c4e354a706a..293954271514 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -277,7 +277,7 @@ void GPU_GLES::BeginFrame() { if (shaderCachePath_.Valid() && (gpuStats.numFlips & 4095) == 0) { shaderManagerGL_->SaveCache(shaderCachePath_, &drawEngine_); } - shaderManagerGL_->DirtyShader(); + shaderManagerGL_->DirtyLastShader(); // Not sure if this is really needed. gstate_c.Dirty(DIRTY_ALL_UNIFORMS); diff --git a/GPU/GLES/ShaderManagerGLES.cpp b/GPU/GLES/ShaderManagerGLES.cpp index 4c28ef04e8c1..54b3b4edaceb 100644 --- a/GPU/GLES/ShaderManagerGLES.cpp +++ b/GPU/GLES/ShaderManagerGLES.cpp @@ -76,6 +76,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, : render_(render), useHWTransform_(useHWTransform) { PROFILE_THIS_SCOPE("shaderlink"); + _assert_(render); _assert_(vs); _assert_(fs); @@ -715,7 +716,7 @@ void ShaderManagerGLES::Clear() { linkedShaderCache_.clear(); fsCache_.Clear(); vsCache_.Clear(); - DirtyShader(); + DirtyLastShader(); } void ShaderManagerGLES::ClearShaders() { @@ -734,16 +735,12 @@ void ShaderManagerGLES::DeviceRestore(Draw::DrawContext *draw) { draw_ = draw; } -void ShaderManagerGLES::DirtyShader() { +void ShaderManagerGLES::DirtyLastShader() { // Forget the last shader ID lastFSID_.set_invalid(); lastVSID_.set_invalid(); - DirtyLastShader(); gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE); shaderSwitchDirtyUniforms_ = 0; -} - -void ShaderManagerGLES::DirtyLastShader() { lastShader_ = nullptr; lastVShaderSame_ = false; } @@ -986,7 +983,7 @@ enum class CacheDetectFlags { }; #define CACHE_HEADER_MAGIC 0x83277592 -#define CACHE_VERSION 32 +#define CACHE_VERSION 33 struct CacheHeader { uint32_t magic; diff --git a/GPU/GLES/ShaderManagerGLES.h b/GPU/GLES/ShaderManagerGLES.h index db834dce3735..3ae0aaa60ae7 100644 --- a/GPU/GLES/ShaderManagerGLES.h +++ b/GPU/GLES/ShaderManagerGLES.h @@ -172,7 +172,6 @@ class ShaderManagerGLES : public ShaderManagerCommon { void DeviceLost() override; void DeviceRestore(Draw::DrawContext *draw) override; - void DirtyShader(); void DirtyLastShader() override; int GetNumVertexShaders() const { return (int)vsCache_.size(); } diff --git a/GPU/GLES/TextureCacheGLES.cpp b/GPU/GLES/TextureCacheGLES.cpp index 8bbb390bc896..94dcf0db04a9 100644 --- a/GPU/GLES/TextureCacheGLES.cpp +++ b/GPU/GLES/TextureCacheGLES.cpp @@ -304,7 +304,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) { bc = true; } else { int bpp = (int)Draw::DataFormatSizeInBytes(plan.replaced->Format()); - stride = std::max(mipWidth * bpp, 16); + stride = mipWidth * bpp; dataSize = stride * mipHeight; } } else { @@ -314,7 +314,7 @@ void TextureCacheGLES::BuildTexture(TexCacheEntry *const entry) { } else { bpp = (int)Draw::DataFormatSizeInBytes(dstFmt); } - stride = std::max(mipWidth * bpp, 16); + stride = mipWidth * bpp; dataSize = stride * mipHeight; } diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index cd8824f2e333..9dfd2cdbbf8f 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -1704,9 +1704,7 @@ void GPUCommon::DoBlockTransfer(u32 skipDrawReason) { memcpy(dstp, srcp, bytesToCopy); if (MemBlockInfoDetailed(bytesToCopy)) { - tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUBlockTransfer/", src, bytesToCopy); - NotifyMemInfo(MemBlockFlags::READ, src, bytesToCopy, tag, tagSize); - NotifyMemInfo(MemBlockFlags::WRITE, dst, bytesToCopy, tag, tagSize); + NotifyMemInfoCopy(dst, src, bytesToCopy, "GPUBlockTransfer/"); } } else if ((srcDstOverlap || srcWraps || dstWraps) && (srcValid || srcWraps) && (dstValid || dstWraps)) { // This path means we have either src/dst overlap, OR one or both of src and dst wrap. @@ -1862,12 +1860,11 @@ bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags // We use matching values in PerformReadbackToMemory/PerformWriteColorFromMemory. // Since they're identical we don't need to copy. if (dest != src) { + if (Memory::IsValidRange(dest, size) && Memory::IsValidRange(src, size)) { + memcpy(Memory::GetPointerWriteUnchecked(dest), Memory::GetPointerUnchecked(src), size); + } if (MemBlockInfoDetailed(size)) { - char tag[128]; - size_t tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUMemcpy/", src, size); - Memory::Memcpy(dest, src, size, tag, tagSize); - } else { - Memory::Memcpy(dest, src, size, "GPUMemcpy"); + NotifyMemInfoCopy(dest, src, size, "GPUMemcpy/"); } } } @@ -1876,10 +1873,7 @@ bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags } if (MemBlockInfoDetailed(size)) { - char tag[128]; - size_t tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUMemcpy/", src, size); - NotifyMemInfo(MemBlockFlags::READ, src, size, tag, tagSize); - NotifyMemInfo(MemBlockFlags::WRITE, dest, size, tag, tagSize); + NotifyMemInfoCopy(dest, src, size, "GPUMemcpy/"); } InvalidateCache(dest, size, GPU_INVALIDATE_HINT); if (!(flags & GPUCopyFlag::DEBUG_NOTIFIED)) diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 5d53a1e32dff..bacff2f4d5d6 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -99,8 +99,8 @@ void DrawEngineVulkan::InitDeviceObjects() { bindings[3].descriptorCount = 1; bindings[3].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; bindings[3].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT; - if (gstate_c.Use(GPU_USE_GS_CULLING)) - bindings[3].stageFlags |= VK_SHADER_STAGE_GEOMETRY_BIT; + if (draw_->GetDeviceCaps().geometryShaderSupported) + bindings[3].stageFlags |= VK_SHADER_STAGE_GEOMETRY_BIT; // unlikely to have a penalty. if we check GPU_USE_GS_CULLING, we have problems on runtime toggle. bindings[3].binding = DRAW_BINDING_DYNUBO_BASE; bindings[4].descriptorCount = 1; bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC; @@ -858,7 +858,7 @@ void DrawEngineVulkan::DoFlush() { if (prim == GE_PRIM_TRIANGLE_STRIP) prim = GE_PRIM_TRIANGLES; - u16 *const inds = decIndex_; + u16 *inds = decIndex_; SoftwareTransformResult result{}; SoftwareTransformParams params{}; params.decoded = decoded_; @@ -898,10 +898,9 @@ void DrawEngineVulkan::DoFlush() { // Games sometimes expect exact matches (see #12626, for example) for equal comparisons. if (result.action == SW_CLEAR && everUsedEqualDepth_ && gstate.isClearModeDepthMask() && result.depth > 0.0f && result.depth < 1.0f) result.action = SW_NOT_READY; - int indsOffset = 0; if (result.action == SW_NOT_READY) { swTransform.DetectOffsetTexture(maxIndex); - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); + swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result); } if (result.setSafeSize) @@ -972,9 +971,9 @@ void DrawEngineVulkan::DoFlush() { if (result.drawIndexed) { VkBuffer vbuf, ibuf; vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), 4, &vbuf); - ibOffset = (uint32_t)pushIndex_->Push(inds + indsOffset, sizeof(short) * result.drawNumTrans, 4, &ibuf); + ibOffset = (uint32_t)pushIndex_->Push(inds, sizeof(short) * result.drawNumTrans, 4, &ibuf); renderManager->DrawIndexed(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1); - } else if (result.drawNumTrans > 0) { + } else { VkBuffer vbuf; vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), 4, &vbuf); renderManager->Draw(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, result.drawNumTrans); diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index ae3f4d9d137b..10a46ca9ee64 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -25,6 +25,7 @@ #include "Common/GraphicsContext.h" #include "Common/Serialize/Serializer.h" #include "Common/TimeUtil.h" +#include "Common/Thread/ThreadUtil.h" #include "Core/Config.h" #include "Core/Debugger/Breakpoints.h" @@ -92,26 +93,10 @@ GPU_Vulkan::GPU_Vulkan(GraphicsContext *gfxCtx, Draw::DrawContext *draw) if (discID.size()) { File::CreateFullPath(GetSysDirectory(DIRECTORY_APP_CACHE)); shaderCachePath_ = GetSysDirectory(DIRECTORY_APP_CACHE) / (discID + ".vkshadercache"); - shaderCacheLoaded_ = false; - - std::thread th([&] { - LoadCache(shaderCachePath_); - shaderCacheLoaded_ = true; - }); - th.detach(); - } else { - shaderCacheLoaded_ = true; + LoadCache(shaderCachePath_); } } -bool GPU_Vulkan::IsReady() { - return shaderCacheLoaded_; -} - -void GPU_Vulkan::CancelReady() { - pipelineManager_->CancelCache(); -} - void GPU_Vulkan::LoadCache(const Path &filename) { if (!g_Config.bShaderCache) { WARN_LOG(G3D, "Shader cache disabled. Not loading."); @@ -197,7 +182,6 @@ GPU_Vulkan::~GPU_Vulkan() { shaderManager_->ClearShaders(); // other managers are deleted in ~GPUCommonHW. - if (draw_) { VulkanRenderManager *rm = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); rm->ReleaseCompileQueue(); @@ -314,7 +298,7 @@ void GPU_Vulkan::BeginHostFrame() { framebufferManager_->BeginFrame(); - shaderManagerVulkan_->DirtyShader(); + shaderManagerVulkan_->DirtyLastShader(); gstate_c.Dirty(DIRTY_ALL); if (gstate_c.useFlagsChanged) { @@ -442,6 +426,13 @@ void GPU_Vulkan::DeviceLost() { while (!IsReady()) { sleep_ms(10); } + // draw_ is normally actually still valid here in Vulkan. But we null it out in GPUCommonHW::DeviceLost so we don't try to use it again. + Draw::DrawContext *draw = draw_; + if (draw) { + VulkanRenderManager *rm = (VulkanRenderManager *)draw->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); + rm->DrainAndBlockCompileQueue(); + } + if (shaderCachePath_.Valid()) { SaveCache(shaderCachePath_); } @@ -449,6 +440,11 @@ void GPU_Vulkan::DeviceLost() { pipelineManager_->DeviceLost(); GPUCommonHW::DeviceLost(); + + if (draw) { + VulkanRenderManager *rm = (VulkanRenderManager *)draw->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); + rm->ReleaseCompileQueue(); + } } void GPU_Vulkan::DeviceRestore(Draw::DrawContext *draw) { diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h index d5bae9798800..3d943de1cf86 100644 --- a/GPU/Vulkan/GPU_Vulkan.h +++ b/GPU/Vulkan/GPU_Vulkan.h @@ -19,6 +19,7 @@ #include #include +#include #include "Common/File/Path.h" @@ -40,9 +41,6 @@ class GPU_Vulkan : public GPUCommonHW { // This gets called on startup and when we get back from settings. u32 CheckGPUFeatures() const override; - bool IsReady() override; - void CancelReady() override; - // These are where we can reset command buffers etc. void BeginHostFrame() override; void EndHostFrame() override; @@ -83,5 +81,4 @@ class GPU_Vulkan : public GPUCommonHW { PipelineManagerVulkan *pipelineManager_; Path shaderCachePath_; - std::atomic shaderCacheLoaded_{}; }; diff --git a/GPU/Vulkan/PipelineManagerVulkan.cpp b/GPU/Vulkan/PipelineManagerVulkan.cpp index ad978c524f7a..b2ad9d0739a6 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.cpp +++ b/GPU/Vulkan/PipelineManagerVulkan.cpp @@ -719,8 +719,6 @@ bool PipelineManagerVulkan::LoadPipelineCache(FILE *file, bool loadRawPipelineCa VulkanRenderManager *rm = (VulkanRenderManager *)drawContext->GetNativeObject(Draw::NativeObject::RENDER_MANAGER); VulkanQueueRunner *queueRunner = rm->GetQueueRunner(); - cancelCache_ = false; - uint32_t size = 0; if (loadRawPipelineCache) { NOTICE_LOG(G3D, "WARNING: Using the badly tested raw pipeline cache path!!!!"); @@ -779,7 +777,7 @@ bool PipelineManagerVulkan::LoadPipelineCache(FILE *file, bool loadRawPipelineCa int pipelineCreateFailCount = 0; int shaderFailCount = 0; for (uint32_t i = 0; i < size; i++) { - if (failed || cancelCache_) { + if (failed) { break; } StoredVulkanPipelineKey key; @@ -824,7 +822,3 @@ bool PipelineManagerVulkan::LoadPipelineCache(FILE *file, bool loadRawPipelineCa // We just ignore any failures. return true; } - -void PipelineManagerVulkan::CancelCache() { - cancelCache_ = true; -} diff --git a/GPU/Vulkan/PipelineManagerVulkan.h b/GPU/Vulkan/PipelineManagerVulkan.h index 0876c4cc2dcc..b61a3faa107b 100644 --- a/GPU/Vulkan/PipelineManagerVulkan.h +++ b/GPU/Vulkan/PipelineManagerVulkan.h @@ -101,11 +101,9 @@ class PipelineManagerVulkan { // Saves data for faster creation next time. void SavePipelineCache(FILE *file, bool saveRawPipelineCache, ShaderManagerVulkan *shaderManager, Draw::DrawContext *drawContext); bool LoadPipelineCache(FILE *file, bool loadRawPipelineCache, ShaderManagerVulkan *shaderManager, Draw::DrawContext *drawContext, VkPipelineLayout layout, int multiSampleLevel); - void CancelCache(); private: DenseHashMap pipelines_; VkPipelineCache pipelineCache_ = VK_NULL_HANDLE; VulkanContext *vulkan_; - bool cancelCache_ = false; }; diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 452395a00f2f..5a36f2d540e3 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -47,6 +47,7 @@ // Most drivers treat vkCreateShaderModule as pretty much a memcpy. What actually // takes time here, and makes this worthy of parallelization, is GLSLtoSPV. // Takes ownership over tag. +// This always returns something, checking the return value for null is not meaningful. static Promise *CompileShaderModuleAsync(VulkanContext *vulkan, VkShaderStageFlagBits stage, const char *code, std::string *tag) { auto compile = [=] { PROFILE_THIS_SCOPE("shadercomp"); @@ -112,13 +113,10 @@ static Promise *CompileShaderModuleAsync(VulkanContext *vulkan, VulkanFragmentShader::VulkanFragmentShader(VulkanContext *vulkan, FShaderID id, FragmentShaderFlags flags, const char *code) : vulkan_(vulkan), id_(id), flags_(flags) { + _assert_(!id.is_invalid()); source_ = code; module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_FRAGMENT_BIT, source_.c_str(), new std::string(FragmentShaderDesc(id))); - if (!module_) { - failed_ = true; - } else { - VERBOSE_LOG(G3D, "Compiled fragment shader:\n%s\n", (const char *)code); - } + VERBOSE_LOG(G3D, "Compiled fragment shader:\n%s\n", (const char *)code); } VulkanFragmentShader::~VulkanFragmentShader() { @@ -147,13 +145,10 @@ std::string VulkanFragmentShader::GetShaderString(DebugShaderStringType type) co VulkanVertexShader::VulkanVertexShader(VulkanContext *vulkan, VShaderID id, VertexShaderFlags flags, const char *code, bool useHWTransform) : vulkan_(vulkan), useHWTransform_(useHWTransform), flags_(flags), id_(id) { + _assert_(!id.is_invalid()); source_ = code; module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_VERTEX_BIT, source_.c_str(), new std::string(VertexShaderDesc(id))); - if (!module_) { - failed_ = true; - } else { - VERBOSE_LOG(G3D, "Compiled vertex shader:\n%s\n", (const char *)code); - } + VERBOSE_LOG(G3D, "Compiled vertex shader:\n%s\n", (const char *)code); } VulkanVertexShader::~VulkanVertexShader() { @@ -182,13 +177,10 @@ std::string VulkanVertexShader::GetShaderString(DebugShaderStringType type) cons VulkanGeometryShader::VulkanGeometryShader(VulkanContext *vulkan, GShaderID id, const char *code) : vulkan_(vulkan), id_(id) { + _assert_(!id.is_invalid()); source_ = code; module_ = CompileShaderModuleAsync(vulkan, VK_SHADER_STAGE_GEOMETRY_BIT, source_.c_str(), new std::string(GeometryShaderDesc(id).c_str())); - if (!module_) { - failed_ = true; - } else { - VERBOSE_LOG(G3D, "Compiled geometry shader:\n%s\n", (const char *)code); - } + VERBOSE_LOG(G3D, "Compiled geometry shader:\n%s\n", (const char *)code); } VulkanGeometryShader::~VulkanGeometryShader() { @@ -232,11 +224,12 @@ ShaderManagerVulkan::ShaderManagerVulkan(Draw::DrawContext *draw) } ShaderManagerVulkan::~ShaderManagerVulkan() { - ClearShaders(); + Clear(); delete[] codeBuffer_; } void ShaderManagerVulkan::DeviceLost() { + Clear(); draw_ = nullptr; } @@ -267,19 +260,15 @@ void ShaderManagerVulkan::Clear() { void ShaderManagerVulkan::ClearShaders() { Clear(); - DirtyShader(); + DirtyLastShader(); gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE); } -void ShaderManagerVulkan::DirtyShader() { +void ShaderManagerVulkan::DirtyLastShader() { // Forget the last shader ID lastFSID_.set_invalid(); lastVSID_.set_invalid(); lastGSID_.set_invalid(); - DirtyLastShader(); -} - -void ShaderManagerVulkan::DirtyLastShader() { lastVShader_ = nullptr; lastFShader_ = nullptr; lastGShader_ = nullptr; @@ -301,29 +290,95 @@ uint64_t ShaderManagerVulkan::UpdateUniforms(bool useBufferedRendering) { } void ShaderManagerVulkan::GetShaders(int prim, VertexDecoder *decoder, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, VulkanGeometryShader **gshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat, bool useSkinInDecode) { + VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT); + VShaderID VSID; + VulkanVertexShader *vs = nullptr; if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) { gstate_c.Clean(DIRTY_VERTEXSHADER_STATE); ComputeVertexShaderID(&VSID, decoder, useHWTransform, useHWTessellation, weightsAsFloat, useSkinInDecode); + if (VSID == lastVSID_) { + _dbg_assert_(lastVShader_ != nullptr); + vs = lastVShader_; + } else if (!vsCache_.Get(VSID, &vs)) { + // Vertex shader not in cache. Let's compile it. + std::string genErrorString; + uint64_t uniformMask = 0; // Not used + uint32_t attributeMask = 0; // Not used + VertexShaderFlags flags{}; + bool success = GenerateVertexShader(VSID, codeBuffer_, compat_, draw_->GetBugs(), &attributeMask, &uniformMask, &flags, &genErrorString); + _assert_msg_(success, "VS gen error: %s", genErrorString.c_str()); + _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "VS length error: %d", (int)strlen(codeBuffer_)); + + // Don't need to re-lookup anymore, now that we lock wider. + vs = new VulkanVertexShader(vulkan, VSID, flags, codeBuffer_, useHWTransform); + vsCache_.Insert(VSID, vs); + } + lastVShader_ = vs; + lastVSID_ = VSID; } else { VSID = lastVSID_; + vs = lastVShader_; } + *vshader = vs; FShaderID FSID; + VulkanFragmentShader *fs = nullptr; if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) { gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE); ComputeFragmentShaderID(&FSID, pipelineState, draw_->GetBugs()); + if (FSID == lastFSID_) { + _dbg_assert_(lastFShader_ != nullptr); + fs = lastFShader_; + } else if (!fsCache_.Get(FSID, &fs)) { + // Fragment shader not in cache. Let's compile it. + std::string genErrorString; + uint64_t uniformMask = 0; // Not used + FragmentShaderFlags flags{}; + bool success = GenerateFragmentShader(FSID, codeBuffer_, compat_, draw_->GetBugs(), &uniformMask, &flags, &genErrorString); + _assert_msg_(success, "FS gen error: %s", genErrorString.c_str()); + _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "FS length error: %d", (int)strlen(codeBuffer_)); + + fs = new VulkanFragmentShader(vulkan, FSID, flags, codeBuffer_); + fsCache_.Insert(FSID, fs); + } + lastFShader_ = fs; + lastFSID_ = FSID; } else { FSID = lastFSID_; + fs = lastFShader_; } + *fshader = fs; GShaderID GSID; + VulkanGeometryShader *gs = nullptr; if (gstate_c.IsDirty(DIRTY_GEOMETRYSHADER_STATE)) { gstate_c.Clean(DIRTY_GEOMETRYSHADER_STATE); ComputeGeometryShaderID(&GSID, draw_->GetBugs(), prim); + if (GSID == lastGSID_) { + // it's ok for this to be null. + gs = lastGShader_; + } else if (GSID.Bit(GS_BIT_ENABLED)) { + if (!gsCache_.Get(GSID, &gs)) { + // Geometry shader not in cache. Let's compile it. + std::string genErrorString; + bool success = GenerateGeometryShader(GSID, codeBuffer_, compat_, draw_->GetBugs(), &genErrorString); + _assert_msg_(success, "GS gen error: %s", genErrorString.c_str()); + _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "GS length error: %d", (int)strlen(codeBuffer_)); + + gs = new VulkanGeometryShader(vulkan, GSID, codeBuffer_); + gsCache_.Insert(GSID, gs); + } + } else { + gs = nullptr; + } + lastGShader_ = gs; + lastGSID_ = GSID; } else { GSID = lastGSID_; + gs = lastGShader_; } + *gshader = gs; _dbg_assert_(FSID.Bit(FS_BIT_FLATSHADE) == VSID.Bit(VS_BIT_FLATSHADE)); _dbg_assert_(FSID.Bit(FS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE)); @@ -331,82 +386,6 @@ void ShaderManagerVulkan::GetShaders(int prim, VertexDecoder *decoder, VulkanVer _dbg_assert_(GSID.Bit(GS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE)); } - // Just update uniforms if this is the same shader as last time. - if (lastVShader_ != nullptr && lastFShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_ && GSID == lastGSID_) { - *vshader = lastVShader_; - *fshader = lastFShader_; - *gshader = lastGShader_; - _dbg_assert_msg_((*vshader)->UseHWTransform() == useHWTransform, "Bad vshader was cached"); - // Already all set, no need to look up in shader maps. - return; - } - - VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT); - VulkanVertexShader *vs = nullptr; - if (!vsCache_.Get(VSID, &vs)) { - // Vertex shader not in cache. Let's compile it. - std::string genErrorString; - uint64_t uniformMask = 0; // Not used - uint32_t attributeMask = 0; // Not used - VertexShaderFlags flags{}; - bool success = GenerateVertexShader(VSID, codeBuffer_, compat_, draw_->GetBugs(), &attributeMask, &uniformMask, &flags, &genErrorString); - _assert_msg_(success, "VS gen error: %s", genErrorString.c_str()); - _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "VS length error: %d", (int)strlen(codeBuffer_)); - - std::lock_guard guard(cacheLock_); - if (!vsCache_.Get(VSID, &vs)) { - vs = new VulkanVertexShader(vulkan, VSID, flags, codeBuffer_, useHWTransform); - vsCache_.Insert(VSID, vs); - } - } - - VulkanFragmentShader *fs; - if (!fsCache_.Get(FSID, &fs)) { - // Fragment shader not in cache. Let's compile it. - std::string genErrorString; - uint64_t uniformMask = 0; // Not used - FragmentShaderFlags flags{}; - bool success = GenerateFragmentShader(FSID, codeBuffer_, compat_, draw_->GetBugs(), &uniformMask, &flags, &genErrorString); - _assert_msg_(success, "FS gen error: %s", genErrorString.c_str()); - _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "FS length error: %d", (int)strlen(codeBuffer_)); - - std::lock_guard guard(cacheLock_); - if (!fsCache_.Get(FSID, &fs)) { - fs = new VulkanFragmentShader(vulkan, FSID, flags, codeBuffer_); - fsCache_.Insert(FSID, fs); - } - } - - VulkanGeometryShader *gs; - if (GSID.Bit(GS_BIT_ENABLED)) { - if (!gsCache_.Get(GSID, &gs)) { - // Geometry shader not in cache. Let's compile it. - std::string genErrorString; - bool success = GenerateGeometryShader(GSID, codeBuffer_, compat_, draw_->GetBugs(), &genErrorString); - _assert_msg_(success, "GS gen error: %s", genErrorString.c_str()); - _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "GS length error: %d", (int)strlen(codeBuffer_)); - - std::lock_guard guard(cacheLock_); - if (!gsCache_.Get(GSID, &gs)) { - gs = new VulkanGeometryShader(vulkan, GSID, codeBuffer_); - gsCache_.Insert(GSID, gs); - } - } - } else { - gs = nullptr; - } - - lastVSID_ = VSID; - lastFSID_ = FSID; - lastGSID_ = GSID; - - lastVShader_ = vs; - lastFShader_ = fs; - lastGShader_ = gs; - - *vshader = vs; - *fshader = fs; - *gshader = gs; _dbg_assert_msg_((*vshader)->UseHWTransform() == useHWTransform, "Bad vshader was computed"); } @@ -528,7 +507,7 @@ enum class VulkanCacheDetectFlags { }; #define CACHE_HEADER_MAGIC 0xff51f420 -#define CACHE_VERSION 47 +#define CACHE_VERSION 48 struct VulkanCacheHeader { uint32_t magic; @@ -597,8 +576,7 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) { continue; } _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "VS length error: %d", (int)strlen(codeBuffer_)); - // Don't add the new shader if already compiled (can happen since this is a background thread). - std::lock_guard guard(cacheLock_); + // Don't add the new shader if already compiled - though this should no longer happen. if (!vsCache_.ContainsKey(id)) { VulkanVertexShader *vs = new VulkanVertexShader(vulkan, id, flags, codeBuffer_, useHWTransform); vsCache_.Insert(id, vs); @@ -622,31 +600,32 @@ bool ShaderManagerVulkan::LoadCache(FILE *f) { continue; } _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "FS length error: %d", (int)strlen(codeBuffer_)); - std::lock_guard guard(cacheLock_); if (!fsCache_.ContainsKey(id)) { VulkanFragmentShader *fs = new VulkanFragmentShader(vulkan, id, flags, codeBuffer_); fsCache_.Insert(id, fs); } } - for (int i = 0; i < header.numGeometryShaders; i++) { - GShaderID id; - if (fread(&id, sizeof(id), 1, f) != 1) { - ERROR_LOG(G3D, "Vulkan shader cache truncated (in GeometryShaders)"); - return false; - } - std::string genErrorString; - if (!GenerateGeometryShader(id, codeBuffer_, compat_, draw_->GetBugs(), &genErrorString)) { - ERROR_LOG(G3D, "Failed to generate geometry shader during cache load"); - // We just ignore this one and carry on. - failCount++; - continue; - } - _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "GS length error: %d", (int)strlen(codeBuffer_)); - std::lock_guard guard(cacheLock_); - if (!gsCache_.ContainsKey(id)) { - VulkanGeometryShader *gs = new VulkanGeometryShader(vulkan, id, codeBuffer_); - gsCache_.Insert(id, gs); + // If it's not enabled, don't create shaders cached from earlier runs - creation will likely fail. + if (gstate_c.Use(GPU_USE_GS_CULLING)) { + for (int i = 0; i < header.numGeometryShaders; i++) { + GShaderID id; + if (fread(&id, sizeof(id), 1, f) != 1) { + ERROR_LOG(G3D, "Vulkan shader cache truncated (in GeometryShaders)"); + return false; + } + std::string genErrorString; + if (!GenerateGeometryShader(id, codeBuffer_, compat_, draw_->GetBugs(), &genErrorString)) { + ERROR_LOG(G3D, "Failed to generate geometry shader during cache load"); + // We just ignore this one and carry on. + failCount++; + continue; + } + _assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "GS length error: %d", (int)strlen(codeBuffer_)); + if (!gsCache_.ContainsKey(id)) { + VulkanGeometryShader *gs = new VulkanGeometryShader(vulkan, id, codeBuffer_); + gsCache_.Insert(id, gs); + } } } diff --git a/GPU/Vulkan/ShaderManagerVulkan.h b/GPU/Vulkan/ShaderManagerVulkan.h index 80c1f39a131f..dc566a426eea 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.h +++ b/GPU/Vulkan/ShaderManagerVulkan.h @@ -43,8 +43,6 @@ class VulkanFragmentShader { const std::string &source() const { return source_; } - bool Failed() const { return failed_; } - std::string GetShaderString(DebugShaderStringType type) const; Promise *GetModule() { return module_; } const FShaderID &GetID() const { return id_; } @@ -68,7 +66,6 @@ class VulkanVertexShader { const std::string &source() const { return source_; } - bool Failed() const { return failed_; } bool UseHWTransform() const { return useHWTransform_; } // TODO: Roll into flags VertexShaderFlags Flags() const { return flags_; } @@ -81,7 +78,6 @@ class VulkanVertexShader { VulkanContext *vulkan_; std::string source_; - bool failed_ = false; bool useHWTransform_; VShaderID id_; VertexShaderFlags flags_; @@ -94,9 +90,8 @@ class VulkanGeometryShader { const std::string &source() const { return source_; } - bool Failed() const { return failed_; } - std::string GetShaderString(DebugShaderStringType type) const; + Promise *GetModule() const { return module_; } const GShaderID &GetID() { return id_; } @@ -105,7 +100,6 @@ class VulkanGeometryShader { VulkanContext *vulkan_; std::string source_; - bool failed_ = false; GShaderID id_; }; @@ -119,7 +113,6 @@ class ShaderManagerVulkan : public ShaderManagerCommon { void GetShaders(int prim, VertexDecoder *decoder, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, VulkanGeometryShader **gshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat, bool useSkinInDecode); void ClearShaders() override; - void DirtyShader(); void DirtyLastShader() override; int GetNumVertexShaders() const { return (int)vsCache_.size(); } @@ -130,6 +123,7 @@ class ShaderManagerVulkan : public ShaderManagerCommon { VulkanVertexShader *GetVertexShaderFromID(VShaderID id) { return vsCache_.GetOrNull(id); } VulkanFragmentShader *GetFragmentShaderFromID(FShaderID id) { return fsCache_.GetOrNull(id); } VulkanGeometryShader *GetGeometryShaderFromID(GShaderID id) { return gsCache_.GetOrNull(id); } + VulkanVertexShader *GetVertexShaderFromModule(VkShaderModule module); VulkanFragmentShader *GetFragmentShaderFromModule(VkShaderModule module); VulkanGeometryShader *GetGeometryShaderFromModule(VkShaderModule module); @@ -175,7 +169,6 @@ class ShaderManagerVulkan : public ShaderManagerCommon { GSCache gsCache_; char *codeBuffer_; - std::mutex cacheLock_; uint64_t uboAlignment_; // Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time. diff --git a/Qt/QtMain.cpp b/Qt/QtMain.cpp index ea99d78ff5a8..7e7d57f480af 100644 --- a/Qt/QtMain.cpp +++ b/Qt/QtMain.cpp @@ -304,7 +304,7 @@ bool MainUI::HandleCustomEvent(QEvent *e) { const char *filter = "All files (*.*)"; switch (fileType) { case BrowseFileType::BOOTABLE: - filter = "PSP ROMs (*.iso *.cso *.pbp *.elf *.zip *.ppdmp)"; + filter = "PSP ROMs (*.iso *.cso *.chd *.pbp *.elf *.zip *.ppdmp)"; break; case BrowseFileType::IMAGE: filter = "Pictures (*.jpg *.png)"; @@ -731,18 +731,7 @@ void MainUI::updateAccelerometer() { // TODO: Toggle it depending on whether it is enabled QAccelerometerReading *reading = acc->reading(); if (reading) { - AxisInput axis[3]; - for (int i = 0; i < 3; i++) { - axis[i].deviceId = DEVICE_ID_ACCELEROMETER; - } - - axis[0].axisId = JOYSTICK_AXIS_ACCELEROMETER_X; - axis[0].value = reading->x(); - axis[1].axisId = JOYSTICK_AXIS_ACCELEROMETER_Y; - axis[1].value = reading->y(); - axis[2].axisId = JOYSTICK_AXIS_ACCELEROMETER_Z; - axis[2].value = reading->z(); - NativeAxis(axis, 3); + NativeAccelerometer(reading->x(), reading->y(), reading->z()); } #endif } diff --git a/Qt/mainwindow.cpp b/Qt/mainwindow.cpp index 6f9722d4963b..1c3401ec0f64 100644 --- a/Qt/mainwindow.cpp +++ b/Qt/mainwindow.cpp @@ -125,7 +125,7 @@ void MainWindow::bootDone() /* SIGNALS */ void MainWindow::loadAct() { - QString filename = QFileDialog::getOpenFileName(NULL, "Load File", g_Config.currentDirectory.c_str(), "PSP ROMs (*.pbp *.elf *.iso *.cso *.prx)"); + QString filename = QFileDialog::getOpenFileName(NULL, "Load File", g_Config.currentDirectory.c_str(), "PSP ROMs (*.pbp *.elf *.iso *.cso *.chd *.prx)"); if (QFile::exists(filename)) { QFileInfo info(filename); @@ -255,7 +255,7 @@ void MainWindow::resetAct() void MainWindow::switchUMDAct() { - QString filename = QFileDialog::getOpenFileName(NULL, "Switch UMD", g_Config.currentDirectory.c_str(), "PSP ROMs (*.pbp *.elf *.iso *.cso *.prx)"); + QString filename = QFileDialog::getOpenFileName(NULL, "Switch UMD", g_Config.currentDirectory.c_str(), "PSP ROMs (*.pbp *.elf *.iso *.cso *.chd *.prx)"); if (QFile::exists(filename)) { QFileInfo info(filename); diff --git a/README.md b/README.md index 9a18bcc14b6f..92a73befab20 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,30 @@ If you want to download regularly updated builds for Android, Windows x86 and x6 For game compatibility, see [community compatibility feedback](https://report.ppsspp.org/games). +What's new in 1.16.5 +==================== +- Additional crash and memory-leak fixes ([#18243], [#18244], [#18247]) +- Revert bad change that broke things with hardware transform turned off ([#18261]) +- Fix menu texture problem in Naruto Kizuna Drive in OpenGL ([#18255]) +- Apply flicker fix to WWE SvR 2007 +- More efficient handling of accelerometer events ([#18250]) +- Fix for jumpy graphics in Harvest Moon ([#18249]) + +What's new in 1.16.4 +==================== +- Simplify shader cache lookups on Vulkan, hopefully fixing some crashes ([#18218]) +- Assorted improvements to the IR JITs ([#18228], [#18235], [#18211], more) +- Other crash and stability fixes ([#18221], [#18220], [#18230], [#18216]) +- Some translation updates ([#18237], more) +- Cleanups and assert fixes ([#18205], [#18201], [#18206]) + +What's new in 1.16.3 +==================== +- Fix crash bug and performance issue in Vulkan shader cache ([#18183], [#18189]) +- Fix crash in icon loading in homebrew store ([#18185]) +- Add some memory safety check ([#18184], [#18194]) +- Fix problem when changing backend from the Windows menu ([#18182]) + What's new in 1.16.2 ==================== - Fix for HTTP support on Linux on networks with shaky or incomplete IPv6 support @@ -400,4 +424,29 @@ Credit goes to: [#18169]: https://github.com/hrydgard/ppsspp/issues/18169 "Better handling of shadergen failures, other minor things" [#18151]: https://github.com/hrydgard/ppsspp/issues/18151 "GPU, VFS, UI: Fixed minor memleaks" [#18165]: https://github.com/hrydgard/ppsspp/issues/18165 "x86jit: Fix flush for special-purpose reg" -[#18158]: https://github.com/hrydgard/ppsspp/issues/18158 "Add -fno-math-errno" \ No newline at end of file +[#18158]: https://github.com/hrydgard/ppsspp/issues/18158 "Add -fno-math-errno" +[#18183]: https://github.com/hrydgard/ppsspp/issues/18183 "Pipeline/shader race-condition-during-shutdown crash fix" +[#18189]: https://github.com/hrydgard/ppsspp/issues/18189 "Be a bit smarter when loading the shader cache, avoid duplicating work" +[#18185]: https://github.com/hrydgard/ppsspp/issues/18185 "Store: Fix race condition causing crashes if looking at another game before an icon finishes downloading" +[#18184]: https://github.com/hrydgard/ppsspp/issues/18184 "Add memory bounds-check when expanding points, rects and lines to triangles" +[#18194]: https://github.com/hrydgard/ppsspp/issues/18194 "Cleanups and comment clarifications" +[#18182]: https://github.com/hrydgard/ppsspp/issues/18182 "Backend change from Win32 menu: Add quick workaround for instance counter misbehavior" +[#18218]: https://github.com/hrydgard/ppsspp/issues/18218 "Vulkan: Simplify GetShaders and DirtyLastShader, making them internally consistent." +[#18228]: https://github.com/hrydgard/ppsspp/issues/18228 "unittest: Add jit compare for jit IR" +[#18235]: https://github.com/hrydgard/ppsspp/issues/18235 "irjit: Handle VDet" +[#18211]: https://github.com/hrydgard/ppsspp/issues/18211 "More crash fix attempts" +[#18221]: https://github.com/hrydgard/ppsspp/issues/18221 "Some cleanups and fixes to obscure crashes" +[#18220]: https://github.com/hrydgard/ppsspp/issues/18220 "Add some missing locking in KeyMap.cpp." +[#18230]: https://github.com/hrydgard/ppsspp/issues/18230 "Android: Minor activity lifecycle stuff" +[#18216]: https://github.com/hrydgard/ppsspp/issues/18216 "Don't load the shader cache on a separate thread - all it does is already async" +[#18237]: https://github.com/hrydgard/ppsspp/issues/18237 "UI/localization: Italian translation update" +[#18205]: https://github.com/hrydgard/ppsspp/issues/18205 "http: Fix errors on connect" +[#18201]: https://github.com/hrydgard/ppsspp/issues/18201 "Asserts and checks" +[#18206]: https://github.com/hrydgard/ppsspp/issues/18206 "GPU: Handle invalid blendeq more accurately" +[#18243]: https://github.com/hrydgard/ppsspp/issues/18243 "More crashfix/leakfix attempts" +[#18244]: https://github.com/hrydgard/ppsspp/issues/18244 "Core: Stop leaking file loaders" +[#18247]: https://github.com/hrydgard/ppsspp/issues/18247 "Jit: Assert on bad exit numbers, allow two more exits per block" +[#18261]: https://github.com/hrydgard/ppsspp/issues/18261 "Revert \"Merge pull request #18184 from hrydgard/expand-lines-mem-fix\"" +[#18255]: https://github.com/hrydgard/ppsspp/issues/18255 "Fix issue uploading narrow textures in OpenGL." +[#18250]: https://github.com/hrydgard/ppsspp/issues/18250 "Separate out accelerometer events from joystick axis events" +[#18249]: https://github.com/hrydgard/ppsspp/issues/18249 "arm64jit: Avoid fused multiplies in vcrsp.t" \ No newline at end of file diff --git a/Tools/tag_release.sh b/Tools/tag_release.sh new file mode 100755 index 000000000000..3d05e6b05aac --- /dev/null +++ b/Tools/tag_release.sh @@ -0,0 +1,14 @@ +# Only for use during the process of making official releases + +if [ -z "$1" ]; then + echo "No argument supplied" + exit 1 +fi + +VER=$1 + +git tag -a ${VER} -m '${VER}'; git push --tags origin ${VER}; git push origin master + +echo Now run the internal tool: +echo ppsspp-build --commit ${VER} --gold --sign-code + diff --git a/UI/BackgroundAudio.cpp b/UI/BackgroundAudio.cpp index 8e7ecc9fde03..c990da8a68bc 100644 --- a/UI/BackgroundAudio.cpp +++ b/UI/BackgroundAudio.cpp @@ -148,6 +148,7 @@ void WavData::Read(RIFFReader &file_) { raw_data = (uint8_t *)malloc(numBytes); raw_data_size = numBytes; + if (num_channels == 1 || num_channels == 2) { file_.ReadData(raw_data, numBytes); } else { @@ -410,7 +411,11 @@ Sample *Sample::Load(const std::string &path) { samples[i] = ConvertU8ToI16(wave.raw_data[i]); } } - return new Sample(samples, wave.num_channels, wave.numFrames, wave.sample_rate); + + // Protect against bad metadata. + int actualFrames = std::min(wave.numFrames, wave.raw_data_size / wave.raw_bytes_per_frame); + + return new Sample(samples, wave.num_channels, actualFrames, wave.sample_rate); } static inline int16_t Clamp16(int32_t sample) { diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index b150c85f2e1d..9628dfdc3d8c 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -1668,7 +1668,7 @@ void DeveloperToolsScreen::CreateViews() { core->HideChoice(3); } // TODO: Enable on more architectures. -#if !PPSSPP_ARCH(X86) && !PPSSPP_ARCH(AMD64) +#if !PPSSPP_ARCH(X86) && !PPSSPP_ARCH(AMD64) && !PPSSPP_ARCH(ARM64) core->HideChoice(3); #endif diff --git a/UI/MainScreen.cpp b/UI/MainScreen.cpp index a90fef737473..6349daad3384 100644 --- a/UI/MainScreen.cpp +++ b/UI/MainScreen.cpp @@ -846,7 +846,7 @@ void GameBrowser::Refresh() { } } else if (!listingPending_) { std::vector fileInfo; - path_.GetListing(fileInfo, "iso:cso:pbp:elf:prx:ppdmp:"); + path_.GetListing(fileInfo, "iso:cso:chd:pbp:elf:prx:ppdmp:"); for (size_t i = 0; i < fileInfo.size(); i++) { bool isGame = !fileInfo[i].isDirectory; bool isSaveData = false; diff --git a/UI/MiscScreens.cpp b/UI/MiscScreens.cpp index 1d462f6065ce..754c8f620616 100644 --- a/UI/MiscScreens.cpp +++ b/UI/MiscScreens.cpp @@ -136,21 +136,28 @@ class WaveAnimation : public Animation { // 500 is enough for any resolution really. 24 * 500 = 12000 which fits handily in our UI vertex buffer (max 65536 per flush). const int steps = std::max(20, std::min((int)g_display.dp_xres, 500)); - float step = (float)g_display.dp_xres / (float)steps; + float stepSize = (float)g_display.dp_xres / (float)steps; t *= speed; + float stepx = x; for (int n = 0; n < steps; n++) { - float x = (float)n * step; - float i = x * 1280 / bounds.w; + float nextx = stepx + stepSize; + // Round actual x and width to prevent gaps between waves. + float roundedx = floorf(stepx); + float w = floorf(nextx) - roundedx; + float i = stepx * 1280 / bounds.w; float wave0 = sin(i*0.005+t*0.8)*0.05 + sin(i*0.002+t*0.25)*0.02 + sin(i*0.001+t*0.3)*0.03 + 0.625; float wave1 = sin(i*0.0044+t*0.4)*0.07 + sin(i*0.003+t*0.1)*0.02 + sin(i*0.001+t*0.3)*0.01 + 0.625; - dc.Draw()->RectVGradient(x, wave0*bounds.h, step, (1.0-wave0)*bounds.h, color, 0x00000000); - dc.Draw()->RectVGradient(x, wave1*bounds.h, step, (1.0-wave1)*bounds.h, color, 0x00000000); + + dc.Draw()->RectVGradient(roundedx, wave0*bounds.h, w, (1.0-wave0)*bounds.h, color, 0x00000000); + dc.Draw()->RectVGradient(roundedx, wave1*bounds.h, w, (1.0-wave1)*bounds.h, color, 0x00000000); // Add some "antialiasing" - dc.Draw()->RectVGradient(x, wave0*bounds.h-3.0f * g_display.pixel_in_dps_y, step, 3.0f * g_display.pixel_in_dps_y, 0x00000000, color); - dc.Draw()->RectVGradient(x, wave1*bounds.h-3.0f * g_display.pixel_in_dps_y, step, 3.0f * g_display.pixel_in_dps_y, 0x00000000, color); + dc.Draw()->RectVGradient(roundedx, wave0*bounds.h-3.0f * g_display.pixel_in_dps_y, w, 3.0f * g_display.pixel_in_dps_y, 0x00000000, color); + dc.Draw()->RectVGradient(roundedx, wave1*bounds.h-3.0f * g_display.pixel_in_dps_y, w, 3.0f * g_display.pixel_in_dps_y, 0x00000000, color); + + stepx = nextx; } dc.Flush(); diff --git a/UI/NativeApp.cpp b/UI/NativeApp.cpp index 618c430d7571..f4056c10b32b 100644 --- a/UI/NativeApp.cpp +++ b/UI/NativeApp.cpp @@ -828,7 +828,7 @@ bool CreateGlobalPipelines(); bool NativeInitGraphics(GraphicsContext *graphicsContext) { INFO_LOG(SYSTEM, "NativeInitGraphics"); - _assert_(g_screenManager); + _assert_msg_(g_screenManager, "No screenmanager, bad init order. Backend = %d", g_Config.iGPUBackend); // We set this now so any resize during init is processed later. resized = false; @@ -1336,22 +1336,12 @@ static void ProcessOneAxisEvent(const AxisInput &axis) { } void NativeAxis(const AxisInput *axes, size_t count) { - // figure out what the current tilt orientation is by checking the axis event - // This is static, since we need to remember where we last were (in terms of orientation) - static float tiltX; - static float tiltY; - static float tiltZ; - for (size_t i = 0; i < count; i++) { ProcessOneAxisEvent(axes[i]); - switch (axes[i].axisId) { - case JOYSTICK_AXIS_ACCELEROMETER_X: tiltX = axes[i].value; break; - case JOYSTICK_AXIS_ACCELEROMETER_Y: tiltY = axes[i].value; break; - case JOYSTICK_AXIS_ACCELEROMETER_Z: tiltZ = axes[i].value; break; - default: break; - } } +} +void NativeAccelerometer(float tiltX, float tiltY, float tiltZ) { if (g_Config.iTiltInputType == TILT_NULL) { // if tilt events are disabled, don't do anything special. return; @@ -1377,6 +1367,10 @@ void NativeAxis(const AxisInput *axes, size_t count) { TiltEventProcessor::ProcessTilt(landscape, tiltBaseAngleY, tiltX, tiltY, tiltZ, g_Config.bInvertTiltX, g_Config.bInvertTiltY, xSensitivity, ySensitivity); + + HLEPlugins::PluginDataAxis[JOYSTICK_AXIS_ACCELEROMETER_X] = tiltX; + HLEPlugins::PluginDataAxis[JOYSTICK_AXIS_ACCELEROMETER_Y] = tiltY; + HLEPlugins::PluginDataAxis[JOYSTICK_AXIS_ACCELEROMETER_Z] = tiltZ; } void System_PostUIMessage(const std::string &message, const std::string &value) { diff --git a/UI/OnScreenDisplay.cpp b/UI/OnScreenDisplay.cpp index ad8eac4ffd6e..5209673883e6 100644 --- a/UI/OnScreenDisplay.cpp +++ b/UI/OnScreenDisplay.cpp @@ -459,6 +459,8 @@ void OnScreenMessagesView::Draw(UIContext &dc) { // Save the location of the popup, for easy dismissal. dismissZones.push_back(ClickZone{ (int)j, b }); break; + default: + break; } break; } diff --git a/UI/RemoteISOScreen.cpp b/UI/RemoteISOScreen.cpp index b80df3002142..9ade1fd80397 100644 --- a/UI/RemoteISOScreen.cpp +++ b/UI/RemoteISOScreen.cpp @@ -503,8 +503,8 @@ void RemoteISOConnectScreen::ExecuteLoad() { class RemoteGameBrowser : public GameBrowser { public: - RemoteGameBrowser(const Path &url, BrowseFlags browseFlags, bool *gridStyle_, ScreenManager *screenManager, std::string lastText, std::string lastLink, UI::LayoutParams *layoutParams = nullptr) - : GameBrowser(url, browseFlags, gridStyle_, screenManager, lastText, lastLink, layoutParams) { + RemoteGameBrowser(const Path &url, BrowseFlags browseFlags, bool *gridStyle, ScreenManager *screenManager, std::string lastText, std::string lastLink, UI::LayoutParams *layoutParams = nullptr) + : GameBrowser(url, browseFlags, gridStyle, screenManager, lastText, lastLink, layoutParams) { initialPath_ = url; } diff --git a/UI/RetroAchievementScreens.cpp b/UI/RetroAchievementScreens.cpp index d8ebceb9ffc9..a92663d3f53a 100644 --- a/UI/RetroAchievementScreens.cpp +++ b/UI/RetroAchievementScreens.cpp @@ -516,7 +516,7 @@ void RenderAchievement(UIContext &dc, const rc_client_achievement_t *achievement char cacheKey[256]; snprintf(cacheKey, sizeof(cacheKey), "ai:%s:%s", achievement->badge_name, iconState == RC_CLIENT_ACHIEVEMENT_STATE_UNLOCKED ? "unlocked" : "locked"); if (RC_OK == rc_client_achievement_get_image_url(achievement, iconState, temp, sizeof(temp))) { - Achievements::DownloadImageIfMissing(cacheKey, std::move(std::string(temp))); + Achievements::DownloadImageIfMissing(cacheKey, std::string(temp)); if (g_iconCache.BindIconTexture(&dc, cacheKey)) { dc.Draw()->DrawTexRect(Bounds(bounds.x + padding, bounds.y + padding, iconSpace, iconSpace), 0.0f, 0.0f, 1.0f, 1.0f, whiteAlpha(alpha)); } @@ -560,7 +560,7 @@ void RenderGameAchievementSummary(UIContext &dc, const Bounds &bounds, float alp char cacheKey[256]; snprintf(cacheKey, sizeof(cacheKey), "gi:%s", gameInfo->badge_name); if (RC_OK == rc_client_game_get_image_url(gameInfo, url, sizeof(url))) { - Achievements::DownloadImageIfMissing(cacheKey, std::move(std::string(url))); + Achievements::DownloadImageIfMissing(cacheKey, std::string(url)); if (g_iconCache.BindIconTexture(&dc, cacheKey)) { dc.Draw()->DrawTexRect(Bounds(bounds.x, bounds.y, iconSpace, iconSpace), 0.0f, 0.0f, 1.0f, 1.0f, whiteAlpha(alpha)); } @@ -660,7 +660,7 @@ void RenderLeaderboardEntry(UIContext &dc, const rc_client_leaderboard_entry_t * snprintf(cacheKey, sizeof(cacheKey), "lbe:%s", entry->user); char temp[512]; if (RC_OK == rc_client_leaderboard_entry_get_user_image_url(entry, temp, sizeof(temp))) { - Achievements::DownloadImageIfMissing(cacheKey, std::move(std::string(temp))); + Achievements::DownloadImageIfMissing(cacheKey, std::string(temp)); if (g_iconCache.BindIconTexture(&dc, cacheKey)) { dc.Draw()->DrawTexRect(Bounds(bounds.x + iconLeft, bounds.y + 4.0f, 64.0f, 64.0f), 0.0f, 0.0f, 1.0f, 1.0f, whiteAlpha(alpha)); } diff --git a/UI/TiltAnalogSettingsScreen.cpp b/UI/TiltAnalogSettingsScreen.cpp index 716baf528995..af4456a54599 100644 --- a/UI/TiltAnalogSettingsScreen.cpp +++ b/UI/TiltAnalogSettingsScreen.cpp @@ -137,22 +137,8 @@ void TiltAnalogSettingsScreen::CreateViews() { settings->Add(new Choice(di->T("Back")))->OnClick.Handle(this, &UIScreen::OnBack); } -void TiltAnalogSettingsScreen::axis(const AxisInput &axis) { - UIDialogScreenWithGameBackground::axis(axis); - - if (axis.deviceId == DEVICE_ID_ACCELEROMETER) { - switch (axis.axisId) { - case JOYSTICK_AXIS_ACCELEROMETER_X: down_.x = axis.value; break; - case JOYSTICK_AXIS_ACCELEROMETER_Y: down_.y = axis.value; break; - case JOYSTICK_AXIS_ACCELEROMETER_Z: down_.z = axis.value; break; - default: break; - } - } -} - UI::EventReturn TiltAnalogSettingsScreen::OnCalibrate(UI::EventParams &e) { - Lin::Vec3 down = down_.normalized(); - g_Config.fTiltBaseAngleY = atan2(down.z, down.x); + g_Config.fTiltBaseAngleY = TiltEventProcessor::GetCurrentYAngle(); return UI::EVENT_DONE; } diff --git a/UI/TiltAnalogSettingsScreen.h b/UI/TiltAnalogSettingsScreen.h index 1c338e5e944c..b1416cfb69ea 100644 --- a/UI/TiltAnalogSettingsScreen.h +++ b/UI/TiltAnalogSettingsScreen.h @@ -29,7 +29,6 @@ class TiltAnalogSettingsScreen : public UIDialogScreenWithGameBackground { TiltAnalogSettingsScreen(const Path &gamePath) : UIDialogScreenWithGameBackground(gamePath) {} void CreateViews() override; - void axis(const AxisInput &axis) override; void update() override; const char *tag() const override { return "TiltAnalogSettings"; } diff --git a/UWP/CoreUWP/CoreUWP.vcxproj b/UWP/CoreUWP/CoreUWP.vcxproj index d74fdbbb8d2f..4ea24c0ce377 100644 --- a/UWP/CoreUWP/CoreUWP.vcxproj +++ b/UWP/CoreUWP/CoreUWP.vcxproj @@ -66,7 +66,7 @@ false false pch.h - ../../ffmpeg/Windows10/ARM/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories) + ../../ext/libchdr/include;../../ffmpeg/Windows10/ARM/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories) USE_FFMPEG;WITH_UPNP;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;NOMINMAX;_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1;ARMIPS_USE_STD_FILESYSTEM;%(ClCompile.PreprocessorDefinitions) stdcpp17 @@ -82,7 +82,7 @@ false false pch.h - ../../ffmpeg/Windows10/ARM64/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories) + ../../ext/libchdr/include;../../ffmpeg/Windows10/ARM64/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories) USE_FFMPEG;WITH_UPNP;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;NOMINMAX;_ARM64_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE=1;ARMIPS_USE_STD_FILESYSTEM;%(ClCompile.PreprocessorDefinitions) stdcpp17 @@ -98,7 +98,7 @@ false false pch.h - ../../ffmpeg/Windows10/x64/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories) + ../../ext/libchdr/include;../../ffmpeg/Windows10/x64/include;../../ffmpeg/WindowsInclude;../..;../../ext/native;../../ext/snappy;../../ext/libpng17;../../Common;../../ext/zlib;../../ext/zstd/lib;../../ext;../../ext/armips/;../../ext/armips/ext/filesystem/include/;../../ext/armips/ext/tinyformat/;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories) USE_FFMPEG;WITH_UPNP;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;NOMINMAX;_UNICODE;UNICODE;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions) stdcpp17 @@ -1110,6 +1110,9 @@ {acb316ca-3ecb-48e5-be0a-91e72d5b0f12} + + {191b6f52-ad66-4172-bd20-733eeeceef8c} + {d31fd4f0-53eb-477c-9dc7-149796f628e2} diff --git a/UWP/PPSSPP_UWP.sln b/UWP/PPSSPP_UWP.sln index 8571149bc01a..8f1c7d1ae8f7 100644 --- a/UWP/PPSSPP_UWP.sln +++ b/UWP/PPSSPP_UWP.sln @@ -36,6 +36,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cpu_features_UWP", "cpu_fea EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rcheevos_UWP", "rcheevos_UWP\rcheevos_UWP.vcxproj", "{4C9D52D0-310A-4347-8991-E3788CB22169}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libchdr_UWP", "libchdr_UWP\libchdr_UWP.vcxproj", "{191B6F52-AD66-4172-BD20-733EEECEEF8C}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|ARM = Debug|ARM @@ -310,6 +312,24 @@ Global {4C9D52D0-310A-4347-8991-E3788CB22169}.UWP Gold|ARM64.Build.0 = Release|ARM64 {4C9D52D0-310A-4347-8991-E3788CB22169}.UWP Gold|x64.ActiveCfg = Release|x64 {4C9D52D0-310A-4347-8991-E3788CB22169}.UWP Gold|x64.Build.0 = Release|x64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|ARM.ActiveCfg = Debug|ARM + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|ARM.Build.0 = Debug|ARM + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|ARM64.Build.0 = Debug|ARM64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|x64.ActiveCfg = Debug|x64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Debug|x64.Build.0 = Debug|x64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|ARM.ActiveCfg = Release|ARM + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|ARM.Build.0 = Release|ARM + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|ARM64.ActiveCfg = Release|ARM64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|ARM64.Build.0 = Release|ARM64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|x64.ActiveCfg = Release|x64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.Release|x64.Build.0 = Release|x64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|ARM.ActiveCfg = Release|ARM + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|ARM.Build.0 = Release|ARM + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|ARM64.ActiveCfg = Release|ARM64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|ARM64.Build.0 = Release|ARM64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|x64.ActiveCfg = Release|x64 + {191B6F52-AD66-4172-BD20-733EEECEEF8C}.UWP Gold|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/UWP/libchdr_UWP/libchdr_UWP.vcxproj b/UWP/libchdr_UWP/libchdr_UWP.vcxproj new file mode 100644 index 000000000000..afd6fb6a3194 --- /dev/null +++ b/UWP/libchdr_UWP/libchdr_UWP.vcxproj @@ -0,0 +1,104 @@ + + + + + Debug + ARM + + + Debug + ARM64 + + + Debug + x64 + + + Release + ARM + + + Release + ARM64 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + {191b6f52-ad66-4172-bd20-733eeeceef8c} + StaticLibrary + libchdr + en-US + 14.0 + true + Windows Store + 10.0 + 10.0.19041.0 + 10.0 + libchdr_UWP + + + + StaticLibrary + true + + + StaticLibrary + false + false + + + + + + + + + + + + false + + + + false + false + ..\..\ext\zlib;..\..\ext\libchdr\include;..\..\ext\libchdr\deps\lzma-22.01\include;$(ProjectDir);$(GeneratedFilesDir);$(IntDir);%(AdditionalIncludeDirectories) + NotUsing + _7ZIP_ST;_CRT_SECURE_NO_WARNINGS;_UNICODE;UNICODE;%(PreprocessorDefinitions) + + + Console + false + false + + + + + + \ No newline at end of file diff --git a/UWP/libchdr_UWP/libchdr_UWP.vcxproj.filters b/UWP/libchdr_UWP/libchdr_UWP.vcxproj.filters new file mode 100644 index 000000000000..7a49828f1d8b --- /dev/null +++ b/UWP/libchdr_UWP/libchdr_UWP.vcxproj.filters @@ -0,0 +1,67 @@ + + + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tga;tiff;tif;png;wav;mfcribbon-ms + + + {20de3681-6341-427f-a3a7-4ae259deb0b9} + + + + + + + + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + lzma + + + \ No newline at end of file diff --git a/Windows/DinputDevice.cpp b/Windows/DinputDevice.cpp index 2115d857f7d0..0581f522e31b 100644 --- a/Windows/DinputDevice.cpp +++ b/Windows/DinputDevice.cpp @@ -145,7 +145,7 @@ DinputDevice::DinputDevice(int devnum) { return; } - getDevices(false); + getDevices(needsCheck_); if ( (devnum >= (int)devices.size()) || FAILED(getPDI()->CreateDevice(devices.at(devnum).guidInstance, &pJoystick, NULL))) { return; diff --git a/Windows/PPSSPP.sln b/Windows/PPSSPP.sln index de5d07692648..93a9d22c626a 100644 --- a/Windows/PPSSPP.sln +++ b/Windows/PPSSPP.sln @@ -93,6 +93,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cpu_features", "..\ext\cpu_ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rcheevos", "..\ext\rcheevos-build\rcheevos.vcxproj", "{31694510-A8C0-40F6-B09B-E8DF825ADEFA}" EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libchdr", "..\ext\libchdr.vcxproj", "{956F1F48-B612-46D8-89EE-96996DCD9383}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|ARM = Debug|ARM @@ -395,6 +397,22 @@ Global {31694510-A8C0-40F6-B09B-E8DF825ADEFA}.Release|Win32.Build.0 = Release|Win32 {31694510-A8C0-40F6-B09B-E8DF825ADEFA}.Release|x64.ActiveCfg = Release|x64 {31694510-A8C0-40F6-B09B-E8DF825ADEFA}.Release|x64.Build.0 = Release|x64 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|ARM.ActiveCfg = Debug|ARM + {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|ARM.Build.0 = Debug|ARM + {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|ARM64.ActiveCfg = Debug|ARM64 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|ARM64.Build.0 = Debug|ARM64 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|Win32.ActiveCfg = Debug|Win32 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|Win32.Build.0 = Debug|Win32 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|x64.ActiveCfg = Debug|x64 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Debug|x64.Build.0 = Debug|x64 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|ARM.ActiveCfg = Release|ARM + {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|ARM.Build.0 = Release|ARM + {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|ARM64.ActiveCfg = Release|ARM64 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|ARM64.Build.0 = Release|ARM64 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|Win32.ActiveCfg = Release|Win32 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|Win32.Build.0 = Release|Win32 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|x64.ActiveCfg = Release|x64 + {956F1F48-B612-46D8-89EE-96996DCD9383}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE @@ -412,6 +430,7 @@ Global {8BFD8150-94D5-4BF9-8A50-7BD9929A0850} = {39FCACF8-10D9-4D8D-97AA-7507436AD932} {C249F016-7F82-45CF-BB6E-0642A988C4D3} = {39FCACF8-10D9-4D8D-97AA-7507436AD932} {31694510-A8C0-40F6-B09B-E8DF825ADEFA} = {39FCACF8-10D9-4D8D-97AA-7507436AD932} + {956F1F48-B612-46D8-89EE-96996DCD9383} = {39FCACF8-10D9-4D8D-97AA-7507436AD932} EndGlobalSection GlobalSection(ExtensibilityGlobals) = postSolution SolutionGuid = {2FD47774-A031-48F4-B645-A49A3140A29B} diff --git a/Windows/PPSSPP.vcxproj b/Windows/PPSSPP.vcxproj index 881a31cab516..01128f75fbac 100644 --- a/Windows/PPSSPP.vcxproj +++ b/Windows/PPSSPP.vcxproj @@ -1721,6 +1721,9 @@ {edfa2e87-8ac1-4853-95d4-d7594ff81947} + + {956f1f48-b612-46d8-89ee-96996dcd9383} + {3baae095-e0ab-4b0e-b5df-ce39c8ae31de} diff --git a/Windows/main.cpp b/Windows/main.cpp index 5149ebcc5370..04b89c7372c0 100644 --- a/Windows/main.cpp +++ b/Windows/main.cpp @@ -554,7 +554,7 @@ bool System_MakeRequest(SystemRequestType type, int requestId, const std::string std::wstring filter; switch (type) { case BrowseFileType::BOOTABLE: - filter = MakeFilter(L"All supported file types (*.iso *.cso *.pbp *.elf *.prx *.zip *.ppdmp)|*.pbp;*.elf;*.iso;*.cso;*.prx;*.zip;*.ppdmp|PSP ROMs (*.iso *.cso *.pbp *.elf *.prx)|*.pbp;*.elf;*.iso;*.cso;*.prx|Homebrew/Demos installers (*.zip)|*.zip|All files (*.*)|*.*||"); + filter = MakeFilter(L"All supported file types (*.iso *.cso *.chd *.pbp *.elf *.prx *.zip *.ppdmp)|*.pbp;*.elf;*.iso;*.cso;*.chd;*.prx;*.zip;*.ppdmp|PSP ROMs (*.iso *.cso *.chd *.pbp *.elf *.prx)|*.pbp;*.elf;*.iso;*.cso;*.chd;*.prx|Homebrew/Demos installers (*.zip)|*.zip|All files (*.*)|*.*||"); break; case BrowseFileType::INI: filter = MakeFilter(L"Ini files (*.ini)|*.ini|All files (*.*)|*.*||"); diff --git a/android/ab.cmd b/android/ab.cmd index f205269c8b95..412ba8263e09 100644 --- a/android/ab.cmd +++ b/android/ab.cmd @@ -11,5 +11,5 @@ copy ..\assets\*.meta assets\ copy ..\assets\*.wav assets\ SET NDK=C:\Android\sdk\ndk\21.3.6528147 REM SET NDK=C:\Android\ndk -SET NDK_MODULE_PATH=..\ext;..\ext\native\ext +SET NDK_MODULE_PATH=..\ext %NDK%/ndk-build -j32 %* diff --git a/android/jni/Android.mk b/android/jni/Android.mk index 338d52baa0b2..c70bba4eb4ce 100644 --- a/android/jni/Android.mk +++ b/android/jni/Android.mk @@ -4,11 +4,45 @@ SRC := ../.. include $(CLEAR_VARS) include $(LOCAL_PATH)/Locals.mk +LOCAL_CFLAGS += -D_7ZIP_ST -D__SWITCH__ + +LZMA_FILES := \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/Alloc.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/Bcj2.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/Bcj2Enc.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/Bra.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/Bra86.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/CpuArch.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/Delta.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/LzFind.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/LzFindOpt.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/LzmaDec.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/LzmaEnc.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/Lzma86Dec.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/Lzma86Enc.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/LzmaLib.c \ + $(SRC)/ext/libchdr/deps/lzma-22.01/src/Sort.c + +CHDR_FILES := \ + ${LZMA_FILES} \ + $(SRC)/ext/libchdr/src/libchdr_bitstream.c \ + $(SRC)/ext/libchdr/src/libchdr_cdrom.c \ + $(SRC)/ext/libchdr/src/libchdr_chd.c \ + $(SRC)/ext/libchdr/src/libchdr_flac.c \ + $(SRC)/ext/libchdr/src/libchdr_huffman.c + +LOCAL_MODULE := libchdr +LOCAL_SRC_FILES := $(CHDR_FILES) +include $(BUILD_STATIC_LIBRARY) + +include $(CLEAR_VARS) +include $(LOCAL_PATH)/Locals.mk + LOCAL_C_INCLUDES += \ $(LOCAL_PATH)/../../ext/cpu_features/include \ $(LOCAL_PATH)/../../ext/rcheevos/include -LOCAL_CFLAGS += -DSTACK_LINE_READER_BUFFER_SIZE=1024 -DHAVE_DLFCN_H -DRC_DISABLE_LUA +LOCAL_CFLAGS += -DSTACK_LINE_READER_BUFFER_SIZE=1024 -DHAVE_DLFCN_H -DRC_DISABLE_LUA -D_7ZIP_ST # http://software.intel.com/en-us/articles/getting-started-on-optimizing-ndk-project-for-multiple-cpu-architectures @@ -288,7 +322,7 @@ include $(BUILD_STATIC_LIBRARY) # Next up, Core, GPU, and other core parts shared by headless. include $(CLEAR_VARS) include $(LOCAL_PATH)/Locals.mk -LOCAL_WHOLE_STATIC_LIBRARIES += ppsspp_common +LOCAL_WHOLE_STATIC_LIBRARIES += ppsspp_common libchdr ifeq ($(TARGET_ARCH_ABI),x86_64) ARCH_FILES := \ @@ -357,7 +391,7 @@ ARCH_FILES := \ Arm64EmitterTest.cpp endif -VULKAN_FILES := \ +GPU_VULKAN_FILES := \ $(SRC)/GPU/Vulkan/DrawEngineVulkan.cpp \ $(SRC)/GPU/Vulkan/FramebufferManagerVulkan.cpp \ $(SRC)/GPU/Vulkan/GPU_Vulkan.cpp \ @@ -370,7 +404,7 @@ VULKAN_FILES := \ EXEC_AND_LIB_FILES := \ $(ARCH_FILES) \ - $(VULKAN_FILES) \ + $(GPU_VULKAN_FILES) \ $(SRC)/ext/xxhash.c \ TestRunner.cpp \ $(SRC)/Core/MIPS/MIPS.cpp.arm \ diff --git a/android/jni/Locals.mk b/android/jni/Locals.mk index aa71119f3b71..e176a2bdc747 100644 --- a/android/jni/Locals.mk +++ b/android/jni/Locals.mk @@ -20,6 +20,8 @@ LOCAL_C_INCLUDES := \ $(LOCAL_PATH)/../../ext/armips \ $(LOCAL_PATH)/../../ext/armips/ext/filesystem/include \ $(LOCAL_PATH)/../../ext/armips/ext/tinyformat \ + $(LOCAL_PATH)/../../ext/libchdr/deps/lzma-22.01/include \ + $(LOCAL_PATH)/../../ext/libchdr/include \ $(LOCAL_PATH) LOCAL_STATIC_LIBRARIES := libzip glslang-build miniupnp-build diff --git a/android/jni/app-android.cpp b/android/jni/app-android.cpp index 1d667d72ebcb..79048d959508 100644 --- a/android/jni/app-android.cpp +++ b/android/jni/app-android.cpp @@ -688,8 +688,9 @@ extern "C" void Java_org_ppsspp_ppsspp_NativeApp_init EARLY_LOG("NativeApp.init() -- begin"); PROFILE_INIT(); - std::lock_guard guard(renderLock); // Note: This is held for the rest of this function - intended? + std::lock_guard guard(renderLock); renderer_inited = false; + exitRenderLoop = false; androidVersion = jAndroidVersion; deviceType = jdeviceType; @@ -872,8 +873,14 @@ extern "C" void Java_org_ppsspp_ppsspp_NativeApp_pause(JNIEnv *, jclass) { } extern "C" void Java_org_ppsspp_ppsspp_NativeApp_shutdown(JNIEnv *, jclass) { + INFO_LOG(SYSTEM, "NativeApp.shutdown() -- begin"); + if (renderer_inited && useCPUThread && graphicsContext) { // Only used in Java EGL path. + + // We can't lock renderLock here because the emu thread will be in NativeFrame + // which locks renderLock already, and only gets out once we call ThreadFrame() + // in a loop before, to empty the queue. EmuThreadStop("shutdown"); INFO_LOG(SYSTEM, "BeginAndroidShutdown"); graphicsContext->BeginAndroidShutdown(); @@ -891,19 +898,19 @@ extern "C" void Java_org_ppsspp_ppsspp_NativeApp_shutdown(JNIEnv *, jclass) { EmuThreadJoin(); } - INFO_LOG(SYSTEM, "NativeApp.shutdown() -- begin"); - if (renderer_inited) { - INFO_LOG(G3D, "Shutting down renderer"); - graphicsContext->Shutdown(); - delete graphicsContext; - graphicsContext = nullptr; - renderer_inited = false; - } else { - INFO_LOG(G3D, "Not shutting down renderer - not initialized"); - } - { std::lock_guard guard(renderLock); + + if (graphicsContext) { + INFO_LOG(G3D, "Shutting down renderer"); + graphicsContext->Shutdown(); + delete graphicsContext; + graphicsContext = nullptr; + renderer_inited = false; + } else { + INFO_LOG(G3D, "Not shutting down renderer - not initialized"); + } + NativeShutdown(); g_VFS.Clear(); } @@ -1135,6 +1142,9 @@ void UpdateRunLoopAndroid(JNIEnv *env) { } extern "C" void Java_org_ppsspp_ppsspp_NativeRenderer_displayRender(JNIEnv *env, jobject obj) { + // This doesn't get called on the Vulkan path. + _assert_(useCPUThread); + static bool hasSetThreadName = false; if (!hasSetThreadName) { hasSetThreadName = true; @@ -1144,13 +1154,9 @@ extern "C" void Java_org_ppsspp_ppsspp_NativeRenderer_displayRender(JNIEnv *env, if (IsVREnabled() && !StartVRRender()) return; - if (useCPUThread) { - // This is the "GPU thread". Call ThreadFrame. - if (!graphicsContext || !graphicsContext->ThreadFrame()) { - return; - } - } else { - UpdateRunLoopAndroid(env); + // This is the "GPU thread". Call ThreadFrame. + if (!graphicsContext || !graphicsContext->ThreadFrame()) { + return; } if (IsVREnabled()) { @@ -1234,18 +1240,7 @@ extern "C" jboolean Java_org_ppsspp_ppsspp_NativeApp_mouseWheelEvent( extern "C" void JNICALL Java_org_ppsspp_ppsspp_NativeApp_accelerometer(JNIEnv *, jclass, float x, float y, float z) { if (!renderer_inited) return; - - AxisInput axis[3]; - for (int i = 0; i < 3; i++) { - axis[i].deviceId = DEVICE_ID_ACCELEROMETER; - } - axis[0].axisId = JOYSTICK_AXIS_ACCELEROMETER_X; - axis[0].value = x; - axis[1].axisId = JOYSTICK_AXIS_ACCELEROMETER_Y; - axis[1].value = y; - axis[2].axisId = JOYSTICK_AXIS_ACCELEROMETER_Z; - axis[2].value = z; - NativeAxis(axis, 3); + NativeAccelerometer(x, y, z); } extern "C" void JNICALL Java_org_ppsspp_ppsspp_NativeApp_sendMessageFromJava(JNIEnv *env, jclass, jstring message, jstring param) { @@ -1304,9 +1299,7 @@ extern "C" void JNICALL Java_org_ppsspp_ppsspp_NativeActivity_requestExitVulkanR return; } exitRenderLoop = true; - while (renderLoopRunning) { - sleep_ms(5); - } + // The caller joins the thread anyway, so no point in doing a wait loop here, only leads to misleading hang diagnostics. } void correctRatio(int &sz_x, int &sz_y, float scale) { @@ -1457,15 +1450,24 @@ static void ProcessFrameCommands(JNIEnv *env) { } // This runs in Vulkan mode only. +// This handles the entire lifecycle of the Vulkan context, init and exit. extern "C" bool JNICALL Java_org_ppsspp_ppsspp_NativeActivity_runVulkanRenderLoop(JNIEnv *env, jobject obj, jobject _surf) { _assert_(!useCPUThread); if (!graphicsContext) { ERROR_LOG(G3D, "runVulkanRenderLoop: Tried to enter without a created graphics context."); + renderLoopRunning = false; + exitRenderLoop = false; return false; } - exitRenderLoop = false; + if (exitRenderLoop) { + WARN_LOG(G3D, "runVulkanRenderLoop: ExitRenderLoop requested at start, skipping the whole thing."); + renderLoopRunning = false; + exitRenderLoop = false; + return true; + } + // This is up here to prevent race conditions, in case we pause during init. renderLoopRunning = true; @@ -1507,11 +1509,11 @@ extern "C" bool JNICALL Java_org_ppsspp_ppsspp_NativeActivity_runVulkanRenderLoo hasSetThreadName = true; SetCurrentThreadName("AndroidRender"); } - } - while (!exitRenderLoop) { - LockedNativeUpdateRender(); - ProcessFrameCommands(env); + while (!exitRenderLoop) { + LockedNativeUpdateRender(); + ProcessFrameCommands(env); + } } INFO_LOG(G3D, "Leaving EGL/Vulkan render loop."); @@ -1525,6 +1527,7 @@ extern "C" bool JNICALL Java_org_ppsspp_ppsspp_NativeActivity_runVulkanRenderLoo INFO_LOG(G3D, "Shutting down graphics context from render thread..."); graphicsContext->ShutdownFromRenderThread(); renderLoopRunning = false; + exitRenderLoop = false; WARN_LOG(G3D, "Render loop function exited."); return true; diff --git a/android/src/org/ppsspp/ppsspp/NativeActivity.java b/android/src/org/ppsspp/ppsspp/NativeActivity.java index f9521d14eb2b..21ac0553e7c1 100644 --- a/android/src/org/ppsspp/ppsspp/NativeActivity.java +++ b/android/src/org/ppsspp/ppsspp/NativeActivity.java @@ -62,10 +62,10 @@ public abstract class NativeActivity extends Activity { // Allows us to skip a lot of initialization on secondary calls to onCreate. private static boolean initialized = false; - // False to use C++ EGL, queried from C++ after NativeApp.init. + // False to use Vulkan, queried from C++ after NativeApp.init. private static boolean javaGL = true; - // Graphics and audio interfaces for EGL (javaGL = false) + // Graphics and audio interfaces for Vulkan (javaGL = false) private NativeSurfaceView mSurfaceView; private Surface mSurface; private Thread mRenderLoopThread = null; @@ -662,7 +662,7 @@ public void onCreate(Bundle savedInstanceState) { Log.i(TAG, "setcontentview before"); setContentView(mSurfaceView); Log.i(TAG, "setcontentview after"); - ensureRenderLoop(); + startRenderLoopThread(); } } @@ -677,12 +677,18 @@ public void onWindowFocusChanged(boolean hasFocus) { public void notifySurface(Surface surface) { mSurface = surface; + + if (!initialized) { + Log.e(TAG, "Can't deal with surfaces while not initialized"); + return; + } + if (!javaGL) { // If we got a surface, this starts the thread. If not, it doesn't. if (mSurface == null) { joinRenderLoopThread(); } else { - ensureRenderLoop(); + startRenderLoopThread(); } } updateSustainedPerformanceMode(); @@ -690,7 +696,7 @@ public void notifySurface(Surface surface) { // Invariants: After this, mRenderLoopThread will be set, and the thread will be running, // if in Vulkan mode. - protected synchronized void ensureRenderLoop() { + protected synchronized void startRenderLoopThread() { if (javaGL) { Log.e(TAG, "JavaGL mode - should not get into ensureRenderLoop."); return; @@ -724,6 +730,7 @@ private synchronized void joinRenderLoopThread() { mRenderLoopThread = null; } catch (InterruptedException e) { e.printStackTrace(); + mRenderLoopThread = null; } } } @@ -739,33 +746,37 @@ void setupSystemUiCallback() { navigationCallbackView = decorView; } - @Override - protected void onStop() { - super.onStop(); - Log.i(TAG, "onStop - do nothing special"); - } - @Override protected void onDestroy() { super.onDestroy(); Log.i(TAG, "onDestroy"); if (javaGL) { - if (nativeRenderer != null && nativeRenderer.isRenderingFrame()) { - Log.i(TAG, "Waiting for renderer to finish."); - int tries = 200; - do { - try { - Thread.sleep(10); - } catch (InterruptedException e) { - } - tries--; - } while (nativeRenderer.isRenderingFrame() && tries > 0); + if (nativeRenderer != null) { + if (nativeRenderer.isRenderingFrame()) { + Log.i(TAG, "Waiting for renderer to finish."); + int tries = 200; + do { + try { + Thread.sleep(10); + } catch (InterruptedException e) { + } + tries--; + } while (nativeRenderer.isRenderingFrame() && tries > 0); + } else { + Log.i(TAG, "nativerenderer done."); + nativeRenderer = null; + } + } + if (mGLSurfaceView != null) { + mGLSurfaceView.onDestroy(); + mGLSurfaceView = null; } - mGLSurfaceView.onDestroy(); - mGLSurfaceView = null; } else { - mSurfaceView.onDestroy(); - mSurfaceView = null; + if (mSurfaceView != null) { + mSurfaceView.onDestroy(); + mSurfaceView = null; + } + mSurface = null; } // Probably vain attempt to help the garbage collector... @@ -781,7 +792,7 @@ protected void onDestroy() { // TODO: Can we ensure that the GL thread has stopped rendering here? // I've seen crashes that seem to indicate that sometimes it hasn't... NativeApp.audioShutdown(); - if (shuttingDown || isFinishing()) { + if (shuttingDown) { NativeApp.shutdown(); unregisterCallbacks(); initialized = false; @@ -799,6 +810,7 @@ protected void onPause() { super.onPause(); Log.i(TAG, "onPause"); loseAudioFocus(this.audioManager, this.audioFocusChangeListener); + sizeManager.setPaused(true); NativeApp.pause(); if (!javaGL) { mSurfaceView.onPause(); @@ -834,6 +846,7 @@ private boolean detectOpenGLES30() { protected void onResume() { super.onResume(); updateSustainedPerformanceMode(); + sizeManager.setPaused(false); if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.ICE_CREAM_SANDWICH) { updateSystemUiVisibility(); } @@ -862,7 +875,7 @@ protected void onResume() { if (!javaGL) { // Restart the render loop. - ensureRenderLoop(); + startRenderLoopThread(); } } diff --git a/android/src/org/ppsspp/ppsspp/SizeManager.java b/android/src/org/ppsspp/ppsspp/SizeManager.java index 25509769eb4d..af81f640ddc3 100644 --- a/android/src/org/ppsspp/ppsspp/SizeManager.java +++ b/android/src/org/ppsspp/ppsspp/SizeManager.java @@ -39,10 +39,18 @@ public class SizeManager implements SurfaceHolder.Callback { private Point desiredSize = new Point(); private int badOrientationCount = 0; + + private boolean paused = false; + public SizeManager(final NativeActivity a) { activity = a; } + + public void setPaused(boolean p) { + paused = p; + } + @TargetApi(Build.VERSION_CODES.P) public void setSurfaceView(SurfaceView view) { surfaceView = view; @@ -107,7 +115,11 @@ public void surfaceChanged(SurfaceHolder holder, int format, int width, int heig NativeApp.backbufferResize(width, height, format); updateDisplayMeasurements(); - activity.notifySurface(holder.getSurface()); + if (!paused) { + activity.notifySurface(holder.getSurface()); + } else { + Log.i(TAG, "Skipping notifySurface while paused"); + } } @Override diff --git a/assets/compat.ini b/assets/compat.ini index 3198f7450148..1e2bb0011cdb 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -1508,6 +1508,11 @@ ULES00227 = true ULKS46057 = true ULUS10050 = true +# WWE Smackdown vs RAW 2007 +ULUS10199 = True +ULES00631 = True +ULJM05233 = True + # Rainbow Six: Vegas: See #9324 # Replaces a heuristic we added in #16971, which broke Castlevania - Symphony of the Night. ULES00584 = true diff --git a/assets/lang/en_US.ini b/assets/lang/en_US.ini index 1a04c485edb5..c0a97a9c1263 100644 --- a/assets/lang/en_US.ini +++ b/assets/lang/en_US.ini @@ -40,7 +40,7 @@ Challenge Mode = Challenge Mode Challenge Mode (no savestates) = Challenge Mode (no savestates) Contacting RetroAchievements server... = Contacting RetroAchievements server... Customize = Customize -Earned = You have earned %d of %d achievements, and %d of %d points +Earned = You have unlocked %d of %d achievements, and earned %d of %d points Encore Mode = Encore Mode Failed logging in to RetroAchievements = Failed logging in to RetroAchievements Failed to connect to RetroAchievements. Achievements will not unlock. = Failed to connect to RetroAchievements. Achievements will not unlock. diff --git a/assets/lang/it_IT.ini b/assets/lang/it_IT.ini index aae480c9021f..20562bbce434 100644 --- a/assets/lang/it_IT.ini +++ b/assets/lang/it_IT.ini @@ -1,66 +1,66 @@ -[Achievements] -%1: Attempt failed = %1: Attempt failed -%1: Attempt started = %1: Attempt started -%d achievements, %d points = %d achievements, %d points +[Achievements] +%d achievements, %d points = %d obiettivi, %d punti +%1: Attempt failed = %1: Tentativo fallito +%1: Attempt started = %1: Tentativo iniziato Account = Account -Achievement progress = Achievement progress -Achievement unlocked = Achievement unlocked -Achievements = Achievements -Achievements are disabled = Achievements are disabled -Achievements enabled = Achievements enabled -Achievements with active challenges = Achievements with active challenges -Almost completed achievements = Almost completed achievements -Around me = Around me -Can't log in to RetroAchievements right now = Can't log in to RetroAchievements right now -Challenge indicator = Challenge indicator -Challenge Mode = Challenge Mode -Challenge Mode (no savestates) = Challenge Mode (no savestates) -Contacting RetroAchievements server... = Contacting RetroAchievements server... -Customize = Customize -Earned = You have earned %d of %d achievements, and %d of %d points -Encore Mode = Encore Mode -Failed logging in to RetroAchievements = Failed logging in to RetroAchievements -Failed to connect to RetroAchievements. Achievements will not unlock. = Failed to connect to RetroAchievements. Achievements will not unlock. -Failed to identify game. Achievements will not unlock. = Failed to identify game. Achievements will not unlock. -How to use RetroAchievements = How to use RetroAchievements -In Encore mode - listings may be wrong below = In Encore mode - listings may be wrong below -Leaderboard attempt started or failed = Leaderboard attempt started or failed -Leaderboard result submitted = Leaderboard result submitted -Leaderboard score submission = Leaderboard score submission -Leaderboard submission is enabled = Leaderboard submission is enabled -Leaderboard tracker = Leaderboard tracker -Leaderboards = Leaderboards -Links = Links -Locked achievements = Locked achievements -Log bad memory accesses = Log bad memory accesses -Mastered %1 = Mastered %1 -Notifications = Notifications -Recently unlocked achievements = Recently unlocked achievements -Reconnected to RetroAchievements. = Reconnected to RetroAchievements. -Register on www.retroachievements.org = Register on www.retroachievements.org -RetroAchievements are not available for this game = RetroAchievements are not available for this game -RetroAchievements website = RetroAchievements website +Achievement unlocked = Obiettivo sbloccato +Achievement progress = Progresso dell'obiettivo +Achievements = Obiettivi +Achievements enabled = Obiettivi abilitati +Achievements are disabled = Gli obiettivi sono disabilitati +Achievements with active challenges = Obiettivi con sfide attive +Almost completed achievements = Obiettivi quasi completati +Can't log in to RetroAchievements right now = Impossibile accedere a RetroAchievements in questo momento +Challenge indicator = Indicatore sfida +Challenge Mode = Modalità Sfida +Challenge Mode (no savestates) = Modalità Sfida (senza stati salvati) +Contacting RetroAchievements server... = Contatto con il server di RetroAchievements in corso... +Customize = Personalizza +Earned = Hai sbloccato %d su %d obiettivi, e guadagnato %d su %d punti +Encore Mode = Modalità Encore +Failed logging in to RetroAchievements = Accesso a RetroAchievements non riuscito +Failed to connect to RetroAchievements. Achievements will not unlock. = Connessione a RetroAchievements non riuscita. Gli obiettivi non verranno sbloccati. +Failed to identify game. Achievements will not unlock. = Identificazione del gioco non riuscita. Gli obiettivi non verranno sbloccati. +How to use RetroAchievements = Come usare RetroAchievements +In Encore mode - listings may be wrong below = In modalità Encore - le voci qui sotto potrebbero essere errate +Leaderboard attempt started or failed = Tentativo di classifica iniziato o fallito +Leaderboard result submitted = Risultato di classifica inviato +Leaderboard score submission = Invio punteggio in classifica +Leaderboard submission is enabled = L'invio in classifica è abilitato +Leaderboards = Classifiche +Leaderboard tracker = Tracciatore classifica +Links = Link +Locked achievements = Obiettivi bloccati +Log bad memory accesses = Registra cattivi accessi alla memoria +Mastered %1 = Padroneggiato %1 +Around me = Intorno a me +Notifications = Notifiche +Recently unlocked achievements = Obiettivi recentemente sbloccati +Reconnected to RetroAchievements. = Riconnesso a RetroAchievements. +Register on www.retroachievements.org = Registrati su www.retroachievements.org +RetroAchievements are not available for this game = I RetroAchievements non sono disponibili per questo gioco +RetroAchievements website = Sito web di RetroAchievements Rich Presence = Rich Presence -Save state loaded without achievement data = Save state loaded without achievement data -Save states not available in Challenge Mode = Save states not available in Challenge Mode -Sound Effects = Sound Effects -Statistics = Statistics -Submitted %1 for %2 = Submitted %1 for %2 -Syncing achievements data... = Syncing achievements data... -Test Mode = Test Mode -This feature is not available in Challenge Mode = This feature is not available in Challenge Mode -This game has no achievements = This game has no achievements -Top players = Top players -Unlocked achievements = Unlocked achievements -Unofficial achievements = Unofficial achievements -Unsupported achievements = Unsupported achievements +Save states not available in Challenge Mode = Stati salvati non disponibili in Modalità Sfida +Save state loaded without achievement data = Stato salvato caricato senza dati degli obiettivi +Sound Effects = Effetti Sonori +Statistics = Statistiche +Submitted %1 for %2 = Inviato %1 per %2 +Syncing achievements data... = Sincronizzazione dati obiettivi in corso... +Test Mode = Modalità Test +This feature is not available in Challenge Mode = Questa funzione non è disponibile in Modalità Sfida +This game has no achievements = Questo gioco non ha obiettivi +Top players = Migliori giocatori +Unlocked achievements = Obiettivi sbloccati +Unsupported achievements = Obiettivi non supportati +Unofficial achievements = Obiettivi non ufficiali [Audio] -Alternate speed volume = Volume velocizzato alternativo -Audio backend = Renderer Audio (necessita il riavvio) +Alternate speed volume = Volume a velocità alternativa +Audio backend = Renderer Audio (riavvio necessario) Audio Error = Errore Audio -Audio file format not supported. Must be WAV. = Audio file format not supported. Must be WAV. -AudioBufferingForBluetooth = Buffer per Bluetooth (lento) +Audio file format not supported. Must be WAV. = Formato audio non supportato. Deve essere WAV. +AudioBufferingForBluetooth = Buffer compatibile con Bluetooth (più lento) Auto = Automatico Device = Dispositivo Disabled = Disabilitato @@ -71,7 +71,7 @@ Microphone = Microfono Microphone Device = Periferica Microfono Mute = Muto Reverb volume = Riverb. volume -Use new audio devices automatically = Usa il nuovo dispositivo audio automatic. +Use new audio devices automatically = Usa nuovi dispositivi audio automaticamente. Use global volume = Usa volume globale WASAPI (fast) = WASAPI (veloce) @@ -94,8 +94,8 @@ Calibrate Analog Stick = Calibra Stick Analogico Calibrate = Calibra Calibrated = Calibrato Calibration = Calibratura -Circular low end radius = Circular low end radius -Circular stick input = Input stick circolare +Circular low end radius = Raggio circolare di base +Circular stick input = Input circolare levetta Classic = Classico Confine Mouse = Confina il mouse all'interno dell'area della finestra Control Mapping = Mappatura dei Controlli @@ -110,14 +110,14 @@ Double tap = Doppio tocco Enable analog stick gesture = Enable analog stick gesture Enable gesture control = Abilita controllo gesti Enable standard shortcut keys = Abilita scorciatoie standard -frames = frames -Gesture = Gesti +frames = frame +Gesture = Gesto Gesture mapping = Mappatura gesti Glowing borders = Bordi luminosi HapticFeedback = Risposta Tattile (vibrazione) -Hide touch analog stick background circle = Nascondi cerchio dello stick analogico +Hide touch analog stick background circle = Nascondi cerchio di background della levetta analogica touch Icon = Icona -Ignore gamepads when not focused = Ignora i gamepad se non attivi +Ignore gamepads when not focused = Ignora i controller se non attiva Ignore Windows Key = Ignora il tasto "Windows" Invert Axes = Inverti gli Assi Invert Tilt along X axis = Inverti Inclinazione sull'asse X @@ -139,14 +139,14 @@ OnScreen = Controlli Touchscreen Portrait = Ritratto Portrait Reversed = Ritratto invertito PSP Action Buttons = Pulsanti Azione PSP (△◯✕☐) -Rapid fire interval = Rapid fire interval +Rapid fire interval = Intervallo fuoco rapido Raw input = Input grezzo -Reset to defaults = Reset ai predefiniti +Reset to defaults = Reimposta su valori predefiniti Screen Rotation = Rotazione Schermo Sensitivity (scale) = Sensibilità (scala) Sensitivity = Sensibilità Shape = Forma -Show Touch Pause Menu Button = Mostra Tasto di Pausa +Show Touch Pause Menu Button = Mostra Tasto Menu di Pausa Sticky D-Pad (easier sweeping movements) = Sticky D-Pad (easier sweeping movements) Swipe = Scorrimento Swipe sensitivity = Sensibilità Scorrimento @@ -159,7 +159,7 @@ Tilt Sensitivity along Y axis = Inverti Sensibilità sull'asse Y To Calibrate = Tieni il dispositivo all'angolazione che preferisci e premi "Calibra". Toggle mode = Scambia modalità Repeat mode = Modalità ripeti -Touch Control Visibility = Visibilità controlli Touch +Touch Control Visibility = Visibilità Controlli Touch Use custom right analog = Usa analogico destro personalizzato Use Mouse Control = Usa il controllo con il mouse Visibility = Visibilità @@ -282,32 +282,32 @@ Backspace = Backspace Block address = Blocca indirizzo By Address = Per indirizzo Copy savestates to memstick root = Copia gli stati salvati nella root della Memory Stick -Create frame dump = Create frame dump +Create frame dump = Crea dump frame Create/Open textures.ini file for current game = Crea/Apri il file textures.ini per il gioco corrente Current = Corrente -Debug overlay = Debug overlay -Debug stats = Debug stats +Debug overlay = Overlay debug +Debug stats = Statistiche debug Dev Tools = Strumenti di sviluppo -DevMenu = DevMenu -Disabled JIT functionality = Disattivata Funzionalità JIT -Draw Frametimes Graph = Disegna grafica dei frametimes +DevMenu = MenuSvil +Disabled JIT functionality = Funzionalità JIT Disattivata +Draw Frametimes Graph = Disegna Grafico dei Frametime Dump Decrypted Eboot = Crea EBOOT.BIN decriptato all'avvio del gioco Dump next frame to log = Crea Log del Frame Successivo Enable driver bug workarounds = Abilita espediente per superare i bug dei driver Enable Logging = Attiva Log del Debug Enter address = Inserire indirizzo FPU = FPU -Fragment = Fragment +Fragment = Frammento Frame timing = Frame timing Framedump tests = Test del framedump -Frame Profiler = Profilo Frame -GPU Allocator Viewer = GPU Allocator Viewer +Frame Profiler = Profilatore di Frame +GPU Allocator Viewer = Visualizzatore dell'Allocatore GPU GPU Driver Test = Test dei driver GPU -GPU log profiler = GPU log profiler +GPU log profiler = Profilatore dei registri GPU GPU Profile = Profilo GPU Jit Compare = Confronto Jit JIT debug tools = Strumenti di debug JIT -Log Dropped Frame Statistics = Statistiche dei Frame persi +Log Dropped Frame Statistics = Statistiche dei Frame Persi Log Level = Livello del Log Log View = Visualizza Log Logging Channels = Registra Canali @@ -320,7 +320,7 @@ Random = Casuale Replace textures = Sostituisci texture Reset = Reset Reset limited logging = Reset del logging limitato -RestoreDefaultSettings = Si desidera davvero ripristinare le impostazioni?\n\n\nQuest'azione non può essere annullata.\nRiavviare PPSSPP per caricare i cambiamenti. +RestoreDefaultSettings = Si desidera davvero ripristinare le impostazioni?\nQuest'azione non può essere annullata.\nRiavviare PPSSPP per caricare i cambiamenti. RestoreGameDefaultSettings = Si desidera davvero ripristinare le impostazioni specifiche per il gioco\nai valori predefiniti? Resume = Ripristina Run CPU Tests = Fai Test CPU @@ -332,10 +332,10 @@ Stats = Statistiche System Information = Informazioni Sistema Texture ini file created = Creato file ini delle texture Texture Replacement = Sostituzione Texture -Audio Debug = Audio debug -Control Debug = Controllo debug -Toggle Freeze = Attiva/Disattiva congelamento -Touchscreen Test = Test del touchscreen +Audio Debug = Debug Audio +Control Debug = Debug Controlli +Toggle Freeze = Attiva/Disattiva Congelamento +Touchscreen Test = Test del Touchscreen Ubershaders = Ubershaders Vertex = Vertex VFPU = VFPU @@ -392,17 +392,17 @@ Load = Carica Load completed = Caricamento completato. Loading = Caricamento in corso.\nAttendere, prego... LoadingFailed = Impossibile caricare i dati. -Log in = Log in -Log out = Log out -Logged in! = Logged in! -Logging in... = Logging in... +Log in = Accedi +Log out = Esci +Logged in! = Accesso eseguito! +Logging in... = Accesso in corso... Move = Sposta Move Down = Sposta giù Move Up = Sposta su Network Connection = Connessione di Rete NEW DATA = NUOVI DATI No = No -None = None +None = Nessuno ObtainingIP = Cerco di ottenere l'indirizzo IP.\nAttendere, prego... OK = OK Old savedata detected = Rilevati vecchi dati salvati @@ -473,14 +473,14 @@ MsgErrorSavedataDataBroken = Dati del salvataggio corrotti. MsgErrorSavedataMSFull = Memory Stick piena. Controllare lo spazio a disposizione. MsgErrorSavedataNoData = Attenzione: dati del salvataggio non trovati. MsgErrorSavedataNoMS = Memory Stick non inserita. -No EBOOT.PBP, misidentified game = No EBOOT.PBP, gioco identificato erroneamente. +No EBOOT.PBP, misidentified game = Nessun EBOOT.PBP, gioco identificato erroneamente. Not a valid disc image. = Immagine disco non valida. OpenGLDriverError = Errore Driver OpenGL PPSSPP doesn't support UMD Music. = PPSSPP non supporta Musica UMD. PPSSPP doesn't support UMD Video. = PPSSPP non supporta Video UMD. -PPSSPP plays PSP games, not PlayStation 1 or 2 games. = PPSSPP per giocare ai giochi PSP, non ai giochi PlayStation 1 o 2. +PPSSPP plays PSP games, not PlayStation 1 or 2 games. = PPSSPP esegue giochi PSP, non giochi PlayStation 1 o 2. PPSSPPDoesNotSupportInternet = PPSSPP attualmente non supporta connessioni Internet per DLC, PSN o aggiornamenti. -PS1 EBOOTs are not supported by PPSSPP. = PS1 EBOOT non supportati da PPSSPP. +PS1 EBOOTs are not supported by PPSSPP. = Gli EBOOT PS1 non sono supportati da PPSSPP. PSX game image detected. = Il file è un'immagine MODE2. PPSSPP non supporta giochi per PS1. RAR file detected (Require UnRAR) = Il file è compresso (RAR).\nPrima si deve decomprimere (prova UnRAR). RAR file detected (Require WINRAR) = Il file è compresso (RAR).\nPrima si deve decomprimere (prova WinRAR). @@ -488,11 +488,11 @@ Running slow: try frameskip, sound is choppy when slow = Emulazione lenta: prova Running slow: Try turning off Software Rendering = Emulazione lenta: prova a disattivare "rendering software" Save encryption failed. This save won't work on real PSP = Criptazione salvataggio fallita. Questo salvataggio non funzionerà su una PSP reale textures.ini filenames may not be cross-platform = I nomi dei file "textures.ini" potrebbero non essere multi-piattaforma. -This is a saved state, not a game. = Questo è un salvataggio di stato, non un gioco. -This is save data, not a game. = Questi sono dei dati salvati, non un gioco. +This is a saved state, not a game. = Questo è uno stato salvato, non un gioco. +This is save data, not a game. = Questi sono dati salvati, non un gioco. Unable to create cheat file, disk may be full = Impossibile creare il file cheat, il disco potrebbe essere pieno. -Unable to initialize rendering engine. = Impossibile inizializzare il renderer. -Unable to write savedata, disk may be full = Impossibile sovrascrivere i dati del salvataggio, il disco potrebbe essere pieno. +Unable to initialize rendering engine. = Impossibile inizializzare il motore di rendering. +Unable to write savedata, disk may be full = Impossibile scrivere i dati del salvataggio, il disco potrebbe essere pieno. Warning: Video memory FULL, reducing upscaling and switching to slow caching mode = Attenzione: Memoria Video PIENA, ridurre l'upscaling e passare in modalità caching lenta. Warning: Video memory FULL, switching to slow caching mode = Attenzione: Memoria Video PIENA, passare in modalità caching lenta. ZIP file detected (Require UnRAR) = Il file è compresso (ZIP).\nPrima si deve decomprimere (prova UnRAR). @@ -502,11 +502,11 @@ ZIP file detected (Require WINRAR) = Il file è compresso (ZIP).\nPrima si deve Asia = Asia Calculate CRC = Calcola CRC ConfirmDelete = Elimina -Create Game Config = Crea game config +Create Game Config = Crea Configurazione di Gioco Create Shortcut = Crea Scorciatoia Delete Game = Elimina Gioco -Delete Game Config = Elimina game config -Delete Save Data = Elimina dati salvataggio +Delete Game Config = Elimina Configurazione di Gioco +Delete Save Data = Elimina Dati Salvataggio Europe = Europa Game = Gioco Game Settings = Impostazioni Gioco @@ -680,23 +680,23 @@ Zip archive corrupt = Archivio ZIP corrotto Zip file does not contain PSP software = Il file ZIP non contiene software PSP [KeyMapping] -Allow combo mappings = Allow combo mappings +Allow combo mappings = Consenti mappature combinate Autoconfigure = Configurazione automatica Autoconfigure for device = Configurazione Automatica per il Dispositivo -Bind All = Mappa tutto -Clear All = Pulisci Tutto -Combo mappings are not enabled = Combo mappings are not enabled -Default All = Ripristina Tutto -Map a new key for = Seleziona Tasto di Controllo per: -Map Key = Mappatura tasto -Map Mouse = Mappatura mouse +Bind All = Associa Tutti +Clear All = Pulisci Tutti +Combo mappings are not enabled = Le mappature combinate non sono abilitate +Default All = Ripristina Tutti a Valori Predefiniti +Map a new key for = Mappa un nuovo tasto per: +Map Key = Mappa Tasto +Map Mouse = Mappa Mouse Replace = Sostituisci Show PSP = Mostra PSP You can press ESC to cancel. = Puoi premere Esc per annullare. [MainMenu] Browse = Sfoglia... -Buy PPSSPP Gold = Acquista PPSSPP Gold +Buy PPSSPP Gold = Compra PPSSPP Gold Choose folder = Scegli cartella Credits = Riconoscimenti Exit = Esci @@ -708,14 +708,14 @@ How to get games = Come ottenere giochi? How to get homebrew & demos = Come ottenere homebrew o demo? Load = Carica... Loading... = Caricamento in corso... -PPSSPP Homebrew Store = PPSSPP Homebrew Store -PinPath = Pin -PPSSPP can't load games or save right now = PPSSPP non può caricare i giochi o salvarli, in questo momento +PPSSPP Homebrew Store = Store degli Homebrew di PPSSPP +PinPath = Fissa +PPSSPP can't load games or save right now = PPSSPP non può caricare i giochi o salvare in questo momento Recent = Recenti SavesAreTemporary = PPSSPP sta salvando in una cartella temporanea -SavesAreTemporaryGuidance = Estrae PPSSPP da qualche parte per salvarlo permanentemente +SavesAreTemporaryGuidance = Estrai PPSSPP da qualche parte per salvare permanentemente SavesAreTemporaryIgnore = Ignora l'avviso -UnpinPath = Stacca +UnpinPath = Rimuovi dai fissati UseBrowseOrLoad = Usa Sfoglia per scegliere una cartella, oppure Carica per scegliere un file. www.ppsspp.org = www.ppsspp.org @@ -872,22 +872,22 @@ Chinese (traditional) = Chinese (traditional) Dutch = Dutch English = English French = French -Game language = Game language +Game language = Lingua del gioco German = German Italian = Italian Japanese = Japanese Korean = Korean -Games often don't support all languages = Games often don't support all languages +Games often don't support all languages = I giochi spesso non supportano tutte le lingue Portuguese = Portuguese Russian = Russian Spanish = Spanish [Pause] -Cheats = Cheat +Cheats = Trucchi Continue = Continua -Create Game Config = Crea game config -Delete Game Config = Elimina game config -Exit to menu = Vai al Menu +Create Game Config = Crea Configurazione di Gioco +Delete Game Config = Elimina Configurazione di Gioco +Exit to menu = Vai al menu Game Settings = Impostazioni Gioco Load State = Carica Stato Rewind = Riavvolgimento @@ -898,7 +898,7 @@ Undo last load = Annulla ultimo caricamento Undo last save = Annulla ultimo salvataggio [PostShaders] -(duplicated setting, previous slider will be used) = (parametri duplicati, verrà usata la precedente regolazione) +(duplicated setting, previous slider will be used) = (parametro duplicato, verrà usata la precedente regolazione) 4xHqGLSL = 4xHQ GLSL 5xBR = Upscaler 5xBR in pixel art 5xBR-lv2 = Upscaler 5xBR-lv2 in pixel art @@ -908,7 +908,7 @@ Animation speed (0 -> disable) = Velocità animazione (0 -> disabilita) Aspect = Aspetto Black border = Bordo nero Bloom = Sfocatura luminosa -BloomNoBlur = Bloom (no blur) +BloomNoBlur = Bloom (senza sfocatura) Brightness = Luminosità Cartoon = Disegno CatmullRom = Upscaler bicubico (Catmull-Rom) @@ -916,7 +916,7 @@ ColorCorrection = Correzione dei colori ColorPreservation = Preservazione colore Contrast = Contrasto CRT = Linee di scansione CRT -FakeReflections = FakeReflections +FakeReflections = RiflessiFinti FXAA = Antialiasing FXAA Gamma = Gamma GreenLevel = Livello del verde @@ -945,16 +945,16 @@ Vignette = Miniatura [PSPCredits] all the forum mods = tutte le mod del forum build server = crea server -Buy Gold = Versione Gold -check = Prova anche il Dolphin, il miglior emulatore per Wii/GC: -CheckOutPPSSPP = Da' un'occhiata a PPSSPP, il magnifico emulatore PSP: http://www.ppsspp.org/ +Buy Gold = Compra Gold +check = Da' anche un'occhiata a Dolphin, il miglior emulatore per Wii/GC sulla piazza: +CheckOutPPSSPP = Da' un'occhiata a PPSSPP, il meraviglioso emulatore PSP: http://www.ppsspp.org/ contributors = Collaboratori: created = Realizzato da Discord = Discord info1 = PPSSPP è realizzato esclusivamente a scopo didattico. info2 = Assicurarsi di avere i diritti di utilizzo dei propri giochi info3 = possedendo una copia UMD autentica o acquistando una copia -info4 = digitale dal negozio di PlayStation Network sulla propria PSP. +info4 = digitale dal PlayStation Store sulla propria PSP. info5 = PSP è un marchio di Sony, Inc. iOS builds = versione iOS license = Software Libero GPL 2.0+ @@ -979,12 +979,12 @@ translators4 = papel translators5 = translators6 = Twitter @PPSSPP_emu = Twitter -website = Visita il sito web: +website = Da' un'occhiata al sito web: written = Scritto in C++ per velocità e portabilità [MemStick] Already contains PSP data = Contiene già dati PSP -Cancelled - try again = Cancelled - try again +Cancelled - try again = Annullato - prova di nuovo Create or Choose a PSP folder = Scegli o crea una cartella PSP Current = Corrente DataCanBeShared = I dati possono essere condivisi tra PPSSPP normale/Gold @@ -1061,7 +1061,7 @@ Perfect Description = Emulazione perfetta per tutto il gioco - magnifico! Plays = Giocabile Plays Description = Completamente giocabile ma sono presenti dei glitch ReportButton = Rapporto feedback -Show disc CRC = Show disc CRC +Show disc CRC = Mostra CRC disco Speed = Velocità Submit Feedback = Invia feedback SuggestionConfig = Guarda i rapporti sul sito web per le impostazioni migliori. @@ -1073,7 +1073,7 @@ SuggestionsWaiting = Invia e consulta i feedback degli altri utenti... SuggestionUpgrade = Aggiorna alla nuova build del PPSSPP. SuggestionVerifyDisc = Verifica se l'ISO in tuo possesso è una copia funzionante del tuo disco. Unselected Overall Description = Quanto è stata precisa l'emulazione del gioco? -View Feedback = Mostra i Feedback +View Feedback = Visualizza tutti i Feedback [Savedata] Date = Data @@ -1341,6 +1341,7 @@ Choices: = Scelte: List: = Lista: Progress: %1% = Avanzamento: %1% Screen representation = Rappresentazione su schermo + [Upgrade] Details = Dettagli Dismiss = Ignora diff --git a/assets/lang/ru_RU.ini b/assets/lang/ru_RU.ini index 72ad38dd65c6..9db13ad47333 100644 --- a/assets/lang/ru_RU.ini +++ b/assets/lang/ru_RU.ini @@ -17,7 +17,7 @@ Challenge Mode = Режим испытания Challenge Mode (no savestates) = Режим испытания (без сохранений состояния) Contacting RetroAchievements server... = Подключение к серверу RetroAchievements... Customize = Настроить -Earned = Вы открыли %d из %d достижений и %d из %d очков +Earned = Вы разблокировали %d из %d достижений и получили %d из %d очков Encore Mode = Режим повтора Failed logging in to RetroAchievements = Не удалось войти в RetroAchievements Failed to connect to RetroAchievements. Achievements will not unlock. = Не удалось подключиться к RetroAchievements. Достижения не будут разблокированы. @@ -510,7 +510,7 @@ Delete Save Data = Удалить сохранения Europe = Европа Game = Игра Game Settings = Настройки игры -Homebrew = Хоумбрю +Homebrew = Homebrew Hong Kong = Гонконг InstallData = Установить данные Japan = Япония @@ -698,14 +698,14 @@ Browse = Обзор... Buy PPSSPP Gold = PPSSPP Gold Choose folder = Выберите папку Credits = Авторы -PPSSPP Homebrew Store = Магазин хоумбрю PPSSPP +PPSSPP Homebrew Store = Магазин homebrew для PPSSPP Exit = Выход Game Settings = Настройки Games = Игры Give PPSSPP permission to access storage = Дать доступ к хранилищу данных -Homebrew & Demos = Хоумбрю и демо +Homebrew & Demos = Homebrew и демо How to get games = Как получить игры? -How to get homebrew & demos = Как получить хоумбрю и демо? +How to get homebrew & demos = Как получить homebrew и демо? Load = Открыть... Loading... = Загрузка... PinPath = Закрепить @@ -1233,10 +1233,10 @@ Day Light Saving = Летнее время DDMMYYYY = ДДММГГГГ Decrease size = Уменьшить размер Developer Tools = Инструменты разработчика -Display Extra Info = Отображать дополнительную ниформацию -Display Games on a grid = Отображать "Игры" в сетке -Display Homebrew on a grid = Отображать "Хоумбрю и демо" в сетке -Display Recent on a grid = Отображать "Недавние" в сетке +Display Extra Info = Показывать дополнительную ниформацию +Display Games on a grid = Показывать "Игры" в виде сетки +Display Homebrew on a grid = Показывать "Homebrew и демо" в виде сетки +Display Recent on a grid = Показывать "Недавние" в виде сетки Dynarec (JIT) = Динамическая рекомпиляция (JIT) Emulation = Эмуляция Enable Cheats = Включить коды @@ -1251,7 +1251,7 @@ Floating symbols = Парящие символы Force real clock sync (slower, less lag) = Принудительная синхронизация реальной частоты ЦП (медленнее, меньше лагов) Games list settings = Настройки списка игр General = Основные -Grid icon size = Размер ярлыков в режиме сетки +Grid icon size = Размер ярлыков в виде сетки Help the PPSSPP team = Помочь команде PPSSPP Host (bugs, less lag) = Хост (возможны баги, меньше лагов) Ignore bad memory accesses = Игнорировать ошибки доступа к памяти diff --git a/ext/CMakeLists.txt b/ext/CMakeLists.txt index 9bbb9b7a7452..1031f6edf91c 100644 --- a/ext/CMakeLists.txt +++ b/ext/CMakeLists.txt @@ -37,3 +37,5 @@ endif() if(USE_DISCORD AND NOT IOS AND NOT LIBRETRO) add_subdirectory(discord-rpc-build) endif() + +add_subdirectory(libchdr-build) diff --git a/ext/libchdr b/ext/libchdr new file mode 160000 index 000000000000..9108f34a8922 --- /dev/null +++ b/ext/libchdr @@ -0,0 +1 @@ +Subproject commit 9108f34a892272f61c3ed3bff4bee728d4c1dd57 diff --git a/ext/libchdr-build/CMakeLists.txt b/ext/libchdr-build/CMakeLists.txt new file mode 100644 index 000000000000..8b93aa42fe4d --- /dev/null +++ b/ext/libchdr-build/CMakeLists.txt @@ -0,0 +1,38 @@ +cmake_minimum_required (VERSION 3.2.0) +project (chdr) + +set(LZMA_DIR ../libchdr/deps/lzma-22.01/src) +set(SRC_DIR ../libchdr/src) + +include_directories(../libchdr/deps/lzma-22.01/include) +include_directories(../libchdr/include) + +add_definitions(-D_7ZIP_ST) + +# Hack - CpuArch.c has a workaround that we reuse. +add_definitions(-D__SWITCH__) + +set(ALL_SOURCE_FILES + ${LZMA_DIR}/Alloc.c + ${LZMA_DIR}/Bcj2.c + ${LZMA_DIR}/Bcj2Enc.c + ${LZMA_DIR}/Bra.c + ${LZMA_DIR}/Bra86.c + ${LZMA_DIR}/CpuArch.c + ${LZMA_DIR}/Delta.c + ${LZMA_DIR}/LzFind.c + ${LZMA_DIR}/LzFindOpt.c + ${LZMA_DIR}/LzmaDec.c + ${LZMA_DIR}/LzmaEnc.c + ${LZMA_DIR}/Lzma86Dec.c + ${LZMA_DIR}/Lzma86Enc.c + ${LZMA_DIR}/LzmaLib.c + ${LZMA_DIR}/Sort.c + ${SRC_DIR}/libchdr_bitstream.c + ${SRC_DIR}/libchdr_cdrom.c + ${SRC_DIR}/libchdr_chd.c + ${SRC_DIR}/libchdr_flac.c + ${SRC_DIR}/libchdr_huffman.c + ) + +add_library(chdr STATIC ${ALL_SOURCE_FILES}) diff --git a/ext/libchdr.vcxproj b/ext/libchdr.vcxproj new file mode 100644 index 000000000000..89d56f3fbe2c --- /dev/null +++ b/ext/libchdr.vcxproj @@ -0,0 +1,374 @@ + + + + + Debug + ARM + + + Debug + ARM64 + + + Debug + Win32 + + + Debug + x64 + + + Release + ARM + + + Release + ARM64 + + + Release + Win32 + + + Release + x64 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {956F1F48-B612-46D8-89EE-96996DCD9383} + Win32Proj + libchdr + libchdr_static + Unicode + StaticLibrary + $(SolutionDir)..\ext\libchdr\bin\$(Platform)_$(Configuration)\ + $(SolutionDir)..\ext\libchdr\bin\obj\$(RootNamespace)_$(Platform)_$(Configuration)\ + 10.0 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + true + $(DefaultPlatformToolset) + + + true + $(DefaultPlatformToolset) + + + true + $(DefaultPlatformToolset) + + + true + $(DefaultPlatformToolset) + + + false + true + $(DefaultPlatformToolset) + + + false + true + $(DefaultPlatformToolset) + + + false + true + $(DefaultPlatformToolset) + + + false + true + $(DefaultPlatformToolset) + + + + + + + + + + + + + + + + + + + true + $(IncludePath);$(SolutionDir)..\ext\libchdr\include;$(UniversalCRT_IncludePath); + false + + + true + $(IncludePath);$(SolutionDir)..\ext\libchdr\include;$(UniversalCRT_IncludePath); + false + + + false + $(IncludePath);$(SolutionDir)..\ext\libchdr\include;$(UniversalCRT_IncludePath); + false + + + false + $(IncludePath);$(SolutionDir)..\ext\libchdr\include;$(UniversalCRT_IncludePath); + false + + + + $(OutDir)$(TargetName).pdb + + + + + + + Level4 + _7ZIP_ST;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebug + true + ProgramDatabase + false + libchdr\include;libchdr\deps\lzma-22.01\include;zlib; + + + Windows + true + MachineX86 + + + + + + + Level4 + _7ZIP_ST;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebug + true + ProgramDatabase + false + libchdr\include;libchdr\deps\lzma-22.01\include;zlib; + + + Windows + true + + + + + + + Level4 + _7ZIP_ST;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebug + true + ProgramDatabase + false + libchdr\include;libchdr\deps\lzma-22.01\include;zlib; + + + Windows + true + + + + + + + Level4 + _7ZIP_ST;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + EnableFastChecks + MultiThreadedDebug + true + ProgramDatabase + false + libchdr\include;libchdr\deps\lzma-22.01\include;zlib; + + + Windows + true + + + + + Level4 + + + MaxSpeed + true + true + _7ZIP_ST;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + false + MultiThreaded + true + ProgramDatabase + AnySuitable + Speed + true + false + StreamingSIMDExtensions2 + libchdr\include;libchdr\deps\lzma-22.01\include;zlib; + + + Windows + true + true + true + MachineX86 + + + + + Level4 + + + MaxSpeed + true + true + _7ZIP_ST;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + false + false + MultiThreaded + true + ProgramDatabase + false + true + AnySuitable + Speed + libchdr\include;libchdr\deps\lzma-22.01\include;zlib; + + + Windows + true + true + true + + + + + Level4 + + + MaxSpeed + true + true + _7ZIP_ST;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + false + false + MultiThreaded + true + ProgramDatabase + false + true + AnySuitable + Speed + libchdr\include;libchdr\deps\lzma-22.01\include;zlib; + + + Windows + true + true + true + + + + + Level4 + + + MaxSpeed + true + true + _7ZIP_ST;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + false + false + MultiThreaded + true + ProgramDatabase + false + true + AnySuitable + Speed + libchdr\include;libchdr\deps\lzma-22.01\include;zlib; + + + Windows + true + true + true + + + + + + \ No newline at end of file diff --git a/ext/libchdr.vcxproj.filters b/ext/libchdr.vcxproj.filters new file mode 100644 index 000000000000..d35dd7e48fb3 --- /dev/null +++ b/ext/libchdr.vcxproj.filters @@ -0,0 +1,72 @@ + + + + + + + + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + LZMA + + + + + + + + + + + + + + {ff31581b-3ff9-4e39-bb96-2f662896cf70} + + + \ No newline at end of file diff --git a/headless/Headless.vcxproj b/headless/Headless.vcxproj index 65cc85d5b46c..b500d26de4eb 100644 --- a/headless/Headless.vcxproj +++ b/headless/Headless.vcxproj @@ -502,6 +502,9 @@ {edfa2e87-8ac1-4853-95d4-d7594ff81947} + + {956f1f48-b612-46d8-89ee-96996dcd9383} + {3baae095-e0ab-4b0e-b5df-ce39c8ae31de} diff --git a/libretro/Makefile.common b/libretro/Makefile.common index 0168cd21e510..e8c6ed2d4113 100644 --- a/libretro/Makefile.common +++ b/libretro/Makefile.common @@ -226,6 +226,35 @@ SOURCES_C += \ COREFLAGS += -DSTACK_LINE_READER_BUFFER_SIZE=1024 COREFLAGS += -DHTTPS_NOT_AVAILABLE +COREFLAGS += -D_7ZIP_ST +INCFLAGS += -I$(EXTDIR)/libchdr/deps/lzma-22.01/include + +SOURCES_C += \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/Alloc.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/Bcj2.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/Bcj2Enc.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/Bra.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/Bra86.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/CpuArch.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/Delta.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/LzFind.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/LzFindOpt.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/LzmaDec.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/LzmaEnc.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/Lzma86Dec.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/Lzma86Enc.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/LzmaLib.c \ + $(EXTDIR)/libchdr/deps/lzma-22.01/src/Sort.c + +INCFLAGS += -I$(EXTDIR)/libchdr/include + +SOURCES_C += \ + $(EXTDIR)/libchdr/src/libchdr_bitstream.c \ + $(EXTDIR)/libchdr/src/libchdr_cdrom.c \ + $(EXTDIR)/libchdr/src/libchdr_chd.c \ + $(EXTDIR)/libchdr/src/libchdr_flac.c \ + $(EXTDIR)/libchdr/src/libchdr_huffman.c + ifeq ($(PLATFORM_EXT), android) COREFLAGS += -DHAVE_DLFCN_H else ifneq ($(PLATFORM_EXT), win32) diff --git a/unittest/JitHarness.cpp b/unittest/JitHarness.cpp index 4ad53fc65c1a..088416e2bcd8 100644 --- a/unittest/JitHarness.cpp +++ b/unittest/JitHarness.cpp @@ -34,6 +34,7 @@ #include "Core/MemMap.h" #include "Core/Core.h" #include "Core/CoreTiming.h" +#include "Core/Config.h" #include "Core/HLE/HLE.h" // Temporary hacks around annoying linking errors. Copied from Headless. @@ -55,9 +56,15 @@ HLEFunction UnitTestFakeSyscalls[] = { {0x1234BEEF, &UnitTestTerminator, "UnitTestTerminator"}, }; -double ExecCPUTest() { +double ExecCPUTest(bool clearCache = true) { int blockTicks = 1000000; int total = 0; + + if (MIPSComp::jit) { + currentMIPS->pc = PSP_GetUserMemoryBase(); + MIPSComp::JitAt(); + } + double st = time_now_d(); do { for (int j = 0; j < 1000; ++j) { @@ -73,6 +80,17 @@ double ExecCPUTest() { while (time_now_d() - st < 0.5); double elapsed = time_now_d() - st; + if (MIPSComp::jit) { + JitBlockCacheDebugInterface *cache = MIPSComp::jit->GetBlockCacheDebugInterface(); + if (cache) { + JitBlockDebugInfo block = cache->GetBlockDebugInfo(0); + WARN_LOG(JIT, "Executed %d target instrs, %d IR, for %d orig", (int)block.targetDisasm.size(), (int)block.irDisasm.size(), (int)block.origDisasm.size()); + } + + if (clearCache) + MIPSComp::jit->ClearCache(); + } + return total / elapsed; } @@ -108,6 +126,7 @@ static void DestroyJitHarness() { bool TestJit() { SetupJitHarness(); + g_Config.bFastMemory = true; currentMIPS->pc = PSP_GetUserMemoryBase(); u32 *p = (u32 *)Memory::GetPointer(currentMIPS->pc); @@ -158,6 +177,7 @@ bool TestJit() { *p++ = MIPS_MAKE_SYSCALL("UnitTestFakeSyscalls", "UnitTestTerminator"); *p++ = MIPS_MAKE_BREAK(1); + *p++ = MIPS_MAKE_JR_RA(); // Dogfood. addr = currentMIPS->pc; @@ -170,11 +190,15 @@ bool TestJit() { printf("\n"); - double jit_speed = 0.0, interp_speed = 0.0; + double jit_speed = 0.0, jit_ir_speed = 0.0, ir_speed = 0.0, interp_speed = 0.0; if (compileSuccess) { interp_speed = ExecCPUTest(); + mipsr4k.UpdateCore(CPUCore::IR_INTERPRETER); + ir_speed = ExecCPUTest(); mipsr4k.UpdateCore(CPUCore::JIT); jit_speed = ExecCPUTest(); + mipsr4k.UpdateCore(CPUCore::JIT_IR); + jit_ir_speed = ExecCPUTest(false); // Disassemble JitBlockCacheDebugInterface *cache = MIPSComp::jit->GetBlockCacheDebugInterface(); @@ -182,14 +206,14 @@ bool TestJit() { JitBlockDebugInfo block = cache->GetBlockDebugInfo(0); // Should only be one block. std::vector &lines = block.targetDisasm; // Cut off at 25 due to the repetition above. Might need tweaking for large instructions. - const int cutoff = 25; + const int cutoff = 50; for (int i = 0; i < std::min((int)lines.size(), cutoff); i++) { printf("%s\n", lines[i].c_str()); } if (lines.size() > cutoff) printf("...\n"); } - printf("Jit was %fx faster than interp.\n\n", jit_speed / interp_speed); + printf("Jit was %fx faster than interp, IR was %fx faster, JIT IR %fx.\n\n", jit_speed / interp_speed, ir_speed / interp_speed, jit_ir_speed / interp_speed); } printf("\n"); diff --git a/unittest/UnitTest.cpp b/unittest/UnitTest.cpp index ac7b5ba76339..92ed78760e4b 100644 --- a/unittest/UnitTest.cpp +++ b/unittest/UnitTest.cpp @@ -58,6 +58,7 @@ #include "Common/Render/DrawBuffer.h" #include "Common/System/NativeApp.h" #include "Common/System/System.h" +#include "Common/Thread/ThreadUtil.h" #include "Common/ArmEmitter.h" #include "Common/BitScan.h" @@ -1038,6 +1039,8 @@ TestItem availableTests[] = { }; int main(int argc, const char *argv[]) { + SetCurrentThreadName("UnitTest"); + cpu_info.bNEON = true; cpu_info.bVFP = true; cpu_info.bVFPv3 = true; diff --git a/unittest/UnitTests.vcxproj b/unittest/UnitTests.vcxproj index 7417f4f38b63..76ce39678097 100644 --- a/unittest/UnitTests.vcxproj +++ b/unittest/UnitTests.vcxproj @@ -420,6 +420,9 @@ {edfa2e87-8ac1-4853-95d4-d7594ff81947} + + {956f1f48-b612-46d8-89ee-96996dcd9383} + {3baae095-e0ab-4b0e-b5df-ce39c8ae31de}