From b72865f1101c3d0e869854a38b3c7355ac01c353 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Sun, 15 May 2022 05:41:48 +0100 Subject: [PATCH 1/6] AX: add support for biquad filtering This fixes e.g. the overly loud wind in "I SPY: Spooky Mansion". --- Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp | 2 +- Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h | 22 ++++----- Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h | 49 ++++++++++++++++--- Source/Core/Core/HW/DSPHLE/UCodes/AXWii.cpp | 5 +- Source/Core/Core/HW/DSPHLE/UCodes/AXWii.h | 3 ++ 5 files changed, 60 insertions(+), 21 deletions(-) diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp b/Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp index 2dc000dbfe05..3b9161a52df8 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp @@ -438,7 +438,7 @@ void AXUCode::ProcessPBList(u32 pb_addr) ProcessVoice(static_cast(m_accelerator.get()), pb, buffers, spms, ConvertMixerControl(pb.mixer_control), - m_coeffs_checksum ? m_coeffs.data() : nullptr); + m_coeffs_checksum ? m_coeffs.data() : nullptr, false); // Forward the buffers for (auto& ptr : buffers.ptrs) diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h index 8c6c6aad90a9..8d224c6f574a 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h @@ -177,7 +177,7 @@ struct PBADPCMLoopInfo struct PBLowPassFilter { - u16 enabled; + u16 on; s16 yn1; u16 a0; u16 b0; @@ -215,16 +215,16 @@ struct AXPB struct PBBiquadFilter { - u16 on; // on = 2, off = 0 - u16 xn1; // History data - u16 xn2; - u16 yn1; - u16 yn2; - u16 b0; // Filter coefficients - u16 b1; - u16 b2; - u16 a1; - u16 a2; + u16 on; + s16 xn1; // History data + s16 xn2; + s16 yn1; + s16 yn2; + s16 b0; // Filter coefficients + s16 b1; + s16 b2; + s16 a1; + s16 a2; }; union PBInfImpulseResponseWM diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h index 4c9eec6df1b7..10d93ad7cca3 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h @@ -400,17 +400,45 @@ void MixAdd(int* out, const s16* input, u32 count, VolumeData* vd, s16* dpop, bo // Execute a low pass filter on the samples using one history value. Returns // the new history value. -s16 LowPassFilter(s16* samples, u32 count, s16 yn1, u16 a0, u16 b0) +static void LowPassFilter(s16* samples, u32 count, PBLowPassFilter& f) { for (u32 i = 0; i < count; ++i) - yn1 = samples[i] = (a0 * (s32)samples[i] + b0 * (s32)yn1) >> 15; - return yn1; + f.yn1 = samples[i] = (f.a0 * (s32)samples[i] + f.b0 * (s32)f.yn1) >> 15; } +#ifdef AX_WII +static void BiquadFilter(s16* samples, u32 count, PBBiquadFilter& f) +{ + for (u32 i = 0; i < count; ++i) + { + s16 xn0 = samples[i]; + s64 tmp = 0; + tmp += f.b0 * s32(xn0); + tmp += f.b1 * s32(f.xn1); + tmp += f.b2 * s32(f.xn2); + tmp += f.a1 * s32(f.yn1); + tmp += f.a2 * s32(f.yn2); + tmp <<= 2; + // CLRL + if (tmp & 0x10000) + tmp += 0x8000; + else + tmp += 0x7FFF; + tmp >>= 16; + s16 yn0 = s16(tmp); + f.xn2 = f.xn1; + f.yn2 = f.yn1; + f.xn1 = xn0; + f.yn1 = yn0; + samples[i] = yn0; + } +} +#endif + // Process 1ms of audio (for AX GC) or 3ms of audio (for AX Wii) from a PB and // mix it to the output buffers. void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buffers, u16 count, - AXMixControl mctrl, const s16* coeffs) + AXMixControl mctrl, const s16* coeffs, bool new_filter) { // If the voice is not running, nothing to do. if (pb.running != 1) @@ -435,12 +463,19 @@ void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buf pb.vol_env.cur_volume += pb.vol_env.cur_volume_delta; } - // Optionally, execute a low pass filter - if (pb.lpf.enabled) + // Optionally, execute a low-pass and/or biquad filter. + if (pb.lpf.on != 0) { - pb.lpf.yn1 = LowPassFilter(samples, count, pb.lpf.yn1, pb.lpf.a0, pb.lpf.b0); + LowPassFilter(samples, count, pb.lpf); } +#ifdef AX_WII + if (new_filter && pb.biquad.on != 0) + { + BiquadFilter(samples, count, pb.biquad); + } +#endif + // Mix LRS, AUXA and AUXB depending on mixer_control // TODO: Handle DPL2 on AUXB. diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.cpp b/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.cpp index 36cd154f9ba7..19ed9dd90e83 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.cpp +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.cpp @@ -29,6 +29,7 @@ AXWiiUCode::AXWiiUCode(DSPHLE* dsphle, u32 crc) volume = 0x8000; m_old_axwii = (crc == 0xfa450138) || (crc == 0x7699af32); + m_new_filter = crc == 0x347112ba || crc == 0x4cc52064; m_accelerator = std::make_unique(dsphle->GetSystem().GetDSP()); } @@ -450,7 +451,7 @@ void AXWiiUCode::ProcessPBList(u32 pb_addr) ApplyUpdatesForMs(curr_ms, pb, num_updates, updates); ProcessVoice(static_cast(m_accelerator.get()), pb, buffers, spms, ConvertMixerControl(HILO_TO_32(pb.mixer_control)), - m_coeffs_checksum ? m_coeffs.data() : nullptr); + m_coeffs_checksum ? m_coeffs.data() : nullptr, m_new_filter); // Forward the buffers for (auto& ptr : buffers.ptrs) @@ -462,7 +463,7 @@ void AXWiiUCode::ProcessPBList(u32 pb_addr) { ProcessVoice(static_cast(m_accelerator.get()), pb, buffers, 96, ConvertMixerControl(HILO_TO_32(pb.mixer_control)), - m_coeffs_checksum ? m_coeffs.data() : nullptr); + m_coeffs_checksum ? m_coeffs.data() : nullptr, m_new_filter); } WritePB(memory, pb_addr, pb, m_crc); diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.h index aa69ae96372d..85d330038467 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.h @@ -38,6 +38,9 @@ class AXWiiUCode final : public AXUCode // Are we implementing an old version of AXWii which still has updates? bool m_old_axwii = false; + // Late AXWii versions support Wiimote filtering and a biquad filter. + bool m_new_filter = false; + // Last volume values for MAIN and AUX. Used to generate volume ramps to // interpolate nicely between old and new volume values. u16 m_last_main_volume = 0; From 72153dd978ef81c113546e01e3cce23fa5418cf7 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Sun, 15 May 2022 05:48:23 +0100 Subject: [PATCH 2/6] AX: enable low-pass/biquad filtering of Wiimote audio --- Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h | 1 + Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h | 13 +++++++++++++ 2 files changed, 14 insertions(+) diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h index 8d224c6f574a..f819c066aad7 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h @@ -229,6 +229,7 @@ struct PBBiquadFilter union PBInfImpulseResponseWM { + u16 on; // 0: off, 2: biquad, other: low-pass PBLowPassFilter lpf; PBBiquadFilter biquad; }; diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h index 10d93ad7cca3..59813ac22929 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h @@ -562,6 +562,19 @@ void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buf // Wiimote mixing. if (pb.remote) { + if (new_filter && pb.remote_iir.on != 0) + { + // Only one filter at most for Wiimotes. + if (pb.remote_iir.on == 2) + { + BiquadFilter(samples, count, pb.remote_iir.biquad); + } + else + { + LowPassFilter(samples, count, pb.remote_iir.lpf); + } + } + // Old AXWii versions process ms per ms. u16 wm_count = count == 96 ? 18 : 6; From 2a7511f58b7315e9bec40ff5f5f44826f2a99b5d Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Sat, 27 Apr 2024 18:59:10 +0100 Subject: [PATCH 3/6] AX: make volume_delta signed NFC because the current code always masks the volume + volume_delta sum to 16 bits. --- Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h index f819c066aad7..26dd0d20e074 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h @@ -10,7 +10,7 @@ namespace DSP::HLE struct VolumeData { u16 volume; - u16 volume_delta; + s16 volume_delta; }; struct PBMixer From 55f71e4358888240376ae6641f15c04732337468 Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Sat, 27 Apr 2024 15:53:41 +0100 Subject: [PATCH 4/6] AX: avoid unnecessary CRC check on Wii --- Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h index 59813ac22929..9255f1ed1e4e 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h @@ -90,13 +90,11 @@ union AXBuffers // Determines if this version of the UCode has a PBLowPassFilter in its AXPB layout. bool HasLpf(u32 crc) { - switch (crc) - { - case 0x4E8A8B21: - return false; - default: - return true; - } +#ifdef AX_GC + return crc != 0x4E8A8B21; +#else + return true; +#endif } // Read a PB from MRAM/ARAM From ea57952148e98f3c4dd936c8134ca8a57be8bc5f Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Sat, 27 Apr 2024 15:44:48 +0100 Subject: [PATCH 5/6] AX: use AVX2 for byte-swapping parameter blocks --- Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h | 4 +-- Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h | 26 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h index 26dd0d20e074..87ea5630ea3f 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXStructs.h @@ -183,7 +183,7 @@ struct PBLowPassFilter u16 b0; }; -struct AXPB +struct alignas(32) AXPB { u16 next_pb_hi; u16 next_pb_lo; @@ -234,7 +234,7 @@ union PBInfImpulseResponseWM PBBiquadFilter biquad; }; -struct AXPBWii +struct alignas(32) AXPBWii { u16 next_pb_hi; u16 next_pb_lo; diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h index 9255f1ed1e4e..16622826219c 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h @@ -11,6 +11,10 @@ #error AXVoice.h included without specifying version #endif +#ifdef __AVX2__ +#include +#endif + #include #include #include @@ -97,13 +101,31 @@ bool HasLpf(u32 crc) #endif } +#ifdef __AVX2__ +void CopyPBSwapped(void* dst, const void* src) +{ + static_assert((sizeof(PB_TYPE) & 31) == 0, "PB size must be a multiple of 32"); + const __m256i swap = _mm256_set_epi64x(0x0e0f0c0d0a0b0809, 0x0607040502030001, 0x0e0f0c0d0a0b0809, + 0x0607040502030001); + for (size_t i = 0; i < sizeof(PB_TYPE) / 32; ++i) + { + _mm256_storeu_si256((__m256i*)dst + i, + _mm256_shuffle_epi8(_mm256_loadu_si256((__m256i*)src + i), swap)); + } +} +#endif + // Read a PB from MRAM/ARAM void ReadPB(Memory::MemoryManager& memory, u32 addr, PB_TYPE& pb, u32 crc) { if (HasLpf(crc)) { +#ifdef __AVX2__ + CopyPBSwapped(&pb, memory.GetPointerForRange(addr, sizeof(pb))); +#else u16* dst = (u16*)&pb; memory.CopyFromEmuSwapped(dst, addr, sizeof(pb)); +#endif } else { @@ -126,8 +148,12 @@ void WritePB(Memory::MemoryManager& memory, u32 addr, const PB_TYPE& pb, u32 crc { if (HasLpf(crc)) { +#ifdef __AVX2__ + CopyPBSwapped(memory.GetPointerForRange(addr, sizeof(pb)), &pb); +#else const u16* src = (const u16*)&pb; memory.CopyToEmuSwapped(addr, src, sizeof(pb)); +#endif } else { From ba657977fc6332805ec399c26efe8e975f2d2c1e Mon Sep 17 00:00:00 2001 From: Tillmann Karras Date: Sat, 27 Apr 2024 19:02:15 +0100 Subject: [PATCH 6/6] AX: use AVX2 for mixing --- Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp | 6 +- Source/Core/Core/HW/DSPHLE/UCodes/AX.h | 18 +-- Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h | 155 +++++++++++++------- Source/Core/Core/HW/DSPHLE/UCodes/AXWii.cpp | 12 +- Source/Core/Core/HW/DSPHLE/UCodes/AXWii.h | 22 +-- 5 files changed, 129 insertions(+), 84 deletions(-) diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp b/Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp index 3b9161a52df8..b36af3b7c4a7 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AX.cpp @@ -436,9 +436,9 @@ void AXUCode::ProcessPBList(u32 pb_addr) { ApplyUpdatesForMs(curr_ms, pb, pb.updates.num_updates, updates); - ProcessVoice(static_cast(m_accelerator.get()), pb, buffers, spms, - ConvertMixerControl(pb.mixer_control), - m_coeffs_checksum ? m_coeffs.data() : nullptr, false); + ProcessVoice(static_cast(m_accelerator.get()), pb, buffers, + ConvertMixerControl(pb.mixer_control), + m_coeffs_checksum ? m_coeffs.data() : nullptr, false); // Forward the buffers for (auto& ptr : buffers.ptrs) diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AX.h b/Source/Core/Core/HW/DSPHLE/UCodes/AX.h index a4d8cb05f40a..129c6927259f 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AX.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AX.h @@ -87,15 +87,15 @@ class AXUCode /* not final: subclassed by AXWiiUCode */ : public UCodeInterface static constexpr u32 MAIL_CMDLIST_MASK = 0xFFFF0000; // 32 * 5 because 32 samples per millisecond, for max 5 milliseconds. - int m_samples_main_left[32 * 5]{}; - int m_samples_main_right[32 * 5]{}; - int m_samples_main_surround[32 * 5]{}; - int m_samples_auxA_left[32 * 5]{}; - int m_samples_auxA_right[32 * 5]{}; - int m_samples_auxA_surround[32 * 5]{}; - int m_samples_auxB_left[32 * 5]{}; - int m_samples_auxB_right[32 * 5]{}; - int m_samples_auxB_surround[32 * 5]{}; + alignas(32) int m_samples_main_left[32 * 5]{}; + alignas(32) int m_samples_main_right[32 * 5]{}; + alignas(32) int m_samples_main_surround[32 * 5]{}; + alignas(32) int m_samples_auxA_left[32 * 5]{}; + alignas(32) int m_samples_auxA_right[32 * 5]{}; + alignas(32) int m_samples_auxA_surround[32 * 5]{}; + alignas(32) int m_samples_auxB_left[32 * 5]{}; + alignas(32) int m_samples_auxB_right[32 * 5]{}; + alignas(32) int m_samples_auxB_surround[32 * 5]{}; u16 m_cmdlist[512]{}; u32 m_cmdlist_size = 0; diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h index 16622826219c..e8e76328231d 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXVoice.h @@ -397,10 +397,11 @@ void GetInputSamples(HLEAccelerator* accelerator, PB_TYPE& pb, s16* samples, u16 } // Add samples to an output buffer, with optional volume ramping. -void MixAdd(int* out, const s16* input, u32 count, VolumeData* vd, s16* dpop, bool ramp) +template +static void MixAdd(int* out, const s16* input, VolumeData* vd, s16* dpop, bool ramp) { - u16& volume = vd->volume; - u16 volume_delta = vd->volume_delta; + u16 volume = vd->volume; + s16 volume_delta = vd->volume_delta; // If volume ramping is disabled, set volume_delta to 0. That way, the // mixing loop can avoid testing if volume ramping is enabled at each step, @@ -408,18 +409,61 @@ void MixAdd(int* out, const s16* input, u32 count, VolumeData* vd, s16* dpop, bo if (!ramp) volume_delta = 0; - for (u32 i = 0; i < count; ++i) +#ifdef __AVX2__ + if constexpr ((count & 15) == 0) { - s64 sample = input[i]; - sample *= volume; - sample >>= 15; - sample = std::clamp((s32)sample, -32767, 32767); // -32768 ? + out = std::assume_aligned<32>(out); + input = std::assume_aligned<32>(input); + + auto vol = _mm256_set1_epi16(volume); + const auto delta = _mm256_set1_epi16(volume_delta); + + // Vectorize the volume. + const auto iota = _mm256_set_epi16(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + vol = _mm256_add_epi16(vol, _mm256_mullo_epi16(delta, iota)); + + // Each loop iteration processes 16 samples. + const auto delta16 = _mm256_slli_epi16(delta, 4); - out[i] += (s16)sample; - volume += volume_delta; + for (u32 i = 0; i < count; i += 16) + { + const auto val = *(__m256i*)&input[i]; + const auto dst = (__m256i*)&out[i]; + + // mulhrs is signed * signed but we need signed * unsigned, + // so drop the top bit and adjust the product if it was set. + const auto mul = _mm256_mulhrs_epi16(val, _mm256_and_si256(vol, _mm256_set1_epi16(0x7FFF))); + const auto add = _mm256_adds_epi16(mul, _mm256_and_si256(val, _mm256_srai_epi16(vol, 15))); + + // Sign-extend to 32-bit. + const auto lo = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(add, 0)); + const auto hi = _mm256_cvtepi16_epi32(_mm256_extracti128_si256(add, 1)); - *dpop = (s16)sample; + // Add to the output. + dst[0] = _mm256_add_epi32(dst[0], lo); + dst[1] = _mm256_add_epi32(dst[1], hi); + + // Update the volume for the next iteration. + vol = _mm256_add_epi16(vol, delta16); + } } + else +#endif + { + for (u32 i = 0; i < count; ++i) + { + s32 sample = input[i]; + sample *= volume; + sample >>= 15; + sample = std::clamp(sample, -32767, 32767); // -32768 ? + + out[i] += (s16)sample; + volume += volume_delta; + } + } + + vd->volume += volume_delta * count; + *dpop = out[count - 1]; } // Execute a low pass filter on the samples using one history value. Returns @@ -461,7 +505,8 @@ static void BiquadFilter(s16* samples, u32 count, PBBiquadFilter& f) // Process 1ms of audio (for AX GC) or 3ms of audio (for AX Wii) from a PB and // mix it to the output buffers. -void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buffers, u16 count, +template +void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buffers, AXMixControl mctrl, const s16* coeffs, bool new_filter) { // If the voice is not running, nothing to do. @@ -469,7 +514,7 @@ void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buf return; // Read input samples, performing sample rate conversion if needed. - s16 samples[MAX_SAMPLES_PER_FRAME]; + alignas(32) s16 samples[MAX_SAMPLES_PER_FRAME]; GetInputSamples(accelerator, pb, samples, count, coeffs); // Apply a global volume ramp using the volume envelope parameters. @@ -508,67 +553,67 @@ void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buf if (MIX_ON(MAIN_L)) { - MixAdd(buffers.main_left, samples, count, &pb.mixer.main_left, &pb.dpop.main_left, - RAMP_ON(MAIN_L)); + MixAdd(buffers.main_left, samples, &pb.mixer.main_left, &pb.dpop.main_left, + RAMP_ON(MAIN_L)); } if (MIX_ON(MAIN_R)) { - MixAdd(buffers.main_right, samples, count, &pb.mixer.main_right, &pb.dpop.main_right, - RAMP_ON(MAIN_R)); + MixAdd(buffers.main_right, samples, &pb.mixer.main_right, &pb.dpop.main_right, + RAMP_ON(MAIN_R)); } if (MIX_ON(MAIN_S)) { - MixAdd(buffers.main_surround, samples, count, &pb.mixer.main_surround, &pb.dpop.main_surround, - RAMP_ON(MAIN_S)); + MixAdd(buffers.main_surround, samples, &pb.mixer.main_surround, &pb.dpop.main_surround, + RAMP_ON(MAIN_S)); } if (MIX_ON(AUXA_L)) { - MixAdd(buffers.auxA_left, samples, count, &pb.mixer.auxA_left, &pb.dpop.auxA_left, - RAMP_ON(AUXA_L)); + MixAdd(buffers.auxA_left, samples, &pb.mixer.auxA_left, &pb.dpop.auxA_left, + RAMP_ON(AUXA_L)); } if (MIX_ON(AUXA_R)) { - MixAdd(buffers.auxA_right, samples, count, &pb.mixer.auxA_right, &pb.dpop.auxA_right, - RAMP_ON(AUXA_R)); + MixAdd(buffers.auxA_right, samples, &pb.mixer.auxA_right, &pb.dpop.auxA_right, + RAMP_ON(AUXA_R)); } if (MIX_ON(AUXA_S)) { - MixAdd(buffers.auxA_surround, samples, count, &pb.mixer.auxA_surround, &pb.dpop.auxA_surround, - RAMP_ON(AUXA_S)); + MixAdd(buffers.auxA_surround, samples, &pb.mixer.auxA_surround, &pb.dpop.auxA_surround, + RAMP_ON(AUXA_S)); } if (MIX_ON(AUXB_L)) { - MixAdd(buffers.auxB_left, samples, count, &pb.mixer.auxB_left, &pb.dpop.auxB_left, - RAMP_ON(AUXB_L)); + MixAdd(buffers.auxB_left, samples, &pb.mixer.auxB_left, &pb.dpop.auxB_left, + RAMP_ON(AUXB_L)); } if (MIX_ON(AUXB_R)) { - MixAdd(buffers.auxB_right, samples, count, &pb.mixer.auxB_right, &pb.dpop.auxB_right, - RAMP_ON(AUXB_R)); + MixAdd(buffers.auxB_right, samples, &pb.mixer.auxB_right, &pb.dpop.auxB_right, + RAMP_ON(AUXB_R)); } if (MIX_ON(AUXB_S)) { - MixAdd(buffers.auxB_surround, samples, count, &pb.mixer.auxB_surround, &pb.dpop.auxB_surround, - RAMP_ON(AUXB_S)); + MixAdd(buffers.auxB_surround, samples, &pb.mixer.auxB_surround, &pb.dpop.auxB_surround, + RAMP_ON(AUXB_S)); } #ifdef AX_WII if (MIX_ON(AUXC_L)) { - MixAdd(buffers.auxC_left, samples, count, &pb.mixer.auxC_left, &pb.dpop.auxC_left, - RAMP_ON(AUXC_L)); + MixAdd(buffers.auxC_left, samples, &pb.mixer.auxC_left, &pb.dpop.auxC_left, + RAMP_ON(AUXC_L)); } if (MIX_ON(AUXC_R)) { - MixAdd(buffers.auxC_right, samples, count, &pb.mixer.auxC_right, &pb.dpop.auxC_right, - RAMP_ON(AUXC_R)); + MixAdd(buffers.auxC_right, samples, &pb.mixer.auxC_right, &pb.dpop.auxC_right, + RAMP_ON(AUXC_R)); } if (MIX_ON(AUXC_S)) { - MixAdd(buffers.auxC_surround, samples, count, &pb.mixer.auxC_surround, &pb.dpop.auxC_surround, - RAMP_ON(AUXC_S)); + MixAdd(buffers.auxC_surround, samples, &pb.mixer.auxC_surround, &pb.dpop.auxC_surround, + RAMP_ON(AUXC_S)); } #endif @@ -600,10 +645,10 @@ void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buf } // Old AXWii versions process ms per ms. - u16 wm_count = count == 96 ? 18 : 6; + constexpr u16 wm_count = count == 96 ? 18 : 6; // Interpolate at most 18 samples from the 96 samples we read before. - s16 wm_samples[18]; + alignas(32) s16 wm_samples[18]; // We use ratio 0x55555 == (5 * 65536 + 21845) / 65536 == 5.3333 which // is the nearest we can get to 96/18 @@ -617,29 +662,29 @@ void ProcessVoice(HLEAccelerator* accelerator, PB_TYPE& pb, const AXBuffers& buf #define WMCHAN_MIX_RAMP(n) (0 != ((pb.remote_mixer_control >> (2 * n)) & 2)) if (WMCHAN_MIX_ON(0)) - MixAdd(buffers.wm_main0, wm_samples, wm_count, &pb.remote_mixer.main0, &pb.remote_dpop.main0, - WMCHAN_MIX_RAMP(0)); + MixAdd(buffers.wm_main0, wm_samples, &pb.remote_mixer.main0, &pb.remote_dpop.main0, + WMCHAN_MIX_RAMP(0)); if (WMCHAN_MIX_ON(1)) - MixAdd(buffers.wm_aux0, wm_samples, wm_count, &pb.remote_mixer.aux0, &pb.remote_dpop.aux0, - WMCHAN_MIX_RAMP(1)); + MixAdd(buffers.wm_aux0, wm_samples, &pb.remote_mixer.aux0, &pb.remote_dpop.aux0, + WMCHAN_MIX_RAMP(1)); if (WMCHAN_MIX_ON(2)) - MixAdd(buffers.wm_main1, wm_samples, wm_count, &pb.remote_mixer.main1, &pb.remote_dpop.main1, - WMCHAN_MIX_RAMP(2)); + MixAdd(buffers.wm_main1, wm_samples, &pb.remote_mixer.main1, &pb.remote_dpop.main1, + WMCHAN_MIX_RAMP(2)); if (WMCHAN_MIX_ON(3)) - MixAdd(buffers.wm_aux1, wm_samples, wm_count, &pb.remote_mixer.aux1, &pb.remote_dpop.aux1, - WMCHAN_MIX_RAMP(3)); + MixAdd(buffers.wm_aux1, wm_samples, &pb.remote_mixer.aux1, &pb.remote_dpop.aux1, + WMCHAN_MIX_RAMP(3)); if (WMCHAN_MIX_ON(4)) - MixAdd(buffers.wm_main2, wm_samples, wm_count, &pb.remote_mixer.main2, &pb.remote_dpop.main2, - WMCHAN_MIX_RAMP(4)); + MixAdd(buffers.wm_main2, wm_samples, &pb.remote_mixer.main2, &pb.remote_dpop.main2, + WMCHAN_MIX_RAMP(4)); if (WMCHAN_MIX_ON(5)) - MixAdd(buffers.wm_aux2, wm_samples, wm_count, &pb.remote_mixer.aux2, &pb.remote_dpop.aux2, - WMCHAN_MIX_RAMP(5)); + MixAdd(buffers.wm_aux2, wm_samples, &pb.remote_mixer.aux2, &pb.remote_dpop.aux2, + WMCHAN_MIX_RAMP(5)); if (WMCHAN_MIX_ON(6)) - MixAdd(buffers.wm_main3, wm_samples, wm_count, &pb.remote_mixer.main3, &pb.remote_dpop.main3, - WMCHAN_MIX_RAMP(6)); + MixAdd(buffers.wm_main3, wm_samples, &pb.remote_mixer.main3, &pb.remote_dpop.main3, + WMCHAN_MIX_RAMP(6)); if (WMCHAN_MIX_ON(7)) - MixAdd(buffers.wm_aux3, wm_samples, wm_count, &pb.remote_mixer.aux3, &pb.remote_dpop.aux3, - WMCHAN_MIX_RAMP(7)); + MixAdd(buffers.wm_aux3, wm_samples, &pb.remote_mixer.aux3, &pb.remote_dpop.aux3, + WMCHAN_MIX_RAMP(7)); } #undef WMCHAN_MIX_RAMP #undef WMCHAN_MIX_ON diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.cpp b/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.cpp index 19ed9dd90e83..426eacc24443 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.cpp +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.cpp @@ -449,9 +449,9 @@ void AXWiiUCode::ProcessPBList(u32 pb_addr) for (int curr_ms = 0; curr_ms < 3; ++curr_ms) { ApplyUpdatesForMs(curr_ms, pb, num_updates, updates); - ProcessVoice(static_cast(m_accelerator.get()), pb, buffers, spms, - ConvertMixerControl(HILO_TO_32(pb.mixer_control)), - m_coeffs_checksum ? m_coeffs.data() : nullptr, m_new_filter); + ProcessVoice(static_cast(m_accelerator.get()), pb, buffers, + ConvertMixerControl(HILO_TO_32(pb.mixer_control)), + m_coeffs_checksum ? m_coeffs.data() : nullptr, m_new_filter); // Forward the buffers for (auto& ptr : buffers.ptrs) @@ -461,9 +461,9 @@ void AXWiiUCode::ProcessPBList(u32 pb_addr) } else { - ProcessVoice(static_cast(m_accelerator.get()), pb, buffers, 96, - ConvertMixerControl(HILO_TO_32(pb.mixer_control)), - m_coeffs_checksum ? m_coeffs.data() : nullptr, m_new_filter); + ProcessVoice<96>(static_cast(m_accelerator.get()), pb, buffers, + ConvertMixerControl(HILO_TO_32(pb.mixer_control)), + m_coeffs_checksum ? m_coeffs.data() : nullptr, m_new_filter); } WritePB(memory, pb_addr, pb, m_crc); diff --git a/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.h b/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.h index 85d330038467..85461910ee46 100644 --- a/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.h +++ b/Source/Core/Core/HW/DSPHLE/UCodes/AXWii.h @@ -21,19 +21,19 @@ class AXWiiUCode final : public AXUCode protected: // Additional AUX buffers - int m_samples_auxC_left[32 * 3]{}; - int m_samples_auxC_right[32 * 3]{}; - int m_samples_auxC_surround[32 * 3]{}; + alignas(32) int m_samples_auxC_left[32 * 3]{}; + alignas(32) int m_samples_auxC_right[32 * 3]{}; + alignas(32) int m_samples_auxC_surround[32 * 3]{}; // Wiimote buffers - int m_samples_wm0[6 * 3]{}; - int m_samples_aux0[6 * 3]{}; - int m_samples_wm1[6 * 3]{}; - int m_samples_aux1[6 * 3]{}; - int m_samples_wm2[6 * 3]{}; - int m_samples_aux2[6 * 3]{}; - int m_samples_wm3[6 * 3]{}; - int m_samples_aux3[6 * 3]{}; + alignas(32) int m_samples_wm0[6 * 3]{}; + alignas(32) int m_samples_aux0[6 * 3]{}; + alignas(32) int m_samples_wm1[6 * 3]{}; + alignas(32) int m_samples_aux1[6 * 3]{}; + alignas(32) int m_samples_wm2[6 * 3]{}; + alignas(32) int m_samples_aux2[6 * 3]{}; + alignas(32) int m_samples_wm3[6 * 3]{}; + alignas(32) int m_samples_aux3[6 * 3]{}; // Are we implementing an old version of AXWii which still has updates? bool m_old_axwii = false;