Skip to content
Permalink
Browse files
Merge pull request #6077 from leoetlino/dsp-fixes
Small DSP accelerator fixes
  • Loading branch information
leoetlino committed Sep 24, 2017
2 parents 8ebdd62 + 6484776 commit 38a8d04
Show file tree
Hide file tree
Showing 15 changed files with 570 additions and 179 deletions.
@@ -4,77 +4,65 @@

#include "Core/DSP/DSPAccelerator.h"

#include "Common/ChunkFile.h"
#include "Common/CommonTypes.h"
#include "Common/Logging/Log.h"
#include "Common/MathUtil.h"

#include "Core/DSP/DSPCore.h"
#include "Core/DSP/DSPHWInterface.h"
#include "Core/DSP/DSPHost.h"

namespace DSP
{
u16 dsp_read_aram_d3()
u16 Accelerator::ReadD3()
{
// Zelda ucode reads ARAM through 0xffd3.
const u32 EndAddress = (g_dsp.ifx_regs[DSP_ACEAH] << 16) | g_dsp.ifx_regs[DSP_ACEAL];
u32 Address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL];
u16 val = 0;

switch (g_dsp.ifx_regs[DSP_FORMAT])
switch (m_sample_format)
{
case 0x5: // u8 reads
val = Host::ReadHostMemory(Address);
Address++;
val = ReadMemory(m_current_address);
m_current_address++;
break;
case 0x6: // u16 reads
val = (Host::ReadHostMemory(Address * 2) << 8) | Host::ReadHostMemory(Address * 2 + 1);
Address++;
val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1);
m_current_address++;
break;
default:
ERROR_LOG(DSPLLE, "dsp_read_aram_d3() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]);
ERROR_LOG(DSPLLE, "dsp_read_aram_d3() - unknown format 0x%x", m_sample_format);
break;
}

if (Address >= EndAddress)
if (m_current_address >= m_end_address)
{
// Set address back to start address. (never seen this here!)
Address = (g_dsp.ifx_regs[DSP_ACSAH] << 16) | g_dsp.ifx_regs[DSP_ACSAL];
m_current_address = m_start_address;
}

g_dsp.ifx_regs[DSP_ACCAH] = Address >> 16;
g_dsp.ifx_regs[DSP_ACCAL] = Address & 0xffff;
return val;
}

void dsp_write_aram_d3(u16 value)
void Accelerator::WriteD3(u16 value)
{
// Zelda ucode writes a bunch of zeros to ARAM through d3 during
// initialization. Don't know if it ever does it later, too.
// Pikmin 2 Wii writes non-stop to 0x10008000-0x1000801f (non-zero values too)
// Zelda TP Wii writes non-stop to 0x10000000-0x1000001f (non-zero values too)
u32 Address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL];

switch (g_dsp.ifx_regs[DSP_FORMAT])
switch (m_sample_format)
{
case 0xA: // u16 writes
Host::WriteHostMemory(value >> 8, Address * 2);
Host::WriteHostMemory(value & 0xFF, Address * 2 + 1);
Address++;
WriteMemory(m_current_address * 2, value >> 8);
WriteMemory(m_current_address * 2 + 1, value & 0xFF);
m_current_address++;
break;
default:
ERROR_LOG(DSPLLE, "dsp_write_aram_d3() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]);
ERROR_LOG(DSPLLE, "dsp_write_aram_d3() - unknown format 0x%x", m_sample_format);
break;
}

g_dsp.ifx_regs[DSP_ACCAH] = Address >> 16;
g_dsp.ifx_regs[DSP_ACCAL] = Address & 0xffff;
}

u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u16 sample_format,
s16* yn1, s16* yn2, u16* pred_scale, s16* coefs,
std::function<void()> end_exception)
u16 Accelerator::Read(s16* coefs)
{
if (m_reads_stopped)
return 0x0000;

u16 val;
u8 step_size_bytes = 0;

@@ -84,69 +72,69 @@ u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u1
// extension and do/do not use ADPCM. It also remains to be figured out
// whether there's a difference between the usual accelerator "read
// address" and 0xd3.
switch (sample_format)
switch (m_sample_format)
{
case 0x00: // ADPCM audio
{
// ADPCM decoding, not much to explain here.
if ((*current_address & 15) == 0)
{
*pred_scale = Host::ReadHostMemory((*current_address & ~15) >> 1);
*current_address += 2;
}

switch (end_address & 15)
{
case 0: // Tom and Jerry
step_size_bytes = 1;
break;
case 1: // Blazing Angels
step_size_bytes = 0;
break;
default:
step_size_bytes = 2;
break;
}

int scale = 1 << (*pred_scale & 0xF);
int coef_idx = (*pred_scale >> 4) & 0x7;
int scale = 1 << (m_pred_scale & 0xF);
int coef_idx = (m_pred_scale >> 4) & 0x7;

s32 coef1 = coefs[coef_idx * 2 + 0];
s32 coef2 = coefs[coef_idx * 2 + 1];

int temp = (*current_address & 1) ? (Host::ReadHostMemory(*current_address >> 1) & 0xF) :
(Host::ReadHostMemory(*current_address >> 1) >> 4);
int temp = (m_current_address & 1) ? (ReadMemory(m_current_address >> 1) & 0xF) :
(ReadMemory(m_current_address >> 1) >> 4);

if (temp >= 8)
temp -= 16;

s32 val32 = (scale * temp) + ((0x400 + coef1 * *yn1 + coef2 * *yn2) >> 11);
s32 val32 = (scale * temp) + ((0x400 + coef1 * m_yn1 + coef2 * m_yn2) >> 11);
val = static_cast<s16>(MathUtil::Clamp<s32>(val32, -0x7FFF, 0x7FFF));
step_size_bytes = 2;

*yn2 = *yn1;
*yn1 = val;
*current_address += 1;
m_yn2 = m_yn1;
m_yn1 = val;
m_current_address += 1;

// These two cases are handled in a special way, separate from normal overflow handling:
// the ACCOV exception does not fire at all, the predscale register is not updated,
// and if the end address is 16-byte aligned, the DSP loops to start_address + 1
// instead of start_address.
if ((m_end_address & 0xf) == 0x0 && m_current_address == m_end_address)
{
m_current_address = m_start_address + 1;
}
else if ((m_end_address & 0xf) == 0x1 && m_current_address == m_end_address - 1)
{
m_current_address = m_start_address;
}
// If any of these special cases were hit, the DSP does not update the predscale register.
else if ((m_current_address & 15) == 0)
{
m_pred_scale = ReadMemory((m_current_address & ~15) >> 1);
m_current_address += 2;
step_size_bytes += 2;
}
break;
}
case 0x0A: // 16-bit PCM audio
val = (Host::ReadHostMemory(*current_address * 2) << 8) |
Host::ReadHostMemory(*current_address * 2 + 1);
*yn2 = *yn1;
*yn1 = val;
val = (ReadMemory(m_current_address * 2) << 8) | ReadMemory(m_current_address * 2 + 1);
m_yn2 = m_yn1;
m_yn1 = val;
step_size_bytes = 2;
*current_address += 1;
m_current_address += 1;
break;
case 0x19: // 8-bit PCM audio
val = Host::ReadHostMemory(*current_address) << 8;
*yn2 = *yn1;
*yn1 = val;
val = ReadMemory(m_current_address) << 8;
m_yn2 = m_yn1;
m_yn1 = val;
step_size_bytes = 2;
*current_address += 1;
m_current_address += 1;
break;
default:
ERROR_LOG(DSPLLE, "dsp_read_accelerator() - unknown format 0x%x", g_dsp.ifx_regs[DSP_FORMAT]);
ERROR_LOG(DSPLLE, "dsp_read_accelerator() - unknown format 0x%x", m_sample_format);
step_size_bytes = 2;
*current_address += 1;
m_current_address += 1;
val = 0;
break;
}
@@ -160,30 +148,66 @@ u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u1
// Somehow, YN1 and YN2 must be initialized with their "loop" values,
// so yeah, it seems likely that we should raise an exception to let
// the DSP program do that, at least if DSP_FORMAT == 0x0A.
if (*current_address == (end_address + step_size_bytes - 1))
if (m_current_address == (m_end_address + step_size_bytes - 1))
{
// Set address back to start address.
*current_address = start_address;
end_exception();
m_current_address = m_start_address;
m_reads_stopped = true;
OnEndException();
}

SetCurrentAddress(m_current_address);
return val;
}

u16 dsp_read_accelerator()
void Accelerator::DoState(PointerWrap& p)
{
const u32 start_address = (g_dsp.ifx_regs[DSP_ACSAH] << 16) | g_dsp.ifx_regs[DSP_ACSAL];
const u32 end_address = (g_dsp.ifx_regs[DSP_ACEAH] << 16) | g_dsp.ifx_regs[DSP_ACEAL];
u32 current_address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL];

auto end_address_reached = [] { DSPCore_SetException(EXP_ACCOV); };
const u16 val = ReadAccelerator(
start_address, end_address, &current_address, g_dsp.ifx_regs[DSP_FORMAT],
reinterpret_cast<s16*>(&g_dsp.ifx_regs[DSP_YN1]),
reinterpret_cast<s16*>(&g_dsp.ifx_regs[DSP_YN2]), &g_dsp.ifx_regs[DSP_PRED_SCALE],
reinterpret_cast<s16*>(&g_dsp.ifx_regs[DSP_COEF_A1_0]), end_address_reached);

gdsp_ifx_write(DSP_ACCAH, current_address >> 16);
gdsp_ifx_write(DSP_ACCAL, current_address & 0xffff);
return val;
p.Do(m_start_address);
p.Do(m_end_address);
p.Do(m_current_address);
p.Do(m_sample_format);
p.Do(m_yn1);
p.Do(m_yn2);
p.Do(m_pred_scale);
p.Do(m_reads_stopped);
}

constexpr u32 START_END_ADDRESS_MASK = 0x3fffffff;
constexpr u32 CURRENT_ADDRESS_MASK = 0xbfffffff;

void Accelerator::SetStartAddress(u32 address)
{
m_start_address = address & START_END_ADDRESS_MASK;
}

void Accelerator::SetEndAddress(u32 address)
{
m_end_address = address & START_END_ADDRESS_MASK;
}

void Accelerator::SetCurrentAddress(u32 address)
{
m_current_address = address & CURRENT_ADDRESS_MASK;
}

void Accelerator::SetSampleFormat(u16 format)
{
m_sample_format = format;
}

void Accelerator::SetYn1(s16 yn1)
{
m_yn1 = yn1;
}

void Accelerator::SetYn2(s16 yn2)
{
m_yn2 = yn2;
m_reads_stopped = false;
}

void Accelerator::SetPredScale(u16 pred_scale)
{
m_pred_scale = pred_scale & 0x7f;
}
} // namespace DSP
@@ -4,18 +4,56 @@

#pragma once

#include <functional>

#include "Common/CommonTypes.h"

class PointerWrap;

namespace DSP
{
u16 ReadAccelerator(u32 start_address, u32 end_address, u32* current_address, u16 sample_format,
s16* yn1, s16* yn2, u16* pred_scale, s16* coefs,
std::function<void()> end_exception);
class Accelerator
{
public:
virtual ~Accelerator() = default;

u16 Read(s16* coefs);
// Zelda ucode reads ARAM through 0xffd3.
u16 ReadD3();
void WriteD3(u16 value);

u32 GetStartAddress() const { return m_start_address; }
u32 GetEndAddress() const { return m_end_address; }
u32 GetCurrentAddress() const { return m_current_address; }
u16 GetSampleFormat() const { return m_sample_format; }
s16 GetYn1() const { return m_yn1; }
s16 GetYn2() const { return m_yn2; }
u16 GetPredScale() const { return m_pred_scale; }
void SetStartAddress(u32 address);
void SetEndAddress(u32 address);
void SetCurrentAddress(u32 address);
void SetSampleFormat(u16 format);
void SetYn1(s16 yn1);
void SetYn2(s16 yn2);
void SetPredScale(u16 pred_scale);

void DoState(PointerWrap& p);

protected:
virtual void OnEndException() = 0;
virtual u8 ReadMemory(u32 address) = 0;
virtual void WriteMemory(u32 address, u8 value) = 0;

u16 dsp_read_accelerator();
// DSP accelerator registers.
u32 m_start_address = 0;
u32 m_end_address = 0;
u32 m_current_address = 0;
u16 m_sample_format = 0;
s16 m_yn1 = 0;
s16 m_yn2 = 0;
u16 m_pred_scale = 0;

u16 dsp_read_aram_d3();
void dsp_write_aram_d3(u16 value);
// When an ACCOV is triggered, the accelerator stops reading back anything
// and updating the current address register, unless the YN2 register is written to.
// This is kept track of internally; this state is not exposed via any register.
bool m_reads_stopped = false;
};
} // namespace DSP
@@ -15,12 +15,14 @@
#include "Common/MemoryUtil.h"
#include "Common/MsgHandler.h"

#include "Core/DSP/DSPAccelerator.h"
#include "Core/DSP/DSPAnalyzer.h"
#include "Core/DSP/DSPHWInterface.h"
#include "Core/DSP/DSPHost.h"
#include "Core/DSP/Interpreter/DSPIntUtil.h"
#include "Core/DSP/Interpreter/DSPInterpreter.h"
#include "Core/DSP/Jit/DSPEmitter.h"
#include "Core/HW/DSP.h"

namespace DSP
{
@@ -111,11 +113,21 @@ static void DSPCore_FreeMemoryPages()
g_dsp.irom = g_dsp.iram = g_dsp.dram = g_dsp.coef = nullptr;
}

class LLEAccelerator final : public Accelerator
{
protected:
u8 ReadMemory(u32 address) override { return Host::ReadHostMemory(address); }
void WriteMemory(u32 address, u8 value) override { Host::WriteHostMemory(value, address); }
void OnEndException() override { DSPCore_SetException(EXP_ACCOV); }
};

bool DSPCore_Init(const DSPInitOptions& opts)
{
g_dsp.step_counter = 0;
g_init_hax = false;

g_dsp.accelerator = std::make_unique<LLEAccelerator>();

g_dsp.irom = static_cast<u16*>(Common::AllocateMemoryPages(DSP_IROM_BYTE_SIZE));
g_dsp.iram = static_cast<u16*>(Common::AllocateMemoryPages(DSP_IRAM_BYTE_SIZE));
g_dsp.dram = static_cast<u16*>(Common::AllocateMemoryPages(DSP_DRAM_BYTE_SIZE));

0 comments on commit 38a8d04

Please sign in to comment.