@@ -12,6 +12,7 @@ TEST_ADDR: equ 0xFFA0 ; 0x0000
TEST_MEM: equ 0x00A0 ; 0x0000
TEST_MEM_2: equ 0x01A0 ; 0x0100

test_main:
LRI $AC0.L, #0xf00f
SR @TEST_REG, $AC0.L
SR @TEST_MEM, $AC0.L
@@ -27,6 +27,7 @@ include "dsp_base.inc"
; And thus it's tricky to implement in software too :p

; test using indexing register 1 - 0 is used in send_back
test_main:
lri $AR1, #16
lri $IX1, #32
lri $WR1, #0
@@ -1,9 +1,3 @@
; This is the trojan program we send to the DSP from DSPSpy to figure it out.
REGS_BASE: equ 0x0f80
MEM_HI: equ 0x0f7E
MEM_LO: equ 0x0f7F


; Interrupt vectors 8 vectors, 2 opcodes each
jmp irq0
jmp irq1
@@ -14,276 +8,5 @@ MEM_LO: equ 0x0f7F
jmp irq6
jmp irq7

; Main code (and normal entrypoint) at 0x10
sbset #0x02
sbset #0x03
sbclr #0x04
sbset #0x05
sbset #0x06

s16
lri $CR, #0x00ff

clr $acc1
clr $acc0

; get address of memory dump and copy it to DRAM
call 0x807e
si @DMBH, #0x8888
si @DMBL, #0xdead
si @DIRQ, #0x0001

call 0x8078
andi $ac0.m, #0x7fff
lrs $ac1.m, @CMBL

sr @MEM_HI, $ac0.m
sr @MEM_LO, $ac1.m

lri $ax0.l, #0
lri $ax1.l, #0 ;(DSP_CR_IMEM | DSP_CR_TO_CPU)
lri $ax0.h, #0x2000
lr $ac0.l, @MEM_HI
lr $ac0.m, @MEM_LO
call do_dma


; get address of registers and DMA them to ram
call 0x807e
si @DMBH, #0x8888
si @DMBL, #0xbeef
si @DIRQ, #0x0001

call 0x8078
andi $ac0.m, #0x7fff
lrs $ac1.m, @CMBL

sr @MEM_HI, $ac0.m
sr @MEM_LO, $ac1.m

lri $ax0.l, #REGS_BASE
lri $ax1.l, #0 ;(DSP_CR_IMEM | DSP_CR_TO_CPU)
lri $ax0.h, #0x80
lr $ac0.l, @MEM_HI
lr $ac0.m, @MEM_LO
call do_dma

; Read in all the registers from RAM
lri $ar0, #REGS_BASE+1
lrri $ar1, @$ar0
lrri $ar2, @$ar0
lrri $ar3, @$ar0
lrri $ix0, @$ar0
lrri $ix1, @$ar0
lrri $ix2, @$ar0
lrri $ix3, @$ar0
lrri $wr0, @$ar0
lrri $wr1, @$ar0
lrri $wr2, @$ar0
lrri $wr3, @$ar0
lrri $st0, @$ar0
lrri $st1, @$ar0
lrri $st2, @$ar0
lrri $st3, @$ar0
lrri $ac0.h, @$ar0
lrri $ac1.h, @$ar0
lrri $cr, @$ar0
lrri $sr, @$ar0
lrri $prod.l, @$ar0
lrri $prod.m1, @$ar0
lrri $prod.h, @$ar0
lrri $prod.m2, @$ar0
lrri $ax0.l, @$ar0
lrri $ax1.l, @$ar0
lrri $ax0.h, @$ar0
lrri $ax1.h, @$ar0
lrri $ac0.l, @$ar0
lrri $ac1.l, @$ar0
lrri $ac0.m, @$ar0
lrri $ac1.m, @$ar0
lr $ar0, @REGS_BASE

jmp start_of_test

; This is where we jump when we're done testing, see above.
; We just fall into a loop, playing dead until someone resets the DSP.
end_of_test:
nop
jmp end_of_test

; Utility function to do DMA.
do_dma:
sr @DSMAH, $ac0.l
sr @DSMAL, $ac0.m
sr @DSPA, $ax0.l
sr @DSCR, $ax1.l
sr @DSBL, $ax0.h ; This kicks off the DMA.
call 0x863d ; Wait for DMA to complete by watching a bit in DSCR.
ret

; IRQ handlers. Just send back exception# and die
irq0:
lri $ac0.m, #0x0000
jmp irq
irq1:
lri $ac0.m, #0x0001
jmp irq
irq2:
lri $ac0.m, #0x0002
jmp irq
irq3:
lri $ac0.m, #0x0003
jmp irq
irq4:
lri $ac0.m, #0x0004
jmp irq
irq5:
lrs $ac0.m, @DMBH
andcf $ac0.m, #0x8000
jlz irq5
si @DMBH, #0x8005
si @DMBL, #0x0000
si @DIRQ, #0x0001
lri $ac0.m, #0xbbbb
sr @0xffda, $ac0.m ; pred scale
sr @0xffdb, $ac0.m ; yn1
lr $ix2, @ARAM
sr @0xffdc, $ac0.m ; yn2
rti
irq6:
lri $ac0.m, #0x0006
jmp irq
irq7:
lri $ac0.m, #0x0007

irq:
lrs $ac1.m, @DMBH
andcf $ac1.m, #0x8000
jlz irq
si @DMBH, #0x8bad
;sr @DMBL, $wr3 ; ???
sr @DMBL, $ac0.m ; Exception number
si @DIRQ, #0x0001
halt ; Through some magic this allows us to properly ack the exception in dspspy
;rti ; allow dumping of ucodes which cause exceptions...probably not safe at all

; DMA:s the current state of the registers back to the PowerPC. To do this,
; it must write the contents of all regs to DRAM.
send_back:
; first, store $sr so we can modify it
sr @(REGS_BASE + 19), $sr
set16
; Now store $wr0, as it must be 0xffff for srri to work as we expect
sr @(REGS_BASE + 8), $wr0
lri $wr0, #0xffff
; store registers to reg table
sr @REGS_BASE, $ar0
lri $ar0, #(REGS_BASE + 1)
srri @$ar0, $ar1
srri @$ar0, $ar2
srri @$ar0, $ar3
srri @$ar0, $ix0
srri @$ar0, $ix1
srri @$ar0, $ix2
srri @$ar0, $ix3
; skip $wr0 since we already stored and modified it
iar $ar0
srri @$ar0, $wr1
srri @$ar0, $wr2
srri @$ar0, $wr3
srri @$ar0, $st0
srri @$ar0, $st1
srri @$ar0, $st2
srri @$ar0, $st3
srri @$ar0, $ac0.h
srri @$ar0, $ac1.h
srri @$ar0, $cr
; skip $sr since we already stored and modified it
iar $ar0
srri @$ar0, $prod.l
srri @$ar0, $prod.m1
srri @$ar0, $prod.h
srri @$ar0, $prod.m2
srri @$ar0, $ax0.l
srri @$ar0, $ax1.l
srri @$ar0, $ax0.h
srri @$ar0, $ax1.h
srri @$ar0, $ac0.l
srri @$ar0, $ac1.l
srri @$ar0, $ac0.m
srri @$ar0, $ac1.m

; Regs are stored. Prepare DMA.
; $cr must be 0x00ff because the ROM uses lrs and srs with the assumption that
; they will modify hardware registers.
lri $cr, #0x00ff
lri $ax0.l, #0x0000
lri $ax1.l, #1 ;(DSP_CR_IMEM | DSP_CR_TO_CPU)
lri $ax0.h, #0x200
lr $ac0.l, @MEM_HI
lr $ac0.m, @MEM_LO

; Now, why are we looping here?
lri $ar1, #8+8
bloop $ar1, dma_copy
call do_dma
addi $ac0.m, #0x200
mrr $ac1.m, $ax0.l
addi $ac1.m, #0x100
dma_copy:
mrr $ax0.l, $ac1.m

; Wait for the CPU to send us a mail.
call 0x807e
si @DMBH, #0x8888
si @DMBL, #0xfeeb
si @DIRQ, #0x0001

; wait for the CPU to recieve our response before we execute the next op
call 0x8078
andi $ac0.m, #0x7fff
lrs $ac1.m, @CMBL

; Restore all regs again so we're ready to execute another op.
lri $ar0, #REGS_BASE+1
lrri $ar1, @$ar0
lrri $ar2, @$ar0
lrri $ar3, @$ar0
lrri $ix0, @$ar0
lrri $ix1, @$ar0
lrri $ix2, @$ar0
lrri $ix3, @$ar0
; leave $wr for later
iar $ar0
lrri $wr1, @$ar0
lrri $wr2, @$ar0
lrri $wr3, @$ar0
lrri $st0, @$ar0
lrri $st1, @$ar0
lrri $st2, @$ar0
lrri $st3, @$ar0
lrri $ac0.h, @$ar0
lrri $ac1.h, @$ar0
lrri $cr, @$ar0
; leave $sr for later
iar $ar0
lrri $prod.l, @$ar0
lrri $prod.m1, @$ar0
lrri $prod.h, @$ar0
lrri $prod.m2, @$ar0
lrri $ax0.l, @$ar0
lrri $ax1.l, @$ar0
lrri $ax0.h, @$ar0
lrri $ax1.h, @$ar0
lrri $ac0.l, @$ar0
lrri $ac1.l, @$ar0
lrri $ac0.m, @$ar0
lrri $ac1.m, @$ar0
lr $ar0, @REGS_BASE
lr $wr0, @(REGS_BASE+8)
lr $sr, @(REGS_BASE+19)

ret ; from send_back

; Obviously this must be included directly before your test code
start_of_test:
; The rest is in dsp_base_noirq.inc
include "dsp_base_noirq.inc"
@@ -0,0 +1,283 @@
; This is the trojan program we send to the DSP from DSPSpy to figure it out.
REGS_BASE: equ 0x0f80
MEM_HI: equ 0x0f7E
MEM_LO: equ 0x0f7F

WARNPC 0x10
ORG 0x10

; Main code (and normal entrypoint) at 0x10
; It is expected that IRQs were listed beforehand
; (e.g. by including dsp_base.inc instead of dsp_base_noirq.inc)
sbset #0x02
sbset #0x03
sbclr #0x04
sbset #0x05
sbset #0x06

s16
lri $CR, #0x00ff

clr $acc1
clr $acc0

; get address of memory dump and copy it to DRAM
call 0x807e
si @DMBH, #0x8888
si @DMBL, #0xdead
si @DIRQ, #0x0001

call 0x8078
andi $ac0.m, #0x7fff
lrs $ac1.m, @CMBL

sr @MEM_HI, $ac0.m
sr @MEM_LO, $ac1.m

lri $ax0.l, #0
lri $ax1.l, #0 ;(DSP_CR_IMEM | DSP_CR_TO_CPU)
lri $ax0.h, #0x2000
lr $ac0.l, @MEM_HI
lr $ac0.m, @MEM_LO
call do_dma


; get address of registers and DMA them to ram
call 0x807e
si @DMBH, #0x8888
si @DMBL, #0xbeef
si @DIRQ, #0x0001

call 0x8078
andi $ac0.m, #0x7fff
lrs $ac1.m, @CMBL

sr @MEM_HI, $ac0.m
sr @MEM_LO, $ac1.m

lri $ax0.l, #REGS_BASE
lri $ax1.l, #0 ;(DSP_CR_IMEM | DSP_CR_TO_CPU)
lri $ax0.h, #0x80
lr $ac0.l, @MEM_HI
lr $ac0.m, @MEM_LO
call do_dma

; Read in all the registers from RAM
lri $ar0, #REGS_BASE+1
lrri $ar1, @$ar0
lrri $ar2, @$ar0
lrri $ar3, @$ar0
lrri $ix0, @$ar0
lrri $ix1, @$ar0
lrri $ix2, @$ar0
lrri $ix3, @$ar0
lrri $wr0, @$ar0
lrri $wr1, @$ar0
lrri $wr2, @$ar0
lrri $wr3, @$ar0
lrri $st0, @$ar0
lrri $st1, @$ar0
lrri $st2, @$ar0
lrri $st3, @$ar0
lrri $ac0.h, @$ar0
lrri $ac1.h, @$ar0
lrri $cr, @$ar0
lrri $sr, @$ar0
lrri $prod.l, @$ar0
lrri $prod.m1, @$ar0
lrri $prod.h, @$ar0
lrri $prod.m2, @$ar0
lrri $ax0.l, @$ar0
lrri $ax1.l, @$ar0
lrri $ax0.h, @$ar0
lrri $ax1.h, @$ar0
lrri $ac0.l, @$ar0
lrri $ac1.l, @$ar0
lrri $ac0.m, @$ar0
lrri $ac1.m, @$ar0
lr $ar0, @REGS_BASE

jmp test_main

; This is where we jump when we're done testing, see above.
; We just fall into a loop, playing dead until someone resets the DSP.
end_of_test:
nop
jmp end_of_test

; Utility function to do DMA.
do_dma:
sr @DSMAH, $ac0.l
sr @DSMAL, $ac0.m
sr @DSPA, $ax0.l
sr @DSCR, $ax1.l
sr @DSBL, $ax0.h ; This kicks off the DMA.
wait_dma_finish:
lr $ac1.m, @DSCR
andcf $ac1.m, #0x4
jlz wait_dma_finish
ret

; IRQ handlers. Just send back exception# and die
irq0:
lri $ac0.m, #0x0000
jmp irq
irq1:
lri $ac0.m, #0x0001
jmp irq
irq2:
lri $ac0.m, #0x0002
jmp irq
irq3:
lri $ac0.m, #0x0003
jmp irq
irq4:
lri $ac0.m, #0x0004
jmp irq
irq5:
lrs $ac0.m, @DMBH
andcf $ac0.m, #0x8000
jlz irq5
si @DMBH, #0x8005
si @DMBL, #0x0000
si @DIRQ, #0x0001
lri $ac0.m, #0xbbbb
sr @0xffda, $ac0.m ; pred scale
sr @0xffdb, $ac0.m ; yn1
lr $ix2, @ARAM
sr @0xffdc, $ac0.m ; yn2
rti
irq6:
lri $ac0.m, #0x0006
jmp irq
irq7:
lri $ac0.m, #0x0007

irq:
lrs $ac1.m, @DMBH
andcf $ac1.m, #0x8000
jlz irq
si @DMBH, #0x8bad
;sr @DMBL, $wr3 ; ???
sr @DMBL, $ac0.m ; Exception number
si @DIRQ, #0x0001
halt ; Through some magic this allows us to properly ack the exception in dspspy
;rti ; allow dumping of ucodes which cause exceptions...probably not safe at all

; DMA:s the current state of the registers back to the PowerPC. To do this,
; it must write the contents of all regs to DRAM.
send_back:
; first, store $sr so we can modify it
sr @(REGS_BASE + 19), $sr
set16
; Now store $wr0, as it must be 0xffff for srri to work as we expect
sr @(REGS_BASE + 8), $wr0
lri $wr0, #0xffff
; store registers to reg table
sr @REGS_BASE, $ar0
lri $ar0, #(REGS_BASE + 1)
srri @$ar0, $ar1
srri @$ar0, $ar2
srri @$ar0, $ar3
srri @$ar0, $ix0
srri @$ar0, $ix1
srri @$ar0, $ix2
srri @$ar0, $ix3
; skip $wr0 since we already stored and modified it
iar $ar0
srri @$ar0, $wr1
srri @$ar0, $wr2
srri @$ar0, $wr3
srri @$ar0, $st0
srri @$ar0, $st1
srri @$ar0, $st2
srri @$ar0, $st3
srri @$ar0, $ac0.h
srri @$ar0, $ac1.h
srri @$ar0, $cr
; skip $sr since we already stored and modified it
iar $ar0
srri @$ar0, $prod.l
srri @$ar0, $prod.m1
srri @$ar0, $prod.h
srri @$ar0, $prod.m2
srri @$ar0, $ax0.l
srri @$ar0, $ax1.l
srri @$ar0, $ax0.h
srri @$ar0, $ax1.h
srri @$ar0, $ac0.l
srri @$ar0, $ac1.l
srri @$ar0, $ac0.m
srri @$ar0, $ac1.m

; Regs are stored. Prepare DMA.
; $cr must be 0x00ff because the ROM uses lrs and srs with the assumption that
; they will modify hardware registers.
lri $cr, #0x00ff
lri $ax0.l, #0x0000
lri $ax1.l, #1 ;(DSP_CR_IMEM | DSP_CR_TO_CPU)
lri $ax0.h, #0x200
lr $ac0.l, @MEM_HI
lr $ac0.m, @MEM_LO

; Now, why are we looping here?
lri $ar1, #8+8
bloop $ar1, dma_copy
call do_dma
addi $ac0.m, #0x200
mrr $ac1.m, $ax0.l
addi $ac1.m, #0x100
dma_copy:
mrr $ax0.l, $ac1.m

; Wait for the CPU to send us a mail.
call 0x807e
si @DMBH, #0x8888
si @DMBL, #0xfeeb
si @DIRQ, #0x0001

; wait for the CPU to recieve our response before we execute the next op
call 0x8078
andi $ac0.m, #0x7fff
lrs $ac1.m, @CMBL

; Restore all regs again so we're ready to execute another op.
lri $ar0, #REGS_BASE+1
lrri $ar1, @$ar0
lrri $ar2, @$ar0
lrri $ar3, @$ar0
lrri $ix0, @$ar0
lrri $ix1, @$ar0
lrri $ix2, @$ar0
lrri $ix3, @$ar0
; leave $wr for later
iar $ar0
lrri $wr1, @$ar0
lrri $wr2, @$ar0
lrri $wr3, @$ar0
lrri $st0, @$ar0
lrri $st1, @$ar0
lrri $st2, @$ar0
lrri $st3, @$ar0
lrri $ac0.h, @$ar0
lrri $ac1.h, @$ar0
lrri $cr, @$ar0
; leave $sr for later
iar $ar0
lrri $prod.l, @$ar0
lrri $prod.m1, @$ar0
lrri $prod.h, @$ar0
lrri $prod.m2, @$ar0
lrri $ax0.l, @$ar0
lrri $ax1.l, @$ar0
lrri $ax0.h, @$ar0
lrri $ax1.h, @$ar0
lrri $ac0.l, @$ar0
lrri $ac1.l, @$ar0
lrri $ac0.m, @$ar0
lrri $ac1.m, @$ar0
lr $ar0, @REGS_BASE
lr $wr0, @(REGS_BASE+8)
lr $sr, @(REGS_BASE+19)

ret ; from send_back
@@ -6,13 +6,14 @@ include "dsp_base.inc"

; We can call send_back at any time to send data back to the PowerPC.

lri $AC0.M, #0x1000
call send_back
test_main:
lri $AC0.M, #0x1000
call send_back

set40
lri $AC0.M, #0x1000
set16
call send_back
set40
lri $AC0.M, #0x1000
set16
call send_back

; We're done, DO NOT DELETE THIS LINE
jmp end_of_test
jmp end_of_test
@@ -30,7 +30,7 @@ include "dsp_base.inc"

; I really don't know how the above could possibly be efficiently implemented in hardware.
; And thus it's tricky to implement in software too :p

test_main:
; test using indexing register 1 - 0 is used in send_back
lri $AR1, #16
lri $IX1, #32
@@ -2,6 +2,7 @@
incdir "tests"
include "dsp_base.inc"

test_main:
lri $AR0, #0x001c
lri $AR1, #0x001d
lri $AR2, #0x001e
@@ -1,6 +1,7 @@
incdir "tests"
include "dsp_base.inc"

test_main:
CLR $acc0
CLR $acc1
LRI $ac0.h, #0x0050
@@ -2,8 +2,9 @@
incdir "tests"
include "dsp_base.inc"

; Results is in capitails like this: UNSIGNED
; Results are in capitals like this: UNSIGNED

test_main:
CLR15

; Test MULXMVZ - SET15
@@ -2,6 +2,7 @@
incdir "tests"
include "dsp_base.inc"

test_main:
clr $ACC0
neg $ACC0

@@ -10,6 +10,7 @@ include "dsp_base.inc"
// 0x02cb is the same opcode, but arithmetic.
// We'll call it asrn, no arguments.

test_main:
clr $ACC0
clr $ACC1
lri $AC0.H, #0
@@ -1,6 +1,7 @@
incdir "tests"
include "dsp_base.inc"

test_main:
; Test what happens various values are written to every register
LRI $ar0, #0xffff
CALL set_all_regs
@@ -0,0 +1,55 @@
; This test needs to manually specify IRQs
jmp irq0
jmp irq1
jmp irq2
jmp irq3
jmp irq4
jmp accov_irq
jmp irq6
jmp irq7

incdir "tests"
include "dsp_base_noirq.inc"

test_main:
; Use the accelerator to generate an IRQ by setting the start and end address to 0
; This will result in an interrupt on every read
SI @0xffda, #0 ; pred_scale
SI @0xffdb, #0 ; yn1
SI @0xffdc, #0 ; yn2
SI @0xffd1, #0 ; SampleFormat
SI @ACSAH, #0
SI @ACCAH, #0
SI @ACSAL, #0
SI @ACCAL, #0
SI @ACEAH, #0
SI @ACEAL, #0


LRI $AX1.H, #0x0000
LRS $AX0.L, @ARAM ; Trigger interrupt
CALL send_back

LRI $AX1.H, #0x0001
LRS $AX0.L, @ARAM ; Trigger interrupt
CALL send_back

LRI $AX1.H, #0x0000
LRS $AX0.L, @ARAM ; Trigger interrupt
CALL send_back

jmp end_of_test

accov_irq:
; Restore registers, otherwise no new interrupt will be generated
SI @0xffda, #0 ; pred_scale
SI @0xffdb, #0 ; yn1
SI @0xffdc, #0 ; yn2

TSTAXH $AX1.H
LRI $AX1.L, #0x1111
cw 0x02f4 ; RTINZ if it exists
LRI $AX1.L, #0x2222
cw 0x02f5 ; RTIZ if it exists
LRI $AX1.L, #0x3333
RTI
@@ -0,0 +1,131 @@
incdir "tests"
include "dsp_base.inc"

test_main:
; Test registers used by LRS and SRS
LRI $CR, #0x0000
CALL clear_regs
CALL store_mem_sr

; Write with SR, read with LR
LRI $AR0, #0xA00A
CALL create_pattern
CALL store_mem_sr
CALL send_back
CALL clear_regs
CALL read_mem_lr
CALL send_back

; Write with SR, read with LRS
LRI $AR0, #0xB00B
CALL create_pattern
CALL store_mem_sr
CALL send_back
CALL clear_regs
CALL read_mem_lrs
CALL send_back

; Write with SRS, read with LR
LRI $AR0, #0xC00C
CALL create_pattern
CALL store_mem_srs
CALL send_back
CALL clear_regs
CALL read_mem_lr
CALL send_back

; Write with SR, read with LRS
LRI $AR0, #0xD00D
CALL create_pattern
CALL store_mem_srs
CALL send_back
CALL clear_regs
CALL read_mem_lrs
CALL send_back

; We're done, DO NOT DELETE THIS LINE
JMP end_of_test

create_pattern:
LRI $IX0, #0x0110
MRR $AX0.L, $AR0
ADDARN $AR0, $IX0
MRR $AX1.L, $AR0
ADDARN $AR0, $IX0
MRR $AX0.H, $AR0
ADDARN $AR0, $IX0
MRR $AX1.H, $AR0
ADDARN $AR0, $IX0
MRR $AC0.L, $AR0
ADDARN $AR0, $IX0
MRR $AC1.L, $AR0
ADDARN $AR0, $IX0
MRR $AC0.M, $AR0
ADDARN $AR0, $IX0
MRR $AC1.M, $AR0
ADDARN $AR0, $IX0
; AC0.H and AC1.H have odd results since they're 8-bit sign-extended, but that's fine.
MRR $AC0.H, $AR0
ADDARN $AR0, $IX0
MRR $AC1.H, $AR0
RET

clear_regs:
LRI $AX0.L, #0x0000
LRI $AX1.L, #0x0000
LRI $AX0.H, #0x0000
LRI $AX1.H, #0x0000
LRI $AC0.L, #0x0000
LRI $AC1.L, #0x0000
LRI $AC0.M, #0x0000
LRI $AC1.M, #0x0000
LRI $AC0.H, #0x0000
LRI $AC1.H, #0x0000
RET

read_mem_lr:
LR $AX0.L, @0x0000
LR $AX1.L, @0x0001
LR $AX0.H, @0x0002
LR $AX1.H, @0x0003
LR $AC0.L, @0x0004
LR $AC1.L, @0x0005
LR $AC0.M, @0x0006
LR $AC1.M, @0x0007
RET

read_mem_lrs:
LRS $AX0.L, @0x00
LRS $AX1.L, @0x01
LRS $AX0.H, @0x02
LRS $AX1.H, @0x03
LRS $AC0.L, @0x04
LRS $AC1.L, @0x05
LRS $AC0.M, @0x06
LRS $AC1.M, @0x07
RET

store_mem_sr:
SR @0x0000, $AX0.L
SR @0x0001, $AX1.L
SR @0x0002, $AX0.H
SR @0x0003, $AX1.H
SR @0x0004, $AC0.L
SR @0x0005, $AC1.L
SR @0x0006, $AC0.M
SR @0x0007, $AC1.M
RET

store_mem_srs:
; For future compatibility these have been changed to cw.
; The way the instructions were originally encoded is commented,
; but this does not match their behavior.
cw 0x2800 ; SRS @0x00, $AX0.L - actually SRSH @0x00, $AC0.H
cw 0x2901 ; SRS @0x01, $AX1.L - actually SRSH @0x01, $AC1.H
cw 0x2A02 ; SRS @0x02, $AX0.H - actually unknown, no store performed
cw 0x2B03 ; SRS @0x03, $AX1.H - actually unknown, no store performed
cw 0x2C04 ; SRS @0x04, $AC0.L
cw 0x2D05 ; SRS @0x05, $AC1.L
cw 0x2E06 ; SRS @0x06, $AC0.M
cw 0x2F07 ; SRS @0x07, $AC1.M
RET
@@ -2,7 +2,7 @@ incdir "tests"
include "dsp_base.inc"

; Reads regs from 0xFF80 to 0xFF8D and sends them back

test_main:
lr $AC0.M, @0xff80
call send_back

@@ -86,11 +86,11 @@ WARNPC 0x8b
ORG 0x8b
; called by GBA ucode
dram_to_cpu:
srs @DSMAH, $AX0.H
srs @DSMAL, $AX0.L
sr @DSMAH, $AX0.H
sr @DSMAL, $AX0.L
si @DSCR, #0x1
srs @DSPA, $AX1.H
srs @DSBL, $AX1.L
sr @DSPA, $AX1.H
sr @DSBL, $AX1.L
call wait_dma+#IROM_BASE
ret

@@ -108,11 +108,11 @@ ORG 0xbc
; called by GBA ucode
bootucode_ax:
lris $AC0.M, #0
srs @DSCR, $AC0.M
srs @DSMAH, $AX0.H
srs @DSMAL, $AX0.L
srs @DSPA, $AX1.H
srs @DSBL, $AX1.L
srs @DSCR, $AC0.M
sr @DSMAH, $AX0.H
sr @DSMAL, $AX0.L
sr @DSPA, $AX1.H
sr @DSBL, $AX1.L
call wait_dma+#IROM_BASE

bootucode_ix:
@@ -145,9 +145,15 @@ ORG 0xe7
; Returns:
; AX0.L is the value of the last sample from input 1
; AX1.H is the value of the last sample from input 2
;
; for i = 0..31:
; ar3[i] = ((ar2[i] << 16) + ar0[i] * ar1[0]) >> 16
; for i = 0..31:
; ix1[i] = ((ix1[i] << 16) + ix0[i] * ar1[2]) >> 16
; ax0.l = ar0[31] * ar1[0]
; ax1.h = ix0[31] * ar1[2]
mix_two_add:
call mix_add+#IROM_BASE
iar $AR1
mrr $AR0, $IX0
mrr $AR2, $IX1
mrr $AR3, $IX1
@@ -183,8 +189,14 @@ ORG 0x1f9
; Returns:
; AX0.L is the value of the last sample
; AX1.H is the first address after the output
;
; for i = 0..31:
; ar3[i] = ((ar2[i] << 16) + ar0[i] * ar1[0]) >> 16
; ax0.l = ar0[31] * ar1[0]
; ax1.h = ar3 + 32 // assuming ar3 is a s32 pointer
mix_add:
lrri $AX1.L, @$AR1
iar $AR1
bloopi #32, ____mix_add_end_loop+#IROM_BASE
lrri $AC0.M, @$AR2
lrri $AC0.L, @$AR2
@@ -203,6 +215,13 @@ ____mix_add_end_loop:

WARNPC 0x282
ORG 0x282
; for i = 0..31:
; ar3[i] = ar1[0] + i * ar1[1]
; ar2[i] = ((ar2[i] << 16) + ar0[i] * ar3[i]) >> 16
; ar3[i+32] = ar1[2] + i * ar1[3]
; ix1[i] = ((ix1[i] << 16) + ix0[i] * ar3[i+32]) >> 16
; ax0.l = ar0[31] * ar3[31]
; ax1.h = ix0[31] * ar3[63]
mix_two_add_ramp:
call mix_add_ramp+#IROM_BASE
mrr $AR0, $IX0
@@ -227,6 +246,10 @@ sub_8458:

WARNPC 0x45d
ORG 0x45d
; for i = 0..31:
; ar3[i] = ar1[0] + i * ar1[1]
; ar2[i] = ((ar2[i] << 16) + ar0[i] * ar3[i]) >> 16
; ax0.l = ar0[31] * ar3[31]
mix_add_ramp:
clr $ACC0
clr $ACC1
@@ -259,7 +282,6 @@ ____mix_add_ramp_end_loop:
srri @$AR3, $AC0.L
movp $ACC0
mrr $AX0.L, $AC0.M
mrr $AX1.H, $AR3
mrr $AR1, $IX3
mrr $AR3, $IX2
ret
@@ -1,64 +1,75 @@
Legal GC/WII DSP IROM replacement (v0.3.1)
-------------------------------------------------------

- irom: When running from the ROM entrypoint, skip the bootucode_ax branch
of the bootucode procedure. Since the ROM doesn't set any of the AX
registers, it could cause bad DMA transfers and crashes.

ligfx
10/aug/2017

Legal GC/WII DSP IROM replacement (v0.3)
-------------------------------------------------------

- coef: Explicitly set 23 different values that are used by GBA UCode, and
tweaked overall parameters to more closely match those 23 values.
- irom: Moved a few functions to their proper places, updated BootUCode to
configure DMA transfers using AX registers as well as IX registers (the GBA
UCode uses this to do two sequential transfers in one call), and added
partial functions used by GBA UCode.

ligfx
2/june/2017

Legal GC/WII DSP IROM replacement (v0.2.1)
-------------------------------------------------------

- coef: 4-tap polyphase FIR filters
- irom: unchanged

Coefficients are roughly equivalent to those in the official DROM.
Improves resampling quality greatly over linear interpolation.
See generate_coefs.py for details.

stgn
29/june/2015

Legal GC/WII DSP IROM replacement (v0.2)
-------------------------------------------------------

- coef: crafted to use a linear interpolation when resampling (instead of
having a real 4 TAP FIR filter)
- irom: added all the mixing functions, some functions not used by AX/Zelda are
still missing

Should work with all AX, AXWii and Zelda UCode games. Card/IPL/GBA are most
likely still broken with it and require a real DSP ROM.

delroth
16/march/2013

Legal GC/WII DSP IROM replacement (v0.1)
-------------------------------------------------------
- coef: fake (zeroes)
- irom: reversed and rewrote ucode loading/reset part, everything else is missing

Good enough for Zelda ucode games (and maybe some AX too):
- WII: SMG 1/2, Pikmin 1/2 WII, Zelda TP WII, Donkey Kong Jungle Beat (WII), ...
- GC: Mario Kart Double Dash, Luigi Mansion, Super Mario Sunshine, Pikmin 1/2, Zelda WW, Zelda TP, ...

Basically... If game is not using coef and irom mixing functions it will work ok.
Dolphin emulator will report wrong CRCs, but it will work ok with mentioned games.

LM
31/july/2011
Legal GC/WII DSP IROM replacement (v0.4) (0xe789b5a5, 0xa4a575f5)
-------------------------------------------------------

- irom: Minor accuracy and documentation improvements
- irom: Remove use of SRS instruction with AX registers, as those instructions
do not actually exist

Tilka, Pokechu22
17/aug/2021

Legal GC/WII DSP IROM replacement (v0.3.1) (0x128ea7a2, 0xa4a575f5)
-------------------------------------------------------

- irom: When running from the ROM entrypoint, skip the bootucode_ax branch
of the bootucode procedure. Since the ROM doesn't set any of the AX
registers, it could cause bad DMA transfers and crashes.

ligfx
10/aug/2017

Legal GC/WII DSP IROM replacement (v0.3) (0x3aa4a793, 0xa4a575f5)
-------------------------------------------------------

- coef: Explicitly set 23 different values that are used by GBA UCode, and
tweaked overall parameters to more closely match those 23 values.
- irom: Moved a few functions to their proper places, updated BootUCode to
configure DMA transfers using AX registers as well as IX registers (the GBA
UCode uses this to do two sequential transfers in one call), and added
partial functions used by GBA UCode.

ligfx
2/june/2017

Legal GC/WII DSP IROM replacement (v0.2.1) (0xd9907f71, 0xdb6880c1)
-------------------------------------------------------

- coef: 4-tap polyphase FIR filters
- irom: unchanged

Coefficients are roughly equivalent to those in the official DROM.
Improves resampling quality greatly over linear interpolation.
See generate_coefs.py for details.

stgn
29/june/2015

Legal GC/WII DSP IROM replacement (v0.2) (0xd9907f71, 0xb019c2fb)
-------------------------------------------------------

- coef: crafted to use a linear interpolation when resampling (instead of
having a real 4 TAP FIR filter)
- irom: added all the mixing functions, some functions not used by AX/Zelda are
still missing

Should work with all AX, AXWii and Zelda UCode games. Card/IPL/GBA are most
likely still broken with it and require a real DSP ROM.

delroth
16/march/2013

Legal GC/WII DSP IROM replacement (v0.1) (0x9c8f593c, 0x10000001)
-------------------------------------------------------

- coef: fake (zeroes)
- irom: reversed and rewrote ucode loading/reset part, everything else is missing

Good enough for Zelda ucode games (and maybe some AX too):
- WII: SMG 1/2, Pikmin 1/2 WII, Zelda TP WII, Donkey Kong Jungle Beat (WII), ...
- GC: Mario Kart Double Dash, Luigi Mansion, Super Mario Sunshine, Pikmin 1/2, Zelda WW, Zelda TP, ...

Basically... If game is not using coef and irom mixing functions it will work ok.
Dolphin emulator will report wrong CRCs, but it will work ok with mentioned games.

LM
31/july/2011