Skip to content
This repository has been archived by the owner on Aug 30, 2021. It is now read-only.

Commit

Permalink
First pass at setting sz flags from result reg, not result
Browse files Browse the repository at this point in the history
While digging in mednafen's source code to find any clues why we have
bugs in Vertical Force and a couple other titles, I noticed that many
arithmetic instructions will store the result of an operation to a
register, then set the sign/zero flags from the value of the register.
This is subtly different than much of our code, which sets _both_ the
register value and the sign/zero flags based on the result of the
operations directly, without going through the target register in the
case of the sign/zero flags.

This has a subtle implication that whenever r0 is used as the target of
an operation, regardless of what its value gets set to, the sign/zero
flags would be set/cleared based on the value of the _register_ (not the
result that was just stored into the register necessarily), which would
mean they would always be cleared.

This commit changes our logic to match this, and even affects the bugs
in Vertical Force that we're trying to nail down (powerups moving on a
sine wave disappear for half the sine curve, and some other similar
bugs), however, it didn't fix them. Now, when they disappear, they don't
come back!

After further digging in mednafen's source, I found that their ops will
store to a register (even r0, incorrectly overwriting its always-0
value). To compensate for this, they set r0 to 0 before each
instruction. This means their logic for these flags should match what we
have _without_ the changes in this commit, so I don't think this code is
valid, unless I've missed yet another detail here. :)

What's particularly interesting, though, is that it _did_ affect the
game logic we're trying to fix in some way, so we may still be on to
something here, but I'm just going to stuff this on a branch for now
until we have more info.

Another interesting thing; while digging I _also_ found evidence that
6bdcae8 may be correct in mednafen's
source, although their code is again subtly different. They do the -0 ->
0 rounding _only when setting flags_ for fp ops, and don't change the
value they store in the target register. Also, they seem to be doing it
for many more ops than just cmpf.s, addf.s, and subf.s. Strange!
  • Loading branch information
yupferris committed Feb 11, 2017
1 parent 5524ca1 commit 9e1ad57
Showing 1 changed file with 54 additions and 77 deletions.
131 changes: 54 additions & 77 deletions rustual-boy-core/src/nvc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,12 @@ impl Nvc {
}
}

fn set_reg_gpr_and_zero_sign_flags(&mut self, index: usize, value: u32) {
self.set_reg_gpr(index, value);
let reg_value = self.reg_gpr(index);
self.set_zero_sign_flags(reg_value);
}

// TODO: Come up with a more portable way to do this conversion
fn reg_gpr_float(&self, index: usize) -> f32 {
unsafe {
Expand All @@ -214,6 +220,12 @@ impl Nvc {
}
}

fn set_reg_gpr_float_and_fp_flags(&mut self, index: usize, value: f32) {
self.set_reg_gpr_float(index, value);
let reg_value = self.reg_gpr_float(index);
self.set_fp_flags(reg_value);
}

pub fn reg_eipc(&self) -> u32 {
self.reg_eipc
}
Expand Down Expand Up @@ -365,35 +377,30 @@ impl Nvc {
OPCODE_BITS_SUB => format_i!(|reg1, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = self.reg_gpr(reg1);
let res = self.sub_and_set_flags(lhs, rhs);
self.set_reg_gpr(reg2, res);
let res = self.sub_and_set_ov_cy(lhs, rhs);
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
}),
OPCODE_BITS_CMP_REG => format_i!(|reg1, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = self.reg_gpr(reg1);
self.sub_and_set_flags(lhs, rhs);
let res = self.sub_and_set_ov_cy(lhs, rhs);
self.set_zero_sign_flags(res);
}),
OPCODE_BITS_SHL_REG => format_i!(|reg1, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = self.reg_gpr(reg1);
let res = self.shl_and_set_flags(lhs, rhs);
self.set_reg_gpr(reg2, res);
self.shl(rhs, reg2);
}),
OPCODE_BITS_SHR_REG => format_i!(|reg1, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = self.reg_gpr(reg1);
let res = self.shr_and_set_flags(lhs, rhs);
self.set_reg_gpr(reg2, res);
self.shr(rhs, reg2);
}),
OPCODE_BITS_JMP => format_i!(|reg1, _| {
next_pc = self.reg_gpr(reg1) & 0xfffffffe;
num_cycles = 3;
}),
OPCODE_BITS_SAR_REG => format_i!(|reg1, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = self.reg_gpr(reg1);
let res = self.sar_and_set_flags(lhs, rhs);
self.set_reg_gpr(reg2, res);
self.sar(rhs, reg2);
}),
OPCODE_BITS_MUL => format_i!(|reg1, reg2| {
let lhs = self.reg_gpr(reg2) as i64;
Expand All @@ -403,8 +410,7 @@ impl Nvc {
let res_high = (res >> 32) as u32;
let overflow = res != ((res_low as i32) as u64);
self.set_reg_gpr(30, res_high);
self.set_reg_gpr(reg2, res_low);
self.set_zero_sign_flags(res_low);
self.set_reg_gpr_and_zero_sign_flags(reg2, res_low);
self.psw_overflow = overflow;
num_cycles = 13;
}),
Expand All @@ -421,8 +427,7 @@ impl Nvc {
(res, r30, false)
};
self.set_reg_gpr(30, r30);
self.set_reg_gpr(reg2, res);
self.set_zero_sign_flags(res);
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
self.psw_overflow = overflow;
num_cycles = 38;
}),
Expand All @@ -434,8 +439,7 @@ impl Nvc {
let res_high = (res >> 32) as u32;
let overflow = res != (res_low as u64);
self.set_reg_gpr(30, res_high);
self.set_reg_gpr(reg2, res_low);
self.set_zero_sign_flags(res_low);
self.set_reg_gpr_and_zero_sign_flags(reg2, res_low);
self.psw_overflow = overflow;
num_cycles = 13;
}),
Expand All @@ -445,39 +449,34 @@ impl Nvc {
let res = lhs / rhs;
let r30 = lhs % rhs;
self.set_reg_gpr(30, r30);
self.set_reg_gpr(reg2, res);
self.set_zero_sign_flags(res);
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
self.psw_overflow = false;
num_cycles = 36;
}),
OPCODE_BITS_OR => format_i!(|reg1, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = self.reg_gpr(reg1);
let res = lhs | rhs;
self.set_reg_gpr(reg2, res);
self.set_zero_sign_flags(res);
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
self.psw_overflow = false;
}),
OPCODE_BITS_AND => format_i!(|reg1, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = self.reg_gpr(reg1);
let res = lhs & rhs;
self.set_reg_gpr(reg2, res);
self.set_zero_sign_flags(res);
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
self.psw_overflow = false;
}),
OPCODE_BITS_XOR => format_i!(|reg1, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = self.reg_gpr(reg1);
let res = lhs ^ rhs;
self.set_reg_gpr(reg2, res);
self.set_zero_sign_flags(res);
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
self.psw_overflow = false;
}),
OPCODE_BITS_NOT => format_i!(|reg1, reg2| {
let res = !self.reg_gpr(reg1);
self.set_reg_gpr(reg2, res);
self.set_zero_sign_flags(res);
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
self.psw_overflow = false;
}),
OPCODE_BITS_MOV_IMM => format_ii!(|imm5, reg2| {
Expand Down Expand Up @@ -514,30 +513,25 @@ impl Nvc {
OPCODE_BITS_CMP_IMM => format_ii!(|imm5, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = sign_extend_imm5(imm5);
self.sub_and_set_flags(lhs, rhs);
let res = self.sub_and_set_ov_cy(lhs, rhs);
self.set_zero_sign_flags(res);
}),
OPCODE_BITS_SHL_IMM => format_ii!(|imm5, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = imm5 as u32;
let res = self.shl_and_set_flags(lhs, rhs);
self.set_reg_gpr(reg2, res);
self.shl(rhs, reg2);
}),
OPCODE_BITS_SHR_IMM => format_ii!(|imm5, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = sign_extend_imm5(imm5);
let res = self.shr_and_set_flags(lhs, rhs);
self.set_reg_gpr(reg2, res);
let rhs = imm5 as u32;
self.shr(rhs, reg2);
}),
OPCODE_BITS_CLI => format_ii!(|_, _| {
self.psw_interrupt_disable = false;

num_cycles = 12;
}),
OPCODE_BITS_SAR_IMM => format_ii!(|imm5, reg2| {
let lhs = self.reg_gpr(reg2);
let rhs = imm5 as u32;
let res = self.sar_and_set_flags(lhs, rhs);
self.set_reg_gpr(reg2, res);
self.sar(rhs, reg2);
}),
OPCODE_BITS_RETI => format_ii!(|_, _| {
next_pc = self.return_from_exception();
Expand Down Expand Up @@ -646,24 +640,21 @@ impl Nvc {
let lhs = self.reg_gpr(reg1);
let rhs = imm16 as u32;
let res = lhs | rhs;
self.set_reg_gpr(reg2, res);
self.set_zero_sign_flags(res);
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
self.psw_overflow = false;
}),
OPCODE_BITS_AND_I => format_v!(|reg1, reg2, imm16| {
let lhs = self.reg_gpr(reg1);
let rhs = imm16 as u32;
let res = lhs & rhs;
self.set_reg_gpr(reg2, res);
self.set_zero_sign_flags(res);
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
self.psw_overflow = false;
}),
OPCODE_BITS_XOR_I => format_v!(|reg1, reg2, imm16| {
let lhs = self.reg_gpr(reg1);
let rhs = imm16 as u32;
let res = lhs ^ rhs;
self.set_reg_gpr(reg2, res);
self.set_zero_sign_flags(res);
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
self.psw_overflow = false;
}),
OPCODE_BITS_MOVHI => format_v!(|reg1, reg2, imm16| {
Expand Down Expand Up @@ -755,58 +746,47 @@ impl Nvc {
}
OPCODE_BITS_SUB_OP_CVT_WS => {
let value = (self.reg_gpr(reg1) as i32) as f32;
self.set_reg_gpr_float(reg2, value);

self.set_fp_flags(value);
self.set_reg_gpr_float_and_fp_flags(reg2, value);

num_cycles = 16;
}
OPCODE_BITS_SUB_OP_CVT_SW => {
let value = (self.reg_gpr_float(reg1).round() as i32) as u32;
self.set_reg_gpr(reg2, value);
self.set_reg_gpr_and_zero_sign_flags(reg2, value);

self.psw_overflow = false;
self.set_zero_sign_flags(value);

num_cycles = 14;
}
OPCODE_BITS_SUB_OP_ADDF_S => {
let lhs = self.reg_gpr_float(reg2);
let rhs = self.reg_gpr_float(reg1);
let value = lhs + rhs;
self.set_reg_gpr_float(reg2, value);

self.set_fp_flags(value);
self.set_reg_gpr_float_and_fp_flags(reg2, value);

num_cycles = 28;
}
OPCODE_BITS_SUB_OP_SUBF_S => {
let lhs = self.reg_gpr_float(reg2);
let rhs = self.reg_gpr_float(reg1);
let value = lhs - rhs;
self.set_reg_gpr_float(reg2, value);

self.set_fp_flags(value);
self.set_reg_gpr_float_and_fp_flags(reg2, value);

num_cycles = 28;
}
OPCODE_BITS_SUB_OP_MULF_S => {
let lhs = self.reg_gpr_float(reg2);
let rhs = self.reg_gpr_float(reg1);
let value = lhs * rhs;
self.set_reg_gpr_float(reg2, value);

self.set_fp_flags(value);
self.set_reg_gpr_float_and_fp_flags(reg2, value);

num_cycles = 30;
}
OPCODE_BITS_SUB_OP_DIVF_S => {
let lhs = self.reg_gpr_float(reg2);
let rhs = self.reg_gpr_float(reg1);
let value = lhs / rhs;
self.set_reg_gpr_float(reg2, value);

self.set_fp_flags(value);
self.set_reg_gpr_float_and_fp_flags(reg2, value);

num_cycles = 44;
}
Expand Down Expand Up @@ -834,10 +814,9 @@ impl Nvc {
}
OPCODE_BITS_SUB_OP_TRNC_SW => {
let value = (self.reg_gpr_float(reg1).trunc() as i32) as u32;
self.set_reg_gpr(reg2, value);
self.set_reg_gpr_and_zero_sign_flags(reg2, value);

self.psw_overflow = false;
self.set_zero_sign_flags(value);

num_cycles = 14;
}
Expand Down Expand Up @@ -867,49 +846,48 @@ impl Nvc {

fn add(&mut self, lhs: u32, rhs: u32, reg2: usize) {
let (res, carry) = lhs.overflowing_add(rhs);
self.set_reg_gpr(reg2, res);
self.set_zero_sign_flags(res);
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
self.psw_overflow = ((!(lhs ^ rhs) & (rhs ^ res)) & 0x80000000) != 0;
self.psw_carry = carry;
}

fn sub_and_set_flags(&mut self, lhs: u32, rhs: u32) -> u32 {
fn sub_and_set_ov_cy(&mut self, lhs: u32, rhs: u32) -> u32 {
let (res, carry) = lhs.overflowing_sub(rhs);
self.set_zero_sign_flags(res);
self.psw_overflow = (((lhs ^ rhs) & !(rhs ^ res)) & 0x80000000) != 0;
self.psw_carry = carry;
res
}

fn shl_and_set_flags(&mut self, lhs: u32, rhs: u32) -> u32 {
fn shl(&mut self, rhs: u32, reg2: usize) {
let lhs = self.reg_gpr(reg2);
let mut res = lhs;
let mut carry = false;
let shift = (rhs as usize) & 0x1f;
for _ in 0..shift {
carry = (res & 0x80000000) != 0;
res = res.wrapping_shl(1);
}
self.set_zero_sign_flags(res);
self.psw_overflow = false;
self.psw_carry = carry;
res
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
}

fn shr_and_set_flags(&mut self, lhs: u32, rhs: u32) -> u32 {
fn shr(&mut self, rhs: u32, reg2: usize) {
let lhs = self.reg_gpr(reg2);
let mut res = lhs;
let mut carry = false;
let shift = (rhs as usize) & 0x1f;
for _ in 0..shift {
carry = (res & 0x00000001) != 0;
res = res.wrapping_shr(1);
}
self.set_zero_sign_flags(res);
self.psw_overflow = false;
self.psw_carry = carry;
res
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
}

fn sar_and_set_flags(&mut self, lhs: u32, rhs: u32) -> u32 {
fn sar(&mut self, rhs: u32, reg2: usize) {
let lhs = self.reg_gpr(reg2);
let mut res = lhs;
let mut carry = false;
let shift = (rhs as usize) & 0x1f;
Expand All @@ -918,10 +896,9 @@ impl Nvc {
carry = (res & 0x00000001) != 0;
res = sign | res.wrapping_shr(1);
}
self.set_zero_sign_flags(res);
self.psw_overflow = false;
self.psw_carry = carry;
res
self.set_reg_gpr_and_zero_sign_flags(reg2, res);
}

fn set_zero_sign_flags(&mut self, value: u32) {
Expand Down

0 comments on commit 9e1ad57

Please sign in to comment.