Skip to content

Commit

Permalink
legalize instructions
Browse files Browse the repository at this point in the history
Add mov instructions for packed floating-point values; single-precision min, max, and sqrt; movntq and movntdq
  • Loading branch information
jason-conway committed Oct 16, 2022
1 parent 5e5263b commit 5bd020b
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 10 deletions.
32 changes: 26 additions & 6 deletions emu/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,18 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x11: TRACEI("movupd xmm, xmm:modrm");
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break;
case 0x12: TRACEI("movlpd xmm, modrm");
READMODRM; V_OP(movl_p, modrm_val, xmm_modrm_reg,64); break;
case 0x13: TRACEI("movlpd modrm, xmm");
READMODRM; V_OP(movl_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x14: TRACEI("unpcklpd xmm, xmm:modrm");
READMODRM; V_OP(unpackl_pd, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x15: TRACEI("unpckhpd xmm, xmm:modrm");
READMODRM; V_OP(unpackh_pd, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x16: TRACEI("movhpd xmm, modrm");
READMODRM; V_OP(movh_p, modrm_val, xmm_modrm_reg,64); break;
case 0x17: TRACEI("movhpd modrm, xmm");
READMODRM; V_OP(movh_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x2e: TRACEI("ucomisd xmm, xmm:modrm");
READMODRM; V_OP(single_ucomi, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0x2f: TRACEI("comisd xmm, xmm:modrm");
Expand Down Expand Up @@ -422,6 +429,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(mulu, xmm_modrm_val, xmm_modrm_reg, 128); break;
case 0xe6: TRACEI("cvttpd2dq xmm:modrm, xmm");
READMODRM; V_OP(cvttpd2dq, xmm_modrm_val, xmm_modrm_reg,64); break;
case 0xe7: TRACEI("movntdq xmm, xmm:modrm");
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break;
case 0xe8: TRACEI("psubsb xmm:modrm, xmm");
READMODRM; V_OP(subss_b, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0xe9: TRACEI("psubsw xmm:modrm, xmm");
Expand Down Expand Up @@ -465,14 +474,18 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x11: TRACEI("movups xmm, xmm:modrm");
READMODRM; VMOV(xmm_modrm_reg, xmm_modrm_val,128); break;

case 0x12: TRACEI("movlps xmm, modrm");
READMODRM; V_OP(movl_p, modrm_val, xmm_modrm_reg,64); break;
case 0x13: TRACEI("movlps modrm, xmm");
READMODRM; V_OP(movl_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x14: TRACEI("unpcklps xmm, xmm:modrm");
READMODRM; V_OP(unpackl_ps, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x15: TRACEI("unpckhps xmm, xmm:modrm");
READMODRM; V_OP(unpackh_ps, xmm_modrm_val, xmm_modrm_reg,128); break;
case 0x16: TRACEI("movlhps xmm, xmm:modrm");
READMODRM; V_OP(movlh_ps, xmm_modrm_val, xmm_modrm_reg,128); break;

case 0x16: TRACEI("movhps xmm, modrm");
READMODRM; V_OP(movh_p, modrm_val, xmm_modrm_reg,64); break;
case 0x17: TRACEI("movhps modrm, xmm");
READMODRM; V_OP(movh_pm, xmm_modrm_reg, modrm_val,64); break;
case 0x2e: TRACEI("ucomiss xmm, xmm:modrm");
READMODRM; V_OP(single_ucomi, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x2f: TRACEI("comiss xmm, xmm:modrm");
Expand Down Expand Up @@ -530,7 +543,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {

case 0xe5: TRACEI("pmulhw mm:modrm, mm");
READMODRM; V_OP(mulu, mm_modrm_val, mm_modrm_reg,64); break;

case 0xe7: TRACEI("movntq mm, mm:modrm");
READMODRM_MEM; VMOV(mm_modrm_reg, mm_modrm_val,64); break;
case 0xef: TRACEI("pxor mm:modrm, mm");
READMODRM; V_OP(xor, mm_modrm_val, mm_modrm_reg,64); break;

Expand Down Expand Up @@ -1186,6 +1200,8 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(cvtsi2ss, modrm_val, xmm_modrm_reg,32); break;
case 0x2c: TRACEI("cvttss2si reg, xmm:modrm");
READMODRM; V_OP(cvttss2si, xmm_modrm_val, modrm_reg,32); break;
case 0x51: TRACEI("sqrtss xmm:modrm, xmm");
READMODRM; V_OP(single_fsqrt, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5a: TRACEI("cvtss2sd xmm:modrm, xmm");
READMODRM; V_OP(cvtss2sd, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5b: TRACEI("cvttps2dq xmm:modrm, xmm");
Expand All @@ -1197,8 +1213,12 @@ __no_instrument DECODER_RET glue(DECODER_NAME, OP_SIZE)(DECODER_ARGS) {
READMODRM; V_OP(single_fmul, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5c: TRACEI("subss xmm:modrm, xmm");
READMODRM; V_OP(single_fsub, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5d: TRACEI("minss xmm:modrm, xmm");
READMODRM; V_OP(single_fmin, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5e: TRACEI("divss xmm:modrm, xmm");
READMODRM; V_OP(single_fdiv, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x5f: TRACEI("maxss xmm:modrm, xmm");
READMODRM; V_OP(single_fmax, xmm_modrm_val, xmm_modrm_reg,32); break;
case 0x6f: TRACEI("movdqu xmm:modrm, xmm");
READMODRM; VMOV(xmm_modrm_val, xmm_modrm_reg,128); break;

Expand Down
23 changes: 20 additions & 3 deletions emu/vec.c
Original file line number Diff line number Diff line change
Expand Up @@ -381,13 +381,20 @@ void vec_single_fdiv64(NO_CPU, const double *src, double *dst) { *dst /= *src; }
void vec_single_fdiv32(NO_CPU, const float *src, float *dst) { *dst /= *src; }

void vec_single_fsqrt64(NO_CPU, const double *src, double *dst) { *dst = sqrt(*src); }
void vec_single_fsqrt32(NO_CPU, const float *src, float *dst) { *dst = sqrtf(*src); }

void vec_single_fmax64(NO_CPU, const double *src, double *dst) {
if (*src > *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}
void vec_single_fmin64(NO_CPU, const double *src, double *dst) {
if (*src < *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}
void vec_single_fmax32(NO_CPU, const float *src, float *dst) {
if (*src > *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}
void vec_single_fmin32(NO_CPU, const float *src, float *dst) {
if (*src < *dst || isnan(*src) || isnan(*dst)) *dst = *src;
}

void vec_single_ucomi32(struct cpu_state *cpu, const float *src, const float *dst) {
cpu->zf_res = cpu->pf_res = 0;
Expand Down Expand Up @@ -531,9 +538,6 @@ void vec_unpackh_pd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->f64[0] = dst->f64[1];
dst->f64[1] = src->f64[1];
}
void vec_movlh_ps128(NO_CPU, union xmm_reg *src, union xmm_reg *dst) {
dst->qw[1] = src->qw[0];
}

void vec_packss_w128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst) {
dst->u32[0] = (satsw(dst->u16[0]) << 0x00) | (satsw(dst->u16[1]) << 0x08) |
Expand Down Expand Up @@ -622,6 +626,19 @@ void vec_fmovmask_d128(NO_CPU, const union xmm_reg *src, uint32_t *dst) {
}
}

void vec_movl_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst) {
dst->qw[0] = *src;
}
void vec_movl_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst) {
*dst = src->qw[0];
}
void vec_movh_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst) {
dst->qw[1] = *src;
}
void vec_movh_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst) {
*dst = src->qw[1];
}

void vec_extract_w128(NO_CPU, const union xmm_reg *src, uint32_t *dst, uint8_t index) {
*dst = src->u16[index % 8];
}
Expand Down
10 changes: 9 additions & 1 deletion emu/vec.h
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,12 @@ void vec_single_fsub32(NO_CPU, const float *src, float *dst);
void vec_single_fdiv64(NO_CPU, const double *src, double *dst);
void vec_single_fdiv32(NO_CPU, const float *src, float *dst);
void vec_single_fsqrt64(NO_CPU, const double *src, double *dst);
void vec_single_fsqrt32(NO_CPU, const float *src, float *dst);

void vec_single_fmax64(NO_CPU, const double *src, double *dst);
void vec_single_fmax32(NO_CPU, const float *src, float *dst);
void vec_single_fmin64(NO_CPU, const double *src, double *dst);
void vec_single_fmin32(NO_CPU, const float *src, float *dst);
void vec_single_ucomi32(struct cpu_state *cpu, const float *src, const float *dst);
void vec_single_ucomi64(struct cpu_state *cpu, const double *src, const double *dst);
void vec_single_fcmp64(NO_CPU, const double *src, union xmm_reg *dst, uint8_t type);
Expand Down Expand Up @@ -124,7 +127,7 @@ void vec_unpackh_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_dq128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_ps128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_unpackh_pd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_movlh_ps128(NO_CPU, union xmm_reg *src, union xmm_reg *dst);

void vec_shuffle_lw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
void vec_shuffle_hw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
void vec_shuffle_d128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst, uint8_t encoding);
Expand All @@ -136,6 +139,11 @@ void vec_compares_gtb128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_compares_gtw128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);
void vec_compares_gtd128(NO_CPU, const union xmm_reg *src, union xmm_reg *dst);

void vec_movl_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst);
void vec_movl_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst);
void vec_movh_p64(NO_CPU, const uint64_t *src, union xmm_reg *dst);
void vec_movh_pm64(NO_CPU, const union xmm_reg *src, uint64_t *dst);

void vec_movmask_b128(NO_CPU, const union xmm_reg *src, uint32_t *dst);
void vec_fmovmask_d128(NO_CPU, const union xmm_reg *src, uint32_t *dst);
void vec_extract_w128(NO_CPU, const union xmm_reg *src, uint32_t *dst, uint8_t index);
Expand Down

0 comments on commit 5bd020b

Please sign in to comment.