diff --git a/src/broadcom/.gitignore b/src/broadcom/.gitignore index 5442872127..92edd58f8c 100644 --- a/src/broadcom/.gitignore +++ b/src/broadcom/.gitignore @@ -1,2 +1,3 @@ cle/v3d_xml.h cle/*_pack.h +qpu/tests/qpu_disasm diff --git a/src/broadcom/Makefile.am b/src/broadcom/Makefile.am index 9ebfe4584b..ce2fd7df41 100644 --- a/src/broadcom/Makefile.am +++ b/src/broadcom/Makefile.am @@ -24,6 +24,8 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/include \ -I$(top_builddir)/src \ -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/broadcom/ \ + -I$(top_srcdir)/src/broadcom/include \ $(VALGRIND_CFLAGS) \ $(DEFINES) @@ -49,5 +51,6 @@ PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) include Makefile.genxml.am include Makefile.cle.am +include Makefile.vc5.am CLEANFILES += $(BUILT_SOURCES) diff --git a/src/broadcom/Makefile.sources b/src/broadcom/Makefile.sources index 92f972754c..d40febae67 100644 --- a/src/broadcom/Makefile.sources +++ b/src/broadcom/Makefile.sources @@ -14,6 +14,11 @@ BROADCOM_FILES = \ common/v3d_debug.c \ common/v3d_debug.h \ common/v3d_device_info.h \ + qpu/qpu_disasm.c \ + qpu/qpu_disasm.h \ + qpu/qpu_instr.c \ + qpu/qpu_instr.h \ + qpu/qpu_pack.c \ $() BROADCOM_DECODER_FILES = \ diff --git a/src/broadcom/Makefile.vc5.am b/src/broadcom/Makefile.vc5.am new file mode 100644 index 0000000000..e88afc2042 --- /dev/null +++ b/src/broadcom/Makefile.vc5.am @@ -0,0 +1,19 @@ +noinst_LTLIBRARIES += libbroadcom.la + +if USE_VC5_SIMULATOR +AM_CFLAGS += $(VC5_SIMULATOR_CFLAGS) +libbroadcom_la_LDFLAGS = $(VC5_SIMULATOR_LIBS) +endif + +libbroadcom_la_SOURCES = $(BROADCOM_FILES) + +check_PROGRAMS += \ + qpu/tests/qpu_disasm \ + $(NULL) + +LDADD = \ + libbroadcom.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(NULL) + +TESTS += $(check_PROGRAMS) diff --git a/src/broadcom/qpu/qpu_disasm.c b/src/broadcom/qpu/qpu_disasm.c new file mode 100644 index 0000000000..5ee834852b --- /dev/null +++ b/src/broadcom/qpu/qpu_disasm.c @@ -0,0 +1,298 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include "util/ralloc.h" + +#include "broadcom/common/v3d_device_info.h" +#include "qpu_instr.h" +#include "qpu_disasm.h" + +struct disasm_state { + const struct v3d_device_info *devinfo; + char *string; + size_t offset; +}; + +static void +append(struct disasm_state *disasm, const char *fmt, ...) +{ + va_list args; + va_start(args, fmt); + ralloc_vasprintf_rewrite_tail(&disasm->string, + &disasm->offset, + fmt, args); + va_end(args); +} + +static void +pad_to(struct disasm_state *disasm, int n) +{ + /* FIXME: Do a single append somehow. */ + while (disasm->offset < n) + append(disasm, " "); +} + + +static void +v3d_qpu_disasm_raddr(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr, uint8_t mux) +{ + if (mux == V3D_QPU_MUX_A) { + append(disasm, "rf%d", instr->raddr_a); + } else if (mux == V3D_QPU_MUX_B) { + append(disasm, "rf%d", instr->raddr_b); + } else { + append(disasm, "r%d", mux); + } +} + +static void +v3d_qpu_disasm_waddr(struct disasm_state *disasm, uint32_t waddr, bool magic) +{ + if (!magic) { + append(disasm, "rf%d", waddr); + return; + } + + const char *name = v3d_qpu_magic_waddr_name(waddr); + if (name) + append(disasm, "%s", name); + else + append(disasm, "waddr UNKNOWN %d", waddr); +} + +static void +v3d_qpu_disasm_add(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + bool has_dst = v3d_qpu_add_op_has_dst(instr->alu.add.op); + int num_src = v3d_qpu_add_op_num_src(instr->alu.add.op); + + append(disasm, "%s", v3d_qpu_add_op_name(instr->alu.add.op)); + append(disasm, "%s", v3d_qpu_cond_name(instr->flags.ac)); + append(disasm, "%s", v3d_qpu_pf_name(instr->flags.apf)); + append(disasm, "%s", v3d_qpu_uf_name(instr->flags.auf)); + + append(disasm, " "); + + if (has_dst) { + v3d_qpu_disasm_waddr(disasm, instr->alu.add.waddr, + instr->alu.add.magic_write); + append(disasm, v3d_qpu_pack_name(instr->alu.add.output_pack)); + } + + if (num_src >= 1) { + if (has_dst) + append(disasm, ", "); + v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.a); + append(disasm, "%s", + v3d_qpu_unpack_name(instr->alu.add.a_unpack)); + } + + if (num_src >= 2) { + append(disasm, ", "); + v3d_qpu_disasm_raddr(disasm, instr, instr->alu.add.b); + append(disasm, "%s", + v3d_qpu_unpack_name(instr->alu.add.b_unpack)); + } +} + +static void +v3d_qpu_disasm_mul(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + bool has_dst = v3d_qpu_mul_op_has_dst(instr->alu.mul.op); + int num_src = v3d_qpu_mul_op_num_src(instr->alu.mul.op); + + pad_to(disasm, 21); + append(disasm, "; "); + + append(disasm, "%s", v3d_qpu_mul_op_name(instr->alu.mul.op)); + append(disasm, "%s", v3d_qpu_cond_name(instr->flags.mc)); + append(disasm, "%s", v3d_qpu_pf_name(instr->flags.mpf)); + append(disasm, "%s", v3d_qpu_uf_name(instr->flags.muf)); + + if (instr->alu.mul.op == V3D_QPU_M_NOP) + return; + + append(disasm, " "); + + if (has_dst) { + v3d_qpu_disasm_waddr(disasm, instr->alu.mul.waddr, + instr->alu.mul.magic_write); + append(disasm, v3d_qpu_pack_name(instr->alu.mul.output_pack)); + } + + if (num_src >= 1) { + if (has_dst) + append(disasm, ", "); + v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.a); + append(disasm, "%s", + v3d_qpu_unpack_name(instr->alu.mul.a_unpack)); + } + + if (num_src >= 2) { + append(disasm, ", "); + v3d_qpu_disasm_raddr(disasm, instr, instr->alu.mul.b); + append(disasm, "%s", + v3d_qpu_unpack_name(instr->alu.mul.b_unpack)); + } +} + +static void +v3d_qpu_disasm_sig(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + const struct v3d_qpu_sig *sig = &instr->sig; + + if (!sig->thrsw && + !sig->ldvary && + !sig->ldvpm && + !sig->ldtmu && + !sig->ldunif && + !sig->wrtmuc) { + return; + } + + pad_to(disasm, 41); + + if (sig->thrsw) + append(disasm, "; thrsw"); + if (sig->ldvary) + append(disasm, "; ldvary"); + if (sig->ldvpm) + append(disasm, "; ldvpm"); + if (sig->ldtmu) + append(disasm, "; ldtmu"); + if (sig->ldunif) + append(disasm, "; ldunif"); + if (sig->wrtmuc) + append(disasm, "; wrtmuc"); +} + +static void +v3d_qpu_disasm_alu(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + v3d_qpu_disasm_add(disasm, instr); + v3d_qpu_disasm_mul(disasm, instr); + v3d_qpu_disasm_sig(disasm, instr); +} + +static void +v3d_qpu_disasm_branch(struct disasm_state *disasm, + const struct v3d_qpu_instr *instr) +{ + append(disasm, "b"); + if (instr->branch.ub) + append(disasm, "u"); + append(disasm, "%s", v3d_qpu_branch_cond_name(instr->branch.cond)); + append(disasm, "%s", v3d_qpu_msfign_name(instr->branch.msfign)); + + switch (instr->branch.bdi) { + case V3D_QPU_BRANCH_DEST_ABS: + append(disasm, " zero_addr+0x%08x", instr->branch.offset); + break; + + case V3D_QPU_BRANCH_DEST_REL: + append(disasm, " %d", instr->branch.offset); + break; + + case V3D_QPU_BRANCH_DEST_LINK_REG: + append(disasm, " lri"); + break; + + case V3D_QPU_BRANCH_DEST_REGFILE: + append(disasm, " rf%d", instr->branch.raddr_a); + break; + } + + if (instr->branch.ub) { + switch (instr->branch.bdu) { + case V3D_QPU_BRANCH_DEST_ABS: + append(disasm, ", a:unif"); + break; + + case V3D_QPU_BRANCH_DEST_REL: + append(disasm, ", r:unif"); + break; + + case V3D_QPU_BRANCH_DEST_LINK_REG: + append(disasm, ", lri"); + break; + + case V3D_QPU_BRANCH_DEST_REGFILE: + append(disasm, ", rf%d", instr->branch.raddr_a); + break; + } + } +} + +const char * +v3d_qpu_decode(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) +{ + struct disasm_state disasm = { + .string = rzalloc_size(NULL, 1), + .offset = 0, + .devinfo = devinfo, + }; + + switch (instr->type) { + case V3D_QPU_INSTR_TYPE_ALU: + v3d_qpu_disasm_alu(&disasm, instr); + break; + + case V3D_QPU_INSTR_TYPE_BRANCH: + v3d_qpu_disasm_branch(&disasm, instr); + break; + } + + return disasm.string; +} + +/** + * Returns a string containing the disassembled representation of the QPU + * instruction. It is the caller's responsibility to free the return value + * with ralloc_free(). + */ +const char * +v3d_qpu_disasm(const struct v3d_device_info *devinfo, uint64_t inst) +{ + struct v3d_qpu_instr instr; + bool ok = v3d_qpu_instr_unpack(devinfo, inst, &instr); + assert(ok); (void)ok; + + return v3d_qpu_decode(devinfo, &instr); +} + +void +v3d_qpu_dump(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr) +{ + const char *decoded = v3d_qpu_decode(devinfo, instr); + fprintf(stderr, "%s", decoded); + ralloc_free((char *)decoded); +} diff --git a/src/broadcom/qpu/qpu_disasm.h b/src/broadcom/qpu/qpu_disasm.h new file mode 100644 index 0000000000..efdf8ddb58 --- /dev/null +++ b/src/broadcom/qpu/qpu_disasm.h @@ -0,0 +1,39 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef VC5_QPU_DISASM_H +#define VC5_QPU_DISASM_H + +#include "broadcom/common/v3d_device_info.h" + +struct v3d_qpu_instr; + +const char *v3d_qpu_decode(const struct v3d_device_info *devinfo, const + struct v3d_qpu_instr *instr); + +const char *v3d_qpu_disasm(const struct v3d_device_info *devinfo, uint64_t inst); + +void v3d_qpu_dump(const struct v3d_device_info *devinfo, const + struct v3d_qpu_instr *instr); + +#endif /* VC5_QPU_DISASM_H */ diff --git a/src/broadcom/qpu/qpu_instr.c b/src/broadcom/qpu/qpu_instr.c new file mode 100644 index 0000000000..7499170de3 --- /dev/null +++ b/src/broadcom/qpu/qpu_instr.c @@ -0,0 +1,645 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include "util/macros.h" +#include "qpu_instr.h" + +#ifndef QPU_MASK +#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) +/* Using the GNU statement expression extension */ +#define QPU_SET_FIELD(value, field) \ + ({ \ + uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ + assert((fieldval & ~ field ## _MASK) == 0); \ + fieldval & field ## _MASK; \ + }) + +#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) + +#define QPU_UPDATE_FIELD(inst, value, field) \ + (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) +#endif /* QPU_MASK */ + +#define VC5_QPU_OP_MUL_SHIFT 58 +#define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58) + +#define VC5_QPU_SIG_SHIFT 53 +#define VC5_QPU_SIG_MASK QPU_MASK(57, 53) +# define VC5_QPU_SIG_THRSW_BIT 0x1 +# define VC5_QPU_SIG_LDUNIF_BIT 0x2 +# define VC5_QPU_SIG_LDTMU_BIT 0x4 +# define VC5_QPU_SIG_LDVARY_BIT 0x8 + +#define VC5_QPU_COND_SHIFT 46 +#define VC5_QPU_COND_MASK QPU_MASK(52, 46) + +#define VC5_QPU_COND_IFA 0 +#define VC5_QPU_COND_IFB 1 +#define VC5_QPU_COND_IFNA 2 +#define VC5_QPU_COND_IFNB 3 + +#define VC5_QPU_MM QPU_MASK(45, 45) +#define VC5_QPU_MA QPU_MASK(44, 44) + +#define V3D_QPU_WADDR_M_SHIFT 38 +#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38) + +#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35 +#define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) + +#define V3D_QPU_WADDR_A_SHIFT 32 +#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32) + +#define VC5_QPU_BRANCH_COND_SHIFT 32 +#define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) + +#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24 +#define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) + +#define VC5_QPU_OP_ADD_SHIFT 24 +#define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24) + +#define VC5_QPU_MUL_B_SHIFT 21 +#define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21) + +#define VC5_QPU_BRANCH_MSFIGN_SHIFT 21 +#define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) + +#define VC5_QPU_MUL_A_SHIFT 18 +#define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18) + +#define VC5_QPU_ADD_B_SHIFT 15 +#define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15) + +#define VC5_QPU_BRANCH_BDU_SHIFT 15 +#define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) + +#define VC5_QPU_BRANCH_UB QPU_MASK(14, 14) + +#define VC5_QPU_ADD_A_SHIFT 12 +#define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12) + +#define VC5_QPU_BRANCH_BDI_SHIFT 12 +#define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) + +#define VC5_QPU_RADDR_A_SHIFT 6 +#define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6) + +#define VC5_QPU_RADDR_B_SHIFT 0 +#define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0) + +const char * +v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr) +{ + static const char *waddr_magic[] = { + [V3D_QPU_WADDR_R0] = "r0", + [V3D_QPU_WADDR_R1] = "r1", + [V3D_QPU_WADDR_R2] = "r2", + [V3D_QPU_WADDR_R3] = "r3", + [V3D_QPU_WADDR_R4] = "r4", + [V3D_QPU_WADDR_R5] = "r5", + [V3D_QPU_WADDR_NOP] = "-", + [V3D_QPU_WADDR_TLB] = "tlb", + [V3D_QPU_WADDR_TLBU] = "tlbu", + [V3D_QPU_WADDR_TMU] = "tmu", + [V3D_QPU_WADDR_TMUL] = "tmul", + [V3D_QPU_WADDR_TMUD] = "tmud", + [V3D_QPU_WADDR_TMUA] = "tmua", + [V3D_QPU_WADDR_TMUAU] = "tmuau", + [V3D_QPU_WADDR_VPM] = "vpm", + [V3D_QPU_WADDR_VPMU] = "vpmu", + [V3D_QPU_WADDR_SYNC] = "sync", + [V3D_QPU_WADDR_SYNCU] = "syncu", + [V3D_QPU_WADDR_RECIP] = "recip", + [V3D_QPU_WADDR_RSQRT] = "rsqrt", + [V3D_QPU_WADDR_EXP] = "exp", + [V3D_QPU_WADDR_LOG] = "log", + [V3D_QPU_WADDR_SIN] = "sin", + [V3D_QPU_WADDR_RSQRT2] = "rsqrt2", + }; + + return waddr_magic[waddr]; +} + +const char * +v3d_qpu_add_op_name(enum v3d_qpu_add_op op) +{ + static const char *op_names[] = { + [V3D_QPU_A_FADD] = "fadd", + [V3D_QPU_A_FADDNF] = "faddnf", + [V3D_QPU_A_VFPACK] = "vfpack", + [V3D_QPU_A_ADD] = "add", + [V3D_QPU_A_SUB] = "sub", + [V3D_QPU_A_FSUB] = "fsub", + [V3D_QPU_A_MIN] = "min", + [V3D_QPU_A_MAX] = "max", + [V3D_QPU_A_UMIN] = "umin", + [V3D_QPU_A_UMAX] = "umax", + [V3D_QPU_A_SHL] = "shl", + [V3D_QPU_A_SHR] = "shr", + [V3D_QPU_A_ASR] = "asr", + [V3D_QPU_A_ROR] = "ror", + [V3D_QPU_A_FMIN] = "fmin", + [V3D_QPU_A_FMAX] = "fmax", + [V3D_QPU_A_VFMIN] = "vfmin", + [V3D_QPU_A_AND] = "and", + [V3D_QPU_A_OR] = "or", + [V3D_QPU_A_XOR] = "xor", + [V3D_QPU_A_VADD] = "vadd", + [V3D_QPU_A_VSUB] = "vsub", + [V3D_QPU_A_NOT] = "not", + [V3D_QPU_A_NEG] = "neg", + [V3D_QPU_A_FLAPUSH] = "flapush", + [V3D_QPU_A_FLBPUSH] = "flbpush", + [V3D_QPU_A_FLBPOP] = "flbpop", + [V3D_QPU_A_SETMSF] = "setmsf", + [V3D_QPU_A_SETREVF] = "setrevf", + [V3D_QPU_A_NOP] = "nop", + [V3D_QPU_A_TIDX] = "tidx", + [V3D_QPU_A_EIDX] = "eidx", + [V3D_QPU_A_LR] = "lr", + [V3D_QPU_A_VFLA] = "vfla", + [V3D_QPU_A_VFLNA] = "vflna", + [V3D_QPU_A_VFLB] = "vflb", + [V3D_QPU_A_VFLNB] = "vflnb", + [V3D_QPU_A_FXCD] = "fxcd", + [V3D_QPU_A_XCD] = "xcd", + [V3D_QPU_A_FYCD] = "fycd", + [V3D_QPU_A_YCD] = "ycd", + [V3D_QPU_A_MSF] = "msf", + [V3D_QPU_A_REVF] = "revf", + [V3D_QPU_A_VDWWT] = "vdwwt", + [V3D_QPU_A_IID] = "iid", + [V3D_QPU_A_SAMPID] = "sampid", + [V3D_QPU_A_PATCHID] = "patchid", + [V3D_QPU_A_TMUWT] = "tmuwt", + [V3D_QPU_A_VPMSETUP] = "vpmsetup", + [V3D_QPU_A_VPMWT] = "vpmwt", + [V3D_QPU_A_LDVPMV] = "ldvpmv", + [V3D_QPU_A_LDVPMD] = "ldvpmd", + [V3D_QPU_A_LDVPMP] = "ldvpmp", + [V3D_QPU_A_LDVPMG] = "ldvpmg", + [V3D_QPU_A_FCMP] = "fcmp", + [V3D_QPU_A_VFMAX] = "vfmax", + [V3D_QPU_A_FROUND] = "fround", + [V3D_QPU_A_FTOIN] = "ftoin", + [V3D_QPU_A_FTRUNC] = "ftrunc", + [V3D_QPU_A_FTOIZ] = "ftoiz", + [V3D_QPU_A_FFLOOR] = "ffloor", + [V3D_QPU_A_FTOUZ] = "ftouz", + [V3D_QPU_A_FCEIL] = "fceil", + [V3D_QPU_A_FTOC] = "ftoc", + [V3D_QPU_A_FDX] = "fdx", + [V3D_QPU_A_FDY] = "fdy", + [V3D_QPU_A_STVPMV] = "stvpmv", + [V3D_QPU_A_STVPMD] = "stvpmd", + [V3D_QPU_A_STVPMP] = "stvpmp", + [V3D_QPU_A_ITOF] = "itof", + [V3D_QPU_A_CLZ] = "clz", + [V3D_QPU_A_UTOF] = "utof", + }; + + if (op >= ARRAY_SIZE(op_names)) + return NULL; + + return op_names[op]; +} + +const char * +v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op) +{ + static const char *op_names[] = { + [V3D_QPU_M_ADD] = "add", + [V3D_QPU_M_SUB] = "sub", + [V3D_QPU_M_UMUL24] = "umul24", + [V3D_QPU_M_VFMUL] = "vfmul", + [V3D_QPU_M_SMUL24] = "smul24", + [V3D_QPU_M_MULTOP] = "multop", + [V3D_QPU_M_FMOV] = "fmov", + [V3D_QPU_M_MOV] = "mov", + [V3D_QPU_M_NOP] = "nop", + [V3D_QPU_M_FMUL] = "fmul", + }; + + if (op >= ARRAY_SIZE(op_names)) + return NULL; + + return op_names[op]; +} + +const char * +v3d_qpu_cond_name(enum v3d_qpu_cond cond) +{ + switch (cond) { + case V3D_QPU_COND_NONE: + return ""; + case V3D_QPU_COND_IFA: + return ".ifa"; + case V3D_QPU_COND_IFB: + return ".ifb"; + case V3D_QPU_COND_IFNA: + return ".ifna"; + case V3D_QPU_COND_IFNB: + return ".ifnb"; + default: + unreachable("bad cond value"); + } +} + +const char * +v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond) +{ + switch (cond) { + case V3D_QPU_BRANCH_COND_ALWAYS: + return ""; + case V3D_QPU_BRANCH_COND_A0: + return ".a0"; + case V3D_QPU_BRANCH_COND_NA0: + return ".na0"; + case V3D_QPU_BRANCH_COND_ALLA: + return ".alla"; + case V3D_QPU_BRANCH_COND_ANYNA: + return ".anyna"; + case V3D_QPU_BRANCH_COND_ANYA: + return ".anya"; + case V3D_QPU_BRANCH_COND_ALLNA: + return ".allna"; + default: + unreachable("bad branch cond value"); + } +} + +const char * +v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign) +{ + switch (msfign) { + case V3D_QPU_MSFIGN_NONE: + return ""; + case V3D_QPU_MSFIGN_P: + return "p"; + case V3D_QPU_MSFIGN_Q: + return "q"; + default: + unreachable("bad branch cond value"); + } +} + +const char * +v3d_qpu_pf_name(enum v3d_qpu_pf pf) +{ + switch (pf) { + case V3D_QPU_PF_NONE: + return ""; + case V3D_QPU_PF_PUSHZ: + return ".pushz"; + case V3D_QPU_PF_PUSHN: + return ".pushn"; + case V3D_QPU_PF_PUSHC: + return ".pushc"; + default: + unreachable("bad pf value"); + } +} + +const char * +v3d_qpu_uf_name(enum v3d_qpu_uf uf) +{ + switch (uf) { + case V3D_QPU_UF_NONE: + return ""; + case V3D_QPU_UF_ANDZ: + return ".andz"; + case V3D_QPU_UF_ANDNZ: + return ".andnz"; + case V3D_QPU_UF_NORZ: + return ".norz"; + case V3D_QPU_UF_NORNZ: + return ".nornz"; + case V3D_QPU_UF_ANDN: + return ".andn"; + case V3D_QPU_UF_ANDNN: + return ".andnn"; + case V3D_QPU_UF_NORN: + return ".norn"; + case V3D_QPU_UF_NORNN: + return ".nornn"; + case V3D_QPU_UF_ANDC: + return ".andc"; + case V3D_QPU_UF_ANDNC: + return ".andnc"; + case V3D_QPU_UF_NORC: + return ".norc"; + case V3D_QPU_UF_NORNC: + return ".nornc"; + default: + unreachable("bad pf value"); + } +} + +const char * +v3d_qpu_pack_name(enum v3d_qpu_output_pack pack) +{ + switch (pack) { + case V3D_QPU_PACK_NONE: + return ""; + case V3D_QPU_PACK_L: + return ".l"; + case V3D_QPU_PACK_H: + return ".h"; + default: + unreachable("bad pack value"); + } +} + +const char * +v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack) +{ + switch (unpack) { + case V3D_QPU_UNPACK_NONE: + return ""; + case V3D_QPU_UNPACK_L: + return ".l"; + case V3D_QPU_UNPACK_H: + return ".h"; + case V3D_QPU_UNPACK_ABS: + return ".abs"; + case V3D_QPU_UNPACK_REPLICATE_32F_16: + return ".ff"; + case V3D_QPU_UNPACK_REPLICATE_L_16: + return ".ll"; + case V3D_QPU_UNPACK_REPLICATE_H_16: + return ".hh"; + case V3D_QPU_UNPACK_SWAP_16: + return ".swp"; + default: + unreachable("bad unpack value"); + } +} + +#define D 1 +#define A 2 +#define B 4 +static const uint8_t add_op_args[] = { + [V3D_QPU_A_FADD] = D | A | B, + [V3D_QPU_A_FADDNF] = D | A | B, + [V3D_QPU_A_VFPACK] = D | A | B, + [V3D_QPU_A_ADD] = D | A | B, + [V3D_QPU_A_VFPACK] = D | A | B, + [V3D_QPU_A_SUB] = D | A | B, + [V3D_QPU_A_VFPACK] = D | A | B, + [V3D_QPU_A_FSUB] = D | A | B, + [V3D_QPU_A_MIN] = D | A | B, + [V3D_QPU_A_MAX] = D | A | B, + [V3D_QPU_A_UMIN] = D | A | B, + [V3D_QPU_A_UMAX] = D | A | B, + [V3D_QPU_A_SHL] = D | A | B, + [V3D_QPU_A_SHR] = D | A | B, + [V3D_QPU_A_ASR] = D | A | B, + [V3D_QPU_A_ROR] = D | A | B, + [V3D_QPU_A_FMIN] = D | A | B, + [V3D_QPU_A_FMAX] = D | A | B, + [V3D_QPU_A_VFMIN] = D | A | B, + + [V3D_QPU_A_AND] = D | A | B, + [V3D_QPU_A_OR] = D | A | B, + [V3D_QPU_A_XOR] = D | A | B, + + [V3D_QPU_A_VADD] = D | A | B, + [V3D_QPU_A_VSUB] = D | A | B, + [V3D_QPU_A_NOT] = D | A, + [V3D_QPU_A_NEG] = D | A, + [V3D_QPU_A_FLAPUSH] = D | A, + [V3D_QPU_A_FLBPUSH] = D | A, + [V3D_QPU_A_FLBPOP] = D | A, + [V3D_QPU_A_SETMSF] = D | A, + [V3D_QPU_A_SETREVF] = D | A, + [V3D_QPU_A_NOP] = 0, + [V3D_QPU_A_TIDX] = D, + [V3D_QPU_A_EIDX] = D, + [V3D_QPU_A_LR] = D, + [V3D_QPU_A_VFLA] = D, + [V3D_QPU_A_VFLNA] = D, + [V3D_QPU_A_VFLB] = D, + [V3D_QPU_A_VFLNB] = D, + + [V3D_QPU_A_FXCD] = D, + [V3D_QPU_A_XCD] = D, + [V3D_QPU_A_FYCD] = D, + [V3D_QPU_A_YCD] = D, + + [V3D_QPU_A_MSF] = D, + [V3D_QPU_A_REVF] = D, + [V3D_QPU_A_VDWWT] = D, + [V3D_QPU_A_IID] = D, + [V3D_QPU_A_SAMPID] = D, + [V3D_QPU_A_PATCHID] = D, + [V3D_QPU_A_TMUWT] = D, + [V3D_QPU_A_VPMWT] = D, + + [V3D_QPU_A_VPMSETUP] = D | A, + + [V3D_QPU_A_LDVPMV] = D | A, + [V3D_QPU_A_LDVPMD] = D | A, + [V3D_QPU_A_LDVPMP] = D | A, + [V3D_QPU_A_LDVPMG] = D | A | B, + + /* FIXME: MOVABSNEG */ + + [V3D_QPU_A_FCMP] = D | A | B, + [V3D_QPU_A_VFMAX] = D | A | B, + + [V3D_QPU_A_FROUND] = D | A, + [V3D_QPU_A_FTOIN] = D | A, + [V3D_QPU_A_FTRUNC] = D | A, + [V3D_QPU_A_FTOIZ] = D | A, + [V3D_QPU_A_FFLOOR] = D | A, + [V3D_QPU_A_FTOUZ] = D | A, + [V3D_QPU_A_FCEIL] = D | A, + [V3D_QPU_A_FTOC] = D | A, + + [V3D_QPU_A_FDX] = D | A, + [V3D_QPU_A_FDY] = D | A, + + [V3D_QPU_A_STVPMV] = A | B, + [V3D_QPU_A_STVPMD] = A | B, + [V3D_QPU_A_STVPMP] = A | B, + + [V3D_QPU_A_ITOF] = D | A, + [V3D_QPU_A_CLZ] = D | A, + [V3D_QPU_A_UTOF] = D | A, +}; + +static const uint8_t mul_op_args[] = { + [V3D_QPU_M_ADD] = D | A | B, + [V3D_QPU_M_SUB] = D | A | B, + [V3D_QPU_M_UMUL24] = D | A | B, + [V3D_QPU_M_VFMUL] = D | A | B, + [V3D_QPU_M_SMUL24] = D | A | B, + [V3D_QPU_M_MULTOP] = D | A | B, + [V3D_QPU_M_FMOV] = D | A, + [V3D_QPU_M_NOP] = 0, + [V3D_QPU_M_MOV] = D | A, + [V3D_QPU_M_FMUL] = D | A | B, +}; + +bool +v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op) +{ + assert(op < ARRAY_SIZE(add_op_args)); + + return add_op_args[op] & D; +} + +bool +v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op) +{ + assert(op < ARRAY_SIZE(mul_op_args)); + + return mul_op_args[op] & D; +} + +int +v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op) +{ + assert(op < ARRAY_SIZE(add_op_args)); + + uint8_t args = add_op_args[op]; + if (args & B) + return 2; + else if (args & A) + return 1; + else + return 0; +} + +int +v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op) +{ + assert(op < ARRAY_SIZE(mul_op_args)); + + uint8_t args = mul_op_args[op]; + if (args & B) + return 2; + else if (args & A) + return 1; + else + return 0; +} + +bool +v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) +{ + switch (waddr) { + case V3D_QPU_WADDR_RECIP: + case V3D_QPU_WADDR_RSQRT: + case V3D_QPU_WADDR_EXP: + case V3D_QPU_WADDR_LOG: + case V3D_QPU_WADDR_SIN: + case V3D_QPU_WADDR_RSQRT2: + return true; + default: + return false; + } +} + +bool +v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) +{ + switch (waddr) { + case V3D_QPU_WADDR_TMU: + case V3D_QPU_WADDR_TMUL: + case V3D_QPU_WADDR_TMUD: + case V3D_QPU_WADDR_TMUA: + case V3D_QPU_WADDR_TMUAU: + return true; + default: + return false; + } +} + +bool +v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) +{ + return (waddr == V3D_QPU_WADDR_TLB || + waddr == V3D_QPU_WADDR_TLBU); +} + +bool +v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) +{ + return (waddr == V3D_QPU_WADDR_VPM || + waddr == V3D_QPU_WADDR_VPMU); +} + +bool +v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) +{ + return (waddr == V3D_QPU_WADDR_SYNC || + waddr == V3D_QPU_WADDR_SYNCU); +} + +bool +v3d_qpu_writes_r3(const struct v3d_qpu_instr *inst) +{ + return inst->sig.ldvary || inst->sig.ldvpm; +} + +bool +v3d_qpu_writes_r4(const struct v3d_qpu_instr *inst) +{ + if (inst->sig.ldtmu) + return true; + + if (inst->type == V3D_QPU_INSTR_TYPE_ALU) { + if (inst->alu.add.magic_write && + v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) { + return true; + } + + if (inst->alu.mul.magic_write && + v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) { + return true; + } + } + + return false; +} + +bool +v3d_qpu_writes_r5(const struct v3d_qpu_instr *inst) +{ + return inst->sig.ldvary || inst->sig.ldunif; +} + +bool +v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux) +{ + int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op); + int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op); + + return ((add_nsrc > 0 && inst->alu.add.a == mux) || + (add_nsrc > 1 && inst->alu.add.b == mux) || + (mul_nsrc > 0 && inst->alu.mul.a == mux) || + (mul_nsrc > 1 && inst->alu.mul.b == mux)); +} diff --git a/src/broadcom/qpu/qpu_instr.h b/src/broadcom/qpu/qpu_instr.h new file mode 100644 index 0000000000..a425fae8b2 --- /dev/null +++ b/src/broadcom/qpu/qpu_instr.h @@ -0,0 +1,411 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/** + * @file qpu_instr.h + * + * Definitions of the unpacked form of QPU instructions. Assembly and + * disassembly will use this for talking about instructions, with qpu_encode.c + * and qpu_decode.c handling the pack and unpack of the actual 64-bit QPU + * instruction. + */ + +#ifndef QPU_INSTR_H +#define QPU_INSTR_H + +#include +#include +#include "util/macros.h" + +struct v3d_device_info; + +struct v3d_qpu_sig { + bool thrsw:1; + bool ldunif:1; + bool ldtmu:1; + bool ldvary:1; + bool ldvpm:1; + bool ldtlb:1; + bool ldtlbu:1; + bool small_imm:1; + bool ucb:1; + bool rotate:1; + bool wrtmuc:1; +}; + +enum v3d_qpu_cond { + V3D_QPU_COND_NONE, + V3D_QPU_COND_IFA, + V3D_QPU_COND_IFB, + V3D_QPU_COND_IFNA, + V3D_QPU_COND_IFNB, +}; + +enum v3d_qpu_pf { + V3D_QPU_PF_NONE, + V3D_QPU_PF_PUSHZ, + V3D_QPU_PF_PUSHN, + V3D_QPU_PF_PUSHC, +}; + +enum v3d_qpu_uf { + V3D_QPU_UF_NONE, + V3D_QPU_UF_ANDZ, + V3D_QPU_UF_ANDNZ, + V3D_QPU_UF_NORNZ, + V3D_QPU_UF_NORZ, + V3D_QPU_UF_ANDN, + V3D_QPU_UF_ANDNN, + V3D_QPU_UF_NORNN, + V3D_QPU_UF_NORN, + V3D_QPU_UF_ANDC, + V3D_QPU_UF_ANDNC, + V3D_QPU_UF_NORNC, + V3D_QPU_UF_NORC, +}; + +enum v3d_qpu_waddr { + V3D_QPU_WADDR_R0 = 0, + V3D_QPU_WADDR_R1 = 1, + V3D_QPU_WADDR_R2 = 2, + V3D_QPU_WADDR_R3 = 3, + V3D_QPU_WADDR_R4 = 4, + V3D_QPU_WADDR_R5 = 5, + /* 6 is reserved, but note 3.2.2.8: "Result Writes" */ + V3D_QPU_WADDR_NOP = 6, + V3D_QPU_WADDR_TLB = 7, + V3D_QPU_WADDR_TLBU = 8, + V3D_QPU_WADDR_TMU = 9, + V3D_QPU_WADDR_TMUL = 10, + V3D_QPU_WADDR_TMUD = 11, + V3D_QPU_WADDR_TMUA = 12, + V3D_QPU_WADDR_TMUAU = 13, + V3D_QPU_WADDR_VPM = 14, + V3D_QPU_WADDR_VPMU = 15, + V3D_QPU_WADDR_SYNC = 16, + V3D_QPU_WADDR_SYNCU = 17, + /* reserved */ + V3D_QPU_WADDR_RECIP = 19, + V3D_QPU_WADDR_RSQRT = 20, + V3D_QPU_WADDR_EXP = 21, + V3D_QPU_WADDR_LOG = 22, + V3D_QPU_WADDR_SIN = 23, + V3D_QPU_WADDR_RSQRT2 = 24, +}; + +struct v3d_qpu_flags { + enum v3d_qpu_cond ac, mc; + enum v3d_qpu_pf apf, mpf; + enum v3d_qpu_uf auf, muf; +}; + +enum v3d_qpu_add_op { + V3D_QPU_A_FADD, + V3D_QPU_A_FADDNF, + V3D_QPU_A_VFPACK, + V3D_QPU_A_ADD, + V3D_QPU_A_SUB, + V3D_QPU_A_FSUB, + V3D_QPU_A_MIN, + V3D_QPU_A_MAX, + V3D_QPU_A_UMIN, + V3D_QPU_A_UMAX, + V3D_QPU_A_SHL, + V3D_QPU_A_SHR, + V3D_QPU_A_ASR, + V3D_QPU_A_ROR, + V3D_QPU_A_FMIN, + V3D_QPU_A_FMAX, + V3D_QPU_A_VFMIN, + V3D_QPU_A_AND, + V3D_QPU_A_OR, + V3D_QPU_A_XOR, + V3D_QPU_A_VADD, + V3D_QPU_A_VSUB, + V3D_QPU_A_NOT, + V3D_QPU_A_NEG, + V3D_QPU_A_FLAPUSH, + V3D_QPU_A_FLBPUSH, + V3D_QPU_A_FLBPOP, + V3D_QPU_A_SETMSF, + V3D_QPU_A_SETREVF, + V3D_QPU_A_NOP, + V3D_QPU_A_TIDX, + V3D_QPU_A_EIDX, + V3D_QPU_A_LR, + V3D_QPU_A_VFLA, + V3D_QPU_A_VFLNA, + V3D_QPU_A_VFLB, + V3D_QPU_A_VFLNB, + V3D_QPU_A_FXCD, + V3D_QPU_A_XCD, + V3D_QPU_A_FYCD, + V3D_QPU_A_YCD, + V3D_QPU_A_MSF, + V3D_QPU_A_REVF, + V3D_QPU_A_VDWWT, + V3D_QPU_A_IID, + V3D_QPU_A_SAMPID, + V3D_QPU_A_PATCHID, + V3D_QPU_A_TMUWT, + V3D_QPU_A_VPMSETUP, + V3D_QPU_A_VPMWT, + V3D_QPU_A_LDVPMV, + V3D_QPU_A_LDVPMD, + V3D_QPU_A_LDVPMP, + V3D_QPU_A_LDVPMG, + V3D_QPU_A_FCMP, + V3D_QPU_A_VFMAX, + V3D_QPU_A_FROUND, + V3D_QPU_A_FTOIN, + V3D_QPU_A_FTRUNC, + V3D_QPU_A_FTOIZ, + V3D_QPU_A_FFLOOR, + V3D_QPU_A_FTOUZ, + V3D_QPU_A_FCEIL, + V3D_QPU_A_FTOC, + V3D_QPU_A_FDX, + V3D_QPU_A_FDY, + V3D_QPU_A_STVPMV, + V3D_QPU_A_STVPMD, + V3D_QPU_A_STVPMP, + V3D_QPU_A_ITOF, + V3D_QPU_A_CLZ, + V3D_QPU_A_UTOF, +}; + +enum v3d_qpu_mul_op { + V3D_QPU_M_ADD, + V3D_QPU_M_SUB, + V3D_QPU_M_UMUL24, + V3D_QPU_M_VFMUL, + V3D_QPU_M_SMUL24, + V3D_QPU_M_MULTOP, + V3D_QPU_M_FMOV, + V3D_QPU_M_MOV, + V3D_QPU_M_NOP, + V3D_QPU_M_FMUL, +}; + +enum v3d_qpu_output_pack { + V3D_QPU_PACK_NONE, + /** + * Convert to 16-bit float, put in low 16 bits of destination leaving + * high unmodified. + */ + V3D_QPU_PACK_L, + /** + * Convert to 16-bit float, put in high 16 bits of destination leaving + * low unmodified. + */ + V3D_QPU_PACK_H, +}; + +enum v3d_qpu_input_unpack { + /** + * No-op input unpacking. Note that this enum's value doesn't match + * the packed QPU instruction value of the field (we use 0 so that the + * default on new instruction creation is no-op). + */ + V3D_QPU_UNPACK_NONE, + /** Absolute value. Only available for some operations. */ + V3D_QPU_UNPACK_ABS, + /** Convert low 16 bits from 16-bit float to 32-bit float. */ + V3D_QPU_UNPACK_L, + /** Convert high 16 bits from 16-bit float to 32-bit float. */ + V3D_QPU_UNPACK_H, + + /** Convert to 16f and replicate it to the high bits. */ + V3D_QPU_UNPACK_REPLICATE_32F_16, + + /** Replicate low 16 bits to high */ + V3D_QPU_UNPACK_REPLICATE_L_16, + + /** Replicate high 16 bits to low */ + V3D_QPU_UNPACK_REPLICATE_H_16, + + /** Swap high and low 16 bits */ + V3D_QPU_UNPACK_SWAP_16, +}; + +enum v3d_qpu_mux { + V3D_QPU_MUX_R0, + V3D_QPU_MUX_R1, + V3D_QPU_MUX_R2, + V3D_QPU_MUX_R3, + V3D_QPU_MUX_R4, + V3D_QPU_MUX_R5, + V3D_QPU_MUX_A, + V3D_QPU_MUX_B, +}; + +struct v3d_qpu_alu_instr { + struct { + enum v3d_qpu_add_op op; + enum v3d_qpu_mux a, b; + uint8_t waddr; + bool magic_write; + enum v3d_qpu_output_pack output_pack; + enum v3d_qpu_input_unpack a_unpack; + enum v3d_qpu_input_unpack b_unpack; + } add; + + struct { + enum v3d_qpu_mul_op op; + enum v3d_qpu_mux a, b; + uint8_t waddr; + bool magic_write; + enum v3d_qpu_output_pack output_pack; + enum v3d_qpu_input_unpack a_unpack; + enum v3d_qpu_input_unpack b_unpack; + } mul; +}; + +enum v3d_qpu_branch_cond { + V3D_QPU_BRANCH_COND_ALWAYS, + V3D_QPU_BRANCH_COND_A0, + V3D_QPU_BRANCH_COND_NA0, + V3D_QPU_BRANCH_COND_ALLA, + V3D_QPU_BRANCH_COND_ANYNA, + V3D_QPU_BRANCH_COND_ANYA, + V3D_QPU_BRANCH_COND_ALLNA, +}; + +enum v3d_qpu_msfign { + /** Ignore multisample flags when determining branch condition. */ + V3D_QPU_MSFIGN_NONE, + /** + * If no multisample flags are set in the lane (a pixel in the FS, a + * vertex in the VS), ignore the lane's condition when computing the + * branch condition. + */ + V3D_QPU_MSFIGN_P, + /** + * If no multisample flags are set in a 2x2 quad in the FS, ignore the + * quad's a/b conditions. + */ + V3D_QPU_MSFIGN_Q, +}; + +enum v3d_qpu_branch_dest { + V3D_QPU_BRANCH_DEST_ABS, + V3D_QPU_BRANCH_DEST_REL, + V3D_QPU_BRANCH_DEST_LINK_REG, + V3D_QPU_BRANCH_DEST_REGFILE, +}; + +struct v3d_qpu_branch_instr { + enum v3d_qpu_branch_cond cond; + enum v3d_qpu_msfign msfign; + + /** Selects how to compute the new IP if the branch is taken. */ + enum v3d_qpu_branch_dest bdi; + + /** + * Selects how to compute the new uniforms pointer if the branch is + * taken. (ABS/REL implicitly load a uniform and use that) + */ + enum v3d_qpu_branch_dest bdu; + + /** + * If set, then udest determines how the uniform stream will branch, + * otherwise the uniform stream is left as is. + */ + bool ub; + + uint8_t raddr_a; + + uint32_t offset; +}; + +enum v3d_qpu_instr_type { + V3D_QPU_INSTR_TYPE_ALU, + V3D_QPU_INSTR_TYPE_BRANCH, +}; + +struct v3d_qpu_instr { + enum v3d_qpu_instr_type type; + + struct v3d_qpu_sig sig; + uint8_t raddr_a; + uint8_t raddr_b; + struct v3d_qpu_flags flags; + + union { + struct v3d_qpu_alu_instr alu; + struct v3d_qpu_branch_instr branch; + }; +}; + +const char *v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr); +const char *v3d_qpu_add_op_name(enum v3d_qpu_add_op op); +const char *v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op); +const char *v3d_qpu_cond_name(enum v3d_qpu_cond cond); +const char *v3d_qpu_pf_name(enum v3d_qpu_pf pf); +const char *v3d_qpu_uf_name(enum v3d_qpu_uf uf); +const char *v3d_qpu_pack_name(enum v3d_qpu_output_pack pack); +const char *v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack); +const char *v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond); +const char *v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign); + +bool v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op); +bool v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op); +int v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op); +int v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op); + +bool v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_sig *sig, + uint32_t *packed_sig); +bool v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, + uint32_t packed_sig, + struct v3d_qpu_sig *sig); + +bool +v3d_qpu_flags_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_flags *cond, + uint32_t *packed_cond); +bool +v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, + uint32_t packed_cond, + struct v3d_qpu_flags *cond); + +bool +v3d_qpu_instr_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, + uint64_t *packed_instr); +bool +v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, + uint64_t packed_instr, + struct v3d_qpu_instr *instr); + +bool v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r3(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r4(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_writes_r5(const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST; +bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux); + +#endif diff --git a/src/broadcom/qpu/qpu_pack.c b/src/broadcom/qpu/qpu_pack.c new file mode 100644 index 0000000000..0ecce86662 --- /dev/null +++ b/src/broadcom/qpu/qpu_pack.c @@ -0,0 +1,1206 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include "util/macros.h" + +#include "broadcom/common/v3d_device_info.h" +#include "qpu_instr.h" + +#ifndef QPU_MASK +#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low)) +/* Using the GNU statement expression extension */ +#define QPU_SET_FIELD(value, field) \ + ({ \ + uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \ + assert((fieldval & ~ field ## _MASK) == 0); \ + fieldval & field ## _MASK; \ + }) + +#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT)) + +#define QPU_UPDATE_FIELD(inst, value, field) \ + (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field)) +#endif /* QPU_MASK */ + +#define VC5_QPU_OP_MUL_SHIFT 58 +#define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58) + +#define VC5_QPU_SIG_SHIFT 53 +#define VC5_QPU_SIG_MASK QPU_MASK(57, 53) +# define VC5_QPU_SIG_THRSW_BIT 0x1 +# define VC5_QPU_SIG_LDUNIF_BIT 0x2 +# define VC5_QPU_SIG_LDTMU_BIT 0x4 +# define VC5_QPU_SIG_LDVARY_BIT 0x8 + +#define VC5_QPU_COND_SHIFT 46 +#define VC5_QPU_COND_MASK QPU_MASK(52, 46) + +#define VC5_QPU_COND_IFA 0 +#define VC5_QPU_COND_IFB 1 +#define VC5_QPU_COND_IFNA 2 +#define VC5_QPU_COND_IFNB 3 + +#define VC5_QPU_MM QPU_MASK(45, 45) +#define VC5_QPU_MA QPU_MASK(44, 44) + +#define V3D_QPU_WADDR_M_SHIFT 38 +#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38) + +#define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35 +#define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35) + +#define V3D_QPU_WADDR_A_SHIFT 32 +#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32) + +#define VC5_QPU_BRANCH_COND_SHIFT 32 +#define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32) + +#define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24 +#define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24) + +#define VC5_QPU_OP_ADD_SHIFT 24 +#define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24) + +#define VC5_QPU_MUL_B_SHIFT 21 +#define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21) + +#define VC5_QPU_BRANCH_MSFIGN_SHIFT 21 +#define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21) + +#define VC5_QPU_MUL_A_SHIFT 18 +#define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18) + +#define VC5_QPU_ADD_B_SHIFT 15 +#define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15) + +#define VC5_QPU_BRANCH_BDU_SHIFT 15 +#define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15) + +#define VC5_QPU_BRANCH_UB QPU_MASK(14, 14) + +#define VC5_QPU_ADD_A_SHIFT 12 +#define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12) + +#define VC5_QPU_BRANCH_BDI_SHIFT 12 +#define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12) + +#define VC5_QPU_RADDR_A_SHIFT 6 +#define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6) + +#define VC5_QPU_RADDR_B_SHIFT 0 +#define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0) + +#define THRSW .thrsw = true +#define LDUNIF .ldunif = true +#define LDTMU .ldtmu = true +#define LDVARY .ldvary = true +#define LDVPM .ldvpm = true +#define SMIMM .small_imm = true +#define LDTLB .ldtlb = true +#define LDTLBU .ldtlbu = true +#define UCB .ucb = true +#define ROT .rotate = true +#define WRTMUC .wrtmuc = true + +static const struct v3d_qpu_sig v33_sig_map[] = { + /* MISC R3 R4 R5 */ + [0] = { }, + [1] = { THRSW, }, + [2] = { LDUNIF }, + [3] = { THRSW, LDUNIF }, + [4] = { LDTMU, }, + [5] = { THRSW, LDTMU, }, + [6] = { LDTMU, LDUNIF }, + [7] = { THRSW, LDTMU, LDUNIF }, + [8] = { LDVARY, }, + [9] = { THRSW, LDVARY, }, + [10] = { LDVARY, LDUNIF }, + [11] = { THRSW, LDVARY, LDUNIF }, + [12] = { LDVARY, LDTMU, }, + [13] = { THRSW, LDVARY, LDTMU, }, + [14] = { SMIMM, LDVARY, }, + [15] = { SMIMM, }, + [16] = { LDTLB, }, + [17] = { LDTLBU, }, + /* 18-21 reserved */ + [22] = { UCB, }, + [23] = { ROT, }, + [24] = { LDVPM, }, + [25] = { THRSW, LDVPM, }, + [26] = { LDVPM, LDUNIF }, + [27] = { THRSW, LDVPM, LDUNIF }, + [28] = { LDVPM, LDTMU, }, + [29] = { THRSW, LDVPM, LDTMU, }, + [30] = { SMIMM, LDVPM, }, + [31] = { SMIMM, }, +}; + +bool +v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo, + uint32_t packed_sig, + struct v3d_qpu_sig *sig) +{ + if (packed_sig >= ARRAY_SIZE(v33_sig_map)) + return false; + + *sig = v33_sig_map[packed_sig]; + + /* Signals with zeroed unpacked contents after element 0 are reserved. */ + return (packed_sig == 0 || + memcmp(sig, &v33_sig_map[0], sizeof(*sig) != 0)); +} + +bool +v3d_qpu_sig_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_sig *sig, + uint32_t *packed_sig) +{ + static const struct v3d_qpu_sig *map; + + map = v33_sig_map; + + for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) { + if (memcmp(&map[i], sig, sizeof(*sig)) == 0) { + *packed_sig = i; + return true; + } + } + + return false; +} + +bool +v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo, + uint32_t packed_cond, + struct v3d_qpu_flags *cond) +{ + static const enum v3d_qpu_cond cond_map[4] = { + [0] = V3D_QPU_COND_IFA, + [1] = V3D_QPU_COND_IFB, + [2] = V3D_QPU_COND_IFNA, + [3] = V3D_QPU_COND_IFNB, + }; + + cond->ac = V3D_QPU_COND_NONE; + cond->mc = V3D_QPU_COND_NONE; + cond->apf = V3D_QPU_PF_NONE; + cond->mpf = V3D_QPU_PF_NONE; + cond->auf = V3D_QPU_UF_NONE; + cond->muf = V3D_QPU_UF_NONE; + + if (packed_cond == 0) { + return true; + } else if (packed_cond >> 2 == 0) { + cond->apf = packed_cond & 0x3; + } else if (packed_cond >> 4 == 0) { + cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; + } else if (packed_cond == 0x10) { + return false; + } else if (packed_cond >> 2 == 0x4) { + cond->mpf = packed_cond & 0x3; + } else if (packed_cond >> 4 == 0x1) { + cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; + } else if (packed_cond >> 4 == 0x2) { + cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; + cond->mpf = packed_cond & 0x3; + } else if (packed_cond >> 4 == 0x3) { + cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA; + cond->apf = packed_cond & 0x3; + } else if (packed_cond >> 6) { + cond->mc = cond_map[(packed_cond >> 4) & 0x3]; + if (((packed_cond >> 2) & 0x3) == 0) { + cond->ac = cond_map[packed_cond & 0x3]; + } else { + cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ; + } + } + + return true; +} + +bool +v3d_qpu_flags_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_flags *cond, + uint32_t *packed_cond) +{ +#define AC (1 << 0) +#define MC (1 << 1) +#define APF (1 << 2) +#define MPF (1 << 3) +#define AUF (1 << 4) +#define MUF (1 << 5) + static const struct { + uint8_t flags_present; + uint8_t bits; + } flags_table[] = { + { 0, 0 }, + { APF, 0 }, + { AUF, 0 }, + { MPF, (1 << 4) }, + { MUF, (1 << 4) }, + { AC, (1 << 5) }, + { AC | MPF, (1 << 5) }, + { MC, (1 << 5) | (1 << 4) }, + { MC | APF, (1 << 5) | (1 << 4) }, + { MC | AC, (1 << 6) }, + { MC | AUF, (1 << 6) }, + }; + + uint8_t flags_present = 0; + if (cond->ac != V3D_QPU_COND_NONE) + flags_present |= AC; + if (cond->mc != V3D_QPU_COND_NONE) + flags_present |= MC; + if (cond->apf != V3D_QPU_PF_NONE) + flags_present |= APF; + if (cond->mpf != V3D_QPU_PF_NONE) + flags_present |= MPF; + if (cond->auf != V3D_QPU_UF_NONE) + flags_present |= AUF; + if (cond->muf != V3D_QPU_UF_NONE) + flags_present |= MUF; + + for (int i = 0; i < ARRAY_SIZE(flags_table); i++) { + if (flags_table[i].flags_present != flags_present) + continue; + + *packed_cond = flags_table[i].bits; + + *packed_cond |= cond->apf; + *packed_cond |= cond->mpf; + + if (flags_present & AUF) + *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4; + if (flags_present & MUF) + *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4; + + if (flags_present & AC) + *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2; + + if (flags_present & MC) { + if (*packed_cond & (1 << 6)) + *packed_cond |= (cond->mc - + V3D_QPU_COND_IFA) << 4; + else + *packed_cond |= (cond->mc - + V3D_QPU_COND_IFA) << 2; + } + + return true; + } + + return false; +} + +/* Make a mapping of the table of opcodes in the spec. The opcode is + * determined by a combination of the opcode field, and in the case of 0 or + * 1-arg opcodes, the mux_b field as well. + */ +#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1)) +#define ANYMUX MUX_MASK(0, 7) + +struct opcode_desc { + uint8_t opcode_first; + uint8_t opcode_last; + uint8_t mux_b_mask; + uint8_t mux_a_mask; + uint8_t op; + /* 0 if it's the same across V3D versions, or a specific V3D version. */ + uint8_t ver; +}; + +static const struct opcode_desc add_ops[] = { + /* FADD is FADDNF depending on the order of the mux_a/mux_b. */ + { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD }, + { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF }, + { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, + { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD }, + { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, + { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB }, + { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK }, + { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB }, + { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN }, + { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX }, + { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN }, + { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX }, + { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL }, + { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR }, + { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR }, + { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR }, + /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */ + { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN }, + { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX }, + { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN }, + + { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND }, + { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR }, + { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR }, + + { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD }, + { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB }, + { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT }, + { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG }, + { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH }, + { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH }, + { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLBPOP }, + { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF }, + { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF }, + { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 }, + { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX }, + { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX }, + { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR }, + { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA }, + { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA }, + { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB }, + { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB }, + + { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD }, + { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD }, + { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD }, + { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD }, + + { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF }, + { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF }, + { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT }, + { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT }, + { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT }, + + { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP }, + + /* FIXME: MORE COMPLICATED */ + /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */ + + { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP }, + { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX }, + + { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND }, + { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN }, + { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC }, + { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ }, + { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR }, + { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ }, + { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL }, + { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC }, + + { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX }, + { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY }, + + /* The stvpms are distinguished by the waddr field. */ + { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV }, + { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD }, + { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP }, + + { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF }, + { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ }, + { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF }, +}; + +static const struct opcode_desc mul_ops[] = { + { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD }, + { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB }, + { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 }, + { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL }, + { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 }, + { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP }, + { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV }, + { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV }, + { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 }, + { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV }, + { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL }, +}; + +static const struct opcode_desc * +lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes, + uint32_t opcode, uint32_t mux_a, uint32_t mux_b) +{ + for (int i = 0; i < num_opcodes; i++) { + const struct opcode_desc *op_desc = &opcodes[i]; + + if (opcode < op_desc->opcode_first || + opcode > op_desc->opcode_last) + continue; + + if (!(op_desc->mux_b_mask & (1 << mux_b))) + continue; + + if (!(op_desc->mux_a_mask & (1 << mux_a))) + continue; + + return op_desc; + } + + return NULL; +} + +static bool +v3d_qpu_float32_unpack_unpack(uint32_t packed, + enum v3d_qpu_input_unpack *unpacked) +{ + switch (packed) { + case 0: + *unpacked = V3D_QPU_UNPACK_ABS; + return true; + case 1: + *unpacked = V3D_QPU_UNPACK_NONE; + return true; + case 2: + *unpacked = V3D_QPU_UNPACK_L; + return true; + case 3: + *unpacked = V3D_QPU_UNPACK_H; + return true; + default: + return false; + } +} + +static bool +v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked, + uint32_t *packed) +{ + switch (unpacked) { + case V3D_QPU_UNPACK_ABS: + *packed = 0; + return true; + case V3D_QPU_UNPACK_NONE: + *packed = 1; + return true; + case V3D_QPU_UNPACK_L: + *packed = 2; + return true; + case V3D_QPU_UNPACK_H: + *packed = 3; + return true; + default: + return false; + } +} + +static bool +v3d_qpu_float16_unpack_unpack(uint32_t packed, + enum v3d_qpu_input_unpack *unpacked) +{ + switch (packed) { + case 0: + *unpacked = V3D_QPU_UNPACK_NONE; + return true; + case 1: + *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16; + return true; + case 2: + *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16; + return true; + case 3: + *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16; + return true; + case 4: + *unpacked = V3D_QPU_UNPACK_SWAP_16; + return true; + default: + return false; + } +} + +static bool +v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked, + uint32_t *packed) +{ + switch (unpacked) { + case V3D_QPU_UNPACK_NONE: + *packed = 0; + return true; + case V3D_QPU_UNPACK_REPLICATE_32F_16: + *packed = 1; + return true; + case V3D_QPU_UNPACK_REPLICATE_L_16: + *packed = 2; + return true; + case V3D_QPU_UNPACK_REPLICATE_H_16: + *packed = 3; + return true; + case V3D_QPU_UNPACK_SWAP_16: + *packed = 4; + return true; + default: + return false; + } +} + +static bool +v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked, + uint32_t *packed) +{ + switch (unpacked) { + case V3D_QPU_PACK_NONE: + *packed = 0; + return true; + case V3D_QPU_PACK_L: + *packed = 1; + return true; + case V3D_QPU_PACK_H: + *packed = 2; + return true; + default: + return false; + } +} + +static bool +v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, + struct v3d_qpu_instr *instr) +{ + uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD); + uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A); + uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B); + uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); + + uint32_t map_op = op; + /* Some big clusters of opcodes are replicated with unpack + * flags + */ + if (map_op >= 249 && map_op <= 251) + map_op = (map_op - 249 + 245); + if (map_op >= 253 && map_op <= 255) + map_op = (map_op - 253 + 245); + + const struct opcode_desc *desc = + lookup_opcode(add_ops, ARRAY_SIZE(add_ops), + map_op, mux_a, mux_b); + if (!desc) + return false; + + instr->alu.add.op = desc->op; + + /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the + * operands. + */ + if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) { + if (instr->alu.add.op == V3D_QPU_A_FMIN) + instr->alu.add.op = V3D_QPU_A_FMAX; + if (instr->alu.add.op == V3D_QPU_A_FADD) + instr->alu.add.op = V3D_QPU_A_FADDNF; + } + + /* Some QPU ops require a bit more than just basic opcode and mux a/b + * comparisons to distinguish them. + */ + switch (instr->alu.add.op) { + case V3D_QPU_A_STVPMV: + case V3D_QPU_A_STVPMD: + case V3D_QPU_A_STVPMP: + switch (waddr) { + case 0: + instr->alu.add.op = V3D_QPU_A_STVPMV; + break; + case 1: + instr->alu.add.op = V3D_QPU_A_STVPMD; + break; + case 2: + instr->alu.add.op = V3D_QPU_A_STVPMP; + break; + default: + return false; + } + break; + default: + break; + } + + switch (instr->alu.add.op) { + case V3D_QPU_A_FADD: + case V3D_QPU_A_FADDNF: + case V3D_QPU_A_FSUB: + case V3D_QPU_A_FMIN: + case V3D_QPU_A_FMAX: + case V3D_QPU_A_FCMP: + instr->alu.add.output_pack = (op >> 4) & 0x3; + + if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + &instr->alu.add.a_unpack)) { + return false; + } + + if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, + &instr->alu.add.b_unpack)) { + return false; + } + break; + + case V3D_QPU_A_FFLOOR: + case V3D_QPU_A_FROUND: + case V3D_QPU_A_FTRUNC: + case V3D_QPU_A_FCEIL: + case V3D_QPU_A_FDX: + case V3D_QPU_A_FDY: + instr->alu.add.output_pack = mux_b & 0x3; + + if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + &instr->alu.add.a_unpack)) { + return false; + } + break; + + case V3D_QPU_A_FTOIN: + case V3D_QPU_A_FTOIZ: + case V3D_QPU_A_FTOUZ: + case V3D_QPU_A_FTOC: + instr->alu.add.output_pack = V3D_QPU_PACK_NONE; + + if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + &instr->alu.add.a_unpack)) { + return false; + } + break; + + case V3D_QPU_A_VFMIN: + case V3D_QPU_A_VFMAX: + if (!v3d_qpu_float16_unpack_unpack(op & 0x7, + &instr->alu.add.a_unpack)) { + return false; + } + + instr->alu.add.output_pack = V3D_QPU_PACK_NONE; + instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; + break; + + default: + instr->alu.add.output_pack = V3D_QPU_PACK_NONE; + instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE; + instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE; + break; + } + + instr->alu.add.a = mux_a; + instr->alu.add.b = mux_b; + instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A); + instr->alu.add.magic_write = packed_inst & VC5_QPU_MA; + + return true; +} + +static bool +v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst, + struct v3d_qpu_instr *instr) +{ + uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL); + uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A); + uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B); + + { + const struct opcode_desc *desc = + lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops), + op, mux_a, mux_b); + if (!desc) + return false; + + instr->alu.mul.op = desc->op; + } + + switch (instr->alu.mul.op) { + case V3D_QPU_M_FMUL: + instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1; + + if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3, + &instr->alu.mul.a_unpack)) { + return false; + } + + if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3, + &instr->alu.mul.b_unpack)) { + return false; + } + + break; + + case V3D_QPU_M_FMOV: + instr->alu.mul.output_pack = (((op & 1) << 1) + + ((mux_b >> 2) & 1)); + + if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3, + &instr->alu.mul.a_unpack)) { + return false; + } + + break; + default: + instr->alu.mul.output_pack = V3D_QPU_PACK_NONE; + instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE; + instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE; + break; + } + + instr->alu.mul.a = mux_a; + instr->alu.mul.b = mux_b; + instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M); + instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM; + + return true; +} + +static bool +v3d_qpu_add_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, uint64_t *packed_instr) +{ + uint32_t waddr = instr->alu.add.waddr; + uint32_t mux_a = instr->alu.add.a; + uint32_t mux_b = instr->alu.add.b; + int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op); + const struct opcode_desc *desc; + + int opcode; + for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)]; + desc++) { + if (desc->op == instr->alu.add.op) + break; + } + if (desc == &add_ops[ARRAY_SIZE(add_ops)]) + return false; + + opcode = desc->opcode_first; + + /* If an operation doesn't use an arg, its mux values may be used to + * identify the operation type. + */ + if (nsrc < 2) + mux_b = ffs(desc->mux_b_mask) - 1; + + if (nsrc < 1) + mux_a = ffs(desc->mux_a_mask) - 1; + + switch (instr->alu.add.op) { + case V3D_QPU_A_STVPMV: + waddr = 0; + break; + case V3D_QPU_A_STVPMD: + waddr = 1; + break; + case V3D_QPU_A_STVPMP: + waddr = 2; + break; + default: + break; + } + + switch (instr->alu.add.op) { + case V3D_QPU_A_FADD: + case V3D_QPU_A_FADDNF: + case V3D_QPU_A_FSUB: + case V3D_QPU_A_FMIN: + case V3D_QPU_A_FMAX: + case V3D_QPU_A_FCMP: { + uint32_t output_pack; + uint32_t a_unpack; + uint32_t b_unpack; + + if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, + &output_pack)) { + return false; + } + opcode |= output_pack << 4; + + if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, + &a_unpack)) { + return false; + } + + if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack, + &b_unpack)) { + return false; + } + + /* These operations with commutative operands are + * distinguished by which order their operands come in. + */ + bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b; + if (((instr->alu.add.op == V3D_QPU_A_FMIN || + instr->alu.add.op == V3D_QPU_A_FADD) && ordering) || + ((instr->alu.add.op == V3D_QPU_A_FMAX || + instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) { + uint32_t temp; + + temp = a_unpack; + a_unpack = b_unpack; + b_unpack = temp; + + temp = mux_a; + mux_a = mux_b; + mux_b = temp; + } + + opcode |= a_unpack << 2; + opcode |= b_unpack << 0; + break; + } + + case V3D_QPU_A_FFLOOR: + case V3D_QPU_A_FROUND: + case V3D_QPU_A_FTRUNC: + case V3D_QPU_A_FCEIL: + case V3D_QPU_A_FDX: + case V3D_QPU_A_FDY: { + uint32_t packed; + + if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack, + &packed)) { + return false; + } + mux_b |= packed; + + if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, + &packed)) { + return false; + } + if (packed == 0) + return false; + opcode |= packed << 2; + break; + } + + case V3D_QPU_A_FTOIN: + case V3D_QPU_A_FTOIZ: + case V3D_QPU_A_FTOUZ: + case V3D_QPU_A_FTOC: + if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE) + return false; + + uint32_t packed; + if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack, + &packed)) { + return false; + } + if (packed == 0) + return false; + opcode |= packed << 2; + + break; + + case V3D_QPU_A_VFMIN: + case V3D_QPU_A_VFMAX: + if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || + instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) { + return false; + } + + if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack, + &packed)) { + return false; + } + opcode |= packed; + break; + + default: + if (instr->alu.add.op != V3D_QPU_A_NOP && + (instr->alu.add.output_pack != V3D_QPU_PACK_NONE || + instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE || + instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) { + return false; + } + break; + } + + *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A); + *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B); + *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD); + *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A); + if (instr->alu.add.magic_write) + *packed_instr |= VC5_QPU_MA; + + return true; +} + +static bool +v3d_qpu_mul_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, uint64_t *packed_instr) +{ + uint32_t mux_a = instr->alu.mul.a; + uint32_t mux_b = instr->alu.mul.b; + int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op); + const struct opcode_desc *desc; + + for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)]; + desc++) { + if (desc->op == instr->alu.mul.op) + break; + } + if (desc == &mul_ops[ARRAY_SIZE(mul_ops)]) + return false; + + uint32_t opcode = desc->opcode_first; + + /* Some opcodes have a single valid value for their mux a/b, so set + * that here. If mux a/b determine packing, it will be set below. + */ + if (nsrc < 2) + mux_b = ffs(desc->mux_b_mask) - 1; + + if (nsrc < 1) + mux_a = ffs(desc->mux_a_mask) - 1; + + switch (instr->alu.mul.op) { + case V3D_QPU_M_FMUL: { + uint32_t packed; + + if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, + &packed)) { + return false; + } + /* No need for a +1 because desc->opcode_first has a 1 in this + * field. + */ + opcode += packed << 4; + + if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, + &packed)) { + return false; + } + opcode |= packed << 2; + + if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack, + &packed)) { + return false; + } + opcode |= packed << 0; + break; + } + + case V3D_QPU_M_FMOV: { + uint32_t packed; + + if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack, + &packed)) { + return false; + } + opcode |= (packed >> 1) & 1; + mux_b = (packed & 1) << 2; + + if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack, + &packed)) { + return false; + } + mux_b |= packed; + break; + } + + default: + break; + } + + *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A); + *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B); + + *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL); + *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M); + if (instr->alu.mul.magic_write) + *packed_instr |= VC5_QPU_MM; + + return true; +} + +static bool +v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo, + uint64_t packed_instr, + struct v3d_qpu_instr *instr) +{ + instr->type = V3D_QPU_INSTR_TYPE_ALU; + + if (!v3d_qpu_sig_unpack(devinfo, + QPU_GET_FIELD(packed_instr, VC5_QPU_SIG), + &instr->sig)) + return false; + + if (!v3d_qpu_flags_unpack(devinfo, + QPU_GET_FIELD(packed_instr, VC5_QPU_COND), + &instr->flags)) + return false; + + instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A); + instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B); + + if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr)) + return false; + + if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr)) + return false; + + return true; +} + +static bool +v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo, + uint64_t packed_instr, + struct v3d_qpu_instr *instr) +{ + instr->type = V3D_QPU_INSTR_TYPE_BRANCH; + + uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND); + if (cond == 0) + instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS; + else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <= + V3D_QPU_BRANCH_COND_ALLNA) + instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2); + else + return false; + + uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN); + if (msfign == 3) + return false; + instr->branch.msfign = msfign; + + instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI); + + instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB; + if (instr->branch.ub) { + instr->branch.bdu = QPU_GET_FIELD(packed_instr, + VC5_QPU_BRANCH_BDU); + } + + instr->branch.raddr_a = QPU_GET_FIELD(packed_instr, + VC5_QPU_RADDR_A); + + instr->branch.offset = 0; + + instr->branch.offset += + QPU_GET_FIELD(packed_instr, + VC5_QPU_BRANCH_ADDR_LOW) << 3; + + instr->branch.offset += + QPU_GET_FIELD(packed_instr, + VC5_QPU_BRANCH_ADDR_HIGH) << 24; + + return true; +} + +bool +v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo, + uint64_t packed_instr, + struct v3d_qpu_instr *instr) +{ + if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) { + return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr); + } else { + uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG); + + if ((sig & 24) == 16) { + return v3d_qpu_instr_unpack_branch(devinfo, packed_instr, + instr); + } else { + return false; + } + } +} + +static bool +v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, + uint64_t *packed_instr) +{ + uint32_t sig; + if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig)) + return false; + *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG); + + if (instr->type == V3D_QPU_INSTR_TYPE_ALU) { + *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A); + *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B); + + if (!v3d_qpu_add_pack(devinfo, instr, packed_instr)) + return false; + if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr)) + return false; + + uint32_t flags; + if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags)) + return false; + *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND); + } + + return true; +} + +static bool +v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, + uint64_t *packed_instr) +{ + *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG); + + if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) { + *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond - + V3D_QPU_BRANCH_COND_A0), + VC5_QPU_BRANCH_COND); + } + + *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, + VC5_QPU_BRANCH_MSFIGN); + + *packed_instr |= QPU_SET_FIELD(instr->branch.bdi, + VC5_QPU_BRANCH_BDI); + + if (instr->branch.ub) { + *packed_instr |= VC5_QPU_BRANCH_UB; + *packed_instr |= QPU_SET_FIELD(instr->branch.bdu, + VC5_QPU_BRANCH_BDU); + } + + switch (instr->branch.bdi) { + case V3D_QPU_BRANCH_DEST_ABS: + case V3D_QPU_BRANCH_DEST_REL: + *packed_instr |= QPU_SET_FIELD(instr->branch.msfign, + VC5_QPU_BRANCH_MSFIGN); + + *packed_instr |= QPU_SET_FIELD((instr->branch.offset & + ~0xff000000) >> 3, + VC5_QPU_BRANCH_ADDR_LOW); + + *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24, + VC5_QPU_BRANCH_ADDR_HIGH); + + case V3D_QPU_BRANCH_DEST_REGFILE: + *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a, + VC5_QPU_RADDR_A); + break; + + default: + break; + } + + return true; +} + +bool +v3d_qpu_instr_pack(const struct v3d_device_info *devinfo, + const struct v3d_qpu_instr *instr, + uint64_t *packed_instr) +{ + *packed_instr = 0; + + switch (instr->type) { + case V3D_QPU_INSTR_TYPE_ALU: + return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr); + case V3D_QPU_INSTR_TYPE_BRANCH: + return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr); + default: + return false; + } +} diff --git a/src/broadcom/qpu/qpu_validate.c b/src/broadcom/qpu/qpu_validate.c new file mode 100644 index 0000000000..e69de29bb2 diff --git a/src/broadcom/qpu/tests/.gitignore b/src/broadcom/qpu/tests/.gitignore new file mode 100644 index 0000000000..d2cf70a7ca --- /dev/null +++ b/src/broadcom/qpu/tests/.gitignore @@ -0,0 +1 @@ +v3d_qpu_disasm diff --git a/src/broadcom/qpu/tests/qpu_disasm.c b/src/broadcom/qpu/tests/qpu_disasm.c new file mode 100644 index 0000000000..c7f6476def --- /dev/null +++ b/src/broadcom/qpu/tests/qpu_disasm.c @@ -0,0 +1,146 @@ +/* + * Copyright © 2016 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include "util/macros.h" +#include "broadcom/common/v3d_device_info.h" +#include "broadcom/qpu/qpu_disasm.h" +#include "broadcom/qpu/qpu_instr.h" + +static const struct { + int ver; + uint64_t inst; + const char *expected; +} tests[] = { + { 33, 0x3d003186bb800000ull, "nop ; nop ; ldvary" }, + { 33, 0x3c20318105829000ull, "fadd r1, r1, r5 ; nop ; thrsw" }, + { 33, 0x3c403186bb81d000ull, "vpmsetup -, r5 ; nop ; ldunif" }, + { 33, 0x3f003186bb800000ull, "nop ; nop ; ldvpm" }, + { 33, 0x3c002380b6edb000ull, "or rf0, r3, r3 ; mov vpm, r3" }, + { 33, 0x57403006bbb80000ull, "nop ; fmul r0, rf0, r5 ; ldvpm; ldunif" }, + + /* branch conditions */ + { 33, 0x02000006002034c0ull, "b.anyap rf19" }, + { 33, 0x02679356b4201000ull, "b.anyap -1268280496" }, + { 33, 0x02b76a2dd0400000ull, "b.anynaq zero_addr+0xd0b76a28" }, + { 33, 0x0200000500402000ull, "b.anynaq lri" }, + { 33, 0x0216fe167301c8c0ull, "bu.anya zero_addr+0x7316fe10, rf35" }, + { 33, 0x020000050040e000ull, "bu.anynaq lri, r:unif" }, + { 33, 0x0200000300006000ull, "bu.na0 lri, a:unif" }, + + /* Special waddr names */ + { 33, 0x3c00318735808000ull, "vfpack tlb, r0, r1 ; nop" }, + { 33, 0xe0571c938e8d5000ull, "fmax.andc recip, r5.h, r2.l; fmul.ifb rf50.h, r3.l, r4.abs; ldunif" }, + { 33, 0xc04098d4382c9000ull, "add.pushn rsqrt, r1, r1; fmul rf35.h, r3.abs, r1.abs; ldunif" }, + { 33, 0x481edcd6b3184500ull, "vfmin.norn log, r4.hh, r0; fmul.ifnb rf51, rf20.abs, r0.l" }, + { 33, 0x041618d57c453000ull, "shl.andn exp, r3, r2; add.ifb rf35, r1, r2" }, + { 33, 0x7048e5da49272800ull, "fsub.ifa rf26, r2.l, rf32; fmul.pushc sin, r1.h, r1.abs; ldunif" }, + +}; + +static void +swap_mux(enum v3d_qpu_mux *a, enum v3d_qpu_mux *b) +{ + enum v3d_qpu_mux t = *a; + *a = *b; + *b = t; +} + +static void +swap_pack(enum v3d_qpu_input_unpack *a, enum v3d_qpu_input_unpack *b) +{ + enum v3d_qpu_input_unpack t = *a; + *a = *b; + *b = t; +} + +int +main(int argc, char **argv) +{ + struct v3d_device_info devinfo = { }; + int retval = 0; + + for (int i = 0; i < ARRAY_SIZE(tests); i++) { + devinfo.ver = tests[i].ver; + + printf("Testing v%d.%d 0x%016llx... ", + devinfo.ver / 10, devinfo.ver % 10, + (long long)tests[i].inst); + + const char *disasm_output = v3d_qpu_disasm(&devinfo, + tests[i].inst); + + if (strcmp(disasm_output, tests[i].expected) != 0) { + printf("FAIL\n"); + printf(" Expected: \"%s\"\n", tests[i].expected); + printf(" Got: \"%s\"\n", disasm_output); + retval = 1; + continue; + } + + struct v3d_qpu_instr instr; + if (!v3d_qpu_instr_unpack(&devinfo, tests[i].inst, &instr)) { + printf("FAIL (unpack) %s\n", tests[i].expected); + retval = 1; + continue; + } + + if (instr.type == V3D_QPU_INSTR_TYPE_ALU) { + switch (instr.alu.add.op) { + case V3D_QPU_A_FADD: + case V3D_QPU_A_FADDNF: + case V3D_QPU_A_FMIN: + case V3D_QPU_A_FMAX: + /* Swap the operands to be sure that we test + * how the QPUs distinguish between these ops. + */ + swap_mux(&instr.alu.add.a, + &instr.alu.add.b); + swap_pack(&instr.alu.add.a_unpack, + &instr.alu.add.b_unpack); + default: + break; + } + } + + uint64_t repack; + if (!v3d_qpu_instr_pack(&devinfo, &instr, &repack)) { + printf("FAIL (pack) %s\n", tests[i].expected); + retval = 1; + continue; + } + + if (repack != tests[i].inst) { + printf("FAIL (repack) 0x%016llx\n", (long long)repack); + printf(" Expected: \"%s\"\n", tests[i].expected); + const char *redisasm = v3d_qpu_disasm(&devinfo, repack); + printf(" Got: \"%s\"\n", redisasm); + retval = 1; + } + + printf("PASS\n"); + } + + return retval; +}