Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

demmt: parse USER.DMA_PUT for pre-nv50 cards

  • Loading branch information...
commit b3d396c1a38cfda8cd759dadd0df065f4bf6c9a7 1 parent f07c3d6
Marcin Ślusarz mslusarz authored
Showing with 234 additions and 66 deletions.
  1. +81 −30 rnn/demmt.c
  2. +7 −5 rnn/demmt.h
  3. +134 −30 rnn/demmt_pushbuf.c
  4. +12 −1 rnn/demmt_pushbuf.h
111 rnn/demmt.c
View
@@ -47,17 +47,18 @@ static int writes_since_last_full_dump = 0; // NOTE: you cannot rely too much on
static int writes_since_last_dump = 0;
static int last_wreg_id = -1;
static int compress_clears = 1;
-static int ib_buffer = -1;
+static int pb_pointer_buffer = -1;
static int dump_ioctls = 0;
static int print_gpu_addresses = 0;
struct rnndomain *domain;
struct rnndb *rnndb;
int chipset;
+int ib_supported;
int guess_invalid_pushbuf = 1;
int invalid_pushbufs_visible = 1;
int decode_invalid_buffers = 1;
-int find_ib_buffer = 0;
+int find_pb_pointer = 0;
int quiet = 0;
int disassemble_shaders = 1;
const struct envy_colors *colors = NULL;
@@ -87,29 +88,54 @@ static void dump_writes(struct buffer *buf)
struct region *cur = buf->written_regions;
struct pushbuf_decode_state *state = &buf->state.pushbuf;
struct ib_decode_state *ibstate = &buf->state.ib;
+ struct user_decode_state *ustate = &buf->state.user;
char pushbuf_desc[1024];
char comment[2][50];
comment[0][0] = 0;
comment[1][0] = 0;
- if (find_ib_buffer)
+ if (find_pb_pointer)
{
- if (cur->start != 0 || cur->end < 8)
- return;
- uint32_t *data = (uint32_t *)buf->data;
- if (!data[0] || !data[1])
- return;
- if (data[0] & 0x3)
- return;
- uint64_t gpu_addr = (((uint64_t)(data[1] & 0xff)) << 32) | (data[0] & 0xfffffffc);
- struct buffer *buf2;
- for (buf2 = buffers_list; buf2 != NULL; buf2 = buf2->next)
+ if (ib_supported)
+ {
+ if (cur->start != 0 || cur->end < 8)
+ return;
+ uint32_t *data = (uint32_t *)buf->data;
+ if (!data[0] || !data[1])
+ return;
+ if (data[0] & 0x3)
+ return;
+ uint64_t gpu_addr = (((uint64_t)(data[1] & 0xff)) << 32) | (data[0] & 0xfffffffc);
+ struct buffer *buf2;
+ for (buf2 = buffers_list; buf2 != NULL; buf2 = buf2->next)
+ {
+ if (buf2->gpu_start == gpu_addr &&
+ buf2->length >= 4 * ((data[1] & 0x7fffffff) >> 10))
+ {
+ fprintf(stdout, "possible IB buffer: %d\n", buf->id);
+ break;
+ }
+ }
+ }
+ else
{
- if (buf2->gpu_start == gpu_addr &&
- buf2->length >= 4 * ((data[1] & 0x7fffffff) >> 10))
+ if (buf->type == USER) // already checked
+ return;
+ if (cur->start != 0x40 || cur->end < 0x44)
+ return;
+ uint32_t gpu_addr = *(uint32_t *)&(buf->data[0x40]);
+ if (!gpu_addr)
+ return;
+
+ struct buffer *buf2;
+ for (buf2 = buffers_list; buf2 != NULL; buf2 = buf2->next)
{
- fprintf(stdout, "possible IB buffer: %d\n", buf->id);
- break;
+ if (gpu_addr >= buf2->gpu_start && gpu_addr < buf2->gpu_start + buf2->length)
+ {
+ fprintf(stdout, "possible USER buffer: %d\n", buf->id);
+ buf->type = USER;
+ break;
+ }
}
}
@@ -174,7 +200,7 @@ static void dump_writes(struct buffer *buf)
if (buf->type == IB)
ib_decode_start(ibstate);
- else
+ else if (buf->type == PUSH)
{
if (addr != state->next_command_offset)
{
@@ -188,6 +214,11 @@ static void dump_writes(struct buffer *buf)
pushbuf_decode_start(state);
}
}
+ else if (buf->type == USER)
+ {
+ if (0)
+ user_decode_start(ustate);
+ }
while (addr < cur->end)
{
@@ -202,9 +233,9 @@ static void dump_writes(struct buffer *buf)
if (!quiet)
fprintf(stdout, "w %d:0x%04x%s, 0x%08x %s\n", buf->id, addr, comment[0], *(uint32_t *)(data + addr), pushbuf_desc);
}
- else
+ else if (buf->type == PUSH)
{
- if (ib_buffer != -1 || quiet)
+ if (pb_pointer_buffer != -1 || quiet)
pushbuf_desc[0] = 0;
else if (state->pushbuf_invalid == 0 || decode_invalid_buffers)
pushbuf_decode(state, *(uint32_t *)(data + addr), pushbuf_desc, NULL, 0);
@@ -218,6 +249,12 @@ static void dump_writes(struct buffer *buf)
if (!quiet)
fprintf(stdout, "w %d:0x%04x%s, 0x%08x %s%s\n", buf->id, addr, comment[0], *(uint32_t *)(data + addr), state->pushbuf_invalid ? "INVALID " : "", pushbuf_desc);
}
+ else if (buf->type == USER)
+ {
+ user_decode(ustate, addr, *(uint32_t *)(data + addr), pushbuf_desc);
+ if (!quiet)
+ fprintf(stdout, "w %d:0x%04x%s, 0x%08x %s\n", buf->id, addr, comment[0], *(uint32_t *)(data + addr), pushbuf_desc);
+ }
addr += 4;
left -= 4;
@@ -240,8 +277,10 @@ static void dump_writes(struct buffer *buf)
if (buf->type == IB)
ib_decode_end(ibstate);
- else
+ else if (buf->type == PUSH)
pushbuf_decode_end(state);
+ else if (buf->type == USER)
+ user_decode_end(ustate);
cur = cur->next;
}
@@ -562,7 +601,7 @@ static void clear_buffered_writes()
static void demmt_memread(struct mmt_read *w, void *state)
{
- if (find_ib_buffer)
+ if (find_pb_pointer)
return;
char comment[50];
@@ -663,8 +702,13 @@ static void demmt_mmap(struct mmt_mmap *mm, void *state)
buf->cpu_start = mm->start;
buf->length = mm->len;
buf->mmap_offset = mm->offset;
- if (mm->id == ib_buffer)
- buf->type = IB;
+ if (mm->id == pb_pointer_buffer)
+ {
+ if (ib_supported)
+ buf->type = IB;
+ else
+ buf->type = USER;
+ }
if (buffers_list)
buffers_list->prev = buf;
buf->next = buffers_list;
@@ -1003,8 +1047,13 @@ static void demmt_nv_mmap(struct mmt_nvidia_mmap *mm, void *state)
buf->mmap_offset = mm->offset;
buf->data1 = mm->data1;
buf->data2 = mm->data2;
- if (mm->id == ib_buffer)
- buf->type = IB;
+ if (mm->id == pb_pointer_buffer)
+ {
+ if (ib_supported)
+ buf->type = IB;
+ else
+ buf->type = USER;
+ }
if (buffers_list)
buffers_list->prev = buf;
buf->next = buffers_list;
@@ -1096,8 +1145,8 @@ static void usage()
fprintf(stderr, "Usage: demmt [OPTION]\n"
"Decodes binary trace files generated by Valgrind MMT. Reads standard input.\n\n"
" -m 'chipset'\tset chipset version\n"
- " -f\t\tfind possible IB(s)\n"
- " -n id\t\tset buffer \"id\" as IB\n"
+ " -f\t\tfind possible IB(s) on >= tesla or USER on < tesla\n"
+ " -n id\t\tset buffer \"id\" as IB on >= tesla or USER on < tesla\n"
" -g\t\tprint gpu addresses\n"
" -o\t\tdump ioctl data\n"
" -q\t\t(quiet) print only the most important data (for now only pushbufs from IB's)\n"
@@ -1143,14 +1192,14 @@ int main(int argc, char *argv[])
{
if (i + 1 >= argc)
usage();
- ib_buffer = strtoul(argv[++i], NULL, 10);
+ pb_pointer_buffer = strtoul(argv[++i], NULL, 10);
}
else if (!strcmp(argv[i], "-o"))
dump_ioctls = 1;
else if (!strcmp(argv[i], "-g"))
print_gpu_addresses = 1;
else if (!strcmp(argv[i], "-f"))
- find_ib_buffer = 1;
+ find_pb_pointer = 1;
else if (!strcmp(argv[i], "-q"))
quiet = 1;
else if (!strcmp(argv[i], "-a"))
@@ -1169,6 +1218,8 @@ int main(int argc, char *argv[])
if (chipset == 0)
usage();
+ ib_supported = chipset >= 0x80 || chipset == 0x50;
+
demmt_object_init_chipset(chipset);
if (!colors)
colors = &envy_null_colors;
12 rnn/demmt.h
View
@@ -4,14 +4,14 @@
#include "demmt_pushbuf.h"
#define MMT_DEBUG 0
-extern int find_ib_buffer;
+extern int find_pb_pointer;
extern int quiet;
extern int disassemble_shaders;
extern const struct envy_colors *colors;
-#define mmt_debug(fmt, ...) do { if (MMT_DEBUG) fprintf(stderr, fmt, __VA_ARGS__); } while (0)
-#define mmt_log(fmt, ...) do { if (!find_ib_buffer && !quiet) fprintf(stdout, "%64s" fmt, " ", __VA_ARGS__); } while (0)
-#define mmt_log_cont(fmt, ...) do { if (!find_ib_buffer && !quiet) fprintf(stdout, fmt, __VA_ARGS__); } while (0)
+#define mmt_debug(fmt, ...) do { if (MMT_DEBUG) fprintf(stderr, fmt, __VA_ARGS__); } while (0)
+#define mmt_log(fmt, ...) do { if (!find_pb_pointer && !quiet) fprintf(stdout, "%64s" fmt, " ", __VA_ARGS__); } while (0)
+#define mmt_log_cont(fmt, ...) do { if (!find_pb_pointer && !quiet) fprintf(stdout, fmt, __VA_ARGS__); } while (0)
#define mmt_error(fmt, ...) do { fprintf(stderr, fmt, __VA_ARGS__); } while (0)
struct region
@@ -32,11 +32,12 @@ struct buffer
uint64_t data1;
uint64_t data2;
uint64_t gpu_start;
- enum BUFTYPE { PUSH, IB } type;
+ enum BUFTYPE { PUSH, IB, USER } type;
union
{
struct pushbuf_decode_state pushbuf;
struct ib_decode_state ib;
+ struct user_decode_state user;
} state;
struct region *written_regions;
struct region *written_region_last;
@@ -50,6 +51,7 @@ extern struct buffer *gpu_only_buffers_list;
extern struct rnndomain *domain;
extern struct rnndb *rnndb;
extern int chipset;
+extern int ib_supported;
extern int guess_invalid_pushbuf;
void buffer_register_write(struct buffer *buf, uint32_t offset, uint8_t len, const void *data);
164 rnn/demmt_pushbuf.c
View
@@ -164,15 +164,16 @@ static void decode_method(struct pushbuf_decode_state *state, uint32_t data, cha
free(dec_obj);
}
-void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *output, int *addr, int safe)
+/* returns 0 when decoding should continue, anything else: next command gpu address */
+uint64_t pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *output, int *mthd, int safe)
{
- if (addr)
- *addr = -1;
+ if (mthd)
+ *mthd = -1;
if (state->skip)
{
strcpy(output, "SKIP");
state->skip--;
- return;
+ return 0;
}
if (state->size == 0)
@@ -180,7 +181,7 @@ void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *out
if (data == 0 && !state->long_command)
{
strcpy(output, "NOP");
- return;
+ return 0;
}
if (chipset >= 0xc0)
@@ -200,9 +201,9 @@ void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *out
{
decode_method(state, state->size, output);
state->size = 0;
- if (addr)
- *addr = state->addr;
- return;
+ if (mthd)
+ *mthd = state->addr;
+ return 0;
}
else if (mode == 0)
{
@@ -218,7 +219,7 @@ void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *out
sprintf(output, "SLI user mask store: 0x%x", (data & 0xfff0) >> 4);
else if (type == 3)
sprintf(output, "SLI cond from user mask");
- return;
+ return 0;
}
if (!state->pushbuf_invalid)
@@ -233,7 +234,7 @@ void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *out
{
state->size = 0;
sprintf(output, "invalid old-style non-inc mthd, type: %d", type);
- return;
+ return 0;
}
if (!state->pushbuf_invalid)
@@ -243,7 +244,7 @@ void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *out
{
state->size = 0;
sprintf(output, "unknown mode %d", mode);
- return;
+ return 0;
}
}
else
@@ -253,7 +254,7 @@ void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *out
state->size = data & 0xffffff;
state->long_command = 0;
sprintf(output, "size %d", state->size);
- return;
+ return 0;
}
int mode = (data & 0xe0000000) >> 29;
@@ -273,13 +274,13 @@ void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *out
{
state->size = 0;
sprintf(output, "SLI cond, mask: 0x%x", (data & 0xfff0) >> 4);
- return;
+ return 0;
}
else if (type == 2)
{
state->size = 0;
sprintf(output, "return");
- return;
+ return 0;
}
else if (type == 3)
{
@@ -291,7 +292,7 @@ void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *out
else if (mode == 1)
{
sprintf(output, "jump (old) to 0x%x", data & 0x1ffffffc);
- return;
+ return 1;
}
else if (mode == 2)
state->incr = 0;
@@ -299,26 +300,27 @@ void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *out
{
sprintf(output, "unknown mode, top 3 bits: %d", mode);
state->size = 0;
- return;
+ return 0;
}
}
else if (type == 1)
{
- sprintf(output, "jump to 0x%x", data & 0xfffffffc);
+ uint32_t addr = data & 0xfffffffc;
+ sprintf(output, "jump to 0x%x", addr);
state->size = 0;
- return;
+ return addr;
}
else if (type == 2)
{
sprintf(output, "call 0x%x", data & 0xfffffffc);
state->size = 0;
- return;
+ return 0; // XXX
}
else
{
sprintf(output, "unknown type, bottom 2 bits: %d", type);
state->size = 0;
- return;
+ return 0;
}
}
@@ -359,8 +361,8 @@ void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *out
}
decode_method(state, data, output);
- if (addr)
- *addr = state->addr;
+ if (mthd)
+ *mthd = state->addr;
if (state->incr)
{
@@ -370,6 +372,8 @@ void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *out
state->size--;
}
+
+ return 0;
}
void pushbuf_decode_end(struct pushbuf_decode_state *state)
@@ -383,25 +387,38 @@ void ib_decode_start(struct ib_decode_state *state)
pushbuf_decode_start(&state->pstate);
}
-static void ib_print(struct ib_decode_state *state)
+static uint64_t pushbuf_print(struct pushbuf_decode_state *pstate, struct buffer *buffer, uint64_t gpu_address, int commands)
{
char cmdoutput[1024];
- uint64_t cur = state->address - state->last_buffer->gpu_start;
- uint64_t end = cur + state->size * 4;
+ uint64_t cur = gpu_address - buffer->gpu_start;
+ uint64_t end = cur + commands * 4;
+ uint64_t nextaddr;
while (cur < end)
{
- uint32_t cmd = *(uint32_t *)&state->last_buffer->data[cur];
- int curaddr;
- pushbuf_decode(&state->pstate, cmd, cmdoutput, &curaddr, 1);
+ uint32_t cmd = *(uint32_t *)&buffer->data[cur];
+ int mthd;
+ nextaddr = pushbuf_decode(pstate, cmd, cmdoutput, &mthd, 1);
+ if (nextaddr)
+ {
+ mmt_log("decoding aborted, cmd: \"%s\", nextaddr: 0x%08lx\n", cmdoutput, nextaddr);
+ return nextaddr;
+ }
fprintf(stdout, "PB: 0x%08x %s\n", cmd, cmdoutput);
- struct obj *obj = subchans[state->pstate.subchan];
+ struct obj *obj = subchans[pstate->subchan];
if (obj)
- demmt_parse_command(obj->class, curaddr, cmd);
+ demmt_parse_command(obj->class, mthd, cmd);
cur += 4;
}
+
+ return gpu_address + commands * 4;
+}
+
+static void ib_print(struct ib_decode_state *state)
+{
+ pushbuf_print(&state->pstate, state->last_buffer, state->address, state->size);
}
void ib_decode(struct ib_decode_state *state, uint32_t data, char *output)
@@ -466,3 +483,90 @@ void ib_decode_end(struct ib_decode_state *state)
pushbuf_decode_end(&state->pstate);
}
+
+void user_decode_start(struct user_decode_state *state)
+{
+ memset(state, 0, sizeof(*state));
+ pushbuf_decode_start(&state->pstate);
+}
+
+static void user_print(struct user_decode_state *state)
+{
+ struct buffer *buf = state->last_buffer;
+ if (state->dma_put < state->prev_dma_put)
+ {
+ uint64_t nextaddr = pushbuf_print(&state->pstate, buf, state->prev_dma_put,
+ (buf->gpu_start + buf->length - state->prev_dma_put) / 4);
+ if (state->dma_put >= buf->gpu_start && state->dma_put < buf->gpu_start + buf->length &&
+ nextaddr >= buf->gpu_start && nextaddr < buf->gpu_start + buf->length &&
+ nextaddr <= state->dma_put &&
+ nextaddr <= 0xffffffff)
+ {
+ mmt_log("pushbuffer wraparound%s\n", "\n");
+ state->prev_dma_put = nextaddr;
+ }
+ else
+ {
+ mmt_log("confused, dma_put: 0x%x, nextaddr: 0x%lx, buffer: <0x%08lx,0x%08lx>, resetting state\n",
+ state->dma_put, nextaddr, buf->gpu_start, buf->gpu_start + buf->length);
+ state->last_buffer = NULL;
+ state->prev_dma_put = state->dma_put;
+ return;
+ }
+ }
+
+ pushbuf_print(&state->pstate, buf, state->prev_dma_put, (state->dma_put - state->prev_dma_put) / 4);
+ state->prev_dma_put = state->dma_put;
+}
+
+void user_decode(struct user_decode_state *state, uint32_t addr, uint32_t data, char *output)
+{
+ struct buffer *buf = state->last_buffer;
+ if (buf && state->prev_dma_put != state->dma_put)
+ user_print(state);
+
+ if (addr != 0x40) // DMA_PUT
+ {
+ output[0] = 0;
+ return;
+ }
+
+ if (buf)
+ if (data < buf->gpu_start || data >= buf->gpu_start + buf->length)
+ buf = NULL;
+
+ if (!buf)
+ {
+ for (buf = buffers_list; buf != NULL; buf = buf->next)
+ {
+ if (!buf->gpu_start)
+ continue;
+ if (data >= buf->gpu_start && data < buf->gpu_start + buf->length)
+ {
+ state->prev_dma_put = buf->gpu_start;
+ break;
+ }
+ }
+ }
+
+ state->last_buffer = buf;
+ if (buf)
+ state->dma_put = data;
+
+ sprintf(output, "DMA_PUT: 0x%08x", data);
+ if (buf)
+ {
+ char cmdoutput[32];
+
+ sprintf(cmdoutput, ", buffer id: %d", buf->id);
+ strcat(output, cmdoutput);
+ }
+}
+
+void user_decode_end(struct user_decode_state *state)
+{
+ if (state->last_buffer && state->prev_dma_put != state->dma_put)
+ user_print(state);
+
+ pushbuf_decode_end(&state->pstate);
+}
13 rnn/demmt_pushbuf.h
View
@@ -29,14 +29,25 @@ struct ib_decode_state
struct buffer *last_buffer;
};
+struct user_decode_state
+{
+ uint32_t prev_dma_put;
+ uint32_t dma_put;
+ struct buffer *last_buffer;
+ struct pushbuf_decode_state pstate;
+};
void pushbuf_add_object(uint32_t handle, uint32_t class);
void pushbuf_decode_start(struct pushbuf_decode_state *state);
-void pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *output, int *addr, int safe);
+uint64_t pushbuf_decode(struct pushbuf_decode_state *state, uint32_t data, char *output, int *addr, int safe);
void pushbuf_decode_end(struct pushbuf_decode_state *state);
void ib_decode_start(struct ib_decode_state *state);
void ib_decode(struct ib_decode_state *state, uint32_t data, char *output);
void ib_decode_end(struct ib_decode_state *state);
+void user_decode_start(struct user_decode_state *state);
+void user_decode(struct user_decode_state *state, uint32_t addr, uint32_t data, char *output);
+void user_decode_end(struct user_decode_state *state);
+
#endif
Please sign in to comment.
Something went wrong with that request. Please try again.