From 3f270663266c2f62d661bf4b19e69a6574ec560d Mon Sep 17 00:00:00 2001 From: CrisXian Date: Wed, 19 Mar 2025 09:32:46 +0800 Subject: [PATCH] check invalid values, add debug_hook function and gdb shell --- debug_check.gdb | 34 +++++++++++++ ggml/src/ggml-cpu/ggml-cpu.c | 97 ++++++++++++++++++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 debug_check.gdb diff --git a/debug_check.gdb b/debug_check.gdb new file mode 100644 index 0000000000000..98fb091117109 --- /dev/null +++ b/debug_check.gdb @@ -0,0 +1,34 @@ +# 1) Set program arguments +set args -m ~/dev/llm/DeepSeek-R1-Distill-Qwen-1.5B-Q4_0-GGUF/deepseek-r1-distill-qwen-1.5b-q4_0.gguf -b 16 -ngl 0 -c 1024 -t 4 -p "Hello" + +# 2) Redirect GDB output to a log file +set logging file gdb_output.log +set logging on + +# 3) Place a breakpoint at the debug_hook() function in ggml-cpu.c +break ggml-cpu.c:debug_hook + +# 4) Commands to execute once the breakpoint is hit +commands + # Prevent GDB from printing its usual breakpoint messages + silent + + # (a) Exit from debug_hook() and return to its caller + # This should land you at check_invalid_values() right before 'return true;' + finish + + # (b) Now that you're in check_invalid_values(), print variables of interest + p *src0 + p (*src0).data + x/128f (*src0).data + + # (c) If you only want to trigger once, disable the breakpoint afterwards + disable $bpnum + + # If you would rather keep hitting this breakpoint repeatedly, comment out + # the disable command above and uncomment the following 'continue' command: + # continue +end + +# 5) Automatically run the program (remove or comment out if you want to run manually) +run diff --git a/ggml/src/ggml-cpu/ggml-cpu.c b/ggml/src/ggml-cpu/ggml-cpu.c index 75dc96b478655..b850243beabd1 100644 --- a/ggml/src/ggml-cpu/ggml-cpu.c +++ b/ggml/src/ggml-cpu/ggml-cpu.c @@ -10217,13 +10217,53 @@ static void ggml_compute_forward_diag_mask_zero( } } + +__attribute__((noinline)) static void debug_hook(void) { +} + // ggml_compute_forward_soft_max +static bool check_invalid_values(const struct ggml_tensor * src0) { + if (!src0) { + printf("Error: src0 is NULL!\n"); + return false; + } + + const int nc = src0->ne[0]; // 列数 + const int nr = ggml_nrows(src0); // 行数 + + int nan_count = 0, inf_count = 0; + + // printf("Checking tensor for NaN/Inf values...\n"); + + for (int i1 = 0; i1 < nr; i1++) { + float * sp = (float *)((char *) src0->data + i1 * src0->nb[1]); + + for (int i = 0; i < nc; ++i) { + if (isnan(sp[i])) { + nan_count++; + // printf("NaN detected at row %d, col %d (index %d)\n", i1, i, i1 * nc + i); + } + else if (isinf(sp[i])) { + inf_count++; + // printf("Inf detected at row %d, col %d (index %d)\n", i1, i, i1 * nc + i); + } + } + } + + + if (nan_count > 0 || inf_count > 0) { + debug_hook(); + return true; + } +} static void ggml_compute_forward_soft_max_f32( const struct ggml_compute_params * params, struct ggml_tensor * dst) { const struct ggml_tensor * src0 = dst->src[0]; + + // check_invalid_values(src0); const struct ggml_tensor * src1 = dst->src[1]; assert(ggml_is_contiguous(dst)); @@ -10266,6 +10306,12 @@ static void ggml_compute_forward_soft_max_f32( const bool use_f16 = (src1 && src1->type == GGML_TYPE_F16); + // 限制 scale 避免溢出 + if (!isfinite(scale) || scale > 1e6) { + // printf("Warning: scale is invalid (%f), resetting to 1.0\n", scale); + scale = 1.0f; + } + for (int i1 = ir0; i1 < ir1; i1++) { // ALiBi const uint32_t h = (i1/ne01)%ne02; // head @@ -10278,6 +10324,27 @@ static void ggml_compute_forward_soft_max_f32( ggml_fp16_t * mp_f16 = src1 ? (ggml_fp16_t *)((char *) src1->data) + (i1%ne01)*ne00 : NULL; float * mp_f32 = src1 ? (float *)((char *) src1->data) + (i1%ne01)*ne00 : NULL; + int nan_count = 0, inf_count = 0; + for (int i = 0; i < nc; ++i) + { + if (isnan(sp[i])) nan_count++; + else if (isinf(sp[i])) { + // printf("Error: sp contains inf value!\n"); + inf_count++; + sp[i] = FLT_MAX; + } + } + + if(inf_count) + { + // printf("sp count: col: %d, row: %d, inf: [%d]\n", nc, nr, inf_count); + } + + if (nan_count) { + // printf("Error: sp contains %d NaN values, aborting!\n", nan_count); + exit(1); + } + ggml_vec_cpy_f32 (nc, wp, sp); ggml_vec_scale_f32(nc, wp, scale); if (mp_f32) { @@ -10302,6 +10369,10 @@ static void ggml_compute_forward_soft_max_f32( float max = -INFINITY; ggml_vec_max_f32(nc, &max, wp); + if (!isfinite(max)) { + max = FLT_MAX; + } + ggml_float sum = ggml_vec_soft_max_f32(nc, dp, wp, max); assert(sum > 0.0); @@ -15431,6 +15502,9 @@ struct ggml_cplan ggml_graph_plan( return cplan; } +// ggml_graph_compute_with_ctx +// ggml_graph_compute +// check_invalid_values static thread_ret_t ggml_graph_compute_thread(void * data) { struct ggml_compute_state * state = (struct ggml_compute_state *) data; struct ggml_threadpool * tp = state->threadpool; @@ -15450,6 +15524,27 @@ static thread_ret_t ggml_graph_compute_thread(void * data) { for (int node_n = 0; node_n < cgraph->n_nodes && atomic_load_explicit(&tp->abort, memory_order_relaxed) != node_n; node_n++) { struct ggml_tensor * node = cgraph->nodes[node_n]; + struct ggml_tensor * tensor = node; + + { + if (tensor->op == GGML_OP_NONE || ggml_is_empty(tensor)) + { + + } + else if (ggml_cpu_extra_compute_forward(¶ms, tensor)) + { + + } + else if(tensor->op == GGML_OP_SOFT_MAX) + { + // ggml_compute_forward + // GGML_OP_SOFT_MAX + // ggml_compute_forward_soft_max + // ggml_compute_forward_soft_max_f32 + // check_invalid_values + check_invalid_values(tensor); + } + } ggml_compute_forward(¶ms, node); @@ -15726,6 +15821,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed); } + // printf("GGML_USE_OPENMP->ggml_graph_compute_thread: %d\n", omp_get_thread_num()); ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]); } } else { @@ -15757,6 +15853,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl return ret; } +// TODO cgraph enum ggml_status ggml_graph_compute_with_ctx(struct ggml_context * ctx, struct ggml_cgraph * cgraph, int n_threads) { struct ggml_cplan cplan = ggml_graph_plan(cgraph, n_threads, NULL);