Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ggml/src/ggml-cuda/ggml-cuda.cu
Original file line number Diff line number Diff line change
Expand Up @@ -3274,14 +3274,14 @@ static void evaluate_and_capture_cuda_graph(ggml_backend_cuda_context * cuda_ctx
GGML_LOG_DEBUG("Setting stream no to %d for node %s\n", cuda_ctx->curr_stream_no, node->name);
}
}
prev_i = i;

#ifdef GGML_CUDA_DEBUG
const int nodes_fused = i - prev_i - 1;
if (nodes_fused > 0) {
GGML_LOG_INFO("nodes_fused: %d\n", nodes_fused);
}
#endif
prev_i = i;

if (ggml_is_empty(node) || node->op == GGML_OP_RESHAPE || node->op == GGML_OP_TRANSPOSE || node->op == GGML_OP_VIEW || node->op == GGML_OP_PERMUTE || node->op == GGML_OP_NONE) {
continue;
Expand Down
6 changes: 0 additions & 6 deletions src/llama-graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -810,9 +810,6 @@ ggml_tensor * llm_graph_context::build_ffn(
GGML_ABORT("fatal error");
}

//expand here so that we can fuse ffn gate
ggml_build_forward_expand(gf, cur);

if (gate && type_gate == LLM_FFN_PAR) {
cur = ggml_mul(ctx0, cur, tmp);
cb(cur, "ffn_gate_par", il);
Expand Down Expand Up @@ -1093,9 +1090,6 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
GGML_ABORT("fatal error");
}

//expand here so that we can fuse ffn gate
ggml_build_forward_expand(gf, cur);

experts = build_lora_mm_id(down_exps, cur, selected_experts); // [n_embd, n_expert_used, n_tokens]
cb(experts, "ffn_moe_down", il);

Expand Down
Loading