Skip to content

Commit

Permalink
Revert "Fix memory allocation issues and seg faults"
Browse files Browse the repository at this point in the history
This reverts commit 4870e45.

Will provide the correct fix later
  • Loading branch information
ggerganov committed Mar 24, 2023
1 parent 4870e45 commit 3cd8dde
Showing 1 changed file with 18 additions and 16 deletions.
34 changes: 18 additions & 16 deletions llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,6 @@ struct llama_context {
// decode output (2-dimensional array: [n_tokens][n_vocab])
std::vector<float> logits;
bool logits_all = false;

// work buffer for transformer evaluation
std::vector<uint8_t> buf_eval;
};

struct llama_context_params llama_context_default_params() {
Expand Down Expand Up @@ -630,19 +627,27 @@ static bool llama_eval_internal(
const int n_rot = hparams.n_embd/hparams.n_head;

auto & mem_per_token = lctx.mem_per_token;
auto & buf_eval = lctx.buf_eval;

if (mem_per_token*(n_past + N + 16) > buf_eval.size()) {
const size_t buf_size_new = 1.618*buf_eval.size();
// TODO: fix this hardcoded size
static size_t buf_size = 512u*1024*1024;
static void * buf = malloc(buf_size);

//fprintf(stderr, "\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_eval.size(), buf_size_new);
if (mem_per_token > 0 && mem_per_token*N > buf_size) {
const size_t buf_size_new = 1.3*(mem_per_token*N); // add 30% to account for ggml object overhead
//fprintf(stderr, "\n%s: reallocating buffer from %zu to %zu bytes\n", __func__, buf_size, buf_size_new);

buf_eval.resize(buf_size_new);
// reallocate
buf_size = buf_size_new;
buf = realloc(buf, buf_size);
if (buf == nullptr) {
fprintf(stderr, "%s: failed to allocate %zu bytes\n", __func__, buf_size);
return false;
}
}

struct ggml_init_params params = {
/*.mem_size =*/ buf_eval.size(),
/*.mem_buffer =*/ buf_eval.data(),
/*.mem_size =*/ buf_size,
/*.mem_buffer =*/ buf,
};

struct ggml_context * ctx0 = ggml_init(params);
Expand Down Expand Up @@ -827,11 +832,10 @@ static bool llama_eval_internal(
memcpy(logits_out.data(), (float *) ggml_get_data(inpL) + (n_vocab*(N-1)), sizeof(float)*n_vocab);
}

if (N == 1) {
mem_per_token = ggml_used_mem(ctx0)/(n_past + N);
if (mem_per_token == 0) {
mem_per_token = ggml_used_mem(ctx0)/N;
}

//fprintf(stderr, "\nused_mem = %zu, %zu MB\n", ggml_used_mem(ctx0), ggml_used_mem(ctx0)/1024/1024);
//fprintf(stderr, "used_mem = %zu\n", ggml_used_mem(ctx0));

ggml_free(ctx0);

Expand Down Expand Up @@ -1412,8 +1416,6 @@ struct llama_context * llama_init_from_file(
return nullptr;
}

ctx->buf_eval.resize(512u*1024u*1024u);

return ctx;
}

Expand Down

0 comments on commit 3cd8dde

Please sign in to comment.