Skip to content

Commit 4bf7336

Browse files
committed
talk-llama : sync llama.cpp
1 parent 18162bc commit 4bf7336

144 files changed

Lines changed: 3675 additions & 5535 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

examples/talk-llama/llama-adapter.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
294294
}
295295

296296
// get extra buffer types of the CPU
297-
// TODO: a more general solution for non-CPU extra buft should be imlpemented in the future
297+
// TODO: a more general solution for non-CPU extra buft should be implemented in the future
298298
// ref: https://github.com/ggml-org/llama.cpp/pull/12593#pullrequestreview-2718659948
299299
std::vector<ggml_backend_buffer_type_t> buft_extra;
300300
{
@@ -418,7 +418,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
418418
}
419419

420420
llama_adapter_lora * llama_adapter_lora_init(llama_model * model, const char * path_lora) {
421-
llama_adapter_lora * adapter = new llama_adapter_lora();
421+
llama_adapter_lora * adapter = new llama_adapter_lora(model);
422422

423423
try {
424424
llama_adapter_lora_init_impl(*model, path_lora, *adapter);
@@ -471,8 +471,17 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter,
471471
return snprintf(buf, buf_size, "%s", it->second.c_str());
472472
}
473473

474-
void llama_adapter_lora_free(llama_adapter_lora *) {
475-
// deprecated: adapters are freed by llama_model's destructor
474+
void llama_adapter_lora_free(llama_adapter_lora * adapter) {
475+
if (adapter == nullptr) {
476+
return;
477+
}
478+
479+
if (adapter->model != nullptr) {
480+
adapter->model->loras.erase(adapter);
481+
adapter->model = nullptr;
482+
}
483+
484+
delete adapter;
476485
}
477486

478487
uint64_t llama_adapter_get_alora_n_invocation_tokens(const struct llama_adapter_lora * adapter) {

examples/talk-llama/llama-adapter.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,8 @@ struct llama_adapter_lora_weight {
6161
};
6262

6363
struct llama_adapter_lora {
64+
llama_model * model = nullptr;
65+
6466
// map tensor name to lora_a_b
6567
std::unordered_map<std::string, llama_adapter_lora_weight> ab_map;
6668

@@ -75,7 +77,7 @@ struct llama_adapter_lora {
7577
// activated lora (aLoRA)
7678
std::vector<llama_token> alora_invocation_tokens;
7779

78-
llama_adapter_lora() = default;
80+
explicit llama_adapter_lora(llama_model * model) : model(model) {}
7981
~llama_adapter_lora() = default;
8082

8183
llama_adapter_lora_weight * get_weight(ggml_tensor * w);

0 commit comments

Comments
 (0)