@@ -294,7 +294,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
294294 }
295295
296296 // get extra buffer types of the CPU
297- // TODO: a more general solution for non-CPU extra buft should be imlpemented in the future
297+ // TODO: a more general solution for non-CPU extra buft should be implemented in the future
298298 // ref: https://github.com/ggml-org/llama.cpp/pull/12593#pullrequestreview-2718659948
299299 std::vector<ggml_backend_buffer_type_t > buft_extra;
300300 {
@@ -418,7 +418,7 @@ static void llama_adapter_lora_init_impl(llama_model & model, const char * path_
418418}
419419
420420llama_adapter_lora * llama_adapter_lora_init (llama_model * model, const char * path_lora) {
421- llama_adapter_lora * adapter = new llama_adapter_lora ();
421+ llama_adapter_lora * adapter = new llama_adapter_lora (model );
422422
423423 try {
424424 llama_adapter_lora_init_impl (*model, path_lora, *adapter);
@@ -471,8 +471,17 @@ int32_t llama_adapter_meta_val_str_by_index(const llama_adapter_lora * adapter,
471471 return snprintf (buf, buf_size, " %s" , it->second .c_str ());
472472}
473473
474- void llama_adapter_lora_free (llama_adapter_lora *) {
475- // deprecated: adapters are freed by llama_model's destructor
474+ void llama_adapter_lora_free (llama_adapter_lora * adapter) {
475+ if (adapter == nullptr ) {
476+ return ;
477+ }
478+
479+ if (adapter->model != nullptr ) {
480+ adapter->model ->loras .erase (adapter);
481+ adapter->model = nullptr ;
482+ }
483+
484+ delete adapter;
476485}
477486
478487uint64_t llama_adapter_get_alora_n_invocation_tokens (const struct llama_adapter_lora * adapter) {
0 commit comments