-
-
Notifications
You must be signed in to change notification settings - Fork 75
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add support for full CUDA GPU offloading (#105)
Signed-off-by: mudler <mudler@mocaccino.org> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
- Loading branch information
1 parent
a796025
commit 35a3c99
Showing
8 changed files
with
155 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
From 200892a3a54323eb65ca9c8d8afb6043ca2d8944 Mon Sep 17 00:00:00 2001 | ||
From: mudler <mudler@mocaccino.org> | ||
Date: Fri, 16 Jun 2023 23:43:36 +0200 | ||
Subject: [PATCH] Pass pointer to params in llama_init_from_file | ||
|
||
Especially with golang bindings, calling by value has the side-effect of | ||
values not being copied correctly. This has been observed with the | ||
bindings in https://github.com/go-skynet/go-llama.cpp/pull/105. | ||
--- | ||
examples/common.cpp | 2 +- | ||
examples/quantize-stats/quantize-stats.cpp | 2 +- | ||
examples/save-load-state/save-load-state.cpp | 4 ++-- | ||
examples/train-text-from-scratch/train-text-from-scratch.cpp | 2 +- | ||
llama.cpp | 3 ++- | ||
llama.h | 2 +- | ||
tests/test-tokenizer-0.cpp | 2 +- | ||
7 files changed, 9 insertions(+), 8 deletions(-) | ||
|
||
diff --git a/examples/common.cpp b/examples/common.cpp | ||
index 055383beff9..7cf48e82158 100644 | ||
--- a/examples/common.cpp | ||
+++ b/examples/common.cpp | ||
@@ -555,7 +555,7 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params) { | ||
lparams.logits_all = params.perplexity; | ||
lparams.embedding = params.embedding; | ||
|
||
- llama_context * lctx = llama_init_from_file(params.model.c_str(), lparams); | ||
+ llama_context * lctx = llama_init_from_file(params.model.c_str(), &lparams); | ||
|
||
if (lctx == NULL) { | ||
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); | ||
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp | ||
index 6b8018ee284..a7c1e873a92 100644 | ||
--- a/examples/quantize-stats/quantize-stats.cpp | ||
+++ b/examples/quantize-stats/quantize-stats.cpp | ||
@@ -330,7 +330,7 @@ int main(int argc, char ** argv) { | ||
lparams.f16_kv = false; | ||
lparams.use_mlock = false; | ||
|
||
- ctx = llama_init_from_file(params.model.c_str(), lparams); | ||
+ ctx = llama_init_from_file(params.model.c_str(), &lparams); | ||
|
||
if (ctx == NULL) { | ||
fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); | ||
diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp | ||
index da4d37ad03d..07ee6750d4c 100644 | ||
--- a/examples/save-load-state/save-load-state.cpp | ||
+++ b/examples/save-load-state/save-load-state.cpp | ||
@@ -35,7 +35,7 @@ int main(int argc, char ** argv) { | ||
auto last_n_tokens_data = std::vector<llama_token>(params.repeat_last_n, 0); | ||
|
||
// init | ||
- auto ctx = llama_init_from_file(params.model.c_str(), lparams); | ||
+ auto ctx = llama_init_from_file(params.model.c_str(), &lparams); | ||
auto tokens = std::vector<llama_token>(params.n_ctx); | ||
auto n_prompt_tokens = llama_tokenize(ctx, params.prompt.c_str(), tokens.data(), int(tokens.size()), true); | ||
|
||
@@ -95,7 +95,7 @@ int main(int argc, char ** argv) { | ||
llama_free(ctx); | ||
|
||
// load new model | ||
- auto ctx2 = llama_init_from_file(params.model.c_str(), lparams); | ||
+ auto ctx2 = llama_init_from_file(params.model.c_str(), &lparams); | ||
|
||
// Load state (rng, logits, embedding and kv_cache) from file | ||
{ | ||
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp | ||
index 7ec85951adc..1c7a06c21be 100644 | ||
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp | ||
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp | ||
@@ -3054,7 +3054,7 @@ int main(int argc, char ** argv) { | ||
struct llama_context_params llama_params = llama_context_default_params(); | ||
llama_params.vocab_only = true; | ||
|
||
- struct llama_context * lctx = llama_init_from_file(params.fn_vocab_model, llama_params); | ||
+ struct llama_context * lctx = llama_init_from_file(params.fn_vocab_model, &llama_params); | ||
|
||
struct llama_vocab vocab; | ||
{ | ||
diff --git a/llama.cpp b/llama.cpp | ||
index 81f047ed298..0629e873886 100644 | ||
--- a/llama.cpp | ||
+++ b/llama.cpp | ||
@@ -2618,8 +2618,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s | ||
|
||
struct llama_context * llama_init_from_file( | ||
const char * path_model, | ||
- struct llama_context_params params) { | ||
+ const struct llama_context_params * params_ptr) { | ||
ggml_time_init(); | ||
+ struct llama_context_params params = *params_ptr; | ||
|
||
llama_context * ctx = new llama_context; | ||
|
||
diff --git a/llama.h b/llama.h | ||
index 1241ba6c0ec..faf2675f125 100644 | ||
--- a/llama.h | ||
+++ b/llama.h | ||
@@ -142,7 +142,7 @@ extern "C" { | ||
// Return NULL on failure | ||
LLAMA_API struct llama_context * llama_init_from_file( | ||
const char * path_model, | ||
- struct llama_context_params params); | ||
+ const struct llama_context_params * params); | ||
|
||
// Frees all allocated memory | ||
LLAMA_API void llama_free(struct llama_context * ctx); | ||
diff --git a/tests/test-tokenizer-0.cpp b/tests/test-tokenizer-0.cpp | ||
index ab1538a0cf3..b405df8e687 100644 | ||
--- a/tests/test-tokenizer-0.cpp | ||
+++ b/tests/test-tokenizer-0.cpp | ||
@@ -36,7 +36,7 @@ int main(int argc, char **argv) { | ||
|
||
lparams.vocab_only = true; | ||
|
||
- ctx = llama_init_from_file(fname.c_str(), lparams); | ||
+ ctx = llama_init_from_file(fname.c_str(), &lparams); | ||
|
||
if (ctx == NULL) { | ||
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str()); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Submodule llama.cpp
updated
25 files
+1 −0 | .gitignore | |
+14 −3 | CMakeLists.txt | |
+5 −2 | Makefile | |
+1 −0 | Package.swift | |
+1 −0 | examples/CMakeLists.txt | |
+11 −6 | examples/baby-llama/baby-llama.cpp | |
+41 −0 | examples/chat-vicuna.sh | |
+16 −0 | examples/common.cpp | |
+9 −8 | examples/common.h | |
+1 −0 | examples/main/README.md | |
+5 −0 | examples/server/README.md | |
+9 −0 | examples/server/server.cpp | |
+4 −0 | examples/train-text-from-scratch/CMakeLists.txt | |
+22 −0 | examples/train-text-from-scratch/README.md | |
+3,399 −0 | examples/train-text-from-scratch/train-text-from-scratch.cpp | |
+690 −109 | ggml-cuda.cu | |
+2 −0 | ggml-cuda.h | |
+1 −0 | ggml-metal.h | |
+539 −516 | ggml-metal.m | |
+1,889 −300 | ggml.c | |
+126 −2 | ggml.h | |
+151 −33 | llama.cpp | |
+12 −3 | llama.h | |
+1 −0 | spm-headers/ggml.h | |
+59 −1 | tests/test-grad0.c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters