Skip to content

Commit

Permalink
expose more params
Browse files Browse the repository at this point in the history
  • Loading branch information
hazelnutcloud committed Mar 10, 2024
1 parent 436fa6c commit 54f78c3
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 31 deletions.
10 changes: 8 additions & 2 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ pub fn build(b: *std.Build) !void {
const lib_godot_gen_sources = try findFilesRecursive(b, "godot_cpp/gen/src", &cfiles_exts);
lib_godot.addCSourceFiles(.{ .files = lib_godot_gen_sources, .flags = &.{ "-std=c++17", "-fno-exceptions" } });
lib_godot.addCSourceFiles(.{ .files = lib_godot_sources, .flags = &.{ "-std=c++17", "-fno-exceptions" } });
// try objs.append(lib_godot);

// llama.cpp
const commit_hash = try std.ChildProcess.run(.{ .allocator = b.allocator, .argv = &.{ "git", "rev-parse", "HEAD" }, .cwd = b.pathFromRoot("llama.cpp") });
Expand All @@ -54,7 +53,13 @@ pub fn build(b: *std.Build) !void {

var flags = std.ArrayList([]const u8).init(b.allocator);
if (target.result.abi != .msvc) try flags.append("-D_GNU_SOURCE");
if (target.result.os.tag == .macos) try flags.appendSlice(&.{ "-D_DARWIN_C_SOURCE", "-DGGML_USE_METAL", "-DGGML_USE_ACCELERATE", "-DACCELERATE_USE_LAPACK", "-DACCELERATE_LAPACK_ILP64" }) else try flags.append("-DGGML_USE_VULKAN");
if (target.result.os.tag == .macos) try flags.appendSlice(&.{
"-D_DARWIN_C_SOURCE",
"-DGGML_USE_METAL",
"-DGGML_USE_ACCELERATE",
"-DACCELERATE_USE_LAPACK",
"-DACCELERATE_LAPACK_ILP64",
}) else try flags.append("-DGGML_USE_VULKAN");
try flags.append("-D_XOPEN_SOURCE=600");

var cflags = std.ArrayList([]const u8).init(b.allocator);
Expand Down Expand Up @@ -225,6 +230,7 @@ pub fn build(b: *std.Build) !void {
extension.linkFramework("Foundation");
extension.linkFramework("Accelerate");
b.installFile("llama.cpp/ggml-metal.metal", b.pathJoin(&.{ std.fs.path.basename(b.lib_dir), "ggml-metal.metal" }));
b.installFile("llama.cpp/ggml-common.h", b.pathJoin(&.{ std.fs.path.basename(b.lib_dir), "ggml-common.h" }));
} else {
if (target.result.os.tag == .windows) {
const vk_path = b.graph.env_map.get("VK_SDK_PATH") orelse @panic("VK_SDK_PATH not set");
Expand Down
4 changes: 0 additions & 4 deletions godot/main.gd
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@ extends Node
func _on_button_pressed():
handle_submit()

#func _unhandled_key_input(event: InputEvent) -> void:
#if (event.is_action_released("submit_form") and input.has_focus()):
#handle_submit()

func handle_submit():
print(input.text)
Llama.request_completion(input.text)
Expand Down
5 changes: 3 additions & 2 deletions godot/main.tscn
Original file line number Diff line number Diff line change
Expand Up @@ -57,11 +57,12 @@ texture = ExtResource("1_ojdoj")
expand_mode = 4

[node name="ScrollContainer" type="ScrollContainer" parent="CenterContainer/VBoxContainer"]
custom_minimum_size = Vector2(0, 60)
custom_minimum_size = Vector2(2.08165e-12, 150)
layout_mode = 2
horizontal_scroll_mode = 0

[node name="Panel" type="PanelContainer" parent="CenterContainer/VBoxContainer/ScrollContainer"]
custom_minimum_size = Vector2(2.08165e-12, 2.08165e-12)
layout_mode = 2
size_flags_horizontal = 3
size_flags_vertical = 3
Expand All @@ -74,7 +75,7 @@ theme_override_constants/margin_right = 20

[node name="Output" type="Label" parent="CenterContainer/VBoxContainer/ScrollContainer/Panel/MarginContainer"]
unique_name_in_owner = true
custom_minimum_size = Vector2(200, 0)
custom_minimum_size = Vector2(200, 2.08165e-12)
layout_mode = 2
theme_override_colors/font_color = Color(0.101961, 0.0823529, 0.0627451, 1)
text = "Ask me anything!"
Expand Down
2 changes: 1 addition & 1 deletion llama.cpp
Submodule llama.cpp updated 50 files
+2 −1 .github/workflows/server.yml
+2 −1 CMakeLists.txt
+8 −5 Makefile
+8 −5 README.md
+16 −0 common/common.cpp
+7 −0 common/common.h
+16 −0 common/grammar-parser.cpp
+4 −4 common/log.h
+118 −0 convert-hf-to-gguf.py
+8 −5 examples/batched-bench/batched-bench.cpp
+2 −1 examples/batched/batched.cpp
+2 −4 examples/benchmark/benchmark-matmult.cpp
+1 −13 examples/embedding/embedding.cpp
+27 −3 examples/llama-bench/llama-bench.cpp
+1 −54 examples/llava/clip.cpp
+13 −7 examples/parallel/parallel.cpp
+97 −51 examples/perplexity/perplexity.cpp
+1 −1 examples/server-embd.py
+8 −2 examples/server/CMakeLists.txt
+35 −5 examples/server/README.md
+88 −0 examples/server/bench/README.md
+120 −0 examples/server/bench/script.js
+0 −225 examples/server/oai.hpp
+2,041 −2,074 examples/server/server.cpp
+94 −0 examples/server/tests/features/embeddings.feature
+3 −48 examples/server/tests/features/parallel.feature
+3 −2 examples/server/tests/features/security.feature
+30 −38 examples/server/tests/features/server.feature
+108 −27 examples/server/tests/features/steps/steps.py
+1 −0 examples/server/tests/requirements.txt
+307 −396 examples/server/utils.hpp
+3 −3 flake.lock
+779 −0 ggml-common.h
+3 −740 ggml-cuda.cu
+3 −704 ggml-metal.metal
+51 −808 ggml-quants.c
+25 −21 ggml-quants.h
+2 −382 ggml-sycl.cpp
+1 −39 ggml-vulkan.cpp
+426 −297 ggml.c
+27 −15 ggml.h
+41 −0 gguf-py/gguf/constants.py
+12 −0 gguf-py/gguf/gguf_writer.py
+44 −2 gguf-py/gguf/tensor_mapping.py
+702 −89 llama.cpp
+3 −1 llama.h
+2 −0 scripts/sync-ggml-am.sh
+1 −0 scripts/sync-ggml.sh
+1 −0 tests/.gitignore
+1 −2 tests/test-backend-ops.cpp
73 changes: 66 additions & 7 deletions src/llama_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,41 @@ void LlamaContext::_bind_methods() {
ClassDB::bind_method(D_METHOD("set_model", "model"), &LlamaContext::set_model);
ClassDB::bind_method(D_METHOD("get_model"), &LlamaContext::get_model);
ClassDB::add_property("LlamaContext", PropertyInfo(Variant::OBJECT, "model", PROPERTY_HINT_RESOURCE_TYPE, "LlamaModel"), "set_model", "get_model");

ClassDB::bind_method(D_METHOD("get_seed"), &LlamaContext::get_seed);
ClassDB::bind_method(D_METHOD("set_seed", "seed"), &LlamaContext::set_seed);
ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "seed"), "set_seed", "get_seed");

ClassDB::bind_method(D_METHOD("get_n_ctx"), &LlamaContext::get_n_ctx);
ClassDB::bind_method(D_METHOD("set_n_ctx", "n_ctx"), &LlamaContext::set_n_ctx);
ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "n_ctx"), "set_n_ctx", "get_n_ctx");

ClassDB::bind_method(D_METHOD("get_n_threads"), &LlamaContext::get_n_threads);
ClassDB::bind_method(D_METHOD("set_n_threads", "n_threads"), &LlamaContext::set_n_threads);
ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "n_threads"), "set_n_threads", "get_n_threads");

ClassDB::bind_method(D_METHOD("get_n_threads_batch"), &LlamaContext::get_n_threads_batch);
ClassDB::bind_method(D_METHOD("set_n_threads_batch", "n_threads_batch"), &LlamaContext::set_n_threads_batch);
ClassDB::add_property("LlamaContext", PropertyInfo(Variant::INT, "n_threads_batch"), "set_n_threads_batch", "get_n_threads_batch");

ClassDB::bind_method(D_METHOD("request_completion", "prompt"), &LlamaContext::request_completion);
ClassDB::bind_method(D_METHOD("_fulfill_completion", "prompt"), &LlamaContext::_fulfill_completion);

ADD_SIGNAL(MethodInfo("completion_generated", PropertyInfo(Variant::STRING, "completion"), PropertyInfo(Variant::BOOL, "is_final")));
}

LlamaContext::LlamaContext() {
batch = llama_batch_init(4096, 0, 1);

ctx_params = llama_context_default_params();
ctx_params.seed = -1;
ctx_params.n_ctx = 4096;

int32_t n_threads = OS::get_singleton()->get_processor_count();
ctx_params.n_threads = n_threads;
ctx_params.n_threads_batch = n_threads;
}

void LlamaContext::_ready() {
// TODO: remove this and use runtime classes once godot 4.3 lands, see https://github.com/godotengine/godot/pull/82554
if (Engine::get_singleton()->is_editor_hint()) {
Expand All @@ -30,12 +60,6 @@ void LlamaContext::_ready() {
return;
}

ctx_params.seed = -1;
ctx_params.n_ctx = 4096;
int32_t n_threads = OS::get_singleton()->get_processor_count();
ctx_params.n_threads = n_threads;
ctx_params.n_threads_batch = n_threads;

ctx = llama_new_context_with_model(model->model, ctx_params);
if (ctx == NULL) {
UtilityFunctions::printerr(vformat("%s: Failed to initialize llama context, null ctx", __func__));
Expand All @@ -44,6 +68,14 @@ void LlamaContext::_ready() {
UtilityFunctions::print(vformat("%s: Context initialized", __func__));
}

PackedStringArray LlamaContext::_get_configuration_warnings() const {
PackedStringArray warnings;
if (model == NULL) {
warnings.push_back("Model resource property not defined");
}
return warnings;
}

Variant LlamaContext::request_completion(const String &prompt) {
UtilityFunctions::print(vformat("%s: Requesting completion for prompt: %s", __func__, prompt));
if (task_id) {
Expand Down Expand Up @@ -134,11 +166,38 @@ void LlamaContext::_fulfill_completion(const String &prompt) {
void LlamaContext::set_model(const Ref<LlamaModel> p_model) {
model = p_model;
}

Ref<LlamaModel> LlamaContext::get_model() {
return model;
}

int LlamaContext::get_seed() {
return ctx_params.seed;
}
void LlamaContext::set_seed(int seed) {
ctx_params.seed = seed;
}

int LlamaContext::get_n_ctx() {
return ctx_params.n_ctx;
}
void LlamaContext::set_n_ctx(int n_ctx) {
ctx_params.n_ctx = n_ctx;
}

int LlamaContext::get_n_threads() {
return ctx_params.n_threads;
}
void LlamaContext::set_n_threads(int n_threads) {
ctx_params.n_threads = n_threads;
}

int LlamaContext::get_n_threads_batch() {
return ctx_params.n_threads_batch;
}
void LlamaContext::set_n_threads_batch(int n_threads_batch) {
ctx_params.n_threads_batch = n_threads_batch;
}

LlamaContext::~LlamaContext() {
if (ctx) {
llama_free(ctx);
Expand Down
17 changes: 15 additions & 2 deletions src/llama_context.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@ class LlamaContext : public Node {
private:
Ref<LlamaModel> model;
llama_context *ctx = nullptr;
llama_context_params ctx_params = llama_context_default_params();
llama_batch batch = llama_batch_init(4096, 0, 1);
llama_context_params ctx_params;
llama_batch batch;
int task_id;

protected:
Expand All @@ -22,9 +22,22 @@ class LlamaContext : public Node {
public:
void set_model(const Ref<LlamaModel> model);
Ref<LlamaModel> get_model();

Variant request_completion(const String &prompt);
void _fulfill_completion(const String &prompt);

int get_seed();
void set_seed(int seed);
int get_n_ctx();
void set_n_ctx(int n_ctx);
int get_n_threads();
void set_n_threads(int n_threads);
int get_n_threads_batch();
void set_n_threads_batch(int n_threads_batch);

virtual PackedStringArray _get_configuration_warnings() const override;
virtual void _ready() override;
LlamaContext();
~LlamaContext();
};
} //namespace godot
Expand Down
25 changes: 21 additions & 4 deletions src/llama_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,22 +5,39 @@

using namespace godot;

void LlamaModel::_bind_methods() {
ClassDB::bind_method(D_METHOD("load_model", "path"), &LlamaModel::load_model);

ClassDB::bind_method(D_METHOD("get_n_gpu_layers"), &LlamaModel::get_n_gpu_layers);
ClassDB::bind_method(D_METHOD("set_n_gpu_layers", "n"), &LlamaModel::set_n_gpu_layers);
ClassDB::add_property("LlamaModel", PropertyInfo(Variant::INT, "n_gpu_layers"), "set_n_gpu_layers", "get_n_gpu_layers");
}

LlamaModel::LlamaModel() {
model_params = llama_model_default_params();
}

void LlamaModel::load_model(const String &path) {
if (model) {
llama_free_model(model);
}
llama_model_params model_params = llama_model_default_params();
model_params.n_gpu_layers = 99; // offload all layers to the GPU

model = llama_load_model_from_file(path.utf8().get_data(), model_params);

if (model == NULL) {
UtilityFunctions::printerr(vformat("%s: Unable to load model from %s", __func__, path));
return;
}

UtilityFunctions::print(vformat("%s: Model loaded from %s", __func__, path));
}

void LlamaModel::_bind_methods() {
ClassDB::bind_method(D_METHOD("load_model", "path"), &LlamaModel::load_model);
int LlamaModel::get_n_gpu_layers() {
return model_params.n_gpu_layers;
}

void LlamaModel::set_n_gpu_layers(int n) {
model_params.n_gpu_layers = n;
}

LlamaModel::~LlamaModel() {
Expand Down
26 changes: 17 additions & 9 deletions src/llama_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,25 @@

namespace godot {

class LlamaModel : public Resource {
GDCLASS(LlamaModel, Resource)
class LlamaModel : public Resource {
GDCLASS(LlamaModel, Resource)

protected:
static void _bind_methods();
private:
llama_model_params model_params;

public:
llama_model *model = nullptr;
void load_model( const String &path );
~LlamaModel();
};
protected:
static void _bind_methods();

public:
llama_model *model = nullptr;
void load_model(const String &path);

int get_n_gpu_layers();
void set_n_gpu_layers(int n);

LlamaModel();
~LlamaModel();
};

} //namespace godot

Expand Down

0 comments on commit 54f78c3

Please sign in to comment.