Skip to content

Commit 7ffcd05

Browse files
ruby : Make context accept initial parameters, API to retrieve a segment and more (#2749)
* Fix type signature for Whisper.log_set * Use cache file for model when offline * Extract ruby_whisper_transcribe() into a file * Extract Whisper::Error * Use FileList for ext/*.{c,cpp,h} * Extract Whisper::Segment * Extract Whisper::Model * Extract Whisper::Params * Extract Whisper::Context * Extract log_callback function * Write base code in C rather than C++ * Use chdir instead of Dir.chdir in Rakefile * Define alloc func for Whisper::Model * Define Whisper::Params' calback and user data reader * Add test for Whisper::Params.new with keyword arguments * Make Whisper::Params.new accept keyword arguments * Update type signatures * Update README * Update CLEAN targets * Fix document comment for Whisper::Params#new_segment_callback= * Use macro to define params * Fix dependency of build task * Set Whisper.finalize_log_callback visibility to private * Make Whisper::Context#full and full_parallel return self * Add test for Whisper::Context#full_get_segment * Add Whisper::Context#full_get_segment * Update signatures * Update README * Fix signature * Resplace #initialize with .new in signature file [skip ci] * Fix potential overflow
1 parent 7a423f1 commit 7ffcd05

17 files changed

+2610
-2021
lines changed

bindings/ruby/README.md

Lines changed: 26 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,15 @@ require "whisper"
2424

2525
whisper = Whisper::Context.new("base")
2626

27-
params = Whisper::Params.new
28-
params.language = "en"
29-
params.offset = 10_000
30-
params.duration = 60_000
31-
params.max_text_tokens = 300
32-
params.translate = true
33-
params.print_timestamps = false
34-
params.initial_prompt = "Initial prompt here."
27+
params = Whisper::Params.new(
28+
language: "en",
29+
offset: 10_000,
30+
duration: 60_000,
31+
max_text_tokens: 300,
32+
translate: true,
33+
print_timestamps: false,
34+
initial_prompt: "Initial prompt here."
35+
)
3536

3637
whisper.transcribe("path/to/audio.wav", params) do |whole_text|
3738
puts whole_text
@@ -113,18 +114,18 @@ def format_time(time_ms)
113114
"%02d:%02d:%02d.%03d" % [hour, min, sec, decimal_part]
114115
end
115116

116-
whisper.transcribe("path/to/audio.wav", params)
117-
118-
whisper.each_segment.with_index do |segment, index|
119-
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
120-
nth: index + 1,
121-
st: format_time(segment.start_time),
122-
ed: format_time(segment.end_time),
123-
text: segment.text
124-
}
125-
line << " (speaker turned)" if segment.speaker_next_turn?
126-
puts line
127-
end
117+
whisper
118+
.transcribe("path/to/audio.wav", params)
119+
.each_segment.with_index do |segment, index|
120+
line = "[%{nth}: %{st} --> %{ed}] %{text}" % {
121+
nth: index + 1,
122+
st: format_time(segment.start_time),
123+
ed: format_time(segment.end_time),
124+
text: segment.text
125+
}
126+
line << " (speaker turned)" if segment.speaker_next_turn?
127+
puts line
128+
end
128129

129130
```
130131

@@ -215,10 +216,11 @@ reader = WaveFile::Reader.new("path/to/audio.wav", WaveFile::Format.new(:mono, :
215216
samples = reader.enum_for(:each_buffer).map(&:samples).flatten
216217

217218
whisper = Whisper::Context.new("base")
218-
whisper.full(Whisper::Params.new, samples)
219-
whisper.each_segment do |segment|
220-
puts segment.text
221-
end
219+
whisper
220+
.full(Whisper::Params.new, samples)
221+
.each_segment do |segment|
222+
puts segment.text
223+
end
222224
```
223225

224226
The second argument `samples` may be an array, an object with `length` and `each` method, or a MemoryView. If you can prepare audio data as C array and export it as a MemoryView, whispercpp accepts and works with it with zero copy.

bindings/ruby/Rakefile

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@ EXTSOURCES.each do |src|
1818
end
1919

2020
CLEAN.include SOURCES
21-
CLEAN.include FileList["ext/*.o", "ext/*.metal", "ext/whisper.{so,bundle,dll}"]
21+
CLEAN.include FileList["ext/**/*.o", "ext/**/*.metal", "ext/**/*.tmp", "ext/whisper.{so,bundle,dll}"]
2222

23-
task build: ["ext/Makefile", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp", "whispercpp.gemspec"]
23+
SRC = FileList["ext/*.{c,cpp,h}"]
24+
25+
task build: SOURCES
2426

2527
directory "pkg"
2628
CLOBBER.include "pkg"
@@ -29,14 +31,14 @@ LIB_NAME = "whisper".ext(RbConfig::CONFIG["DLEXT"])
2931
SO_FILE = File.join("ext", LIB_NAME)
3032
LIB_FILE = File.join("lib", LIB_NAME)
3133

32-
file "ext/Makefile" => ["ext/extconf.rb", "ext/ruby_whisper.h", "ext/ruby_whisper.cpp"] + SOURCES do |t|
33-
Dir.chdir "ext" do
34+
file "ext/Makefile" => SRC + ["ext/extconf.rb"] + SOURCES do |t|
35+
chdir "ext" do
3436
ruby "extconf.rb"
3537
end
3638
end
3739

3840
file SO_FILE => "ext/Makefile" do |t|
39-
Dir.chdir "ext" do
41+
chdir "ext" do
4042
sh "make"
4143
end
4244
end
@@ -54,7 +56,7 @@ end
5456

5557
TEST_MEMORY_VIEW = "tests/jfk_reader/jfk_reader.#{RbConfig::CONFIG['DLEXT']}"
5658
file TEST_MEMORY_VIEW => "tests/jfk_reader/jfk_reader.c" do |t|
57-
Dir.chdir "tests/jfk_reader" do
59+
chdir "tests/jfk_reader" do
5860
ruby "extconf.rb"
5961
sh "make"
6062
end

bindings/ruby/ext/.gitignore

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,8 @@ whisper.bundle
44
whisper.dll
55
scripts/get-flags.mk
66
*.o
7-
*.c
8-
*.cpp
9-
*.h
10-
*.m
11-
*.metal
12-
!ruby_whisper.cpp
13-
!ruby_whisper.h
7+
/*/**/*.c
8+
/*/**/*.cpp
9+
/*/**/*.h
10+
/*/**/*.m
11+
/*/**/*.metal

bindings/ruby/ext/extconf.rb

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,14 @@
174174
'src/whisper.o'
175175

176176
$objs = $OBJ_GGML + $OBJ_WHISPER + $OBJ_COMMON + $OBJ_SDL
177-
$objs << "ruby_whisper.o"
177+
$objs <<
178+
"ruby_whisper.o" <<
179+
"ruby_whisper_context.o" <<
180+
"ruby_whisper_transcribe.o" <<
181+
"ruby_whisper_params.o" <<
182+
"ruby_whisper_error.o" <<
183+
"ruby_whisper_segment.o" <<
184+
"ruby_whisper_model.o"
178185

179186
$CPPFLAGS = "#{$MK_CPPFLAGS} #{$CPPFLAGS}"
180187
$CFLAGS = "#{$CPPFLAGS} #{$MK_CFLAGS} #{$GF_CFLAGS} #{$CFLAGS}"

bindings/ruby/ext/ruby_whisper.c

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
#include <ruby.h>
2+
#include <ruby/memory_view.h>
3+
#include "ruby_whisper.h"
4+
5+
VALUE mWhisper;
6+
VALUE cContext;
7+
VALUE cParams;
8+
VALUE eError;
9+
10+
VALUE cSegment;
11+
VALUE cModel;
12+
13+
ID id_to_s;
14+
ID id_call;
15+
ID id___method__;
16+
ID id_to_enum;
17+
ID id_length;
18+
ID id_next;
19+
ID id_new;
20+
ID id_to_path;
21+
ID id_URI;
22+
ID id_pre_converted_models;
23+
24+
static bool is_log_callback_finalized = false;
25+
26+
// High level API
27+
extern VALUE ruby_whisper_segment_allocate(VALUE klass);
28+
29+
extern void init_ruby_whisper_context(VALUE *mWhisper);
30+
extern void init_ruby_whisper_params(VALUE *mWhisper);
31+
extern void init_ruby_whisper_error(VALUE *mWhisper);
32+
extern void init_ruby_whisper_segment(VALUE *mWhisper, VALUE *cSegment);
33+
extern void init_ruby_whisper_model(VALUE *mWhisper);
34+
extern void register_callbacks(ruby_whisper_params *rwp, VALUE *context);
35+
36+
/*
37+
* call-seq:
38+
* lang_max_id -> Integer
39+
*/
40+
static VALUE ruby_whisper_s_lang_max_id(VALUE self) {
41+
return INT2NUM(whisper_lang_max_id());
42+
}
43+
44+
/*
45+
* call-seq:
46+
* lang_id(lang_name) -> Integer
47+
*/
48+
static VALUE ruby_whisper_s_lang_id(VALUE self, VALUE lang) {
49+
const char * lang_str = StringValueCStr(lang);
50+
const int id = whisper_lang_id(lang_str);
51+
if (-1 == id) {
52+
rb_raise(rb_eArgError, "language not found: %s", lang_str);
53+
}
54+
return INT2NUM(id);
55+
}
56+
57+
/*
58+
* call-seq:
59+
* lang_str(lang_id) -> String
60+
*/
61+
static VALUE ruby_whisper_s_lang_str(VALUE self, VALUE id) {
62+
const int lang_id = NUM2INT(id);
63+
const char * str = whisper_lang_str(lang_id);
64+
if (NULL == str) {
65+
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
66+
}
67+
return rb_str_new2(str);
68+
}
69+
70+
/*
71+
* call-seq:
72+
* lang_str(lang_id) -> String
73+
*/
74+
static VALUE ruby_whisper_s_lang_str_full(VALUE self, VALUE id) {
75+
const int lang_id = NUM2INT(id);
76+
const char * str_full = whisper_lang_str_full(lang_id);
77+
if (NULL == str_full) {
78+
rb_raise(rb_eIndexError, "id %d outside of language id", lang_id);
79+
}
80+
return rb_str_new2(str_full);
81+
}
82+
83+
static VALUE ruby_whisper_s_finalize_log_callback(VALUE self, VALUE id) {
84+
is_log_callback_finalized = true;
85+
return Qnil;
86+
}
87+
88+
static void
89+
ruby_whisper_log_callback(enum ggml_log_level level, const char * buffer, void * user_data) {
90+
if (is_log_callback_finalized) {
91+
return;
92+
}
93+
VALUE log_callback = rb_iv_get(mWhisper, "log_callback");
94+
VALUE udata = rb_iv_get(mWhisper, "user_data");
95+
rb_funcall(log_callback, id_call, 3, INT2NUM(level), rb_str_new2(buffer), udata);
96+
}
97+
98+
/*
99+
* call-seq:
100+
* log_set ->(level, buffer, user_data) { ... }, user_data -> nil
101+
*/
102+
static VALUE ruby_whisper_s_log_set(VALUE self, VALUE log_callback, VALUE user_data) {
103+
VALUE old_callback = rb_iv_get(self, "log_callback");
104+
if (!NIL_P(old_callback)) {
105+
rb_undefine_finalizer(old_callback);
106+
}
107+
108+
rb_iv_set(self, "log_callback", log_callback);
109+
rb_iv_set(self, "user_data", user_data);
110+
111+
VALUE finalize_log_callback = rb_funcall(mWhisper, rb_intern("method"), 1, rb_str_new2("finalize_log_callback"));
112+
rb_define_finalizer(log_callback, finalize_log_callback);
113+
114+
whisper_log_set(ruby_whisper_log_callback, NULL);
115+
116+
return Qnil;
117+
}
118+
119+
static void rb_whisper_model_mark(ruby_whisper_model *rwm) {
120+
rb_gc_mark(rwm->context);
121+
}
122+
123+
static VALUE ruby_whisper_model_allocate(VALUE klass) {
124+
ruby_whisper_model *rwm;
125+
rwm = ALLOC(ruby_whisper_model);
126+
return Data_Wrap_Struct(klass, rb_whisper_model_mark, RUBY_DEFAULT_FREE, rwm);
127+
}
128+
129+
void Init_whisper() {
130+
id_to_s = rb_intern("to_s");
131+
id_call = rb_intern("call");
132+
id___method__ = rb_intern("__method__");
133+
id_to_enum = rb_intern("to_enum");
134+
id_length = rb_intern("length");
135+
id_next = rb_intern("next");
136+
id_new = rb_intern("new");
137+
id_to_path = rb_intern("to_path");
138+
id_URI = rb_intern("URI");
139+
id_pre_converted_models = rb_intern("pre_converted_models");
140+
141+
mWhisper = rb_define_module("Whisper");
142+
143+
rb_define_const(mWhisper, "LOG_LEVEL_NONE", INT2NUM(GGML_LOG_LEVEL_NONE));
144+
rb_define_const(mWhisper, "LOG_LEVEL_INFO", INT2NUM(GGML_LOG_LEVEL_INFO));
145+
rb_define_const(mWhisper, "LOG_LEVEL_WARN", INT2NUM(GGML_LOG_LEVEL_WARN));
146+
rb_define_const(mWhisper, "LOG_LEVEL_ERROR", INT2NUM(GGML_LOG_LEVEL_ERROR));
147+
rb_define_const(mWhisper, "LOG_LEVEL_DEBUG", INT2NUM(GGML_LOG_LEVEL_DEBUG));
148+
rb_define_const(mWhisper, "LOG_LEVEL_CONT", INT2NUM(GGML_LOG_LEVEL_CONT));
149+
150+
rb_define_singleton_method(mWhisper, "lang_max_id", ruby_whisper_s_lang_max_id, 0);
151+
rb_define_singleton_method(mWhisper, "lang_id", ruby_whisper_s_lang_id, 1);
152+
rb_define_singleton_method(mWhisper, "lang_str", ruby_whisper_s_lang_str, 1);
153+
rb_define_singleton_method(mWhisper, "lang_str_full", ruby_whisper_s_lang_str_full, 1);
154+
rb_define_singleton_method(mWhisper, "log_set", ruby_whisper_s_log_set, 2);
155+
rb_define_private_method(rb_singleton_class(mWhisper), "finalize_log_callback", ruby_whisper_s_finalize_log_callback, 1);
156+
157+
init_ruby_whisper_context(&mWhisper);
158+
init_ruby_whisper_params(&mWhisper);
159+
init_ruby_whisper_error(&mWhisper);
160+
init_ruby_whisper_segment(&mWhisper, &cContext);
161+
init_ruby_whisper_model(&mWhisper);
162+
163+
rb_require("whisper/model/uri");
164+
}

0 commit comments

Comments
 (0)